python写的腾讯视频电视剧列表,保存成csv文件,当然可以换关键字变成电影,纪录片...
import requestsfrom lxml import etree
import csv
import os
import winreg
lis=[]
offset=30
try:
while True:
# url='https://v.qq.com/x/bu/pagesheet/list?_all=1&append=1&channel=tv&listpage=1&offset=90&pagesize=30'
url="https://v.qq.com/x/bu/pagesheet/list?_all=1&append=1&channel=tv&listpage=1&offset={}&pagesize=30".format(str(offset))
head = {"referer":"https://v.qq.com/channel/tv?listpage=1&channel=tv&sort=18&_all=1",
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36 Maxthon/5.3.8.2000'}
s = requests.get(url,headers=head)
s.encoding = "utf-8"
s = s.text
parse_html=etree.HTML(s)
parse_text = parse_html.xpath('//div[@class="list_item"]/a/@href' #链接
'|//div[@class="list_item"]/a/@title' #片名
'|//div[@class="figure_caption"]/text()'#集数
'|//div/a/img[@class="mark_v mark_v_VIP"]/@alt'#VIP
'|//div/a/img[@class="mark_v mark_v_预告片"]/@alt'
'|//div/a/img[@class="mark_v mark_v_会员付费解锁"]/@alt'
'|//div/a/img[@class="mark_v mark_v_互动"]/@alt'
'|//div/div/div/text()')#简介
parse_page = parse_html.xpath('//div/button/@_stat2')
off = int((str(parse_page[-1])))
print(offset)
if (off * 30) <= offset:
break
offset += 30
for i in range(len(parse_text)):
try:
i = 0
if parse_text=='https://v.qq.com':
html_fin={"链接":parse_text}
del parse_text
html_fin["片名"] = parse_text
del parse_text
if parse_text[-1]=='集':
html_fin["集数"] = parse_text
del parse_text
else:
html_fin["集数"] = ''
if parse_text=='VIP' or parse_text=='预告片'\
or parse_text == '会员付费解锁' or parse_text == '互动':
html_fin["VIP"] = parse_text
del parse_text
else:
html_fin["VIP"] = ''
if(parse_text) != 'https:':
html_fin["简介"] = parse_text
del parse_text
lis.append(html_fin)
except IndexError:
pass
print(lis)
except IndexError:
pass
key=winreg.OpenKey(winreg.HKEY_CURRENT_USER,r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders")
value, type = winreg.QueryValueEx(key, "Desktop")
path=value
header = ['片名', '集数', 'VIP', '简介', '链接']
csvFile = open(path + "\\腾讯视频" + '电视剧列表' + ".csv", 'w', newline='', encoding="GB18030")
dict_writer = csv.DictWriter(csvFile, header)
dict_writer.writeheader()
for key in lis:
dict_writer.writerow(key)
print(parse_text)
页:
[1]