import time
import pandas as pd
import requests
import schedule
from lxml import etree
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'}
def get_msg():
print('I am working...')
url = 'https://www.cgmodel.com/'
page = requests.get(url, headers=header)
tree = etree.HTML(page.text)
msg_dic = {}
for i in range(1, 11):
name = tree.xpath('//ul[@class="sale_ranking"]/li[{}]/p/a'.format(i))[0].text
price = tree.xpath('//ul[@class="sale_ranking"]/li[{}]/span'.format(i))[0].text
msg_url = url + tree.xpath('//ul[@class="sale_ranking"]/li[{}]/p/a/@href'.format(i))[0]
msg_page = requests.get(msg_url)
msg_tree = etree.HTML(msg_page.text)
tags = []
a = 1
while True:
try:
tags.append(msg_tree.xpath('//*[@id="modeldeals"]/div[2]/div/a[{}]'.format(a))[0].text)
except:
break
else:
a += 1
msg_dic[name] = {'price': price, 'tags': tags}
df = pd.DataFrame(msg_dic).transpose()
df.to_csv('test.csv', mode='a+', encoding='ANSI', header=False)#"utf-8"乱码改了"ANSI"
# print(df)
schedule.every(10).seconds.do(get_msg)#改了等待时间测试用
while True:
schedule.run_pending()
time.sleep(1)
|