|
|

楼主 |
发表于 2020-1-9 22:27:26
|
显示全部楼层
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import random
import xlwt
import time
from apscheduler.schedulers.blocking import BlockingScheduler
import datetime
def job():
html = urlopen(url).read().decode('utf-8')
soup = BeautifulSoup(html, features='lxml')
x=0
f = open (r'C:/Users/zc050310/Desktop/taptap爬虫数据定时下载.xls','w')
for i in range(32):
n=soup.select("h4")[x].get_text().strip()
n = n.replace("CN","")
n=n.strip()
x=x+1
print (n,file = f)
print(n)
f.close()
print(strftime("%Y-%m-%d %H:%M:%S", localtime()))
if __name__ == '__main__':
# BlockingScheduler:在进程中运行单个任务,调度器是唯一运行的东西
scheduler = BlockingScheduler()
# 采用阻塞的方式
a = input('请输入时间,格式如2020-01-09 10:56:00 : ')
# 采用date的方式,在特定时间只执行一次
scheduler.add_job(job, 'date', run_date=a)
url = input("请输入网址,格式如https://www.taptap.com/top/download : ")
scheduler.start()
|
|