|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 909646372 于 2020-2-17 20:38 编辑
import urllib.request
import re
def getcontent(url,page):
headers = ("User-Agent" ,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")
opener = urllib.request.build_opener()
opener.addheaders = [headers]
urllib.request.install_opener(opener)
data = urllib.request.urlopen(url).read().decode("utf-8")
userpat = 'target="_blank" title="(.*?)">'
contentpat = '<div class="content">(.*?)</div>'
userlist=re.compile(userpat,re.S).findall(data)
contentlist=re.compile(contentpat,re.S).findall(data)
x=1
for content in contentlist:
content = content.replace("\n","")
name = "content"+str(x)
exec(name+'=content')
x+=1
y=1
for user in userlist:
name = "content" + str(y)
print('user' + str(page) +str(y) + "is:" + user)
print("the content is ")
exec("print("+name+")")
print('\n')
y+=1
for i in range(1,30):
url = 'http://www.qiushibaike.com/8hr/page/'+str(i)
getcontent(url,i)
现在网站好像在升级进不去 有没有大神能解答下下面那个exec的问题QAQ |
|