为什么打印不出来啊?求大佬帮帮忙
import requestsimport re
def parse_page(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
}
response = requests.get(url, headers=headers)
text = response.text
titles = re.findall(r'<div\sclass="cont">.*?<b>(.*?)</b>', text, re.DOTALL)
dynasties = re.findall(r'<p class="source">.*?<a.*>(.*?)</a>',text)
authors = re.findall(r'<p class="source">.*?<a.*?>.*?<a.*?>(.*?)(</a>',text,re.DOTALL)
contents_tag = re.findall(r'<div class="contson".*?>(.*?)</div>',text,re.DOTALL)
contents = []
for content in contents:
x =re.sub(r'<.*?>',"",content)
contents.append(x.strip())
for value in zip(titles,dynasties,authors,contents):
title,dynasty,author,content = value
poems =[]
poem ={
'title':title,
'dynasty':dynasty,
'author':author,
'content':content
}
poems.append(poem)
for poem in poems:
print(poem)
print('='*40)
def main():
url = 'https://www.gushiwen.org/default_1.aspx'
for x in range(1,10):
url = 'https://www.gushiwen.org/default_%s.aspx' %x
parse_page(url)
if __name__ == ' __main__':
main()
if __name__ == ' __main__':
后边的'__main__'多了一个空格
页:
[1]