你怕是在梦里哦 发表于 2020-4-14 21:52:24

为什么打印不出来啊?求大佬帮帮忙

import requests
import re

def parse_page(url):
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',

    }
    response = requests.get(url, headers=headers)
    text = response.text
    titles = re.findall(r'<div\sclass="cont">.*?<b>(.*?)</b>', text, re.DOTALL)
    dynasties = re.findall(r'<p class="source">.*?<a.*>(.*?)</a>',text)
    authors = re.findall(r'<p class="source">.*?<a.*?>.*?<a.*?>(.*?)(</a>',text,re.DOTALL)
    contents_tag = re.findall(r'<div class="contson".*?>(.*?)</div>',text,re.DOTALL)
    contents = []
    for content in contents:
      x =re.sub(r'<.*?>',"",content)
      contents.append(x.strip())
    for value in zip(titles,dynasties,authors,contents):
      title,dynasty,author,content = value
      poems =[]
      poem ={
            'title':title,
            'dynasty':dynasty,
            'author':author,
            'content':content
      }
      poems.append(poem)
    for poem in poems:
      print(poem)
      print('='*40)


def main():
    url = 'https://www.gushiwen.org/default_1.aspx'
    for x in range(1,10):
      url = 'https://www.gushiwen.org/default_%s.aspx' %x

    parse_page(url)
if __name__ == ' __main__':
    main()

snaker 发表于 2020-4-15 09:29:02

if __name__ == ' __main__':
后边的'__main__'多了一个空格
页: [1]
查看完整版本: 为什么打印不出来啊?求大佬帮帮忙