|

楼主 |
发表于 2018-7-27 11:09:55
|
显示全部楼层
import requests
from bs4 import BeautifulSoup
import time
headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
def get_info(url):
wb_data=requests.get(url,headers=headers)
soup=BeautifulSoup(wb_data.text,"html.parser")
authors=soup.select('div.article block untagged mb15 typs_hot > div.author clearfix > a > h2')
full_contents=soup.select('div.article block untagged mb15 typs_hot > a.contentHerf > div.content > span')
ages=soup.select('#content-left > div > div.author.clearfix > div.articleGender manIcon')
smiles=soup.select('a.contentHerf > div.stats > span.stats-vote > a >i.number')
for author,full_content,age,smile in zip(authors,full_contents,ages,smiles):
data={
'author':author.get_text(),
'full_content':full_content.get_text(),
'age':age.get_text(),
'smile':smile.get_text(),
}
print(data)
if __name__=='__main__':
urls=['https://www.qiushibaike.com/hot/page/{}/'.format(str(i)) for i in range(1,4)]
for url in urls:
get_info(url)
time.sleep(2)
可以帮忙再看下 这个代码吗 还是一样的情况 都是没有显示 |
|