import requests as res
import pandas as pd
from bs4 import BeautifulSoup as bs #用于解析
def getWb(p):
for n in range(1,p+1):
url = 'https://www.readnovel.com/all?pageSize=10&gender=2&catId=-1&isFinish=-1&isVip=-1&size=-1&updT=-1&orderBy=0&pageNum=%s'%n
headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'}
res1 =res.get(url,headers =headers) #请求
res1.encoding = 'utf-8' #编码,防止乱码
soup = bs(res1.text,'lxml') #开始解析
cont = soup.find('div',class_='right-book-list').find_all('div',class_='book-info')
# print(cont) #先打印看下内容
l = []
for i in cont:
tit = i.a.text
aut = i.find('a',class_='default').text.replace(',','').replace(' ','').replace('\r','').replace('\n','').replace('\u3000','')
wbsite = 'https://www.readnovel.com/'+ i.a.get('href') #获取网址
novtyp = i.find('span',class_='org').text.replace(',','').replace(' ','').replace('\r','').replace('\n','').replace('\u3000','')
cond = i.find('span',class_='red').text.replace(',','').replace(' ','').replace('\r','').replace('\n','').replace('\u3000','')
nums = i.find('span',class_='blue').text.replace(',','').replace(' ','').replace('\r','').replace('\n','').replace('\u3000','')
info = i.find('p',class_='intro').text.replace(',','').replace(' ','').replace('\r','').replace('\n','').replace('\u3000','')
tot = [tit,aut,wbsite,novtyp,cond,nums,info]
l.append(tot)
# print(l) #试打印看效果
data = pd.DataFrame(l)
data.to_csv(r"D:\大数据学习资料\python学习\作业\09_2-1作业\pachong-1.csv",encoding ='gbk')
getWb(5)