| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
因为网页上存在分页,观察了下分页是ddlpage发生了变化,可是下边的代码爬取时还是只爬取了一页,求个大神指点 
- import requests
 
 - from bs4 import BeautifulSoup
 
 - import re
 
 - import time
 
  
 
- def gethtml():
 
 -     url ="https://www.zjgrc.com/posSearchRslt.aspx?textPosKey=沙钢集团"
 
 -     headers = {
 
 -       "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
 
 -     }
 
 -     for i in range(0,2):
 
 -         data = {
 
 -             "ddlPage": 0,
 
 -             "ddlPage": 1
 
  
-         }
 
 -         r = requests.post(url, data=data)
 
 -         time.sleep(2)
 
  
-         print(r.text)
 
 -         print("结束")
 
  
- if __name__ == '__main__':
 
 -     gethtml()
 
  复制代码 
- import requests
 
 - import re
 
  
- url = 'https://www.zjgrc.com/posSearchRslt.aspx?textPosKey=%E6%B2%99%E9%92%A2%E9%9B%86%E5%9B%A2'
 
 - headers = {
 
 -     'User-Agent': 'Mozilla/5.0',
 
 - }
 
  
- # 第一页
 
 - res = requests.get(url, headers=headers)
 
  
- # 获取翻页参数
 
 - state = re.findall('__VIEWSTATE" value="(.*?)" />', res.text)[0]
 
 - state_generator = re.findall('__VIEWSTATEGENERATOR" value="(.*?)" />', res.text)[0]
 
 - event_validation = re.findall('__EVENTVALIDATION" value="(.*?)" />', res.text)[0]
 
  
- # 翻页(第二页)
 
 - data = {
 
 -     '__EVENTTARGET': 'lbNext',
 
 -     '__EVENTARGUMENT': '',
 
 -     '__LASTFOCUS': '',
 
 -     '__VIEWSTATE': state,
 
 -         '__VIEWSTATEGENERATOR': state_generator,
 
 -     '__EVENTVALIDATION': event_validation,
 
 -     'hfKey': '沙钢集团',
 
 -     'txtDw': '',
 
 -     'txtDw_TextBoxWatermarkExtender_ClientState': '',
 
 -     'txtPos': '',
 
 -     'txtPos_TextBoxWatermarkExtender_ClientState': '',
 
 -     'ddlPage': 0
 
 - }
 
 - res1 = requests.post(url, headers=headers, data=data)
 
 - print(res1.text)
 
  复制代码 
 
 
 |   
 
 
 
 |