|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
因为网页上存在分页,观察了下分页是ddlpage发生了变化,可是下边的代码爬取时还是只爬取了一页,求个大神指点
- import requests
- from bs4 import BeautifulSoup
- import re
- import time
- def gethtml():
- url ="https://www.zjgrc.com/posSearchRslt.aspx?textPosKey=沙钢集团"
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
- }
- for i in range(0,2):
- data = {
- "ddlPage": 0,
- "ddlPage": 1
- }
- r = requests.post(url, data=data)
- time.sleep(2)
- print(r.text)
- print("结束")
- if __name__ == '__main__':
- gethtml()
复制代码
- import requests
- import re
- url = 'https://www.zjgrc.com/posSearchRslt.aspx?textPosKey=%E6%B2%99%E9%92%A2%E9%9B%86%E5%9B%A2'
- headers = {
- 'User-Agent': 'Mozilla/5.0',
- }
- # 第一页
- res = requests.get(url, headers=headers)
- # 获取翻页参数
- state = re.findall('__VIEWSTATE" value="(.*?)" />', res.text)[0]
- state_generator = re.findall('__VIEWSTATEGENERATOR" value="(.*?)" />', res.text)[0]
- event_validation = re.findall('__EVENTVALIDATION" value="(.*?)" />', res.text)[0]
- # 翻页(第二页)
- data = {
- '__EVENTTARGET': 'lbNext',
- '__EVENTARGUMENT': '',
- '__LASTFOCUS': '',
- '__VIEWSTATE': state,
- '__VIEWSTATEGENERATOR': state_generator,
- '__EVENTVALIDATION': event_validation,
- 'hfKey': '沙钢集团',
- 'txtDw': '',
- 'txtDw_TextBoxWatermarkExtender_ClientState': '',
- 'txtPos': '',
- 'txtPos_TextBoxWatermarkExtender_ClientState': '',
- 'ddlPage': 0
- }
- res1 = requests.post(url, headers=headers, data=data)
- print(res1.text)
复制代码
|
|