|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import urllib.request
- import urllib.parse
- from bs4 import BeautifulSoup
- import time
-
- fs = ''
- url = "http://app2.bjepb.gov.cn/cpout/showout/ListAccept.aspx"
- postdata = urllib.parse.urlencode({'_EVENTTARGET':"",
- '_EVENTARGUMENT':"",
- '_VIEWSTATE':"0VaODZY15P3qOZrMI07a5B4TT0zdOWdZzKAHt0PGEAD1QpUaHiGef4xvkOdq9AeMPCH9MxgqpQ3fUEDmIanoXK+lrexKoT+fnrUVQX9L5Rko8C4jcF3uGooYzx+QBeOkBazikPtl9U41TVaSyCwxvIhV5YZ9DfDy5adLR99njfXc6zBJloXBiTukCH7+4664L0W4IavHQ/I4c8vlD5Cv3bLajBEbxWD8hrpZOBhZzILpUbXGcbFZAvwIf95RUSPeVgvI4uvJbE+m4yi2FLwMJppePbuT1bW0k95k6B7u+NtY0LdW8MQUE2P8JvLuap06W10Y/+4Qsm02KzSr9og6pFDcZ2BXQM2l+zsAv1XrtmNE+0KaTKVpOx8EbGB0GwKjp0Z2EbtKM8G6Nkt/Ep46uInfLZXO7oqf+EvH17EWB3b3HgXHOD7jzeA48rAPU8zH4jN3yAGyve9IBwqthSJyQVhJdOCBhKMi+qTpVIp0GSXYxbcC7KGYyo7JX+kmZNfcvZk36pw7nrzdzHOXc7jZkmbxxee9qEdq9Mx91ftliQYGLm22Qs9WYOP53wQRRJLqnjlcbdHBRLCFz8uKVjQKtMgetw+So1sKWYWCdUTHuxzaKv2niaJWxnQOrtOMWKEdIR/3O1Y1h12EWxdyvTddU0hBocGT2kjGWnYMSQATpKhiRhZWkyqoLTvAeeIQT/kwX9ZK/uYka2I1TxQA8iDA3G2JCplj7oCEJeWzQXkj2/feBFbTpx/4Mpe82IB8Lt2R9+SnEeMfPmK8BXSuC2izk2L7jwGm2Hs7qMyjb/b97bEnmyBHSYefW71dNLZZA+vz5fTFRpesQiRxxHbW1d2VgMKnoEdHmEjMhWYX9MnNRbv2YMY455YaD4gl8KcPcEDqfs/RK5+xJCh2x8R0ex8sP1kFLv7oS5pNe+JD5bY2zhIp93RFttZSJA6w8Kl++CC2+R9cU9xMslC3z0izWS1Qg9Ejg/FH0fGChTR+t2fHsx8MIflpMir+U6Fx3gpObyN2YolyAWGOyNlPLWxELh3IMJQfK6F5eC3hAwZnlHJ0brwYGkJxsrUBe8BIqYJUiFjx",
- 'ddlcounty':"市级",
- 'txtAppName': "",'AspNetPager_input': '5', 'AspNetPager': '跳转'})
- postdata = postdata.encode('utf-8')
- req = urllib.request.Request(url, postdata)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0')
- time.sleep(1.5)
- res = urllib.request.urlopen(req)
- html=res.read().decode('utf-8')
- print(res.status, res.reason)
-
- if( res.status != 200 ):
- exit()
-
- print('ok')
- soup=BeautifulSoup(html,'html.parser')
- trs=str(soup) #根据表格的style来定位表格,也可以通过id和第几个来定位,请参考函数重载
- fs=fs+trs
- res.close()
- f=open(r'C:/temp/group1.txt','w',encoding='utf-8') #这个例子没有写新建文件的语句,需要自己提前在相应目录建好空文件,才能通过
- f.write(fs)
- f.close
-
复制代码
总是显示第一页,无法跳转到第五页,好像POST表单没有提交,请各位大神帮帮小弟!!!谢谢,谢谢
一个简单例子,自己完善吧~
这个页面的 字符集 设置得有些蛋疼~
- import re
- import requests as req
- url = r'http://app2.bjepb.gov.cn/cpout/showout/ListAccept.aspx'
- UA = {'User-Agent':'Mozilla/5.0'}
- formdata = {
- '__EVENTTARGET':"",
- '__EVENTARGUMENT':"",
- '__VIEWSTATE':'',
- 'ddlcounty':"市级",
- 'txtAppName': "",
- 'AspNetPager_input': '5',
- 'AspNetPager': '跳转'
- }
- ssn = req.Session()
- ssn.headers = UA
- rsp = ssn.get(url)
- html = rsp.text
- vs = re.findall(r'<input[^>]+?name="__VIEWSTATE"[^>]+?value="([^"]+)',html)[0]
- formdata['__VIEWSTATE'] = vs
- rsp = ssn.post(url, data=formdata)
- html = rsp.text
- pg = ''.join(re.findall(r'(当前页:).+?(\d+)',html, re.S)[0])
- print(pg)
|
|