解答:# -.- coding:utf-8 -*-
import re
import requests
from bs4 import BeautifulSoup as BS
header = {'Accept':'*/*',\
'Accept-Encoding':'gzip, deflate',\
'Accept-Language':'zh-CN,zh-TW;q=0.8,en;q=0.6',\
'cache-control':'no-cache',\
'Connection':'keep-alive',\
'Content-Type':'application/x-www-form-urlencoded',\
'Cookie':'JSESSIONID=5A60972F1D138C44FDC5DCFAF35FF10C.7; yunsuo_session_verify=81bdd2a48997d3be44e0dace7317b5e2; _gscu_1586185021=79432943chubrl65; _gscs_1586185021=79432943027gyc65|pv:3; _gscbrs_1586185021=1',\
'Host':'app1.sfda.gov.cn',\
'Origin':'http://app1.sfda.gov.cn',\
'Referer':'http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=114&tableName=TABLE114&title=%B9%FA%BC%D2%CA%B3%C6%B7%B0%B2%C8%AB%BC%E0%B6%BD%B3%E9%BC%EC%A3%A8%B2%BB%BA%CF%B8%F1%B2%FA%C6%B7%A3%A9&bcId=143106776907834761101199700381',\
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 YaBrowser/16.10.0.2564 Yowser/2.5 Safari/537.36'}
header1 = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\
'Accept-Encoding':'gzip, deflate, sdch',\
'Accept-Language':'zh-CN,zh-TW;q=0.8,en;q=0.6',\
'Connection':'keep-alive',\
'Cookie':'JSESSIONID=2E1717F762EA71C817FA42DB6C62152F; _gscu_1358151024=79444730bj86pz52; _gscs_1358151024=79444730m72inv52|pv:2; _gscbrs_1358151024=1; _gscu_32481467=79444748p7qr7q10; _gscs_32481467=794447489k0m2310|pv:6; _gscbrs_32481467=1; yunsuo_session_verify=1f510b976fd2a2174cda347e273a586f; srcurl=687474703a2f2f617070322e736664612e676f762e636e2f64617461736561726368702f696e646578312e646f3f7461626c6549643d313134267461626c654e616d653d5441424c45313134267461626c65566965773d25453925413325394625453525393325383125453625384125424425453625413325383028254534254238253844254535253930253838254546254246254244253230254546254246254244292649643d353435; security_session_mid_verify=e405dc4b87e82b5c6b5059338ce5c3e1',\
'Host':'app2.sfda.gov.cn',\
'Referer':'http://app2.sfda.gov.cn/datasearchp/index1.do?tableId=114&tableName=TABLE114&tableView=%CA%B3%C6%B7%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD(%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD&security_verify_data=313932302c31303830',\
'Upgrade-Insecure-Requests':'1',\
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 YaBrowser/16.10.0.2564 Yowser/2.5 Safari/537.36'}
f = open ('badrecord.txt','a')
f.write('食药监不合格食品记录\n')
f.write('='*60 + '\n')
for p in range(1,20):
url = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=114&State=1&bcId=143106776907834761101199700381&State=1&tableName=TABLE114&State=1&viewtitleName=COLUMN1490&State=1&viewsubTitleName=COLUMN1486&State=1&curstart="+str(p)+"&State=1&tableView=%25E5%259B%25BD%25E5%25AE%25B6%25E9%25A3%259F%25E5%2593%2581%25E5%25AE%2589%25E5%2585%25A8%25E7%259B%2591%25E7%259D%25A3%25E6%258A%25BD%25E6%25A3%2580%25EF%25BC%2588%25E4%25B8%258D%25E5%2590%2588%25E6%25A0%25BC%25E4%25BA%25A7%25E5%2593%2581%25EF%25BC%2589&State=1"
res = requests.post(url,headers=header)
res = res.text
bs = BS(res,'lxml')
for i in range(len(bs.findAll('a'))):
item = bs.findAll('a')[i].string.encode('utf-8')
Id = re.findall('Id=\d{1,}',str(bs.findAll('a')[i]))[1][3:]
f.write(Id + '\n' + item + '\n' + '-'*60 + '\n')
f.close()
|