|  | 
 
 
 楼主|
发表于 2019-9-10 15:26:19
|
显示全部楼层 
| 复制代码import requests
from lxml import etree
import time
import csv
fp=open('C:/Users/Administrator/Desktop/simu.csv','wt',newline='',encoding='utf-8')
writer=csv.writer(fp)
writer.writerow(('网址','企业名称','负责人','成立时间','注册地址','办公地址','实缴资本','机构类型','全职人数','资格人数','机构网址'))
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
def get_info(url):
        res=requests.get(url,headers=headers)
        res.raise_for_status()
        res.encoding=res.apparent_encoding
        seletctor=etree.HTML(res.text)
        企业名称=seletctor.xpath('//*[@id="complaint1"]')[0].text[:-6]
        成立时间=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[7]/td[4]')[0].text
        注册地址=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[8]/td[2]')[0].text
        办公地址=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[9]/td[2]')[0].text
        实缴资本=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[10]/td[4]')[0].text
        机构类型=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[12]/td[2]')[0].text
        全职人数=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[13]/td[2]')[0].text
        资格人数=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[13]/td[4]')[0].text
        负责人=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[22]/td[2]')[0].text
        机构网址=seletctor.xpath('/html/body/div/div[2]/div/table/tbody/tr[14]/td[2]/a')[0].text
urls=['http://gs.amac.org.cn/amac-infodisc/api/pof/manager?rand=0.9208465968384951&page={}&size=100'.format(number) for number in range(1,244)]
for url in urls:
        get_info(url)
        write.writerow((url,企业名称,负责人,成立时间,注册地址,办公地址,实缴资本,机构类型,全职人数,资格人数,机构网址))
        time.sleep(2)
fp.close()
 
   
 已经很努力了,但还没搞好,求大佬指点。
 
 @kaohsing
 | 
 |