这个总统数据库怎么爬?
https://www.worldpresidentsdb.com/Yang-Shangkun/https://www.worldpresidentsdb.com/list/gender/female/ import requests
from lxml import etree
def main():
base_url = 'https://www.worldpresidentsdb.com'
url = 'https://www.worldpresidentsdb.com/list/gender/female/'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
psts = html.xpath('//div[@class="list-group"]//@href')
for pst in psts:
url = f'{base_url}{pst}'
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
info = html.xpath('//div/p//text()')
print(info)
if __name__ == '__main__':
main()
页:
[1]