|
发表于 2021-10-13 18:22:16
|
显示全部楼层
本楼为最佳答案
- import requests
- from lxml import etree
- def main():
- base_url = 'https://www.worldpresidentsdb.com'
- url = 'https://www.worldpresidentsdb.com/list/gender/female/'
- headers = {'user-agent': 'firefox'}
- r = requests.get(url, headers=headers)
- html = etree.HTML(r.text)
- psts = html.xpath('//div[@class="list-group"]//@href')
- for pst in psts:
- url = f'{base_url}{pst}'
- r = requests.get(url, headers=headers)
- html = etree.HTML(r.text)
- info = html.xpath('//div[contains(@class,"col-md-8")]/p//text()')
- print(info)
- if __name__ == '__main__':
- main()
复制代码 |
|