|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from lxml import etree
def main():
base_url = 'https://www.worldpresidentsdb.com'
url = 'https://www.worldpresidentsdb.com/list/gender/male/'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
psts = html.xpath('//div[@class="list-group"]//@href')
for pst in psts:
url = f'{base_url}{pst}'
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
info = html.xpath('//div[contains(@class,"col-md-8")]/p//text()')
print(info)
if __name__ == '__main__':
main()
|
|