爬虫
有没有大哥知道怎么爬谷歌学术的数据?{:10_256:}import requests
import bs4
url = 'https://scholar.google.com.hk/citations?hl=zh-CN&user=lyQCGAEAAAAJ'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'}
response = requests.get(url, headers=headers)
ValueError: check_hostname requires server_hostname import requests
import bs4
url = 'https://scholar.google.com.hk/citations?hl=zh-CN&user=lyQCGAEAAAAJ'
# 定义代理服务器地址,此地址需要自己去找,通过代理能访问https://scholar.google.com.hk即可
proxies = {'http': '127.0.0.1:8080', 'https': '127.0.0.1:8080'}
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'}
response = requests.get(url, headers=headers, proxy=proxies)
print(response)
页:
[1]