|
楼主 |
发表于 2021-7-11 20:25:05
|
显示全部楼层
附上源码
rom bs4 import BeautifulSoup
import requests
from requests.exceptions import RequestException
import time
def get_everypage(url):#获取页面内容
headers = {"cookie":'WOSSID=6Flhk7wg343DcjCLDsu; dotmatics.elementalKey=SLsLWlMhrHnTjDerSrlG; _abck=E0972FB1A1A656FBF443EDFAE01EDDD6~0~YAAQHg/TFz2yWRt6AQAAPmpGlQacrDgoJYgEgGVT6WJMPoM8uwVa8dzlY0+zpo5QebFHWzFvXQw8Uqv1heQpWJPviQUI7TV2tQXWa/imgc5lMHaK0PA/1z92VSZxXxwx6UgyRskE44eBun7IkJPyp9b7V6YSUdwEuNWUiCTrPCM1ahH2vmp19I2ZY/oEjLmkRA2AtQO/qTTmi1Ga5c3FX93WRnrI9K6rIqtW5DQdJXjvJ2DkHgLqONR156zJP4aYnAuIfioByW0fNqjI/4+Q8yunSB/RbCqlz7njleP2Vz/cAu/Vrek+6Hr+0zvKbvKP8vMWaJKt0VkR3HLkSKrAz3lPiX+/XGoCmIlMeXXcIhug+ru75sMKR8oaBMB+mWlN4KoMftI43El9rtCfq+X4fOUdGNYVO3MqtGvq3OF2DA==~-1~-1~-1; _sp_id.4e8e=c177493d-bda1-4d36-a4c8-bed18645f71d.1625824058.6.1626002058.1625975509.1388605b-c901-40a8-9f21-fec3151e74bc; bm_sz=D7A11AE05D0DDBA88B32DFB0A8942127~YAAQmqzbFxOO5Xl6AQAAiQQmlQz6dTdPFmUfrPA9YxSKmCZLkng13VIDdYhYjVZlDiHzNhMWVhQRgpWsFdu6Aru2asaAeJLXP/xWvlokka8Ot8O0ZIMaSzpeZ1OikmAQXNTdfbdahfEGcIQ0GVlVFUlf/1lTkYsPG0w+B6LrO9v2mBLBg/iQA4PSjXiz2FHDGafLhQTB; ak_bmsc=213297722EC649F3819D33AE45D92634~000000000000000000000000000000~YAAQdP4ZuOs3ooJ6AQAAZ/szlQznYjV2HsVsg6ohm/kQUg1upNJIbLlHGpogtlaBXawYsen/3xKBAuG2P7tqdqsliGKwA+OWHVOLURBQ03duX5A56ZsPmKzP2ROQsrkyRPgWd/WTg3KyenJWOfu4oeSUKVEVSOSEJUd5cyOfioPjacFvUIM4LZ4Ajf7F2zyqwiM8S2NfingQdET/Lm9NR4dQEADZoihmNuU2eJZa+hAOswz5SmwNcPXsjMoIHKC2O0MRI3xTnZNL+EXDWVid4KgqwMYk2eIVESvgUvTgX+7Oy5yU2y1eS0nff4A4stBy5qGKrrOKaPTbl6B9+bBu8VHNsWUfDFU8zBt+see1fJkc7XX7GBdd05V8oJgYvuC2ZY4yYMcvhbQfhsXH02X7; _sp_ses.4e8e=*; bm_sv=227084F008EC7F65748128D4B1CB6905~SLon73rFuAC+a+WvdIR9guie6Hr47ZX2TyHn/bc9vUvpUi4Ckm/ht+xq7WN3CVXafr5HptnX3UCbmZkVXV4E/lGPcEPIkytOs0Bc2A6G3E4HvTZcq6I3C3VNYGYNoM2QUAjJHFrO9doKBZmmrKLFHwZMoC5N3R5CvWJ/VhvBvz0=',"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0'}
#data = {'name'}
r = requests.get(url,headers = headers)
if r.status_code == 200:
print(r.text)
return str(r.text)
None
time.sleep(range(50-70))
html = get_everypage("https://www.webofscience.com/wos/alldb/summary/46b4259a-0f73-4a02-8e5a-680f2dd57349-0076862d/relevance/1")
soup = BeautifulSoup(html,"lxml")
#吧每一种检索元素都构成一个列表
items0 = soup.find_all(name = "span",attrs = {"class":"mat-checkbox-label"})
print("------------------------""\n",items0) |
|