|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 wcq15759797758 于 2021-7-6 10:57 编辑
中国知网专利库的精彩推荐
[code]import requests # 网络请求模块
import time,random # 时间模块
import re
from bs4 import BeautifulSoup
class Crawl():
def __init__(self):
# 创建头部信息
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36',
'Referer': 'https://www.zhihu.com/',}
def get_json(self,json_url):
response = requests.get(json_url, headers=self.headers)
soup = BeautifulSoup(response.text, 'lxml')
Name = re.findall('"Name":"(.*?)",',response.text)
Unit = re.findall('"Unit":"(.*?)",',response.text)
Title = re.findall('"FileTitle":"(.*?)",',response.text)
Date = re.findall('"PublishDate":"(.*?)",',response.text)
for li1, li2, li3, li4 in zip(Name, Unit, Title, Date):
item = {}
item['name'] = li1
item['Unit'] = li2
item['Title'] = li3
item['Date'] = li4
print(item)
if __name__ == '__main__':
json_url ='https://recsys.cnki.net/RCDService/api/RecSysOpenApi/Papers?idenID=&clientID=3210408220706642159&userIP=&platformURL=kns.cnki.net%2Fkns8%40%2FKNS8&productID=SCDB'
c = Crawl() # 创建爬虫类对象
text = c.get_json(json_url)
从页面返回的响应中找到了 json_url |
|