使用requests库
从豆瓣中选择一部自己喜欢的电影,爬取电影的100 条短评和评分,并将短评和评分保存为csv 格式使用BeautifulSoup4 库分析HTML 数据
使用requests 库实现网络爬虫程序
编写一个程序
使用 requests库爬取 2021年全国大学排名,并按格式输出各大学数据,最后绘制 前 10名大学分数柱状图
排名 学校名称 省市 总分 标签(一流大学A类,985,211) 50 先自己动手做,有代码问题再来问。 这活二百鱼币我给你干了,不贵吧。 隔这小猿搜题呢? skyrimz 发表于 2021-6-27 09:54
隔这小猿搜题呢?
我怀疑你在打广告{:10_256:} skyrimz 发表于 2021-6-27 09:54
隔这小猿搜题呢?
搁这搁着呢{:10_292:} import json
import requests
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def printUnivList(ulist, html, num):
data = json.loads(html)
content = data['data']['rankings']
for i in range(num):
global name
global score
index = content['rankOverall']
name = content['univNameCn']
score = content['score']
category = content['univTags']
ulist.append()
tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}\t{4:^10}"
print(tplt.format("排名", "学校名称", "总分", chr(12288), "标签"))
'''chr(12288)为中文空格编码'''
for i in range(num):
u = ulist
print(tplt.format(u, u, u, chr(12288), u))
def main():
uinfo = []
url = 'https://www.shanghairanking.cn/api/pub/v1/bcur?bcur_type=11&year=2021'
html = getHTMLText(url)
printUnivList(uinfo, html, 10)
main()
少写了省份,
输出的标签['一流大学A类', '985', '211']怎么去掉[]和"
ulist.append()为什么没有str()会报错TypeError
{:5_99:} import requests
import pandas
from bs4 import BeautifulSoup
r = requests.get('https://book.douban.com/subject/1084336/comments/').text
soup = BeautifulSoup(r, 'lxml')
pattern = soup.find_all('span', 'short')
for item in pattern:
print(item.string)
comments = []
for item in pattern:
comments.append(item.string)
f = pandas.DataFrame(comments)
f.to_csv('comments.csv', encoding='utf-8_sig')
怎么用代理对付反爬虫呀 import json
import requests
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = "utf-8"
return r.text
except:
return ""
def printUnivList(ulist, html, num):
data = json.loads(html)
global content
content = data['data']['rankings']
for i in range(num):
index = content['rankOverall']
name = content['univNameCn']
province = content['province']
score = content['score']
category0 = content['univTags']
category1 = str(category0)
category = category1.replace('[', '').replace(']', '').replace("'", '')
ulist.append()
tplt = "{0:^10}\t{1:^10}\t{2:^10}\t{3:^10}\t{5:^10}"
print(tplt.format("排名", "学校名称", "省市", "总分", chr(12288), "标签"))
# chr(12288)为中文空格编码
for i in range(num):
u = ulist
print(tplt.format(u, u, u, u, chr(12288), u))
def main():
uinfo = []
url = 'https://www.shanghairanking.cn/api/pub/v1/bcur?bcur_type=11&year=2021'
html = getHTMLText(url)
printUnivList(uinfo, html, 10)
if __name__ == '__main__':
main()
剩画图了
页:
[1]