异常:名称未定义
import requestsimport xlsxwriter
def get_json(index):
url = "https://study.163.com/p/search/studycourse.json"
payload = {
"activityId": 0,
"keyword": "python",
"orderType": 5,
"pageSive": 50,
"priceType": -1,
"qualityType": 0,
"searchTimeType": -1,
}
headers = {
"accept": "application/json",
"host": "study.163.com",
"content-type": "application/jion",
"origin": "hrrps://study.163.com",
"user-afent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) Applewedkit/537.36 (KHTML, LIKE Gecko) Chrome/76.0.3809.132 Safari/537.36"
}
try:
response = requests.post(url,json=payload,headers=headers)
开始执行
Traceback (most recent call last):
File "D:\桌面\爬虫1 网易云python课程.py", line 67, in <module>
main(index)
NameError: name 'index' is not defined
我抄书上的代码,总是抛出这个错误说是未定义是咋回事
contennt_json = response.json()
if content_json and content_json['code'] == 0:
return content_json
return None
except Exception as e:
print('出错了')
print(e)
return None
def get_content(content_json):
if "result" in content_json:
return cintent_json['result']['list']
def save_excel(content,index):
for num,item in enumerte(content):
row = 50*index + (num+1)
worksheet.write(row,0,item['productId'])
worksheet.write(row,1,item['courseId'])
worksheet.write(row,2,item['productName'])
worksheet.write(row,3,item['productType'])
worksheet.write(row,4,item['provider'])
worksheet.write(row,5,item['score'])
worksheet.write(row,6,item['scoreLevel'])
worksheet.write(row,7,item['learnerCount'])
worksheet.write(row,8,item['lessonCount'])
worksheet.write(row,9,item['lectorName'])
worksheet.write(row,10,item['originaPrice'])
worksheet.write(row,11,item['discountPrice'])
worksheet.write(row,12,item['discountRate'])
worksheet.write(row,13,item['imgUrl'])
worksheet.write(row,14,item['bigImgUrl'])
worksheet.write(row,15,item['description'])
def main(index):
content_json = get_json(index)
content = get_content(content_json)
save_excel(content,index)
if True:
print('开始执行')
workbook = xlsxwriter.Workbook("网易云python.xlsx")
worksheet = workbook.add_worksheet("first_sheet")
main(index)
workbook.close()
print('结束运行') index没赋值 suchocolate 发表于 2020-11-28 19:18
index没赋值
index 不是在def中定义的参数吗?
麒麟永生 发表于 2020-11-28 19:24
index 不是在def中定义的参数吗?
函数里的是形式参数,调用的时候要传入实际参数。然而实参并没有定义。 suchocolate 发表于 2020-11-28 19:25
函数里的是形式参数,调用的时候要传入实际参数
那怎么直接调用,因为我感觉这里貌似不太需要参数?
麒麟永生 发表于 2020-11-28 19:27
那怎么直接调用,因为我感觉这里貌似不太需要参数?
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:# _*_ coding: utf-8 _*_
# Developer: suchocolate
# Date: 11/28/2020 19:28
# File name: ixrp.py
# Development tool: PyCharm
import requests
import xlsxwriter
def get_json(index):
url = "https://study.163.com/p/search/studycourse.json"
payload = {
"activityId": 0,
"keyword": "python",
"orderType": 5,
"pageSive": 50,
"priceType": -1,
"qualityType": 0,
"searchTimeType": -1,
}
headers = {
"accept": "application/json",
"host": "study.163.com",
"content-type": "application/jion",
"origin": "hrrps://study.163.com",
"user-afent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) Applewedkit/537.36 (KHTML, LIKE Gecko) Chrome/76.0.3809.132 Safari/537.36"
}
try:
response = requests.post(url, json=payload, headers=headers)
contennt_json = response.json()
if contennt_json and contennt_json['code'] == 0:
return contennt_json
return None
except Exception as e:
print('出错了')
print(e)
raise None
def get_content(content_json):
if "result" in content_json:
return content_json['result']['list']
def save_excel(content, index):
for num, item in enumerate(content):
row = 50 * index + (num + 1)
worksheet.write(row, 0, item['productId'])
worksheet.write(row, 1, item['courseId'])
worksheet.write(row, 2, item['productName'])
worksheet.write(row, 3, item['productType'])
worksheet.write(row, 4, item['provider'])
worksheet.write(row, 5, item['score'])
worksheet.write(row, 6, item['scoreLevel'])
worksheet.write(row, 7, item['learnerCount'])
worksheet.write(row, 8, item['lessonCount'])
worksheet.write(row, 9, item['lectorName'])
worksheet.write(row, 10, item['originaPrice'])
worksheet.write(row, 11, item['discountPrice'])
worksheet.write(row, 12, item['discountRate'])
worksheet.write(row, 13, item['imgUrl'])
worksheet.write(row, 14, item['bigImgUrl'])
worksheet.write(row, 15, item['description'])
def main(index):
content_json = get_json(index)
content = get_content(content_json)
save_excel(content, index)
if True:
print('开始执行')
workbook = xlsxwriter.Workbook("网易云python.xlsx")
worksheet = workbook.add_worksheet("first_sheet")
index = 5
main(index)
workbook.close()
print('结束运行') suchocolate 发表于 2020-11-28 19:33
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:
目的是:爬取网易云课堂的关于python的课程信息,并写入slxs的表格中
suchocolate 发表于 2020-11-28 19:33
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:
您好,我能加您的QQ或者微信吗,这玩意发图片不知道咋搞 suchocolate 发表于 2020-11-28 19:33
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:
我试过您刚发的这一段代码了,直接加个index不行,会导致前面写的函数段用不了
本帖最后由 suchocolate 于 2020-11-28 20:36 编辑
麒麟永生 发表于 2020-11-28 19:42
我试过您刚发的这一段代码了,直接加个index不行,会导致前面写的函数段用不了
你的那个库我没有,我用openpyxl访问下载成功了。你的headers里,字典里有几个拼错的字母,导致拿不到数据,我在我这个代码里都改好了,你自己改改吧。import requests
from openpyxl import Workbook
def get_json():
url = 'https://study.163.com/p/search/studycourse.json'
data = {"pageIndex": 1, "pageSize": 50, "relativeOffset": 0, "keyword": "python", "searchTimeType": -1,
"orderType": 50, "priceType": -1, "activityId": 0, "qualityType": 0}
headers = {'Host': 'study.163.com',
'User-Agent': 'Mozilla',
'Accept': 'application/json',
'Content-Type': 'application/json',
'Origin': 'https://study.163.com'}
try:
r = requests.post(url, headers=headers, json=data)
# print(r.json()['result']['list'])
return r.json()['result']['list']
except Exception as e:
print(e)
def save_data(json_list):
wb = Workbook()
ws = wb.active
# print(json_list)
for n, item in enumerate(json_list):
r = n + 1
ws.cell(row=r, column=1, value=item['productId'])
ws.cell(row=r, column=2, value=item['courseId'])
ws.cell(row=r, column=3, value=item['productName'])
ws.cell(row=r, column=4, value=item['productType'])
ws.cell(row=r, column=5, value=item['provider'])
ws.cell(row=r, column=6, value=item['score'])
ws.cell(row=r, column=7, value=item['scoreLevel'])
ws.cell(row=r, column=8, value=item['learnerCount'])
ws.cell(row=r, column=9, value=item['lessonCount'])
ws.cell(row=r, column=10, value=item['lectorName'])
ws.cell(row=r, column=11, value=item['originalPrice'])
ws.cell(row=r, column=12, value=item['discountPrice'])
ws.cell(row=r, column=13, value=item['discountRate'])
ws.cell(row=r, column=14, value=item['imgUrl'])
ws.cell(row=r, column=15, value=item['bigImgUrl'])
ws.cell(row=r, column=16, value=item['description'])
wb.save('test.xlsx')
def main():
j_data = get_json()
save_data(j_data)
if __name__ == '__main__':
main()
页:
[1]