麒麟永生 发表于 2020-11-28 19:14:35

异常:名称未定义

import requests
import xlsxwriter

def get_json(index):
    url = "https://study.163.com/p/search/studycourse.json"
    payload = {
      "activityId": 0,
      "keyword": "python",
      "orderType": 5,
      "pageSive": 50,
      "priceType": -1,
      "qualityType": 0,
      "searchTimeType": -1,
    }

    headers = {
      "accept": "application/json",
      "host": "study.163.com",
      "content-type": "application/jion",
      "origin": "hrrps://study.163.com",
      "user-afent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) Applewedkit/537.36 (KHTML, LIKE Gecko) Chrome/76.0.3809.132 Safari/537.36"
    }
    try:
      response = requests.post(url,json=payload,headers=headers)



开始执行
Traceback (most recent call last):
File "D:\桌面\爬虫1 网易云python课程.py", line 67, in <module>
    main(index)
NameError: name 'index' is not defined


我抄书上的代码,总是抛出这个错误说是未定义是咋回事
      contennt_json = response.json()
      if content_json and content_json['code'] == 0:
            return content_json
      return None
    except Exception as e:
      print('出错了')
      print(e)
      return None

def get_content(content_json):
    if "result" in content_json:
      return cintent_json['result']['list']
   
def save_excel(content,index):
    for num,item in enumerte(content):
      row = 50*index + (num+1)
      worksheet.write(row,0,item['productId'])
      worksheet.write(row,1,item['courseId'])
      worksheet.write(row,2,item['productName'])
      worksheet.write(row,3,item['productType'])
      worksheet.write(row,4,item['provider'])
      worksheet.write(row,5,item['score'])
      worksheet.write(row,6,item['scoreLevel'])
      worksheet.write(row,7,item['learnerCount'])
      worksheet.write(row,8,item['lessonCount'])
      worksheet.write(row,9,item['lectorName'])
      worksheet.write(row,10,item['originaPrice'])
      worksheet.write(row,11,item['discountPrice'])
      worksheet.write(row,12,item['discountRate'])
      worksheet.write(row,13,item['imgUrl'])
      worksheet.write(row,14,item['bigImgUrl'])
      worksheet.write(row,15,item['description'])

def main(index):
    content_json = get_json(index)
    content = get_content(content_json)
    save_excel(content,index)

if True:
    print('开始执行')
    workbook = xlsxwriter.Workbook("网易云python.xlsx")
    worksheet = workbook.add_worksheet("first_sheet")
    main(index)
    workbook.close()
    print('结束运行')

suchocolate 发表于 2020-11-28 19:18:38

index没赋值

麒麟永生 发表于 2020-11-28 19:24:00

suchocolate 发表于 2020-11-28 19:18
index没赋值

index 不是在def中定义的参数吗?

suchocolate 发表于 2020-11-28 19:25:26

麒麟永生 发表于 2020-11-28 19:24
index 不是在def中定义的参数吗?

函数里的是形式参数,调用的时候要传入实际参数。然而实参并没有定义。

麒麟永生 发表于 2020-11-28 19:27:15

suchocolate 发表于 2020-11-28 19:25
函数里的是形式参数,调用的时候要传入实际参数

那怎么直接调用,因为我感觉这里貌似不太需要参数?

suchocolate 发表于 2020-11-28 19:33:07

麒麟永生 发表于 2020-11-28 19:27
那怎么直接调用,因为我感觉这里貌似不太需要参数?

你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:# _*_ coding: utf-8 _*_
# Developer: suchocolate
# Date: 11/28/2020 19:28
# File name: ixrp.py
# Development tool: PyCharm
import requests
import xlsxwriter


def get_json(index):
    url = "https://study.163.com/p/search/studycourse.json"
    payload = {
      "activityId": 0,
      "keyword": "python",
      "orderType": 5,
      "pageSive": 50,
      "priceType": -1,
      "qualityType": 0,
      "searchTimeType": -1,
    }

    headers = {
      "accept": "application/json",
      "host": "study.163.com",
      "content-type": "application/jion",
      "origin": "hrrps://study.163.com",
      "user-afent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) Applewedkit/537.36 (KHTML, LIKE Gecko) Chrome/76.0.3809.132 Safari/537.36"
    }
    try:
      response = requests.post(url, json=payload, headers=headers)
      contennt_json = response.json()
      if contennt_json and contennt_json['code'] == 0:
            return contennt_json
      return None
    except Exception as e:
      print('出错了')
      print(e)
      raise None


def get_content(content_json):
    if "result" in content_json:
      return content_json['result']['list']


def save_excel(content, index):
    for num, item in enumerate(content):
      row = 50 * index + (num + 1)
      worksheet.write(row, 0, item['productId'])
      worksheet.write(row, 1, item['courseId'])
      worksheet.write(row, 2, item['productName'])
      worksheet.write(row, 3, item['productType'])
      worksheet.write(row, 4, item['provider'])
      worksheet.write(row, 5, item['score'])
      worksheet.write(row, 6, item['scoreLevel'])
      worksheet.write(row, 7, item['learnerCount'])
      worksheet.write(row, 8, item['lessonCount'])
      worksheet.write(row, 9, item['lectorName'])
      worksheet.write(row, 10, item['originaPrice'])
      worksheet.write(row, 11, item['discountPrice'])
      worksheet.write(row, 12, item['discountRate'])
      worksheet.write(row, 13, item['imgUrl'])
      worksheet.write(row, 14, item['bigImgUrl'])
      worksheet.write(row, 15, item['description'])


def main(index):
    content_json = get_json(index)
    content = get_content(content_json)
    save_excel(content, index)


if True:
    print('开始执行')
    workbook = xlsxwriter.Workbook("网易云python.xlsx")
    worksheet = workbook.add_worksheet("first_sheet")
    index = 5
    main(index)
    workbook.close()
    print('结束运行')

麒麟永生 发表于 2020-11-28 19:37:06

suchocolate 发表于 2020-11-28 19:33
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:

目的是:爬取网易云课堂的关于python的课程信息,并写入slxs的表格中

麒麟永生 发表于 2020-11-28 19:41:21

suchocolate 发表于 2020-11-28 19:33
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:

您好,我能加您的QQ或者微信吗,这玩意发图片不知道咋搞

麒麟永生 发表于 2020-11-28 19:42:26

suchocolate 发表于 2020-11-28 19:33
你这个爬虫的目的是啥,不知道目的我只能从语法上猜,你试试这样:

我试过您刚发的这一段代码了,直接加个index不行,会导致前面写的函数段用不了

suchocolate 发表于 2020-11-28 20:34:20

本帖最后由 suchocolate 于 2020-11-28 20:36 编辑

麒麟永生 发表于 2020-11-28 19:42
我试过您刚发的这一段代码了,直接加个index不行,会导致前面写的函数段用不了

你的那个库我没有,我用openpyxl访问下载成功了。你的headers里,字典里有几个拼错的字母,导致拿不到数据,我在我这个代码里都改好了,你自己改改吧。import requests
from openpyxl import Workbook


def get_json():
    url = 'https://study.163.com/p/search/studycourse.json'
    data = {"pageIndex": 1, "pageSize": 50, "relativeOffset": 0, "keyword": "python", "searchTimeType": -1,
               "orderType": 50, "priceType": -1, "activityId": 0, "qualityType": 0}
    headers = {'Host': 'study.163.com',
               'User-Agent': 'Mozilla',
                'Accept': 'application/json',
                'Content-Type': 'application/json',
                'Origin': 'https://study.163.com'}
    try:
      r = requests.post(url, headers=headers, json=data)
      # print(r.json()['result']['list'])
      return r.json()['result']['list']
    except Exception as e:
      print(e)


def save_data(json_list):
    wb = Workbook()
    ws = wb.active
    # print(json_list)
    for n, item in enumerate(json_list):
      r = n + 1
      ws.cell(row=r, column=1, value=item['productId'])
      ws.cell(row=r, column=2, value=item['courseId'])
      ws.cell(row=r, column=3, value=item['productName'])
      ws.cell(row=r, column=4, value=item['productType'])
      ws.cell(row=r, column=5, value=item['provider'])
      ws.cell(row=r, column=6, value=item['score'])
      ws.cell(row=r, column=7, value=item['scoreLevel'])
      ws.cell(row=r, column=8, value=item['learnerCount'])
      ws.cell(row=r, column=9, value=item['lessonCount'])
      ws.cell(row=r, column=10, value=item['lectorName'])
      ws.cell(row=r, column=11, value=item['originalPrice'])
      ws.cell(row=r, column=12, value=item['discountPrice'])
      ws.cell(row=r, column=13, value=item['discountRate'])
      ws.cell(row=r, column=14, value=item['imgUrl'])
      ws.cell(row=r, column=15, value=item['bigImgUrl'])
      ws.cell(row=r, column=16, value=item['description'])
    wb.save('test.xlsx')


def main():
    j_data = get_json()
    save_data(j_data)


if __name__ == '__main__':
    main()

页: [1]
查看完整版本: 异常:名称未定义