ailulululu 发表于 2017-12-21 22:19:41

来个 大佬帮忙指导一下

不知道怎么 解决 这种 错误

==================================================================
import urllib.request
import os
def open_html(url):
    res = urllib.request.Request(url)
    res.add_header('User-Agent',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
    respones = urllib.request.urlopen(res)
    html = respones.read()

    return html

def get_num(url):
    res = urllib.request.Request(url)
    res.add_header('User-Agent',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
    respones = urllib.request.urlopen(res)
    html = respones.read().decode('utf-8')
    # print(html)
    a = html.find('index-') + 6
    b = html.find('\.',a)
    return html

def find_adds(url):
    res = urllib.request.Request(url)
    res.add_header('User-Agent',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
    respones = urllib.request.urlopen(res)
    html = respones.read().decode('utf-8')
    print(html)
    img_adds = []
    a = html.find('img src=')
    b = html.find('.jpg', a)
    if b != -1:
      img_adds.append(html)
    while a != -1:
      b = html.find('.jpg',a)
      print(b)
      if b != -1:
            img_adds.append(html)
      else:
            b = a + 9
      a = html.find('img src=',b)
    # print(img_adds)
    return img_adds
def save_img(mm,img_adds):
    for adds in img_adds:
      img_name = adds.split('/')[-1]
      with open(img_name, 'wb') as f:
            img = open_html(adds)
            f.write(img)
def mmmm(mm='tutu',pages=5):
    os.mkdir(mm)
    os.chdir(mm)

    url = 'https://www.4493.com/star/meizi/'
    num = int(get_num(url))
    for i in range(pages):
      num -= i
      get_url = url + 'a/more_' + str(num) + '.html'
      img_adds = find_adds(get_url)
      save_img(mm,img_adds,)


if __name__ == '__main__':
    mmmm()

ba21 发表于 2017-12-21 23:09:35

html = respones.read().decode('gb2312')

ailulululu 发表于 2017-12-21 23:21:53

ba21 发表于 2017-12-21 23:09
html = respones.read().decode('gb2312')

试试 先谢谢大佬
页: [1]
查看完整版本: 来个 大佬帮忙指导一下