|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
报错:UnicodeEncodeError: 'ascii' codec can't encode characters in position 334-339: ordinal not in range(128)
- import urllib.request
- import os
- import re
- def url_open(url):
- head={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763'}
- req=urllib.request.Request(url,headers=head)
- response=urllib.request.urlopen(req)
- html=response.read()
- return html
- def url_each(url):
- html=url_open(url).decode('utf-8')
- each_addrs=re.findall(r'href="(/.*/.*/\d{4}\.html)',html) #href="/meinv/qiaotun/1030.html"
- return each_addrs
-
- def save_imgs(img_addrs):
- for each in img_addrs:
- html=url_open(each)
- with open(each[59:66]+'.jpg','wb') as f:
- f.write(html)
- def findandsave_imgs(urlofeach=[]):
- img_addrs=[]
- for each in urlofeach:
- for i in range(1,12):
- if i==1:
- each='http://nvshen2.92demo.com'+each
- else:
- each=('nvshen2.92demo.com'+each)[:-8]+'_'+str(i)+'.html' #http://nvshen2.92demo.com/meinv/qingchun/1042_5.html
- html=url_open(each).decode('utf-8')
- img_addrs=re.findall(r' src="(http://img-2.92kaifa\.com.*\.jpg)"',html) #src="http://img-2.92kaifa.com/nvshen/d/file/bigpic/2016/10/19/ccjec3k5ofd.jpg">
- save_imgs(img_addrs)
-
- folder='ooxx'
- pages=int(input('请输入页数:'))
- os.mkdir(folder)
- os.chdir(folder)
- url='http://nvshen2.92demo.com/index.html'
- for i in range(1,pages+1):
- print('第%d页' % i)
- if i!=1:
- page_url='http://nvshen2.92demo.com/index_'+str(i)+'.html'
- else:
- page_url=url
- urlofeach=url_each(page_url)
- findandsave_imgs(urlofeach)
复制代码
不知道该怎么改,但是可参考链接:https://blog.csdn.net/songyu0120/article/details/88680562
- sys.stdout.encoding
- 'cp936'
复制代码
|
|