|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 mashiro666 于 2020-3-24 01:44 编辑
import urllib.request
import os
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
def get_page(url):
req = urllib.request.Request(url,headers=head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
a = html.find('page-numbers current') + 21
b = html.find('<', a)
return html[a:b]
def find_imgs(url):
req = urllib.request.Request(url, headers=head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
img_addrs = []
a = html.find('src=')
while a != -1:
b = html.find('.jpg', a, a+255)
if b != -1:
img_addrs.append(html[a + 5:b + 4]) #[]左闭右开
else:
b = a + 5
a = html.find('src=', b)
return img_addrs
def save_imgs(folder,img_address):
for each in img_address:
filename = each.split('/')[-1] #用/分割,拿出最后一个
with open(filename, 'wb') as f:
req = urllib.request.Request(each,headers=head)
response = urllib.request.urlopen(req)
img = response.read()
f.write(img)
#文件名mm,爬5页
def download_mzitu(folder='mm',pages=5):
#修改文件地址
#创建文件夹
os.mkdir(folder)
os.chdir(folder)
#网站地址
url = 'https://www.mzitu.com/mm/'
#获得页面的地址
page_num = int(get_page(url))
for i in range(pages):
page_num += 1
page_url = url + 'page/' + str(page_num) + '/'
img_address = find_imgs(page_url)
save_imgs(folder, img_address)
if __name__ == '__main__':
print('test')
download_mzitu(folder='mm',pages=5)
Traceback (most recent call last):
File "E:/python__pycharm/056meizitu.py", line 67, in <module>
download_mzitu(folder='mm',pages=5)
File "E:/python__pycharm/056meizitu.py", line 56, in download_mzitu
page_num = int(get_page(url))
ValueError: invalid literal for int() with base 10: '>1'
我仿照小甲鱼写的代码,但是看不懂我哪里错了,可以帮忙看看哪里错了吗
|
|