# 获取页面
def huo_qu(page_url):
# 加请求头
headers = {
'user-agent': ""'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Mobile Safari/537.36 Edg/89.0.774.45'}
res = requests.get(page_url, headers=headers)
return res
# 查找图片下载链接,并保存链接
def xun_zhao(res):
# 用BeautifulSoup解析
soup = BeautifulSoup(res.text,features="lxml")
img_addrs = []
# 查找所有有img的标签
for link in soup.find_all('img'):
# 找到有src属性的链接并保存
if link.get('src') not in img_addrs:
img_addrs.append(link.get('src'))
if len(img_addrs)>=16:
break
return img_addrs
# 保存图片
def bao_cun(img_addrs):
for each in img_addrs:
filename = each.split('/')[-1]
r = requests.get(each)
with open(filename, 'wb') as f:
f.write(r.content)
dizhi.append(filename)
if each==img_addrs[-1]:
caiqu = 0
for sd in dizhi[:]:
im1 = ims.open(sd)
im2 = im1.copy()
im2.thumbnail((100, 100))
baga = 'xianshi/' + str(caiqu) + '.png'
zai_xian.append(baga)
im2.save(baga)
caiqu += 1
if caiqu==16:
caiqu=0
if sd == dizhi[-1]:
caiqu = 0
# 主代码
def download_tx(temp, pages):
fenglei = {'欧美头像': 'oumei/', '清新头像': 'xiaoqingxin/', '恐怖头像': 'kongbu/', \
'可爱头像': 'keai/', '小孩头像': 'xiaohai/', '卡通头像': 'katong/', '古风头像': 'gufeng/', '搞笑头像': 'gaoxiao/'}
url = 'http://www.imeitou.com/' + fenglei[temp]
if pages == 1:#这里加判断是因为爬取的图片网站的第一页与之后的页数的网址不一样
page_url = url
res = huo_qu(page_url)
img_addrs = xun_zhao(res)
bao_cun(img_addrs)
else:
page_url = url + 'index_' + str(pages) + '.html'#拼接完整网址
res = huo_qu(page_url)
img_addrs = xun_zhao(res)
bao_cun(img_addrs)