|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
这里使用xpath爬取妹子图(https://www.mzitu.com/mm/page/2/)
附上代码
- import requests
- from lxml import etree
- import os
- #获取每页的地址
- def get_url(page):
- url = 'https://www.mzitu.com/mm/page/' + str(page) + '/'
- return url
- def url_open(url):
- headers = {
- 'User-Agent' :'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7',
- 'Referer' :'https://www.mzitu.com/mm/'}
- res = requests.get(url,headers = headers)
- return res
- #使用xpath查找图片地址
- def find_img(res):
- html = etree.HTML(res.text)
- result = html.xpath('//ul[@id="pins"]//li//a//img/@data-original')
- return result
- #使用xpath查找图片名称
- def find_name(res):
- html = etree.HTML(res.text)
- result = html.xpath('//ul[@id="pins"]//li//a//img/@alt')
- return result
- def main(pages):
- os.mkdir('妹子')
- os.chdir('妹子')
- for each in range(1,pages):
- url = get_url(each)
- res = url_open(url)
- img_list = find_img(res)
- img_name = find_name(res)
- for i in range(len(img_list)):
- img = url_open(img_list[i])
- with open(img_name[i]+'.jpg','wb') as f:
- f.write(img.content)
-
- if '__main__' == __name__:
- temp = int(input('请输入要爬取的页数:'))
- pages = temp+1
- main(pages)
复制代码 |
-
|