|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
必要的easygui 和 requests_html 模块pip install 就行,效果图贴出来了
下面就开始我们的学习(XXOO)吧 ,代码如下:
- import urllib.robotparser
- from requests_html import HTMLSession
- import os
- import urllib.request
- import easygui as g
- #目标网站
- url = "https://www.lnlnl.cn/meitu/"
- #对文件名进行处理
- def dir_name_process(name):
- dir_name = ''
- for i in name:
- if i not in "|\\/*:"<>":
- dir_name += i
- return dir_name
-
- #生成session对象
- def session_generate(url):
- session = HTMLSession()
- html = session.get(url)
- return html
- #判断目标网站是否允许爬取
- def judge(url, link):
- rq = urllib.robotparser.RobotFileParser()
- rq.set_url(url + '/robot.txt')
- rq.read()
- info = rq.can_fetch('*', link)
- return info
- #选择自己喜欢的类型
- def choose_item(url):
- r = session_generate(url)
- r = r.html.find("#main > div > div > div > div.title > span > a:nth-child(n)")
- name_links = [i.text for i in r]
- name_link = g.choicebox(msg = "选择你喜欢的类型,嘿嘿",choices = name_links)
- count = 0;
- for i in r :
- if i.text == name_link:
- return i.attrs['href']
- #判断是否可以爬取,并选择要下载的页数
- def page_fetch(url, link):
- if not judge(url, link) :#查看Robot协议
- g.msgbox( msg = "哎呀,站长不让你获取图")
- return False
- else :
- g.msgbox(msg = "恭喜,站长:小子,可以取我的图", title = '询问站长是否可以爬取')
- h = session_generate(link)
- links = h.html.xpath('//*[@id="main"]/div/div/div/div/a/@href')
- links = sorted((list(set(links))))
- links.insert(0, link)
- page = g.multchoicebox(msg ="共有%d页的内容,请选择您想要的页数(从0页开始<( ̄3 ̄)>哼!)"%len(links), choices = range(len(links)))
- if page == []:
- yield None
- for link in page:
- link = int(link)
- print("正在下载%d页" %link)
- yield links[link]
- #在网页中选择想要的主题图片
- def page_download(link):
- h = session_generate(link)
- links = h.html.xpath('//*[@id="main"]/div/div/div/ul/li/div/a')
- name_links = [ link.xpath("//img/@alt")[0] for link in links ]
- name_links = g.multchoicebox( msg = "共有%d个主题,选择一下吧ヾ(❀╹◡╹)ノ~"%(len(links)) ,choices = name_links)
- if name_links == None:
- return None
- for link in links:
- name = link.xpath("//img/@alt")[0]
- if name in name_links:
- print("正在下载的主题:%s" %(name))
- link = link.xpath("//@href")
- yield (name, link[0])
- #下载所选主题的图片
- def get_picture_links(link):
- link = session_generate(link)
- links = link.html.xpath('//*[@id="main"]/div/div[1]/div[1]/div[2]/p/img/@src')
- for link in links:
- try:
- req = urllib.request.Request(link)
- req.add_header('User-Agent', "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Mobile Safari/537.36")
- response = urllib.request.urlopen(req).read()
- name = link.split('/')[-1]
- if ".jpg" not in name or ".png" not in name :
- name += ".jpg"
- f = open(name, 'wb')
- f.write(response)
- f.close()
- except :
- print("一张图下载失败(||๐_๐) 地址:", link)
- print("当前主题下载完成!ヾ(❀╹◡╹)ノ~")
- #主程序
- if __name__ == "__main__":
- choice = g.ccbox(msg = "你是否已满18岁?", title = "FBI Warning", choices = ["别多BB快点开车!", "我还差一丢丢"])
- if choice == True: #下载过程
- g.msgbox(msg = "每个主题中图片都不少,别下的太多", title = "上车前的安全提示")
- dir_path = g.diropenbox(msg = "选择你想要保存XXOO的位置")
- dir_path = dir_path + "/XXOO"
- os.mkdir(dir_path)
- os.chdir(dir_path)
- link = choose_item(url)
- for fetch_link in page_fetch(url, link):
- for data in page_download(fetch_link):
- if data == None :
- break
- name, download_link = data
- os.chdir(dir_path)
- name = dir_name_process(name)
- os.mkdir(name)
- os.chdir(name)
- get_picture_links(download_link)
- print("下载任务结束(* ̄3 ̄)╭♡❀小花花砸你")
- else :
- g.msgbox(msg = "这不是去幼儿园的车欧,快点换车", title = "温馨提示")
复制代码
如果有疑问或改进的方法,欢迎跟帖,不胜感激(这次去除了一些话,应该能过审核吧)
|
|