|
发表于 2018-10-26 20:35:13
|
显示全部楼层
我下载完啦。用了最笨的方法。 对其它链接没有通用性 也就是只能下载这本漫画 如果要下载其它的需要改
- import urllib.request
- from bs4 import BeautifulSoup as bs
- import re
- import os
- from urllib import parse
- from urllib.request import quote
- import io
- from PIL import Image
- def urlopen(url):
-
- req = urllib.request.Request(url)
- req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36")
- html = urllib.request.urlopen(req)
- html = html.read()
- return html
- #urlopen 打开链接 并加了heaaders
- def url_list(url):
- html = urlopen(url)
- htm = html.decode('utf-8')
- htm = bs(htm,'lxml')
- cont = htm.div(class_="mhlistbody")
- cont = cont[0]
- cont = cont.find_all('a')
- urllist = []
- for i in cont:
-
- i = i.attrs
- i = i['href']
- i = 'http://www.manhuatai.com'+i
-
- urllist.append(i)
- return urllist
- def content(url):
-
- os.mkdir('苍穹')
- os.chdir('苍穹')
- urllist = url_list(url)
-
- print('一共有:'+str(len(urllist))+'个链接')
- for i in urllist:
-
- html = urlopen(i)
- html = html.decode('utf-8')
- #这里是找到章节数
- nmu = re.findall(r'(pagename:"第)(\d*)(话)',html)
- if len(nmu) == 0:
-
- continue
- #这个是章节名字
- htm = bs(html,'lxml')
- h1 = htm.h1.string
- os.mkdir(h1)
- os.chdir(h1)
-
- #这个是获取图片链接的大写字母它是小说名字第一个字的拼音字母的大家
- capital = re.search(r'mhid:".',html)
- capital = capital.group()
- capital = capital[-1]
- capital = capital.capitalize()
-
- #这里是链接中间的文字
- name = re.search(r'(mhname:")(.*?)(")',html)
- name = name.group(2)
- name = name+'拆分版'
-
- nmu = nmu[0]
- nmu = nmu[1]
- nmu = nmu+'话'
-
- #这里是找到这个章节一共有几张图片
- page = re.search(r'(totalimg:)(\d*)(,)',html)
- page = int(page.group(2))
- cont_list = []
- list3 = [583]
- list2 = [625,676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696]
- ce = nmu[:-1]
- ce = int(ce)
- jie = ce in list2
- jie1 = ce in list3
- if jie == True:
-
- nmu = str(ce)+'话v'
- if jie1 == True:
-
- nmu = str(ce)+'话v1'
- for i in range(1,page+1):
- url = capital+'/'+name+'/'+nmu+'/'+str(i)+'.jpg-mht.middle.webp'
- url = 'https://mhpic.jumanhua.com/comic/'+parse.quote(url)
-
- cont_list.append(url)
- for i in cont_list:
-
- fi_name = h1+str(cont_list.index(i)+1)+'.jpg'
- print(fi_name)
- cont = urlopen(i)
- img = Image.open(io.BytesIO(cont))
- img.save(fi_name,'JPEG')
- os.chdir(os.pardir)
-
- url = "http://www.manhuatai.com/doupocangqiong/"
- list1 = content(url)
复制代码 |
|