我下载完啦。用了最笨的方法。 对其它链接没有通用性 也就是只能下载这本漫画 如果要下载其它的需要改
import urllib.request
from bs4 import BeautifulSoup as bs
import re
import os
from urllib import parse
from urllib.request import quote
import io
from PIL import Image
def urlopen(url):
req = urllib.request.Request(url)
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36")
html = urllib.request.urlopen(req)
html = html.read()
return html
#urlopen 打开链接 并加了heaaders
def url_list(url):
html = urlopen(url)
htm = html.decode('utf-8')
htm = bs(htm,'lxml')
cont = htm.div(class_="mhlistbody")
cont = cont[0]
cont = cont.find_all('a')
urllist = []
for i in cont:
i = i.attrs
i = i['href']
i = 'http://www.manhuatai.com'+i
urllist.append(i)
return urllist
def content(url):
os.mkdir('苍穹')
os.chdir('苍穹')
urllist = url_list(url)
print('一共有:'+str(len(urllist))+'个链接')
for i in urllist:
html = urlopen(i)
html = html.decode('utf-8')
#这里是找到章节数
nmu = re.findall(r'(pagename:"第)(\d*)(话)',html)
if len(nmu) == 0:
continue
#这个是章节名字
htm = bs(html,'lxml')
h1 = htm.h1.string
os.mkdir(h1)
os.chdir(h1)
#这个是获取图片链接的大写字母它是小说名字第一个字的拼音字母的大家
capital = re.search(r'mhid:".',html)
capital = capital.group()
capital = capital[-1]
capital = capital.capitalize()
#这里是链接中间的文字
name = re.search(r'(mhname:")(.*?)(")',html)
name = name.group(2)
name = name+'拆分版'
nmu = nmu[0]
nmu = nmu[1]
nmu = nmu+'话'
#这里是找到这个章节一共有几张图片
page = re.search(r'(totalimg:)(\d*)(,)',html)
page = int(page.group(2))
cont_list = []
list3 = [583]
list2 = [625,676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696]
ce = nmu[:-1]
ce = int(ce)
jie = ce in list2
jie1 = ce in list3
if jie == True:
nmu = str(ce)+'话v'
if jie1 == True:
nmu = str(ce)+'话v1'
for i in range(1,page+1):
url = capital+'/'+name+'/'+nmu+'/'+str(i)+'.jpg-mht.middle.webp'
url = 'https://mhpic.jumanhua.com/comic/'+parse.quote(url)
cont_list.append(url)
for i in cont_list:
fi_name = h1+str(cont_list.index(i)+1)+'.jpg'
print(fi_name)
cont = urlopen(i)
img = Image.open(io.BytesIO(cont))
img.save(fi_name,'JPEG')
os.chdir(os.pardir)
url = "http://www.manhuatai.com/doupocangqiong/"
list1 = content(url)
|