|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
picsite = []
url = "http://jandan.net/girl/"
i = int(input("输入要爬行的页数:"))
def openurl(url):
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
return html
#找图片地址
def find_picture(url):
a = b =0
html = openurl(url)
while not(a == 8):
a = html.find("img src=", b) + 9
b = html.find('"', a)
if a != 8:
picsite.append("http:" + html[a:b])
#找下一页的地址
def openurl_next(url):
html = openurl(url)
a = html.find("Older Comments") + 22
b = html.find('"', a)
url = "http:" + html[a:b]
return url
#下载图片
def down_picture():
for i in range(len(picsite)):
url = picsite[i]
downpicture = urllib.request.urlopen(url)
apt = downpicture.read()
with open(str(i) + '.jpg', "wb") as f:
f.write(apt)
#开始运行程序
def start(i):
aurl = url
while i:
find_picture(aurl)
next_url = openurl_next(aurl)
aurl = next_url
i -= 1
down_picture()
if __name__ == "__main__":
start(i)
|
|