|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import os
import random
import time
def set_proxy():#璁剧疆浠g悊
ip_list = random.choice(['123.7.61.8:53281','42.48.118.106:50038','119.254.94.105:58999','61.138.33.20:808'])
proxy_support = urllib.request.ProxyHandler({'https':random.choice(ip_list)})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
def url_open(url):#璁剧疆headers鎵撳紑缃戦〉骞惰幏寰楄繑鍥炲唴瀹?
set_proxy()
req = urllib.request.Request(url)
req.add_header('user-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0')
req.add_header('Referer','http://www.mmjpg.com/')#灏嗙綉绔欎綔涓轰綘璁块棶鐨勮烦鏉?
response = urllib.request.urlopen(req)
html = response.read()
return html
def get_address(html, mm_collect):#鑾峰彇涓荤綉椤典笅鐨勫瓙鐩稿唽缃戝潃鍒楄〃
a = html.find('<div class="main">')
b = html.find('<em class="info">鍏?,a+255)
html = html[a:b]
a = html.find('<span class="title">')
while a != -1:
b = html.find('target',a)
if b != -1:
mm_collect.append(html[a+29:b-2])
else:
b = a+28
a = html.find('<span class="title">', b)
def get_page(html):#鑾峰彇瀛愭瘡涓浉鍐屽垪琛ㄦ嫢鏈夐〉鏁?
a = html.find('娌℃湁浜?)
if a == -1:
a = html.find('涓婁竴绡?)
b = html.find('鍏ㄩ儴鍥剧墖', a)
html = html[a:b]
a = html.find('<i></i>')+15
b = html.find(r'</a>', a)
html = html[a:b]
a = html.find('>')+1
html = int(html[a:b])
print(html)
return html
def open_mm(mm_collect,pic_address):#鎵撳紑瀛愮浉鍐岋紝鐢熸垚鍏蜂綋鍥剧墖鐨勫湴鍧 |
|