飞雪霓裳 发表于 2018-11-28 22:06:09

刚下了个NOTEPAD试试代买格式能行不?完了自动删帖。

import urllib.request
import os
import random
import time
def set_proxy():#璁剧疆浠g悊
    ip_list = random.choice(['123.7.61.8:53281','42.48.118.106:50038','119.254.94.105:58999','61.138.33.20:808'])

    proxy_support = urllib.request.ProxyHandler({'https':random.choice(ip_list)})
    opener = urllib.request.build_opener(proxy_support)
    urllib.request.install_opener(opener)


def url_open(url):#璁剧疆headers鎵撳紑缃戦〉骞惰幏寰楄繑鍥炲唴瀹?
    set_proxy()
    req = urllib.request.Request(url)
    req.add_header('user-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0')
    req.add_header('Referer','http://www.mmjpg.com/')#灏嗙綉绔欎綔涓轰綘璁块棶鐨勮烦鏉?
    response = urllib.request.urlopen(req)
    html = response.read()
    return html

def get_address(html, mm_collect):#鑾峰彇涓荤綉椤典笅鐨勫瓙鐩稿唽缃戝潃鍒楄〃
    a = html.find('<div class="main">')
    b = html.find('<em class="info">鍏?,a+255)
    html = html
    a = html.find('<span class="title">')
    while a != -1:
      b = html.find('target',a)
      if b != -1:
            mm_collect.append(html)
      else:
            b = a+28
      a = html.find('<span class="title">', b)

def get_page(html):#鑾峰彇瀛愭瘡涓浉鍐屽垪琛ㄦ嫢鏈夐〉鏁?
    a = html.find('娌℃湁浜?)
    if a == -1:
      a = html.find('涓婁竴绡?)
    b = html.find('鍏ㄩ儴鍥剧墖', a)
    html = html
    a = html.find('<i></i>')+15
    b = html.find(r'</a>', a)
    html = html
    a = html.find('>')+1
    html = int(html)
    print(html)
    return html

def open_mm(mm_collect,pic_address):#鎵撳紑瀛愮浉鍐岋紝鐢熸垚鍏蜂綋鍥剧墖鐨勫湴鍧

飞雪霓裳 发表于 2018-11-28 22:07:00

发现好像删不了帖子了。
页: [1]
查看完整版本: 刚下了个NOTEPAD试试代买格式能行不?完了自动删帖。