|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
写了个ip代理刷百度知道浏览量的,写完测试能抓到网页,但是浏览记录却不增加!这是为什么?代码如下:
- import os
- import re
- import urllib.request
- import random
- #获得ip代理网页的源文件
- def get_html(url):
- res = urllib.request.Request(url)
- res.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
- html = urllib.request.urlopen(res).read().decode('utf-8')
-
- return html
-
- #正则抓ip代理
- def get_ip(html):
- z = r'(?:(?:[01]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[01]?\d?\d|2[0-4]\d|25[0-5]):\d{2,4}'
- iplist = re.findall(z,html)
-
- return iplist
- #要刷流量的网页地址
- def to_url(url,iplist):
- proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)})
- opener = urllib.request.build_opener(proxy_support)
- opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')]
- urllib.request.install_opener(opener)
- html = urllib.request.urlopen(url).read().decode('gbk') #习惯写成这样的,方便测试
- #print(html)
- if __name__ == '__main__':
- url = 'http://zhidao.baidu.com/question/1605454116924284547.html'
- iplist = get_ip(get_html('http://www.youdaili.net/Daili/guonei/3053.html'))
- x = 1
- for each in range(10):
- try:
- to_url(url,iplist)
- print('访问成功%d次' % x)
- x += 1
- except:
- print('访问出错!稍后继续')
- continue
复制代码 前辈解答下吧!
|
|