|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import urllib.parse
import random
import time
def readPage(url):
header_list = [{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'},
{'User-Agent': 'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50'},
{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}]
headers = random.choice(header_list)
req = urllib.request.Request(url, headers=headers)
res = urllib.request.urlopen(req, timeout=5)
html = res.read().decode('utf-8')
return html
def writePage(filename,html):
with open(filename, 'w', encoding='utf-8') as f:
f.write(html)
print('写入成功')
def workOn():
name = input('请输入贴吧名:')
begin = int(input('请输入起始页'))
end = int(input('请输入中止页'))
#拼接url
kw={'kw':name}
kw=urllib.parse.urlencode(kw)
for i in range(begin,end+1):
pn=(i-1)*50
baseurl='http://tieba.baidu.com/f?'
url=baseurl+kw+'&pn'+str(pn)
html=readPage(url)
filename='第'+str(i)+'页.html'
writePage(filename,html)
if __name__=='__mian__':
workOn()
|
|