|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
# -*- coding:utf-8 -*-
import urllib.request
import urllib.parse
def tieba_spidr(url, begin_page, end_page):
full_url = url
html = load_page(full_url, 'douban')
write_page(html, 'douban')
def load_page(url, filename):
headers = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
}
request = urllib.request.Request(url, headers=headers)
return urllib.request.urlopen(request).read()
def write_page(html, filename):
with open(filename, 'w', encoding='utf-8') as file:
file.write(html.decode('utf-8'))
if __name__ == '__main__':
begin_page = 40
end_page = 80
url='https://movie.douban.com/subject/27060077/reviews?'
key = urllib.parse.urlencode({"start":40})
url = url+key
print(url)
tieba_spidr(url, begin_page, end_page)
|
-
这是运行后的结果截图
|