|
发表于 2020-5-30 10:04:16
|
显示全部楼层
本楼为最佳答案
用for循环,50*page 作为url 来访问
给你看下当时我爬的代码:
- from urllib.request import Request,urlopen
- from urllib.parse import urlencode
- from fake_useragent import UserAgent
- def get_html(url):
- headers = {
- 'User-Agent':UserAgent().chrome
- }
- request = Request(url,headers=headers)
- response = urlopen(request)
- return response.read()
- def save_html(filename,html_bytes):
- with open(filename,'wb') as f :
- f.write(html_bytes)
- def main():
- content = input('请输入要下载的内容:')
- while True:
- try:
- num = int(input('请输入要下载多少页:'))
- break
- except TypeError:
- print('你输入的页数不是整数!请重新输入!')
- base_url = 'https://tieba.baidu.com/f?ie=utf-8&{}'
- for pn in range(num):
- args = {
- 'pn':pn*50,
- 'kw':content,
- }
- filename = '第'+str(pn + 1)+'页.html'
- args = urlencode(args)
- print('正在下载'+filename)
- html_bytes = get_html(base_url.format(args))
- save_html(filename,html_bytes)
- if __name__ == '__main__' :
- main()
复制代码 |
|