|  | 
 
 发表于 2020-5-30 10:04:16
|
显示全部楼层
   本楼为最佳答案 
|   
 用for循环,50*page 作为url 来访问
 
 给你看下当时我爬的代码:
 
 复制代码from urllib.request import Request,urlopen
from urllib.parse import urlencode
from fake_useragent import UserAgent
def get_html(url):
    headers = {
        'User-Agent':UserAgent().chrome
    }
    request = Request(url,headers=headers)
    response = urlopen(request)
    return response.read()
def save_html(filename,html_bytes):
    with open(filename,'wb') as f :
        f.write(html_bytes)
def main():
    content = input('请输入要下载的内容:')
    while True:
        try:
            num = int(input('请输入要下载多少页:'))
            break
        except TypeError:
            print('你输入的页数不是整数!请重新输入!')
    base_url = 'https://tieba.baidu.com/f?ie=utf-8&{}'
    for pn in range(num):
        args = {
            'pn':pn*50,
            'kw':content,
        }
        filename = '第'+str(pn + 1)+'页.html'
        args = urlencode(args)
        print('正在下载'+filename)
        html_bytes = get_html(base_url.format(args))
        save_html(filename,html_bytes)
if __name__ == '__main__' :
    main()
 | 
 |