|
发表于 2020-11-11 09:18:47
|
显示全部楼层
本楼为最佳答案
__name__ == '__main__' 在主程序里也是有作用的
在Windows操作系统中由于没有fork,在创建子进程的时候会自动以 import 启动,导致递归。所以必须把创建子进程的部分写在if __name__ =='__main__'中。
- import urllib.request as request
- from multiprocessing import Pool
- import os, re
- User_Agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
- headers = dict.fromkeys(('User-Agent',),User_Agent)
- def get_page(url):
- req = request.Request(url=url, headers=headers)
- try:
- res = request.urlopen(req).read().decode('utf-8')
- except:
- print(f'{os.getpid():<6d}请求失败')
- return url
- else:
- print(f'{os.getpid():<6d}请求成功')
- return res
-
- def get_title(res):
- if title:=re.search(r'(?<=\<title\>).*?(?=</title>)', res):
- print(f'{os.getpid():<6d}解析网页标题为 {title.group()}')
- else:
- print(f'{os.getpid():<6d}解析不到 {res} 的网页标题')
- if __name__ == '__main__':
- p = Pool(3)
-
- urls=[
- 'https://www.baidu.com',
- 'https://www.bilibili.com',
- 'https://twitter.com',
- 'https://cn.bing.com/',
- 'https://mail.163.com/',
- 'https://dict.hjenglish.com/',
- 'https://github.com/'
- ]
- for url in urls:
- res = p.apply_async(get_page, args=(url,), callback=get_title)
-
- p.close()
- p.join()
- print('完成')
复制代码 |
|