__name__ == '__main__' 在主程序里也是有作用的
在Windows操作系统中由于没有fork,在创建子进程的时候会自动以 import 启动,导致递归。所以必须把创建子进程的部分写在if __name__ =='__main__'中。
import urllib.request as request
from multiprocessing import Pool
import os, re
User_Agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
headers = dict.fromkeys(('User-Agent',),User_Agent)
def get_page(url):
req = request.Request(url=url, headers=headers)
try:
res = request.urlopen(req).read().decode('utf-8')
except:
print(f'{os.getpid():<6d}请求失败')
return url
else:
print(f'{os.getpid():<6d}请求成功')
return res
def get_title(res):
if title:=re.search(r'(?<=\<title\>).*?(?=</title>)', res):
print(f'{os.getpid():<6d}解析网页标题为 {title.group()}')
else:
print(f'{os.getpid():<6d}解析不到 {res} 的网页标题')
if __name__ == '__main__':
p = Pool(3)
urls=[
'https://www.baidu.com',
'https://www.bilibili.com',
'https://twitter.com',
'https://cn.bing.com/',
'https://mail.163.com/',
'https://dict.hjenglish.com/',
'https://github.com/'
]
for url in urls:
res = p.apply_async(get_page, args=(url,), callback=get_title)
p.close()
p.join()
print('完成')
|