import re
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
def main():
word = input('请输入搜索关键字: ')
key = urllib.parse.urlencode({'word':word}).encode('utf-8')
response = urllib.request.urlopen('http://baike.baidu.com/search/word?%s' % key)
html = response.read().decode('utf-8')
soup = BeautifulSoup(html, 'html.parser')
for each in soup.find_all(href=re.compile('view')):
content = ''.join([each.text])
url2 = ''.join(['https://baike.baidu.com/', urllib.parse.quote(each['href'])])
response2 = urllib.request.urlopen(url2)
html2 = response2.read()
soup2 = BeautifulSoup(html2, 'html.parser')
if soup2.h2:
content = ''.join([content, soup2.h2.text])
content = ' '.join([content, '->', url2])
print(content)
if __name__ == '__main__':
main()
|