|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import urllib.request
- import urllib.parse
- import re
- from bs4 import BeautifulSoup
- def main():
- keyword = input("key words :")
- keyword = urllib.parse.urlencode({"word":keyword})
- response = \
- urllib.request.urlopen("http://baike.baidu.com/search/word? % s" % \
- keyword)
- html = response.read()
- soup = BeautifulSoup(html, "html.parser")
- for each in soup.find_all(href=re.compile("view")):
- content =''.join([each.text])
- url2 = ''.join(["http://baike.baidu.com", each["href"]])
- response2 = urllib.request.urlopen(url2)
- html2 = response2.read()
- soup2 = BeautifulSoup(html2, "html.parser")
- if soup2.h2:
- content = ''.join([content, soup2, h2.txt])
- content = ''.join([content, "->", url2])
- print(content)
- if __name__=="__main__":
- main()
复制代码
IDE报错
- key words :qq
- Traceback (most recent call last):
- File "C:\Users\Martin\Desktop\Python_Course\p14_92.py", line 26, in <module>
- main()
- File "C:\Users\Martin\Desktop\Python_Course\p14_92.py", line 11, in main
- keyword)
- File "D:\Program\Python\lib\urllib\request.py", line 223, in urlopen
- return opener.open(url, data, timeout)
- File "D:\Program\Python\lib\urllib\request.py", line 532, in open
- response = meth(req, response)
- File "D:\Program\Python\lib\urllib\request.py", line 642, in http_response
- 'http', request, response, code, msg, hdrs)
- File "D:\Program\Python\lib\urllib\request.py", line 570, in error
- return self._call_chain(*args)
- File "D:\Program\Python\lib\urllib\request.py", line 504, in _call_chain
- result = func(*args)
- File "D:\Program\Python\lib\urllib\request.py", line 650, in http_error_default
- raise HTTPError(req.full_url, code, msg, hdrs, fp)
- urllib.error.HTTPError: HTTP Error 400: Bad Request
- >>>
复制代码 |
|