|
楼主 |
发表于 2017-5-25 17:54:24
|
显示全部楼层
- import urllib.request
- import urllib.parse
- import re
- from bs4 import BeautifulSoup
- def main():
- keyword = input('请输入关键词:')
- keyword = urllib.parse.urlencode({'word':keyword})
- response = \
- urllib.request.urlopen('http://baike.baidu.com/item/word? % s' % \
- keyword)
- html = response.read()
- soup = BeautifulSoup(html,'html.parser')
- for each in soup.find_all(href = re.compile('view')):
- content = ''.join([each.text])
- url2 = ''.join(['http://baike.baidu.com',each['href']])
- response2 = urllib.request.urlopen(url2)
- html2 = response2.read()
- soup2 = BeautifulSoup(html2,'html.parse')
- if soup2.h2:
- content = ''.join([content,soup2.h2.text])
- content = ''.join([content,'->',url2])
- print(content)
- if __name__ == '__main__':
- main()
复制代码
报错:
Traceback (most recent call last):
File "C:\Users\Python练习\BeautifulSoup.py", line 28, in <module>
main()
File "C:\Users\Python练习\BeautifulSoup.py", line 12, in main
keyword)
File "C:\Python32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Python32\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Python32\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python32\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Python32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Python32\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 400: Bad Request |
|