|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
这是代码:
- import urllib.parse
- import urllib.request
- import re
- from bs4 import BeautifulSoup
- def main():
- keyword = input ("请输入关键字:")
- keyword = urllib.parse.urlencode({"word":keyword})
-
- response =\
- urllib.request.urlopen("http://baike.baidu.com/sarch/word? % s" % \
- keyword)
- html = response.read()
- soup = BeautifulSoup(html,"html.parser")
- for each in soup.find_all(href = re.compile("view")):
- content = ''.join([each.text])
- url2 = ''.join(["http://baike.baidu.com",each["href"]])
- response2 = urllib.request.urlopen(url2)
- html2 = response2.read()
- soup2 = BeautifulSoup(html2,"html.parse")
- if soup2.h2:
- content = ''.join([content,soup2.h2.text])
- content = ''.join([content,"->",url2])
- print(content)
- if __name__ == "__main__":
- main()
复制代码
错误信息:
Message=HTTP Error 400: Bad Request
Source=
StackTrace:
File "D:\Python\PythonApplication22\PythonApplication22\PythonApplication22.py", line 12, in main
keyword)
File "D:\Python\PythonApplication22\PythonApplication22\PythonApplication22.py", line 29, in <module>
main()
表示满脸懵
url的%s那里你加多余的空格干嘛。改成这样就行了:
- def main():
- keyword = input ("请输入关键字:")
- keyword = urllib.parse.urlencode({"word":keyword})
- response = urllib.request.urlopen("http://baike.baidu.com/sarch/word?%s" % keyword)
- html = response.read()
- print(html)
- soup = BeautifulSoup(html,"html.parser")
- for each in soup.find_all(href = re.compile("view")):
- content = ''.join([each.text])
- url2 = ''.join(["http://baike.baidu.com",each["href"]])
- response2 = urllib.request.urlopen(url2)
- html2 = response2.read()
- soup2 = BeautifulSoup(html2,"html.parse")
- if soup2.h2:
- content = ''.join([content,soup2.h2.text])
- content = ''.join([content,"->",url2])
- print(content)
- if __name__ == "__main__":
- main()
复制代码
|
|