|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
#把百度百科里 搜索 猪八戒,检测其是否有府标题,如有,将副标题也一并打印出来
import urllib.request
import urllib.parse
import re
from bs4 import BeautifulSoup
def main():
keyword='猪八戒'
keyword=urllib.parse.urlencode({"word":keyword})
response=\ # 请问,这句的格式为什么写成这样。\ 换行 \换行
urllib.request.urlopen("http://baike.baidu.com/search/word? %s"%\
keyword)
html=response.read()
soup=BeautifulSoup(html,"html.parser")
for each in soup.find_all(h=re.compile("view")):
content=''.join([each.text])
url2="".join(["http://baike.baidu.com",each["href"]])
response2= urllib.request.urlopen(rul2)
html2=response2.read()
soup2=BeautifulSoup(htm2,"html.parser")
if soup2.h2:
content=''.join([content,soup2.h2.text])
content=''.join([content,"_>",url2])
print(content)
main()
运行后,请问错在哪里?
RESTART: C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/my programe/beautifulsoup3.py
Traceback (most recent call last):
File "C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/my programe/beautifulsoup3.py", line 26, in <module>
main()
File "C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/my programe/beautifulsoup3.py", line 11, in main
keyword)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 400: Bad Request
>>>
|
|