|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests as r
import re
import ssl
import time
ssl._create_default_https_context = ssl._create_unverified_context
header = {'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
response = r.get('https://www.vmgirls.com/13344.html', headers=header)
html = response.text
urls = re.findall('<a href=".*?" alt=".*?" title=".*?">', html)
print(urls)
for url in urls:
time.sleep(1)
file_name = url.split('/')[-1]
response = r.get(url, headers=header)
with open(file_name, 'wb') as f:
f.write(response.content)
f.close()
============= RESTART: /Users/sarahqian/Desktop/编程文件/Python/爬虫1.py =============
['<a href="https://static.vmgirls.com/image/2019/12/2019122210292813-scaled.jpeg" alt="少女情怀总是诗" title="少女情怀总是诗">', '<a href="https://static.vmgirls.com/image/2019/12/2019122210294290-scaled.jpeg" alt="少女情怀总是诗" title="少女情怀总是诗">', '<a href="https://static.vmgirls.com/image/2019/12/2019122210295639-scaled.jpeg" alt="少女情怀总是诗" title="少女情怀总是诗">', '<a href="https://static.vmgirls.com/image/2019/12/2019122210300913-scaled.jpeg" alt="少女情怀总是诗" title="少女情怀总是诗">', '<a href="https://static.vmgirls.com/image/2019/12/2019122210301954-scaled.jpeg" alt="少女情怀总是诗" title="少女情怀总是诗">']
Traceback (most recent call last):
File "/Users/sarahqian/Desktop/编程文件/Python/爬虫1.py", line 19, in <module>
response = r.get(url, headers=header)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 637, in send
adapter = self.get_adapter(url=request.url)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 728, in get_adapter
raise InvalidSchema("No connection adapters were found for {!r}".format(url))
requests.exceptions.InvalidSchema: No connection adapters were found for '<a href="https://static.vmgirls.com/image/2019/12/2019122210292813-scaled.jpeg" alt="少女情怀总是诗" title="少女情怀总是诗">' |
|