|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 a1296715528 于 2018-4-29 22:13 编辑
求大佬解释import urllib.request as u
import re
import chardet
import os
def openurl(url):
req = u.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299')
response = u.urlopen(req)
html = response.read()
return html
def trans(html):
code = chardet.detect(html)['encoding']
if code == 'GB2313' or code == 'gb2313':
code = 'GBK'
html = html.decode(code)
return html
'''def findname_dict(html):
aim = r'alt="(\w+)"'
aimlist = delrep(re.findall(aim, html))
for each in aimlist:
print(each)'''
def delrep(list1):
for each in list1[:]:
if list1.count(each) == 2:
list1.remove(each)
return list1
def findimg(html):
aim = r' src="([^"]+\.jpg)"'
aimlist = delrep(re.findall(aim, html))
print(len(aimlist))
'''for each in aimlist:
filename = each.split("/")[-1]
try:
a = u.urlretrieve(each, filename)
except ValueError:
pass'''
def main():
url = 'http://www.imomoe.com/'
html = trans(openurl(url))
# findname_dict(html)
findimg(html)
if __name__ == '__main__':
'''os.mkdir('ooxx')
os.chdir('ooxx')'''
main() 这段代码无问题 但是加点东西有有了 看图
请问各位大佬这是为什么 我要怎样才能爬到这些图
|
-
这是我想爬的东西
-
上述代码运行无问题
-
在前面加了一个双引号,出问题了
-
变0了
|