|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
一段抓取知乎妹子的爬虫代码不能正常运行,总是出现问题说缩进有问题,但是我找不出来在哪里,请大神帮忙找找并修改!谢谢!
问题:运行时总显示unexpected indet
源码如下:
#encoding:utf-8
import urllib.request
import urllib
import re
import os
from bs4 import BeautifulSoup
url='https://www.zhihu.com/question/40753170'
urlop=urllib.request.urlopen(url)
data=urlop.read().decode('utf-8')
bs=BeautifulSoup(data)
def gettitle(url):
title=bs.find('span',{"class":"zm-editable-content"})#找到标题
title=title.get_text()
return(title)
def getpicurl(url):#得到图片链接
pics=re.compile('img.+?src=\"(https.+?)\"')
#pics=re.compile('img.+?src=(https.+?)')
return(pics)
def downpics():
title=gettitle(url)
print(title)
dirpath='f:/知乎/'+title+"/"
if not os.path.exists(dirpath):
os.makedirs(dirpath)#生成了问题标题相应的文件夹
pics=getpicurl(url)
a=1
urls=[]
for x in pics.findall(data):#去除重复的图片链接
if x not in urls:
urls.append(x)
for x in urls:
try:
imgdata=urllib.request.urlopen(x).read()
b = (x.rfind("."))
imgpath=str(dirpath)+str(a)+x[b:]
print(imgpath)
print(x)
a+=1
file=open(imgpath,'wb')
file.write(imgdata)
file.flush()
file.close()
except:
continue
downpics();
本帖最后由 无符号整形 于 2016-9-17 11:33 编辑
- def gettitle(url):
- title=bs.find('span',{"class":"zm-editable-content"})#找到标题
- title=title.get_text()
- return(title)
复制代码
=>
- def gettitle(url):
- title=bs.find('span',{"class":"zm-editable-content"})#找到标题
- title=title.get_text()
- return(title)
复制代码
- for x in urls:
- try:
- imgdata=urllib.request.urlopen(x).read()
- b = (x.rfind("."))
- imgpath=str(dirpath)+str(a)+x[b:]
- print(imgpath)
- print(x)
- a+=1
- file=open(imgpath,'wb')
- file.write(imgdata)
- file.flush()
- file.close()
- except:
- continue
复制代码
=>
- for x in urls:
- try:
- imgdata=urllib.request.urlopen(x).read()
- b = (x.rfind("."))
- imgpath=str(dirpath)+str(a)+x[b:]
- print(imgpath)
- print(x)
- a+=1
- file=open(imgpath,'wb')
- file.write(imgdata)
- file.flush()
- file.close()
- except:
- continue
复制代码
- def downpics():
- title=gettitle(url)
- print(title)
- dirpath='f:/知乎/'+title+"/"
- if not os.path.exists(dirpath):
- os.makedirs(dirpath)#生成了问题标题相应的文件夹
- pics=getpicurl(url)
- a=1
- urls=[]
复制代码
=>
- def downpics():
- title=gettitle(url)
- print(title)
- dirpath='f:/知乎/'+title+"/"
- if not os.path.exists(dirpath):
- os.makedirs(dirpath)#生成了问题标题相应的文件夹
- pics=getpicurl(url)
- a=1
- urls=[]
复制代码
|
|