|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import re
from urllib import request
class Spider():
ur1 = "https://www.panda.tv/cate/lol"
root_pattern = '<div class="video-info">([\s\S]*?)</div>'
name_pattern = '</i>([\s\S]*?)</span>'
namer_pattern ='</i>([\s\S]*?)</span>'
def __fetch_content(self):
r = request.urlopen(Spider.ur1)
htmls = r.read()
htmls = str(htmls,encoding = "utf-8")
return htmls
def __analysis(self,htmls):
root_html = re.findall(Spider.root_pattern,htmls)
anchors=[]
for abc in root_html:
name = re.findall(Spider.name_pattern,abc)
number = re.findall(Spider.namer_pattern,abc)
anchor = {'name':name,'number':number}
anchors.append(anchor)
print(anchors)
a = 1
def __refine(self,anchors):
a = lambda anchor:{
'name':anchor['name'][0].strip(),
'number':anchor['number'][0]
}
return map(a, anchors)
def go(self):
htmls = self.__fetch_content()
anchors = self.__analysis(htmls)
anchors = list(self.__refine(anchors))
print(anchors)
spider = Spider()
spider.go()
啊啊啊 为什么会报对象不可迭代啊 |
|