|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- from lxml import etree
- class Dynamic(object):
- def __init__(self):
- self.url = "https://www.soogif.com"
- self.headers={"User-Agent:Mozilla/5.0 (Windows NT 5.1)AppleWebKit/537.36(KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.7.0.18838"}
- #1. 发送请求获取响应
- def send_request(self, url):
- response = requests.get(url, headers=self.headers)
- return response.content
- #2. 数据筛选
- def data_cleansing(self,data, rel):
- element = etree.HTML(data)
- html_xpath = element.xpath(rel)
- return html_xpath
-
- #3. 保存数据
- def sava_data(self,data,name):
- a = "images/"+name
- with open(a,"wb") as f:
- f.write(data)
-
- #4. 主要逻辑
- def run(self):
- response_data = self.send_request(self.url)
- img_xpath = "//div['@class=center']/a/@href"
- image_url_list = self.data_cleansing(response_data, img_xpath)
- image_url = self.url+ image_url_list[17]
- page = [image_url+"?pageSize=28&pageNumber={}".format(i) for i in range(0,12)]
- for page_num in page:
- print(page_num)
- page_data = self.send_request(page_num)
- gage_xpath = '//div[@class="up clearfix"]/a/figure/div/video/@src'
- page_url_list = self.data_cleansing(page_data, page_xpath)
- for page_url in page_url_lsit:
- mp4_data= self.send_request(page_url)
- image_name = page_url[-10:]
- self.sava_data(mp4_data, image_name)
-
- if __name__ == '__main__':
- graph = Dynamic()
- graph.run()
复制代码
怎么打印不出来,哪里错了 |
|