|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
python + chromedriver实现自动批量保存百度盘分享
已将提取百度云分享的Python代码封装成了类BaiduPanExtractor
- from selenium import webdriver
- import threading
- import time
- class BaiduPanExtractor:
- def __init__(self):
- self.browser = None
- self.password = None
- self.account = None
- self.executable_path = None
- self.login_status = False # 记录登录状态
- def open_browser(self, executable_path="chromedriver"):
- '''
- :param executable_path 指定chromedriver的路径
- '''
- try:
- self.browser = webdriver.Chrome(executable_path=executable_path)
- except Exception:
- print("请指定Chromedriver.exe的路径")
- # 超过2s未检查出来指定元素,默认为该网页无该元素,抛出异常
- def decorator_exception(fun):
- def wrap(*args):
- sleep_time = 0
- while True:
- time.sleep(1)
- try:
- return fun(*args)
- except Exception as reason:
- sleep_time = sleep_time + 1
- if sleep_time >= 2:
- raise reason
- return wrap
- # 打开网页百度云盘登录连接
- def open_baidu_pan(self):
- self.browser.get("http://pan.baidu.com")
- # 检查登录状态
- def check_login_status(self):
- while not self.login_status:
- if "https://pan.baidu.com/disk/home?" in self.browser.current_url:
- self.login_status = True
- # 点击”保存到网盘“按钮
- @decorator_exception
- def click_save2disk(self):
- self.browser.find_element_by_class_name("tools-share-save-hb").click()
- # 点击”新建文件夹“按钮
- @decorator_exception
- def click_create_new_folder(self):
- self.browser.find_element_by_xpath("//a[@data-button-id='b17']").click()
- # 点击分享页面“确认”按钮
- @decorator_exception
- def click_confirm(self):
- self.browser.find_element_by_xpath("//a[@node-type='confirm']").click()
- # 点击分享页面“✔”按钮
- @decorator_exception
- def click_sure(self):
- self.browser.find_element_by_class_name("sure").click()
- # 将文件保存到“file_path”目录下
- def save(self, file_path):
- self.locate_folder(file_path)
- self.click_confirm()
- # 检查”save_path“路径是否存在
- def is_path_exist(self, save_path):
- return self.locate_folder(save_path)
- # 定位“file_name”文件
- def locate_folder(self, file_name):
- pattern = "//span[@node-path='{}']/../..".format(file_name)
- sleep_time = 0
- while True:
- time.sleep(1)
- try:
- self.browser.find_element_by_xpath(pattern).click()
- return True
- except Exception:
- sleep_time = sleep_time + 1 # 定位文件时,如果2s内未找到该文件,视为目录中没有该文件
- if sleep_time >= 2:
- return False
- # 新建路径未file_path的文件夹
- @decorator_exception
- def create_new_folder(self, file_path):
- self.click_create_new_folder() # 点击新建文件夹按钮
- temp = self.browser.find_element_by_class_name("shareFolderInput") # 定位文件名输入框
- temp.clear() # 清空输入框
- temp.send_keys(file_path[file_path.rfind("/"):]) # 填写文件名
- self.click_sure() # 确认填写
- # 提交用户名和登录密码
- @decorator_exception
- def submit_login_form(self):
- username = self.browser.find_element_by_class_name("pass-text-input-userName")
- password = self.browser.find_element_by_class_name("pass-text-input-password")
- username.clear()
- password.clear()
- username.send_keys(self.account)
- password.send_keys(self.password)
- self.browser.find_element_by_id("TANGRAM__PSP_4__submit").click()
- while not self.login_status:
- pass
- # 打开分享界面
- def extract_share(self, share_url, share_url_code):
- self.browser.get(share_url)
- self.browser.find_element_by_id("accessCode").send_keys(share_url_code)
- self.browser.find_element_by_class_name("submit-a").click()
- current_url = self.browser.current_url
- while current_url == self.browser.current_url:
- pass
- # 将文件保存在save_path路径下
- def save_file(self, save_path):
- catalog_list = save_path.split("/")
- catalog_list.remove("")
- length = len(catalog_list)
- file_path = ""
- for i in range(length):
- file_path = file_path + "/" + catalog_list[i]
- if not self.is_path_exist(file_path):
- self.create_new_folder(file_path)
- if file_path == save_path:
- self.save(save_path)
- # 登录百度云盘
- def login(self,account=None,password=None):
- '''
- :param account 百度云的账户
- :param password 百度云账户密码
- 若用户名或密码为空,则打开浏览器手动登录
- 若用户名或密码不为空,则打开浏览器自动提交用户名和密码,(百度云可能会有验证,这时需要手动验证)
- '''
- self.account = account
- self.password = password
- self.open_baidu_pan()
- threading.Thread(target=self.check_login_status).start()
- while not self.login_status:
- if self.account and self.password:
- self.submit_login_form()
- def check_share_type(self):
- if "#list" in self.browser.current_url:
- return "FOLDER"
- return "FILE"
- # 点击“文件名”按钮
- @decorator_exception
- def click_file_name_checkbox(self):
- self.browser.find_element_by_xpath("//div[@class='QxJxtg']/div/ul/li/div").click()
- # 将链接为share_url的文件保存在save_path路径下
- def save2baidu_pan_disk(self, share_url, share_url_code, save_path):
- '''
- :param share_url 分享连接
- :param share_url_code 分享连接提取码
- :param save_path 资源保存位置 默认为根路径
- '''
- self.extract_share(share_url, share_url_code)
- if self.check_share_type() == "FOLDER":
- self.click_file_name_checkbox()
- self.click_save2disk()
- self.save_file(save_path)
复制代码
使用方法:
- chromedriver_executable = "指定你的chromedriver的路径"
- account = "你的百度云"
- password = "你的百度云密码"
- baidupan_extractor = BaiduPanExtractor()
- baidupan_extractor.open_browser(chromedriver_executable)
- baidupan_extractor.login(account = account,password=password)
- baidupan_extractor.save2baidu_pan_disk(share_url="百度云分享链接", share_url_code="百度云分享链接提取码",save_path="指定文件保存路径")
复制代码
账户和密码可以不用输入,运行代码后会打开浏览器,弹出百度网盘登录界面,可在该界面输入账户和密码登录并验证,之后浏览器会自动进入到你填写的百度云分享连接,并提取文件
写脚本的原因是,在github上发现某大神分享了很关于计算机知识的电子书
一个个手动点击保存过于繁琐于是写了个脚本将该页的链接和提取码爬取了下来并配合chromedriver实现模拟手动点击输入提取码,点击提取文件等过程。
|
|