马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Daniel_Zhang 于 2021-1-8 21:27 编辑
模块在此:
文件名 get_data_set.py
注意修改一下 LOCATION 变成自己的文件夹import pickle, os
import easygui as g
import get_total_time as data
LOCATION = os.getcwd() + '/html_css_js_flask/learning_progress_count/'
url = input('enter the url address\n')
my_time = []
my_time = data.get_durations(url)
# make a new binary file to store the data in list my_time
def add_data_set(my_time):
# auto create a new file if it not exists, or write into the file directly if the file exists
pickle_file = open(LOCATION + 'html_learn_progress.testing','wb') # wb is write binary, do not mind the file name, it can be anything
pickle.dump(my_time,pickle_file) # dump the list into the file
pickle_file.close()
def data_set_read():
if __name__ != '__main__':
add_data_set(my_time)
pickle_file = open(LOCATION + 'html_learn_progress.testing','rb') # rb is read binary
my_list2 = pickle.load(pickle_file) # load the binary data
if __name__ == '__main__':
print(my_list2) # show the data
length_data_set = len(my_list2)
g.msgbox(msg = 'data set insert successful :)' + '\n\n' + 'total insert: ' + str(length_data_set),title='System Warning',ok_button='Get it !')
return my_list2
if __name__ == '__main__':
add_data_set(my_time)
data_set_read()
又是一个模块,爬虫模块,自动获取数据,文件名: get_total_time.pyimport re, ssl
import requests
def open_url(url):
# encoding: utf-8
headers = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Accept': 'text/html',
'Cookie': "_uuid=1DBA4F96-2E63-8488-DC25-B8623EFF40E773841infoc; buvid3=FE0D3174-E871-4A3E-877C-A4ED86E20523155831infoc; LIVE_BUVID=AUTO8515670521735348; sid=l765gx48; DedeUserID=33717177; DedeUserID__ckMd5=be4de02fd64f0e56; SESSDATA=cf65a5e0%2C1569644183%2Cc4de7381; bili_jct=1e8cdbb5755b4ecd0346761a121650f5; CURRENT_FNVAL=16; stardustvideo=1; rpdid=|(umY))|ukl~0J'ulY~uJm)kJ; UM_distinctid=16ce0e51cf0abc-02da63c2df0b4b-5373e62-1fa400-16ce0e51cf18d8; stardustpgcv=0606; im_notify_type_33717177=0; finger=b3372c5f; CURRENT_QUALITY=112; bp_t_offset_33717177=300203628285382610"
}
f = open('testing_new.txt','w')
ssl._create_default_https_context = ssl._create_unverified_context
html = requests.get(url,headers=headers).text # 获取url内容
f.write(html) # 写入 url内容到文件,决定如何写下面的正则表达式
f.close()
return html
def get_durations(url):
html = open_url(url)
m = r'"cid":.+?,'
match = str(re.findall(m, html)[0])
match = match.split(':')[-1]
match = match.split(',')[0] # 获得第一个视频的 cid 用来辅助获取完整的播放列表
p = r'\[{"cid":' + match + '.+?]'
pic = re.findall(p, html) # 获取完整的播放列表
final_result = []
q = r'"duration":.+?,'
pic = str(pic)
duration = re.findall(q, pic) # 获取每一个视频的播放时长的列表(此处包含了 class 名称,需要进一步进行处理)
duration = str(duration)
y = r':.+?,'
time_get = re.findall(y, duration) # 获得每一个视频的播放时长列表(进一步进行处理)
for each in range(len(time_get)): # 清除所有不必要的内容
time_get[each] = time_get[each].split(':')[-1]
time_get[each] = time_get[each].split(',')[0]
temp = time_get[each]
final_result.append([int(temp) // 60, int(temp) % 60]) # 将时间转换成 分钟:秒
return final_result
if __name__ == '__main__':
url = input('enter\n')
#open_url(url)
get_durations(url)
主程序在此,文件名 time_adding.pyimport pickle
import easygui as g
import get_data_set as data_set
sum_second = 0 # time already used, initial be zero
# calculate the progress
def calculate(my_list2):
global sum_second
total_time = 0
for each in range(len(my_list2)):
total_time += 60 * int(my_list2[each][0]) + int(my_list2[each][1])
if each < already_take:
sum_second += 60 * int(my_list2[each][0]) + int(my_list2[each][1])
string1 = "hours already take: " + str(sum_second/(60*60)) +' / ' + str(total_time/(60*60))
string2 = "current percentage: " + str((sum_second/total_time) * 100) + ' %'
#g.msgbox(msg = string1 + '\n\n' + string2,title='System Warning',ok_button='Get it !')
g.msgbox(msg = string1 + '\n\n' + string2,title='System Warning',ok_button='Get it !')
if __name__ == "__main__":
already_take = input('how many unit you already take up to now?\n')
try:
already_take = int(already_take)
except ValueError:
g.msgbox(msg = 'seems enter is wrong, please check it and enter again!', title='System Warning',ok_button='Get it !')
exit()
my_list2 = data_set.data_set_read()
calculate(my_list2)
p.s 三个文件请放在同一个文件夹下面
只需要输入指定的 bilibili 视频链接,含有 BV号 或者 a v 号的那个链接,以及自己学完了多少个章节(学完了第一讲就输入1,以此类推) |