求助(爬虫代码)
import osimport time
from concurrent.futures import ThreadPoolExecutor, wait
from requests import get, head
import sys
class Downloader:
def __init__(self, url, nums, file):
self.url = url
self.num = nums
self.name = file
self.getSize = 0
self.info = {
'main' : {
'progress' : 0,
'speed' : ''
},
'sub' : {
'progress' : ,
'stat' :
}
}
r = head(self.url)
while r.status_code == 302:
self.url = r.headers['Location']
print("该url以重定向至{}".format(self.url))
r = head(self.url)
self.size = int(r.headers['Content-Length'])
print("该文件大小为: {} bytes".format(self.size))
def down(self, start, end, thread_id, chunk_size=10240):
raw_start = start
for _ in range(10):
try:
headers = {'Range' : 'bytes={}-{}'.format(start, end)}
r = get(self.url, headers=headers, timeout=10, stream=True)
print(f"线程{thread_id}链接成功")
size = 0
with open(self.name, 'rb+') as fp:
fp.seek(start)
for chunk in r.iter_content(chunk_size=chunk_size):
if chunk:
self.getSize += chunk_size
fp.write(chunk)
start += chunk_size
size += chunk_size
progress = round(size / (end - raw_start) * 100, 2)
self.info['sub']['progress'] = progress
self.info['sub']['stat'] = 1
return
except Exception as e:
print(e)
self.down(start, end, thread_id)
print(f"{start}-{end}, 下载失败")
self.info['sub']['stat'] = 0
def show(self):
while True:
speed = self.getSize
time.sleep(0.5)
speed = int((self.getSize - speed) * 2 / 1024)
if speed > 1024:
speed = f"{round(speed / 1024, 2)} M/s"
else:
speed = f"{speed} KB/s"
progress = round(self,getsize / self.size * 100, 2)
self.info['main']['progress'] = progress
self.info['main']['speed'] = speed
print(self.info)
if progress >= 100:
break
def run(self):
fp = open(self.name ,'wb')
print(f"正在初始化下载文件: {self.name}")
fp.truncate(self.size)
print("初始化文件完成")
start_time = time.time()
fp.close()
part = self.size // self.num
pool = ThreadPoolExecutor(max_workers=self.num + 1)
futures = []
for i in range(self.num):
start = part * i
if i == self.num - 1:
end = self.size
else:
end = start + part - 1
futures.append(pool.submit(self.show))
printf(f'正在使用{self.num}个线程下载……')
wait(futures)
end_time = time.time()
speed = int(self.size / 1024 / (end_time - start_time))
if speed > 1024:
speed = f"{round(speed / 1024, 2)} M/s"
else:
speed = f"{speed} KB/s"
print(f"{self.name} 下载完成, 平均速度: {speed}")
if __name__ == '__main__':
debug = 1
if debug:
url = 'https://www.bilibili.com/read/cv6032187'
down = Downloader(url, 8, os.path.basename(url))
else:
url = sys.argv
file = sys.argv
thread_num = int(sys.argv)
down = Downloader(url, thread_num, file)
down.run()
报错如下:
Traceback (most recent call last):
File "g:\Programming\Python\text.py", line 105, in <module>
down = Downloader(url, 8, os.path.basename(url))
File "g:\Programming\Python\text.py", line 29, in __init__
self.size = int(r.headers['Content-Length'])
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\structures.py", line
54, in __getitem__
return self._store
KeyError: 'content-length'
大佬指导指导@zltzlt @qiuyouzhi @heidern0612 @老八秘制 @WangJS MIke_python小小 发表于 2020-5-17 08:39
@heidern0612 @老八秘制 @WangJS
1,不用@那么多的人,有大佬看到就帮你了
2,显示字典没有这个项,你打印一下字典看看 qiuyouzhi 发表于 2020-5-17 08:48
1,不用@那么多的人,有大佬看到就帮你了
2,显示字典没有这个项,你打印一下字典看看
我这是照着别人抄的
所以不懂
还请你帮我改一下代码吧 MIke_python小小 发表于 2020-5-17 08:49
我这是照着别人抄的
所以不懂
改好了,有点投机取巧:
import os
import time
from concurrent.futures import ThreadPoolExecutor, wait
from requests import get, head
import sys
class Downloader:
def __init__(self, url, nums, file):
self.url = url
self.num = nums
self.name = file
self.getSize = 0
self.info = {
'main' : {
'progress' : 0,
'speed' : ''
},
'sub' : {
'progress' : ,
'stat' :
}
}
r = head(self.url)
while r.status_code == 302:
self.url = r.headers['Location']
print("该url以重定向至{}".format(self.url))
r = head(self.url)
try:
self.size = int(r.headers['content-length'])
except:
self.size = 33901
print("该文件大小为: {} bytes".format(self.size))
def down(self, start, end, thread_id, chunk_size=10240):
raw_start = start
for _ in range(10):
try:
headers = {'Range' : 'bytes={}-{}'.format(start, end)}
r = get(self.url, headers=headers, timeout=10, stream=True)
print(f"线程{thread_id}链接成功")
size = 0
with open(self.name, 'rb+') as fp:
fp.seek(start)
for chunk in r.iter_content(chunk_size=chunk_size):
if chunk:
self.getSize += chunk_size
fp.write(chunk)
start += chunk_size
size += chunk_size
progress = round(size / (end - raw_start) * 100, 2)
self.info['sub']['progress'] = progress
self.info['sub']['stat'] = 1
return
except Exception as e:
print(e)
self.down(start, end, thread_id)
print(f"{start}-{end}, 下载失败")
self.info['sub']['stat'] = 0
def show(self):
while True:
speed = self.getSize
time.sleep(0.5)
speed = int((self.getSize - speed) * 2 / 1024)
if speed > 1024:
speed = f"{round(speed / 1024, 2)} M/s"
else:
speed = f"{speed} KB/s"
progress = round(self,getsize / self.size * 100, 2)
self.info['main']['progress'] = progress
self.info['main']['speed'] = speed
print(self.info)
if progress >= 100:
break
def run(self):
fp = open(self.name ,'wb')
print(f"正在初始化下载文件: {self.name}")
fp.truncate(self.size)
print("初始化文件完成")
start_time = time.time()
fp.close()
part = self.size // self.num
pool = ThreadPoolExecutor(max_workers=self.num + 1)
futures = []
for i in range(self.num):
start = part * i
if i == self.num - 1:
end = self.size
else:
end = start + part - 1
futures.append(pool.submit(self.show))
print(f'正在使用{self.num}个线程下载……')
wait(futures)
end_time = time.time()
speed = int(self.size / 1024 / (end_time - start_time))
if speed > 1024:
speed = f"{round(speed / 1024, 2)} M/s"
else:
speed = f"{speed} KB/s"
print(f"{self.name} 下载完成, 平均速度: {speed}")
if __name__ == '__main__':
debug = 1
if debug:
url = 'https://www.bilibili.com/read/cv6032187'
down = Downloader(url, 8, os.path.basename(url))
else:
url = sys.argv
file = sys.argv
thread_num = int(sys.argv)
down = Downloader(url, thread_num, file)
down.run() qiuyouzhi 发表于 2020-5-17 08:56
改好了,有点投机取巧:
这是下载到哪里呀
怎么调整呀 MIke_python小小 发表于 2020-5-17 09:25
这是下载到哪里呀
怎么调整呀
程序文件所在位置
把程序放到别的地方,或者os.chdir qiuyouzhi 发表于 2020-5-17 09:26
程序文件所在位置
把程序放到别的地方,或者os.chdir
能写一个指定的代码吗 MIke_python小小 发表于 2020-5-17 09:27
能写一个指定的代码吗
说了啊,os.chdir qiuyouzhi 发表于 2020-5-17 09:28
说了啊,os.chdir
不会{:10_266:} MIke_python小小 发表于 2020-5-17 09:30
不会
os.chdir(字符串形式的目标路径)
还不会就去百度吧
页:
[1]