From 475f44e94cced47963039d458b63194696be63f0 Mon Sep 17 00:00:00 2001 From: Speyedr <38199900+Speyedr@users.noreply.github.com> Date: Sat, 20 Jul 2024 18:53:34 +1000 Subject: [PATCH] So, there are a couple of flaws in how `self._kwargs` is utilised and passed around, and I believe there are a few modifications to be made in certain places, both to fix the current bug *and* prevent a similar bug from happening in the future. The main flaw is that self._kwargs is passed down by reference instead of value (since it is a dictionary and this is Python, of course). A callee may modify `self._kwargs` during its procedure (such as in the case of `Multidown()` adding the `range` header) and this is ultimately what causes issues when creating the next request. This can be fixed by: - Explicitly resetting `self._kwargs` when `Pypdl._reset()` is called. - While this still requires a `deepcopy()` operation every time the download is reset, it does also guard against any future mishandling or callee modification of `self._kwargs`! - I believe this has the most coverage of any fix, but the performance hit should be noted. I don't exactly have the internet speeds to notice a difference, but others might. --- pypdl/pypdl_manager.py | 3 ++ testing/pypdl.log | 95 ++++++++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 31 deletions(-) diff --git a/pypdl/pypdl_manager.py b/pypdl/pypdl_manager.py index 502baaf..c4773f9 100644 --- a/pypdl/pypdl_manager.py +++ b/pypdl/pypdl_manager.py @@ -2,6 +2,7 @@ import time from collections import deque from concurrent.futures import Future, ThreadPoolExecutor +from copy import deepcopy from logging import Logger from pathlib import Path from threading import Event @@ -46,6 +47,7 @@ def __init__( "raise_for_status": True, } self._kwargs.update(kwargs) # this is where the "User-Agent" attribute is saved + self._orig_kwargs = deepcopy(self._kwargs) self._allow_reuse = allow_reuse self.size = None @@ -154,6 +156,7 @@ def _reset(self): self._workers.clear() self._interrupt.clear() self._stop = False + self._kwargs = deepcopy(self._orig_kwargs) self.size = None self.progress = 0 diff --git a/testing/pypdl.log b/testing/pypdl.log index 61df596..ec10f56 100644 --- a/testing/pypdl.log +++ b/testing/pypdl.log @@ -1,31 +1,64 @@ -(Pypdl) 19-07-24 16:07:59 - DEBUG: Reseted download manager -(Pypdl) 19-07-24 16:07:59 - DEBUG: Downloading, url: https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg attempt: 1 -(Pypdl) 19-07-24 16:07:59 - DEBUG: Obtaining header from https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg -(Pypdl) 19-07-24 16:08:00 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg. I sent: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Sent headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: HEAD Response: 200 -HEAD Response headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Header accquired from head request -(Pypdl) 19-07-24 16:08:00 - DEBUG: Size accquired from header -(Pypdl) 19-07-24 16:08:00 - DEBUG: ETag accquired from header -(Pypdl) 19-07-24 16:08:00 - DEBUG: Segment table created: {'url': 'https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 469059, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.0'}, 1: {'start': 469060, 'end': 938119, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.1'}} -(Pypdl) 19-07-24 16:08:00 - DEBUG: Initiated waiting loop -(Pypdl) 19-07-24 16:08:00 - DEBUG: Multi-Segment download started -(Pypdl) 19-07-24 16:08:00 - DEBUG: Downloaded all segments -(Pypdl) 19-07-24 16:08:00 - DEBUG: Combining files -(Pypdl) 19-07-24 16:08:00 - DEBUG: Exit waiting loop, download completed -(Pypdl) 19-07-24 16:08:00 - DEBUG: Reseted download manager -(Pypdl) 19-07-24 16:08:00 - DEBUG: Downloading, url: https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg attempt: 1 -(Pypdl) 19-07-24 16:08:00 - DEBUG: Obtaining header from https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg -(Pypdl) 19-07-24 16:08:00 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. I sent: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Sent headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: HEAD Response: 206 -HEAD Response headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Ending GET request for https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. I sent: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Sent headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: GET Response: 206 -GET Response headers: -(Pypdl) 19-07-24 16:08:00 - ERROR: Failed to obtain headers from https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. -NoneType: None -(Pypdl) 19-07-24 16:08:00 - ERROR: (AttributeError) ['NoneType' object has no attribute 'get'] -(Pypdl) 19-07-24 16:08:00 - DEBUG: Download failed, url: https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg +(Pypdl) 20-07-24 18:48:29 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:48:29 - DEBUG: Downloading, url: https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg attempt: 1 +(Pypdl) 20-07-24 18:48:29 - DEBUG: Obtaining header from https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg +(Pypdl) 20-07-24 18:48:30 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg. I sent: +(Pypdl) 20-07-24 18:48:30 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:48:30 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:48:30 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:48:30 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:48:30 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:48:30 - DEBUG: Segment table created: {'url': 'https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 469059, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.0'}, 1: {'start': 469060, 'end': 938119, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.1'}} +(Pypdl) 20-07-24 18:48:30 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:48:30 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:48:30 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:48:30 - DEBUG: Combining files +(Pypdl) 20-07-24 18:48:30 - DEBUG: Exit waiting loop, download completed +(Pypdl) 20-07-24 18:48:30 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:48:30 - DEBUG: Downloading, url: https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg attempt: 1 +(Pypdl) 20-07-24 18:48:30 - DEBUG: Obtaining header from https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg +(Pypdl) 20-07-24 18:48:30 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. I sent: +(Pypdl) 20-07-24 18:48:30 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:48:30 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:48:30 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:48:30 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:48:30 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:48:30 - DEBUG: Segment table created: {'url': 'https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 263040, 'segment_size': 263041, 'segment_path': '55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg.0'}, 1: {'start': 263041, 'end': 526082, 'segment_size': 263042, 'segment_path': '55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg.1'}} +(Pypdl) 20-07-24 18:48:30 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:48:30 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:48:31 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:48:31 - DEBUG: Combining files +(Pypdl) 20-07-24 18:48:31 - DEBUG: Exit waiting loop, download completed +(Pypdl) 20-07-24 18:48:31 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:48:31 - DEBUG: Downloading, url: https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg attempt: 1 +(Pypdl) 20-07-24 18:48:31 - DEBUG: Obtaining header from https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg +(Pypdl) 20-07-24 18:48:31 - DEBUG: Ending HEAD request for https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg. I sent: +(Pypdl) 20-07-24 18:48:31 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:48:31 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:48:31 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:48:31 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:48:31 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:48:31 - DEBUG: Segment table created: {'url': 'https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 269221, 'segment_size': 269222, 'segment_path': 'sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg.0'}, 1: {'start': 269222, 'end': 538444, 'segment_size': 269223, 'segment_path': 'sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg.1'}} +(Pypdl) 20-07-24 18:48:31 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:48:31 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:48:32 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:48:32 - DEBUG: Combining files +(Pypdl) 20-07-24 18:48:32 - DEBUG: Exit waiting loop, download completed +(Pypdl) 20-07-24 18:48:32 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:48:32 - DEBUG: Downloading, url: https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg attempt: 1 +(Pypdl) 20-07-24 18:48:32 - DEBUG: Obtaining header from https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg +(Pypdl) 20-07-24 18:48:32 - DEBUG: Ending HEAD request for https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg. I sent: +(Pypdl) 20-07-24 18:48:32 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:48:32 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:48:32 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:48:32 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:48:32 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:48:32 - DEBUG: Segment table created: {'url': 'https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 229700, 'segment_size': 229701, 'segment_path': 'sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg.0'}, 1: {'start': 229701, 'end': 459402, 'segment_size': 229702, 'segment_path': 'sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg.1'}} +(Pypdl) 20-07-24 18:48:32 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:48:32 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:48:32 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:48:32 - DEBUG: Combining files +(Pypdl) 20-07-24 18:48:32 - DEBUG: Exit waiting loop, download completed