From 4a2a7db353673908ffc8c72512dcc01d19a6d5c3 Mon Sep 17 00:00:00 2001 From: Speyedr <38199900+Speyedr@users.noreply.github.com> Date: Sat, 20 Jul 2024 18:15:16 +1000 Subject: [PATCH] So, there are a couple of flaws in how `self._kwargs` is utilised and passed around, and I believe there are a few modifications to be made in certain places, both to fix the current bug *and* prevent a similar bug from happening in the future. The main flaw is that self._kwargs is passed down by reference instead of value (since it is a dictionary and this is Python, of course). A callee may modify `self._kwargs` during its procedure (such as in the case of `Multidown()` adding the `range` header) and this is ultimately what causes issues when creating the next request. This can be fixed by: - Not trusting the callee to properly handle `self._kwargs` and instead pass a `deepcopy()` to the callee. This is slow, but ensures that the dictionary passed to `Multidown()` can be modified at will. As demonstrated in this commit, this does fix the bug (and again proves its existence) but may not be suitable for a downloader whose main purpose is to be performant. - While this does fix the immediate bug, it does not prevent such another bug from occurring in a later extension of the program. --- pypdl/pypdl_manager.py | 3 +- testing/pypdl.log | 95 ++++++++++++++++++++++++++++-------------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/pypdl/pypdl_manager.py b/pypdl/pypdl_manager.py index 502baaf..27ae446 100644 --- a/pypdl/pypdl_manager.py +++ b/pypdl/pypdl_manager.py @@ -2,6 +2,7 @@ import time from collections import deque from concurrent.futures import Future, ThreadPoolExecutor +from copy import deepcopy from logging import Logger from pathlib import Path from threading import Event @@ -280,7 +281,7 @@ async def _multi_segment(self, segments, segment_table): self._workers.append(md) tasks.append( asyncio.create_task( - md.worker(segment_table, segment, session, **self._kwargs) # self._kwargs is accessed in this function + md.worker(segment_table, segment, session, **deepcopy(self._kwargs)) # self._kwargs is accessed in this function ) ) try: diff --git a/testing/pypdl.log b/testing/pypdl.log index 61df596..764ade0 100644 --- a/testing/pypdl.log +++ b/testing/pypdl.log @@ -1,31 +1,64 @@ -(Pypdl) 19-07-24 16:07:59 - DEBUG: Reseted download manager -(Pypdl) 19-07-24 16:07:59 - DEBUG: Downloading, url: https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg attempt: 1 -(Pypdl) 19-07-24 16:07:59 - DEBUG: Obtaining header from https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg -(Pypdl) 19-07-24 16:08:00 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg. I sent: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Sent headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: HEAD Response: 200 -HEAD Response headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Header accquired from head request -(Pypdl) 19-07-24 16:08:00 - DEBUG: Size accquired from header -(Pypdl) 19-07-24 16:08:00 - DEBUG: ETag accquired from header -(Pypdl) 19-07-24 16:08:00 - DEBUG: Segment table created: {'url': 'https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 469059, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.0'}, 1: {'start': 469060, 'end': 938119, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.1'}} -(Pypdl) 19-07-24 16:08:00 - DEBUG: Initiated waiting loop -(Pypdl) 19-07-24 16:08:00 - DEBUG: Multi-Segment download started -(Pypdl) 19-07-24 16:08:00 - DEBUG: Downloaded all segments -(Pypdl) 19-07-24 16:08:00 - DEBUG: Combining files -(Pypdl) 19-07-24 16:08:00 - DEBUG: Exit waiting loop, download completed -(Pypdl) 19-07-24 16:08:00 - DEBUG: Reseted download manager -(Pypdl) 19-07-24 16:08:00 - DEBUG: Downloading, url: https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg attempt: 1 -(Pypdl) 19-07-24 16:08:00 - DEBUG: Obtaining header from https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg -(Pypdl) 19-07-24 16:08:00 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. I sent: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Sent headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: HEAD Response: 206 -HEAD Response headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Ending GET request for https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. I sent: -(Pypdl) 19-07-24 16:08:00 - DEBUG: Sent headers: -(Pypdl) 19-07-24 16:08:00 - DEBUG: GET Response: 206 -GET Response headers: -(Pypdl) 19-07-24 16:08:00 - ERROR: Failed to obtain headers from https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. -NoneType: None -(Pypdl) 19-07-24 16:08:00 - ERROR: (AttributeError) ['NoneType' object has no attribute 'get'] -(Pypdl) 19-07-24 16:08:00 - DEBUG: Download failed, url: https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg +(Pypdl) 20-07-24 18:04:37 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:04:37 - DEBUG: Downloading, url: https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg attempt: 1 +(Pypdl) 20-07-24 18:04:37 - DEBUG: Obtaining header from https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg +(Pypdl) 20-07-24 18:04:37 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg. I sent: +(Pypdl) 20-07-24 18:04:37 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:04:37 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:04:37 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:04:37 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:04:37 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:04:37 - DEBUG: Segment table created: {'url': 'https://safebooru.org//images/4619/3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 469059, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.0'}, 1: {'start': 469060, 'end': 938119, 'segment_size': 469060, 'segment_path': '3eb0ebb8b3a93515fa070f6be303527c48ffeed1.jpg.1'}} +(Pypdl) 20-07-24 18:04:37 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:04:37 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:04:37 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:04:37 - DEBUG: Combining files +(Pypdl) 20-07-24 18:04:37 - DEBUG: Exit waiting loop, download completed +(Pypdl) 20-07-24 18:04:37 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:04:37 - DEBUG: Downloading, url: https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg attempt: 1 +(Pypdl) 20-07-24 18:04:37 - DEBUG: Obtaining header from https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg +(Pypdl) 20-07-24 18:04:38 - DEBUG: Ending HEAD request for https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg. I sent: +(Pypdl) 20-07-24 18:04:38 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:04:38 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:04:38 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:04:38 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:04:38 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:04:38 - DEBUG: Segment table created: {'url': 'https://safebooru.org//images/4619/55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 263040, 'segment_size': 263041, 'segment_path': '55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg.0'}, 1: {'start': 263041, 'end': 526082, 'segment_size': 263042, 'segment_path': '55cdaa511197791cc818d0e9388e9f93afdd4c0d.jpg.1'}} +(Pypdl) 20-07-24 18:04:38 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:04:38 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:04:38 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:04:38 - DEBUG: Combining files +(Pypdl) 20-07-24 18:04:38 - DEBUG: Exit waiting loop, download completed +(Pypdl) 20-07-24 18:04:38 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:04:38 - DEBUG: Downloading, url: https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg attempt: 1 +(Pypdl) 20-07-24 18:04:38 - DEBUG: Obtaining header from https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg +(Pypdl) 20-07-24 18:04:38 - DEBUG: Ending HEAD request for https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg. I sent: +(Pypdl) 20-07-24 18:04:38 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:04:38 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:04:38 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:04:38 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:04:38 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:04:38 - DEBUG: Segment table created: {'url': 'https://safebooru.org//samples/4619/sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 269221, 'segment_size': 269222, 'segment_path': 'sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg.0'}, 1: {'start': 269222, 'end': 538444, 'segment_size': 269223, 'segment_path': 'sample_93ed3885001db1f53ed3ccf4c2612886e1b53803.jpg.1'}} +(Pypdl) 20-07-24 18:04:38 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:04:38 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:04:39 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:04:39 - DEBUG: Combining files +(Pypdl) 20-07-24 18:04:39 - DEBUG: Exit waiting loop, download completed +(Pypdl) 20-07-24 18:04:39 - DEBUG: Reseted download manager +(Pypdl) 20-07-24 18:04:39 - DEBUG: Downloading, url: https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg attempt: 1 +(Pypdl) 20-07-24 18:04:39 - DEBUG: Obtaining header from https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg +(Pypdl) 20-07-24 18:04:39 - DEBUG: Ending HEAD request for https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg. I sent: +(Pypdl) 20-07-24 18:04:39 - DEBUG: Sent headers: +(Pypdl) 20-07-24 18:04:39 - DEBUG: HEAD Response: 200 +HEAD Response headers: +(Pypdl) 20-07-24 18:04:39 - DEBUG: Header accquired from head request +(Pypdl) 20-07-24 18:04:39 - DEBUG: Size accquired from header +(Pypdl) 20-07-24 18:04:39 - DEBUG: ETag accquired from header +(Pypdl) 20-07-24 18:04:39 - DEBUG: Segment table created: {'url': 'https://safebooru.org//samples/4619/sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg', 'segments': 2, 'overwrite': True, 0: {'start': 0, 'end': 229700, 'segment_size': 229701, 'segment_path': 'sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg.0'}, 1: {'start': 229701, 'end': 459402, 'segment_size': 229702, 'segment_path': 'sample_3cde1a70e1edb5f365d8166db39262196d6c45ba.jpg.1'}} +(Pypdl) 20-07-24 18:04:39 - DEBUG: Initiated waiting loop +(Pypdl) 20-07-24 18:04:39 - DEBUG: Multi-Segment download started +(Pypdl) 20-07-24 18:04:39 - DEBUG: Downloaded all segments +(Pypdl) 20-07-24 18:04:40 - DEBUG: Combining files +(Pypdl) 20-07-24 18:04:40 - DEBUG: Exit waiting loop, download completed