From a81601c41117c8a5c4460736f8d2a0ea5b6f0df9 Mon Sep 17 00:00:00 2001 From: Jishnu M Date: Wed, 19 Jun 2024 13:59:08 +0530 Subject: [PATCH] Indroduce wait flag, update docs --- README.md | 23 +++++++++++++++++++++-- pypdl/__init__.py | 2 +- pypdl/pypdl_factory.py | 3 +-- pypdl/pypdl_manager.py | 12 +++++++----- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 1b53310..416d1f9 100644 --- a/README.md +++ b/README.md @@ -187,10 +187,11 @@ if dl.completed: else: print('Hash is invalid') ``` -An example of using Pypdl object with `allow_reuse` set to `True` and custom logger: +An example of using Pypdl object to get size of the files with `allow_reuse` set to `True` and custom logger: ```py import logging +import time from pypdl import Pypdl urls = [ @@ -204,12 +205,27 @@ urls = [ # create a custom logger logger = logging.getLogger('custom') +size = [] + # create a pypdl object dl = Pypdl(allow_reuse=True, logger=logger) for url in urls: - dl.start(url, block=True) + dl.start(url, block=False) + + # waiting for the size and other preliminary data to be retrived + while dl.wait: + time.sleep(0.1) + + # get the size of the file and add it to size list + size.append(dl.size) + + # do something + + while not dl.completed: + print(dl.progress) +print(size) # shutdown the downloader, this is essential when allow_reuse is enabled dl.shutdown() @@ -313,6 +329,8 @@ The `Pypdl` class represents a file downloader that can download a file from a g - `remaining`: The amount of data remaining to be downloaded, in bytes. - `failed`: A flag that indicates if the download failed. - `completed`: A flag that indicates if the download is complete. +- `wait`: A flag indicating whether preliminary information (e.g., file size) has been retrieved. +- `logger`: The logger object used for logging messages. #### Methods @@ -379,6 +397,7 @@ The `PypdlFactory` class manages multiple instances of the `Pypdl` downloader. I - `completed`: A list of tuples where each tuple contains the URL of the download and the result of the download. - `failed`: A list of URLs for which the download failed. - `remaining`: A list of remaining download tasks. +- `logger`: The logger object used for logging messages. #### Methods diff --git a/pypdl/__init__.py b/pypdl/__init__.py index 6ae884c..24dad5e 100644 --- a/pypdl/__init__.py +++ b/pypdl/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.4.1" +__version__ = "1.4.2" from .pypdl_manager import Pypdl from .pypdl_factory import PypdlFactory diff --git a/pypdl/pypdl_factory.py b/pypdl/pypdl_factory.py index 6ed5382..eef9c67 100644 --- a/pypdl/pypdl_factory.py +++ b/pypdl/pypdl_factory.py @@ -154,12 +154,11 @@ def _execute(self, tasks, display): def _add_future(self, instance, task, futures): self.logger.debug("Adding new task") url, *kwargs = task - instance._status = None kwargs = kwargs[0] if kwargs else {} kwargs.update({"block": False, "display": False, "overwrite": False}) future = instance.start(url, **kwargs) futures[future] = (instance, url) - while instance._status is None: + while instance.wait: time.sleep(0.1) self.logger.debug("Added new task: %s", url) diff --git a/pypdl/pypdl_manager.py b/pypdl/pypdl_manager.py index cdf484c..df6922c 100644 --- a/pypdl/pypdl_manager.py +++ b/pypdl/pypdl_manager.py @@ -39,7 +39,6 @@ def __init__( ): self._pool = ThreadPoolExecutor(max_workers=2) self._workers = [] - self._status = 0 self._interrupt = Event() self._stop = False self._kwargs = { @@ -58,6 +57,7 @@ def __init__( self.remaining = None self.failed = False self.completed = False + self.wait = True self.logger = logger def start( @@ -99,7 +99,6 @@ def download(): for i in range(retries + 1): try: _url = mirror_func() if i > 0 and callable(mirror_func) else url - self._reset() self.logger.debug("Downloading, url: %s attempt: %s", _url, (i + 1)) result = self._execute( _url, @@ -116,16 +115,18 @@ def download(): print(f"Time elapsed: {seconds_to_hms(self.time_spent)}") return result + self._reset() time.sleep(3) except Exception as e: self.logger.error("(%s) [%s]", e.__class__.__name__, e) - self._status = 1 + self.wait = False self.failed = True self.logger.debug("Download failed, url: %s", _url) return None + self._reset() if self._allow_reuse: future = self._pool.submit(download) else: @@ -163,6 +164,7 @@ def _reset(self): self.remaining = None self.failed = False self.completed = False + self.wait = True self.logger.debug("Reseted download manager") def _execute( @@ -175,7 +177,7 @@ def _execute( ) if not overwrite and Path(file_path).exists(): - self._status = 1 + self.wait = False self.completed = True self.time_spent = time.time() - start_time self.logger.debug("File already exists, download completed") @@ -197,7 +199,7 @@ def _execute( recent_queue = deque([0] * 12, maxlen=12) download_mode = "Multi-Segment" if multisegment else "Single-Segment" interval = 0.5 - self._status = 1 + self.wait = False self.logger.debug("Initiated waiting loop") with ScreenCleaner(display): while True: