Skip to content

Commit

Permalink
switched to kwargs instead of specifing each parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
mjishnu committed Jan 17, 2024
1 parent b839b70 commit 97af6ab
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 132 deletions.
41 changes: 8 additions & 33 deletions pypdl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import requests
from reprint import output
from .utls import (
from utls import (
Multidown,
Singledown,
get_filename_from_headers,
Expand All @@ -20,7 +20,7 @@


class Downloader:
def __init__(self, headers={}, proxies=None, auth=None):
def __init__(self, **kwargs):
"""
Initializes the Downloader object.
Expand All @@ -38,6 +38,7 @@ def __init__(self, headers={}, proxies=None, auth=None):
self._Error = threading.Event() # event to signal any download errors
self._threads = [] # list of all worker threads
self._stop = threading.Event() # event to stop the download
self._kwargs = kwargs # keyword arguments

# public attributes
self.totalMB = 0 # total download size in MB
Expand All @@ -48,9 +49,6 @@ def __init__(self, headers={}, proxies=None, auth=None):
self.doneMB = 0 # amount of data downloaded in MB
self.eta = "99:59:59" # estimated time remaining for download completion
self.remaining = 0 # amount of data remaining to be downloaded
self.headers = headers # headers to be used in the download request
self.proxies = proxies # proxies to be used in the download request
self.auth = auth # proxy auth to be used in the download request
self.Failed = False # flag to indicate if download failure

def _download(
Expand All @@ -73,14 +71,7 @@ def _download(
multithread (bool): Whether to use multi-threaded download.
"""
# get the header information for the file
head = requests.head(
url,
timeout=20,
allow_redirects=True,
headers=self.headers,
proxies=self.proxies,
auth=self.auth,
)
head = requests.head(url, timeout=20, allow_redirects=True, **self._kwargs)

# get file name from headers
filename = get_filename_from_headers(head.headers)
Expand Down Expand Up @@ -114,15 +105,7 @@ def _download(
# if no range available in header or no size from header, use single thread
if not total or not head.headers.get("accept-ranges") or not multithread:
# create single-threaded download object
sd = Singledown(
url,
f_path,
self._stop,
self._Error,
self.headers,
self.proxies,
self.auth,
)
sd = Singledown(url, f_path, self._stop, self._Error, **self._kwargs)
# create single download worker thread
th = threading.Thread(target=sd.worker)
self._workers.append(sd)
Expand Down Expand Up @@ -173,15 +156,7 @@ def _download(
"completed": False,
}
# create multidownload object for each connection
md = Multidown(
self._dic,
i,
self._stop,
self._Error,
self.headers,
self.proxies,
self.auth,
)
md = Multidown(self._dic, i, self._stop, self._Error, **self._kwargs)
# create worker thread for each connection
th = threading.Thread(target=md.worker)
threads.append(th)
Expand Down Expand Up @@ -339,7 +314,7 @@ def start_thread():
if self._Error.is_set():
time.sleep(3)
# reset the downloader object
self.__init__(self.headers, self.proxies, self.auth)
self.__init__(**self._kwargs)

# get a new download URL to retry
_url = url
Expand Down Expand Up @@ -371,7 +346,7 @@ def start_thread():
print("Download Failed!")

# Initialize the downloader with stop Event
self.__init__(self.headers, self.proxies, self.auth)
self.__init__(**self._kwargs)
# Start the download process in a new thread
th = threading.Thread(target=start_thread)
th.start()
Expand Down
123 changes: 24 additions & 99 deletions pypdl/utls.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,36 @@
import os
import threading
import time
import copy
from pathlib import Path
from typing import Any, Dict, Tuple
from typing import Any, Dict
from urllib.parse import unquote, urlparse

import requests
from requests.structures import CaseInsensitiveDict


def get_filename_from_headers(headers: CaseInsensitiveDict[str]):
"""
Extracts desired file name from given Content-Disposition header
Parameters:
headers (dict): headers from requests.head or requests.get response
Returns:
Desired file name
"""
def get_filename_from_headers(headers: Dict) -> str:
content_disposition = headers.get("Content-Disposition")

if content_disposition and "filename=" in content_disposition:
filename_start = content_disposition.index("filename=") + len("filename=")
filename = content_disposition[filename_start:]
# Remove quotes and any leading or trailing spaces
filename = filename.strip(' "')
# Decode URL encoding
filename = unquote(filename)
filename = unquote(filename) # Decode URL encoding
return filename
return None


def get_filename_from_url(url: str):
"""
Extracts desired file name from given URL
Parameters:
url (str): URL of the file
Returns:
Desired file name
"""
def get_filename_from_url(url: str) -> str:
filename = unquote(urlparse(url).path.split("/")[-1])
return filename


def timestring(sec: int) -> str:
"""
Converts seconds to a string formatted as HH:MM:SS.
Parameters:
sec (int): The number of seconds.
Returns:
str: The formatted time string in the format HH:MM:SS.
"""

sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
Expand All @@ -73,57 +48,27 @@ def __init__(
id: int,
stop: threading.Event,
error: threading.Event,
headers: Dict[str, str],
proxies: Dict[str, str],
auth: Tuple[str, str],
**kwargs,
):
"""
Initializes the Multidown object.
Parameters:
dic (dict): Dictionary containing download information for all parts.
Format: {start, curr, end, filepath, count, size, url, completed}
id (int): ID of this download part.
stop (threading.Event): Event to stop the download.
error (threading.Event): Event to indicate an error occurred.
headers (dict): User headers to be used in the download request.
"""
self.curr = 0 # current size of downloaded file
self.completed = 0 # whether the download for this part is complete
self.id = id # ID of this download part
self.dic = dic # dictionary containing download information for all parts
self.stop = stop # event to stop the download
self.error = error # event to indicate an error occurred
self.headers = headers # user headers
self.proxies = proxies # user proxies
self.auth = auth # user auth
self.curr = 0 # current size of downloaded part
self.completed = 0
self.id = id
self.dic = dic # {start, curr, end, filepath, size, url, completed}
self.stop = stop
self.error = error
self.kwargs = kwargs # Request Module kwargs

def getval(self, key: str) -> Any:
"""
Get the value of a key from the dictionary.
Parameters:
key (str): The key to retrieve the value for.
Returns:
Any: The value associated with the given key in the dictionary.
"""
return self.dic[self.id][key]

def setval(self, key: str, val: Any):
"""
Set the value of a key in the dictionary.
Parameters:
key (str): The key to set the value for.
val (Any): The value to set for the given key.
"""
self.dic[self.id][key] = val

def worker(self):
"""
Download a part of the file in multiple chunks.
"""

filepath = self.getval("filepath")
path = Path(filepath)
end = self.getval("end")
Expand All @@ -143,21 +88,20 @@ def worker(self):
print("corrupted file!")

url = self.getval("url")
# not updating self.header because it will reference the orginal headers dict and adding to it will cause bugs
headers = {"range": f"bytes={start}-{end}"}
headers.update(self.headers)
# not updating self.kwargs because it will reference the orginal headers dict and will cause wrong range
kwargs = copy.deepcopy(self.kwargs)
range_header = {"range": f"bytes={start}-{end}"}
kwargs.setdefault("headers", {}).update(range_header)

if self.curr != self.getval("size"):
try:
# download part
with requests.session() as s, open(path, "ab+") as f:
with s.get(
url,
headers=headers,
proxies=self.proxies,
auth=self.auth,
stream=True,
timeout=20,
**kwargs,
) as r:
for chunk in r.iter_content(1048576): # 1MB
if chunk:
Expand Down Expand Up @@ -187,29 +131,15 @@ def __init__(
path: str,
stop: threading.Event,
error: threading.Event,
headers: Dict[str, str],
proxies: Dict[str, str],
auth: Tuple[str, str],
**kwargs,
):
"""
Initializes the Singledown object.
Parameters:
url (str): The URL of the file to download.
path (str): The path to save the downloaded file.
stop (threading.Event): Event to stop the download.
error (threading.Event): Event to indicate an error occurred.
headers (dict): User headers to be used in the download request.
"""
self.curr = 0 # current size of downloaded file
self.completed = 0 # whether the download is complete
self.url = url # url of the file
self.path = path # path to save the file
self.stop = stop # event to stop the download
self.error = error # event to indicate an error occurred
self.headers = headers # user headers
self.proxies = proxies # user proxies
self.auth = auth # user auth
self.kwargs = kwargs # user kwargs

def worker(self):
"""
Expand All @@ -219,12 +149,7 @@ def worker(self):
try:
# download part
with requests.get(
self.url,
stream=True,
timeout=20,
headers=self.headers,
proxies=self.proxies,
auth=self.auth,
self.url, stream=True, timeout=20, **self.kwargs
) as r, open(self.path, "wb") as file:
for chunk in r.iter_content(1048576): # 1MB
if chunk:
Expand All @@ -236,6 +161,6 @@ def worker(self):
except Exception as e:
self.error.set()
time.sleep(1)
print(f"Error in thread {self.id}: ({e.__class__.__name__}: {e})")
print(f"Error in download thread: ({e.__class__.__name__}: {e})")
if flag:
self.completed = 1

0 comments on commit 97af6ab

Please sign in to comment.