From 680704f5db3fde5fa412ae24227d1e2f6c3175e0 Mon Sep 17 00:00:00 2001 From: Amirhossein Banavi Date: Sat, 13 Jan 2024 20:12:39 +0330 Subject: [PATCH 1/2] add support for resuming download --- .env.sample | 3 +- .gitignore | 1 + auth.py | 20 ++++++--- constants.py | 3 ++ data.py | 18 ++++++++ downloader.py | 107 +++++++++++++++++++++++++++++++++-------------- url_generator.py | 40 ++++++++++++------ videos_list.py | 19 +++++++-- 8 files changed, 157 insertions(+), 54 deletions(-) create mode 100644 data.py diff --git a/.env.sample b/.env.sample index e2c44db..d1caa6a 100644 --- a/.env.sample +++ b/.env.sample @@ -1,4 +1,5 @@ VOORIVEX_USERNAME=VOORIVEX_USERNAME VOORIVEX_PASSWORD=VOORIVEX_PASSWORD VOORIVEX_TARGET_DIRECTORY=DIRECTORY_TO_DOWNLOAD_LEAVE_EMPTY_TO_DOWNLOAD_ALL -SAVE_DIRECTORY=videos \ No newline at end of file +SAVE_DIRECTORY=videos +LOG_DL_FILE=.downloaded \ No newline at end of file diff --git a/.gitignore b/.gitignore index d147285..bc36cfb 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__ videos/ .env venv +.downloaded \ No newline at end of file diff --git a/auth.py b/auth.py index e960408..4afad39 100644 --- a/auth.py +++ b/auth.py @@ -21,10 +21,14 @@ def fetch_buildId(): def get_access_token(username, password): headers_login = {"Content-Type": "application/json"} data_login = {"username": username, "password": password} - response_login = requests.post(constants.LOGIN_API_URL, headers=headers_login, json=data_login) + response_login = requests.post( + constants.LOGIN_API_URL, headers=headers_login, json=data_login + ) if response_login.status_code != 201: - error_message = f"Login request failed with status code {response_login.status_code}." + error_message = ( + f"Login request failed with status code {response_login.status_code}." + ) try: error_details = response_login.json().get("error", "") error_message += f" Details: {error_details}" @@ -73,9 +77,13 @@ def auth(): time.sleep(1) # Get Access Token - success, access_token = get_access_token(constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD) + success, access_token = get_access_token( + constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD + ) if not success: - print(access_token) # In case of failure, the access_token variable will contain the error message. + print( + access_token + ) # In case of failure, the access_token variable will contain the error message. exit(1) print("Successfully logged in and obtained access token.") @@ -84,7 +92,9 @@ def auth(): # Fetch Next Token success, bearer_token = fetch_next_token(access_token, buildId) if not success: - print(bearer_token) # In case of failure, the bearer_token variable will contain the error message. + print( + bearer_token + ) # In case of failure, the bearer_token variable will contain the error message. exit(1) print("Next token fetched successfully.") diff --git a/constants.py b/constants.py index b266f4b..9f2be13 100644 --- a/constants.py +++ b/constants.py @@ -4,6 +4,7 @@ load_dotenv() + LOGIN_PAGE_URL = "https://voorivex.academy/pages/login/" LOGIN_API_URL = "https://api.voorivex.academy/auth/login" NEXT_TOKEN_URL = "https://voorivex.academy/_next/data/{}/download.json" @@ -18,3 +19,5 @@ ACADEMY_PASSWORD = os.getenv("VOORIVEX_PASSWORD") ACADEMY_TARGET_DIRECTORY = os.getenv("VOORIVEX_TARGET_DIRECTORY", "") SAVE_DIRECTORY = os.getenv("SAVE_DIRECTORY", "videos") + +LOG_DL_FILE = os.getenv("LOG_DL_FILE", ".downloaded") diff --git a/data.py b/data.py new file mode 100644 index 0000000..02bca34 --- /dev/null +++ b/data.py @@ -0,0 +1,18 @@ +import constants + + +def downloaded_videos(): + try: + with open(constants.LOG_DL_FILE, "r") as file: + downloaded_files = file.read().splitlines() + except FileNotFoundError: + # create file + open(constants.LOG_DL_FILE, "w").close() + downloaded_files = [] + + return downloaded_files + + +def log_download(key): + with open(constants.LOG_DL_FILE, "a") as file: + file.write(key + "\n") diff --git a/downloader.py b/downloader.py index 08fdb00..59eef4e 100644 --- a/downloader.py +++ b/downloader.py @@ -5,6 +5,7 @@ from tqdm import tqdm import constants +from data import downloaded_videos, log_download from url_generator import process_download_url from videos_list import get_videos_list @@ -12,41 +13,76 @@ def download_video(video_details): key = video_details.get("key", "") url = video_details.get("url", "") - video_name = video_details.get("title", "") - target_path = os.path.join(constants.SAVE_DIRECTORY, key) # Construct path from 'key' + target_path = os.path.join( + constants.SAVE_DIRECTORY, key + ) # Construct path from 'key' target_directory = os.path.dirname(target_path) # Get directory name without file - print(f"Video Key: {key}") - print(f"{video_name}: Downloading video...") - if not os.path.exists(target_directory): os.makedirs(target_directory) - response = requests.get(url, stream=True) + # if file exists but size doesn't match, resume download + headers = {} + existing_file_size = 0 + if os.path.exists(target_path): + # Get the existing file size + existing_file_size = os.path.getsize(target_path) + + # Get the file size with HEAD request + with requests.head(url) as response: + if response.status_code == 200: + remote_file_size = int(requests.head(url).headers["Content-Length"]) + else: + return ( + False, + f"Failed to get the video size with status code {response.status_code}.", + ) + + if existing_file_size == remote_file_size: + return True, f"Video already exists at {target_path}" + elif existing_file_size < remote_file_size: + print(f"{key}: Partially downloaded file found. Resuming download...") + # Set the starting point to the size of the existing file + headers = {"Range": f"bytes={existing_file_size}-"} + else: + print( + f"{key}: File already exists but size is not valid. Deleting and downloading..." + ) + os.remove(target_path) + + print(f"{key}: Starting download...") + + with requests.get(url, stream=True, headers=headers) as response: + if response.status_code not in [200, 206]: + error_message = f"Failed to start the video download with status code {response.status_code}." + try: + error_details = response.json().get("error", "") + error_message += f" Details: {error_details}" + except: + pass # If there's an error parsing the JSON, we'll just use the generic error message. + return False, error_message + + total_size = existing_file_size + int(response.headers.get("content-length", 0)) + block_size = 8192 # 8KB per piece + progress_bar = tqdm( + initial=existing_file_size, + total=total_size, + unit="iB", + unit_scale=True, + desc=key, + ) - if response.status_code != 200: - error_message = f"Failed to start the video download with status code {response.status_code}." try: - error_details = response.json().get("error", "") - error_message += f" Details: {error_details}" - except: - pass # If there's an error parsing the JSON, we'll just use the generic error message. - return False, error_message - - total_size = int(response.headers.get("content-length", 0)) - block_size = 8192 # 8KB per piece - progress_bar = tqdm(total=total_size, unit="iB", unit_scale=True) - - try: - with open(target_path, "wb") as video_file: - for chunk in response.iter_content(block_size): - progress_bar.update(len(chunk)) - video_file.write(chunk) - except Exception as e: - progress_bar.close() - return False, f"Error during writing the video file: {str(e)}" + with open(target_path, "ab") as video_file: + for chunk in response.iter_content(block_size): + if chunk: + progress_bar.update(len(chunk)) + video_file.write(chunk) + except Exception as e: + progress_bar.close() + return False, f"Error during writing the video file: {str(e)}" - progress_bar.close() + progress_bar.close() if total_size != 0 and progress_bar.n != total_size: return False, "Mismatch in downloaded content size." @@ -58,6 +94,7 @@ def download_videos(bearer_token): target_directory = constants.ACADEMY_TARGET_DIRECTORY success, videos_list = get_videos_list(bearer_token, target_directory) + total_videos = len(videos_list) if not success: print(videos_list) exit(1) @@ -67,18 +104,22 @@ def download_videos(bearer_token): exit(2) else: directory = target_directory if target_directory else "root" - print(f"Found {len(videos_list)} videos in {directory} directory.") + print(f"Found {total_videos} videos in {directory} directory.") print(f"Saving videos to {constants.SAVE_DIRECTORY}") + completed_videos = downloaded_videos() + # Loop through each file key and download the video for idx, file_key in enumerate(videos_list, start=1): # if file_key exists, skip it - if os.path.exists(os.path.join(constants.SAVE_DIRECTORY, file_key)): - print(f"File {idx} of {len(videos_list)} ({file_key}) already exists. Skipping...") + if file_key in completed_videos: + print( + f"{file_key}: File {idx} of {total_videos}, found in log file. Skipping..." + ) continue - print(f"Downloading {idx} of {len(videos_list)} videos...") + print(f"{file_key}: File {idx} of {total_videos} processing...") video_details = process_download_url(bearer_token, file_key) time.sleep(1) @@ -86,7 +127,9 @@ def download_videos(bearer_token): # Download the video success, result_or_error = download_video(video_details) if not success: + print(f"{file_key}: File {idx} of {total_videos}, failed to download.") print(f"\nERROR: {result_or_error}") exit(1) else: - print(f"\n{result_or_error}") + log_download(file_key) + print(f"{file_key}: {result_or_error}") diff --git a/url_generator.py b/url_generator.py index 9f76f7b..a8028b9 100644 --- a/url_generator.py +++ b/url_generator.py @@ -8,7 +8,9 @@ def remove_previous_video(bearer_token, video_key): headers_remove = {"Authorization": f"Bearer {bearer_token}"} data_remove = {"key": video_key} - response_remove = requests.post(constants.REMOVE_URL, headers=headers_remove, json=data_remove) + response_remove = requests.post( + constants.REMOVE_URL, headers=headers_remove, json=data_remove + ) if response_remove.status_code != 201: error_message = f"Removing the previous video failed with status code {response_remove.status_code}." @@ -25,7 +27,9 @@ def remove_previous_video(bearer_token, video_key): def request_video_generation(bearer_token, file_key): headers_generate = {"Authorization": f"Bearer {bearer_token}"} data_generate = {"key": file_key} - response_generate = requests.post(constants.LINK_GENERATOR_URL, headers=headers_generate, json=data_generate) + response_generate = requests.post( + constants.LINK_GENERATOR_URL, headers=headers_generate, json=data_generate + ) if response_generate.status_code != 201: error_message = f"Video generation request failed with status code {response_generate.status_code}." @@ -39,21 +43,23 @@ def request_video_generation(bearer_token, file_key): return True, None -def fetch_active_video_link(bearer_token, video_name): +def fetch_active_video_link(bearer_token, video_name, key): timeout = 60 # 60 seconds step_interval = 3 # every 3 seconds elapsed_time = 0 - print(f"{video_name}: Checking for active download link...") + print(f"{key}: Checking for active download link...") headers_video = {"Authorization": f"Bearer {bearer_token}"} while elapsed_time <= timeout: time.sleep(step_interval) elapsed_time += step_interval - response_video = requests.get(constants.GET_ACTIVE_LINK_URL, headers=headers_video) + response_video = requests.get( + constants.GET_ACTIVE_LINK_URL, headers=headers_video + ) if response_video.status_code != 200: - error_message = f"{video_name}: Failed to fetch the active video link with status code {response_video.status_code}." + error_message = f"{key}: Failed to fetch the active video link with status code {response_video.status_code}." try: error_details = response_video.json().get("error", "") error_message += f" Details: {error_details}" @@ -64,7 +70,7 @@ def fetch_active_video_link(bearer_token, video_name): video_data = response_video.json() if video_data.get("type") == "pending": - print(f"{video_name}: Download link generation is still pending. Waiting...") + print(f"{key}: Download link generation is still pending. Waiting...") continue if video_data.get("type") == "active" and video_data.get("videos"): @@ -72,10 +78,13 @@ def fetch_active_video_link(bearer_token, video_name): title = video_details.get("title", "") if title == video_name: - print(f"{video_name}: Active download link found.") + print(f"{key}: Active download link found.") return True, video_details - return False, f"{video_name}: Timeout reached without receiving an active video link." + return ( + False, + f"{key}: Timeout reached without receiving an active video link.", + ) def process_download_url(bearer_token, file_key): @@ -84,7 +93,8 @@ def process_download_url(bearer_token, file_key): # Remove previous video success, error_message = remove_previous_video(bearer_token, file_key) if not success: - print(error_message) # In case of failure, error_message will contain the specific error. + # In case of failure, error_message will contain the specific error. + print(error_message) exit(1) time.sleep(1) @@ -92,15 +102,19 @@ def process_download_url(bearer_token, file_key): # Request to generate the video download link success, error_message = request_video_generation(bearer_token, file_key) if not success: - print(error_message) # In case of failure, error_message will contain the specific error. + # In case of failure, error_message will contain the specific error. + print(error_message) exit(1) time.sleep(1) # Fetch the active video link - success, result_or_error = fetch_active_video_link(bearer_token, video_name) + success, result_or_error = fetch_active_video_link( + bearer_token, video_name, file_key + ) if not success: - print(result_or_error) # In case of failure, result_or_error will contain the specific error message. + # In case of failure, result_or_error will contain the specific error message. + print(result_or_error) exit(1) video_details = result_or_error # In case of success, result_or_error contains the video_details. return video_details diff --git a/videos_list.py b/videos_list.py index a499808..fe0fe9a 100644 --- a/videos_list.py +++ b/videos_list.py @@ -22,17 +22,30 @@ def extract_file_keys(folder, target_directory=""): def get_videos_list(bearer_token, target_directory=""): # Fetch all the files - response = requests.get(f"{constants.VIDEOS_LIST_URL}", headers={"Authorization": f"Bearer {bearer_token}"}) + response = requests.get( + f"{constants.VIDEOS_LIST_URL}", + headers={"Authorization": f"Bearer {bearer_token}"}, + ) video_list = response.json() all_file_keys = [] # Check if a specific target_directory is provided if target_directory: - root_folder = next((folder for folder in video_list if folder["key"] == target_directory.split("/")[0]), None) + root_folder = next( + ( + folder + for folder in video_list + if folder["key"] == target_directory.split("/")[0] + ), + None, + ) if root_folder: all_file_keys.extend(extract_file_keys(root_folder, target_directory)) else: - return False, f"The target directory to download videos not found: {target_directory}" + return ( + False, + f"The target directory to download videos not found: {target_directory}", + ) else: for root_folder in video_list: all_file_keys.extend(extract_file_keys(root_folder)) From ef4b88581d417f10067d2050330ae4edeb453cc6 Mon Sep 17 00:00:00 2001 From: Amirhossein Banavi Date: Sat, 13 Jan 2024 20:21:50 +0330 Subject: [PATCH 2/2] cleanup --- auth.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/auth.py b/auth.py index 4afad39..67aaa00 100644 --- a/auth.py +++ b/auth.py @@ -81,9 +81,8 @@ def auth(): constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD ) if not success: - print( - access_token - ) # In case of failure, the access_token variable will contain the error message. + # In case of failure, the access_token variable will contain the error message. + print(access_token) exit(1) print("Successfully logged in and obtained access token.") @@ -92,9 +91,8 @@ def auth(): # Fetch Next Token success, bearer_token = fetch_next_token(access_token, buildId) if not success: - print( - bearer_token - ) # In case of failure, the bearer_token variable will contain the error message. + # In case of failure, the bearer_token variable will contain the error message. + print(bearer_token) exit(1) print("Next token fetched successfully.")