bugfloyd · ahbanavi · Jan 13, 2024 · Jan 13, 2024
diff --git a/.env.sample b/.env.sample
@@ -1,4 +1,5 @@
 VOORIVEX_USERNAME=VOORIVEX_USERNAME
 VOORIVEX_PASSWORD=VOORIVEX_PASSWORD
 VOORIVEX_TARGET_DIRECTORY=DIRECTORY_TO_DOWNLOAD_LEAVE_EMPTY_TO_DOWNLOAD_ALL
-SAVE_DIRECTORY=videos
+SAVE_DIRECTORY=videos
+LOG_DL_FILE=.downloaded
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@ __pycache__
 videos/
 .env
 venv
+.downloaded
diff --git a/auth.py b/auth.py
@@ -21,10 +21,14 @@ def fetch_buildId():
 def get_access_token(username, password):
     headers_login = {"Content-Type": "application/json"}
     data_login = {"username": username, "password": password}
-    response_login = requests.post(constants.LOGIN_API_URL, headers=headers_login, json=data_login)
+    response_login = requests.post(
+        constants.LOGIN_API_URL, headers=headers_login, json=data_login
+    )
 
     if response_login.status_code != 201:
-        error_message = f"Login request failed with status code {response_login.status_code}."
+        error_message = (
+            f"Login request failed with status code {response_login.status_code}."
+        )
         try:
             error_details = response_login.json().get("error", "")
             error_message += f" Details: {error_details}"
@@ -73,9 +77,12 @@ def auth():
     time.sleep(1)
 
     # Get Access Token
-    success, access_token = get_access_token(constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD)
+    success, access_token = get_access_token(
+        constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD
+    )
     if not success:
-        print(access_token)  # In case of failure, the access_token variable will contain the error message.
+        # In case of failure, the access_token variable will contain the error message.
+        print(access_token)
         exit(1)
     print("Successfully logged in and obtained access token.")
 
@@ -84,7 +91,8 @@ def auth():
     # Fetch Next Token
     success, bearer_token = fetch_next_token(access_token, buildId)
     if not success:
-        print(bearer_token)  # In case of failure, the bearer_token variable will contain the error message.
+        # In case of failure, the bearer_token variable will contain the error message.
+        print(bearer_token)
         exit(1)
     print("Next token fetched successfully.")
 

diff --git a/constants.py b/constants.py
@@ -4,6 +4,7 @@
 
 load_dotenv()
 
+
 LOGIN_PAGE_URL = "https://voorivex.academy/pages/login/"
 LOGIN_API_URL = "https://api.voorivex.academy/auth/login"
 NEXT_TOKEN_URL = "https://voorivex.academy/_next/data/{}/download.json"
@@ -18,3 +19,5 @@
 ACADEMY_PASSWORD = os.getenv("VOORIVEX_PASSWORD")
 ACADEMY_TARGET_DIRECTORY = os.getenv("VOORIVEX_TARGET_DIRECTORY", "")
 SAVE_DIRECTORY = os.getenv("SAVE_DIRECTORY", "videos")
+
+LOG_DL_FILE = os.getenv("LOG_DL_FILE", ".downloaded")
diff --git a/data.py b/data.py
@@ -0,0 +1,18 @@
+import constants
+
+
+def downloaded_videos():
+    try:
+        with open(constants.LOG_DL_FILE, "r") as file:
+            downloaded_files = file.read().splitlines()
+    except FileNotFoundError:
+        # create file
+        open(constants.LOG_DL_FILE, "w").close()
+        downloaded_files = []
+
+    return downloaded_files
+
+
+def log_download(key):
+    with open(constants.LOG_DL_FILE, "a") as file:
+        file.write(key + "\n")
diff --git a/downloader.py b/downloader.py
@@ -5,48 +5,84 @@
 from tqdm import tqdm
 
 import constants
+from data import downloaded_videos, log_download
 from url_generator import process_download_url
 from videos_list import get_videos_list
 
 
 def download_video(video_details):
     key = video_details.get("key", "")
     url = video_details.get("url", "")
-    video_name = video_details.get("title", "")
-    target_path = os.path.join(constants.SAVE_DIRECTORY, key)  # Construct path from 'key'
+    target_path = os.path.join(
+        constants.SAVE_DIRECTORY, key
+    )  # Construct path from 'key'
     target_directory = os.path.dirname(target_path)  # Get directory name without file
 
-    print(f"Video Key: {key}")
-    print(f"{video_name}: Downloading video...")
-
     if not os.path.exists(target_directory):
         os.makedirs(target_directory)
 
-    response = requests.get(url, stream=True)
+    # if file exists but size doesn't match, resume download
+    headers = {}
+    existing_file_size = 0
+    if os.path.exists(target_path):
+        # Get the existing file size
+        existing_file_size = os.path.getsize(target_path)
+
+        # Get the file size with HEAD request
+        with requests.head(url) as response:
+            if response.status_code == 200:
+                remote_file_size = int(requests.head(url).headers["Content-Length"])
+            else:
+                return (
+                    False,
+                    f"Failed to get the video size with status code {response.status_code}.",
+                )
+
+        if existing_file_size == remote_file_size:
+            return True, f"Video already exists at {target_path}"
+        elif existing_file_size < remote_file_size:
+            print(f"{key}: Partially downloaded file found. Resuming download...")
+            # Set the starting point to the size of the existing file
+            headers = {"Range": f"bytes={existing_file_size}-"}
+        else:
+            print(
+                f"{key}: File already exists but size is not valid. Deleting and downloading..."
+            )
+            os.remove(target_path)
+
+    print(f"{key}: Starting download...")
+
+    with requests.get(url, stream=True, headers=headers) as response:
+        if response.status_code not in [200, 206]:
+            error_message = f"Failed to start the video download with status code {response.status_code}."
+            try:
+                error_details = response.json().get("error", "")
+                error_message += f" Details: {error_details}"
+            except:
+                pass  # If there's an error parsing the JSON, we'll just use the generic error message.
+            return False, error_message
+
+        total_size = existing_file_size + int(response.headers.get("content-length", 0))
+        block_size = 8192  # 8KB per piece
+        progress_bar = tqdm(
+            initial=existing_file_size,
+            total=total_size,
+            unit="iB",
+            unit_scale=True,
+            desc=key,
+        )
 
-    if response.status_code != 200:
-        error_message = f"Failed to start the video download with status code {response.status_code}."
         try:
-            error_details = response.json().get("error", "")
-            error_message += f" Details: {error_details}"
-        except:
-            pass  # If there's an error parsing the JSON, we'll just use the generic error message.
-        return False, error_message
-
-    total_size = int(response.headers.get("content-length", 0))
-    block_size = 8192  # 8KB per piece
-    progress_bar = tqdm(total=total_size, unit="iB", unit_scale=True)
-
-    try:
-        with open(target_path, "wb") as video_file:
-            for chunk in response.iter_content(block_size):
-                progress_bar.update(len(chunk))
-                video_file.write(chunk)
-    except Exception as e:
-        progress_bar.close()
-        return False, f"Error during writing the video file: {str(e)}"
+            with open(target_path, "ab") as video_file:
+                for chunk in response.iter_content(block_size):
+                    if chunk:
+                        progress_bar.update(len(chunk))
+                        video_file.write(chunk)
+        except Exception as e:
+            progress_bar.close()
+            return False, f"Error during writing the video file: {str(e)}"
 
-    progress_bar.close()
+        progress_bar.close()
 
     if total_size != 0 and progress_bar.n != total_size:
         return False, "Mismatch in downloaded content size."
@@ -58,6 +94,7 @@ def download_videos(bearer_token):
     target_directory = constants.ACADEMY_TARGET_DIRECTORY
 
     success, videos_list = get_videos_list(bearer_token, target_directory)
+    total_videos = len(videos_list)
     if not success:
         print(videos_list)
         exit(1)
@@ -67,26 +104,32 @@ def download_videos(bearer_token):
         exit(2)
     else:
         directory = target_directory if target_directory else "root"
-        print(f"Found {len(videos_list)} videos in {directory} directory.")
+        print(f"Found {total_videos} videos in {directory} directory.")
 
     print(f"Saving videos to {constants.SAVE_DIRECTORY}")
 
+    completed_videos = downloaded_videos()
+
     # Loop through each file key and download the video
     for idx, file_key in enumerate(videos_list, start=1):
         # if file_key exists, skip it
-        if os.path.exists(os.path.join(constants.SAVE_DIRECTORY, file_key)):
-            print(f"File {idx} of {len(videos_list)} ({file_key}) already exists. Skipping...")
+        if file_key in completed_videos:
+            print(
+                f"{file_key}: File {idx} of {total_videos}, found in log file. Skipping..."
+            )
             continue
 
-        print(f"Downloading {idx} of {len(videos_list)} videos...")
+        print(f"{file_key}: File {idx} of {total_videos} processing...")
 
         video_details = process_download_url(bearer_token, file_key)
         time.sleep(1)
 
         # Download the video
         success, result_or_error = download_video(video_details)
         if not success:
+            print(f"{file_key}: File {idx} of {total_videos}, failed to download.")
             print(f"\nERROR: {result_or_error}")
             exit(1)
         else:
-            print(f"\n{result_or_error}")
+            log_download(file_key)
+            print(f"{file_key}: {result_or_error}")
diff --git a/url_generator.py b/url_generator.py
@@ -8,7 +8,9 @@
 def remove_previous_video(bearer_token, video_key):
     headers_remove = {"Authorization": f"Bearer {bearer_token}"}
     data_remove = {"key": video_key}
-    response_remove = requests.post(constants.REMOVE_URL, headers=headers_remove, json=data_remove)
+    response_remove = requests.post(
+        constants.REMOVE_URL, headers=headers_remove, json=data_remove
+    )
 
     if response_remove.status_code != 201:
         error_message = f"Removing the previous video failed with status code {response_remove.status_code}."
@@ -25,7 +27,9 @@ def remove_previous_video(bearer_token, video_key):
 def request_video_generation(bearer_token, file_key):
     headers_generate = {"Authorization": f"Bearer {bearer_token}"}
     data_generate = {"key": file_key}
-    response_generate = requests.post(constants.LINK_GENERATOR_URL, headers=headers_generate, json=data_generate)
+    response_generate = requests.post(
+        constants.LINK_GENERATOR_URL, headers=headers_generate, json=data_generate
+    )
 
     if response_generate.status_code != 201:
         error_message = f"Video generation request failed with status code {response_generate.status_code}."
@@ -39,21 +43,23 @@ def request_video_generation(bearer_token, file_key):
     return True, None
 
 
-def fetch_active_video_link(bearer_token, video_name):
+def fetch_active_video_link(bearer_token, video_name, key):
     timeout = 60  # 60 seconds
     step_interval = 3  # every 3 seconds
     elapsed_time = 0
 
-    print(f"{video_name}: Checking for active download link...")
+    print(f"{key}: Checking for active download link...")
     headers_video = {"Authorization": f"Bearer {bearer_token}"}
 
     while elapsed_time <= timeout:
         time.sleep(step_interval)
         elapsed_time += step_interval
 
-        response_video = requests.get(constants.GET_ACTIVE_LINK_URL, headers=headers_video)
+        response_video = requests.get(
+            constants.GET_ACTIVE_LINK_URL, headers=headers_video
+        )
         if response_video.status_code != 200:
-            error_message = f"{video_name}: Failed to fetch the active video link with status code {response_video.status_code}."
+            error_message = f"{key}: Failed to fetch the active video link with status code {response_video.status_code}."
             try:
                 error_details = response_video.json().get("error", "")
                 error_message += f" Details: {error_details}"
@@ -64,18 +70,21 @@ def fetch_active_video_link(bearer_token, video_name):
         video_data = response_video.json()
 
         if video_data.get("type") == "pending":
-            print(f"{video_name}: Download link generation is still pending. Waiting...")
+            print(f"{key}: Download link generation is still pending. Waiting...")
             continue
 
         if video_data.get("type") == "active" and video_data.get("videos"):
             video_details = video_data["videos"][0]
             title = video_details.get("title", "")
 
             if title == video_name:
-                print(f"{video_name}: Active download link found.")
+                print(f"{key}: Active download link found.")
                 return True, video_details
 
-    return False, f"{video_name}: Timeout reached without receiving an active video link."
+    return (
+        False,
+        f"{key}: Timeout reached without receiving an active video link.",
+    )
 
 
 def process_download_url(bearer_token, file_key):
@@ -84,23 +93,28 @@ def process_download_url(bearer_token, file_key):
     # Remove previous video
     success, error_message = remove_previous_video(bearer_token, file_key)
     if not success:
-        print(error_message)  # In case of failure, error_message will contain the specific error.
+        # In case of failure, error_message will contain the specific error.
+        print(error_message)
         exit(1)
 
     time.sleep(1)
 
     # Request to generate the video download link
     success, error_message = request_video_generation(bearer_token, file_key)
     if not success:
-        print(error_message)  # In case of failure, error_message will contain the specific error.
+        # In case of failure, error_message will contain the specific error.
+        print(error_message)
         exit(1)
 
     time.sleep(1)
 
     # Fetch the active video link
-    success, result_or_error = fetch_active_video_link(bearer_token, video_name)
+    success, result_or_error = fetch_active_video_link(
+        bearer_token, video_name, file_key
+    )
     if not success:
-        print(result_or_error)  # In case of failure, result_or_error will contain the specific error message.
+        # In case of failure, result_or_error will contain the specific error message.
+        print(result_or_error)
         exit(1)
     video_details = result_or_error  # In case of success, result_or_error contains the video_details.
     return video_details
diff --git a/videos_list.py b/videos_list.py
@@ -22,17 +22,30 @@ def extract_file_keys(folder, target_directory=""):
 
 def get_videos_list(bearer_token, target_directory=""):
     # Fetch all the files
-    response = requests.get(f"{constants.VIDEOS_LIST_URL}", headers={"Authorization": f"Bearer {bearer_token}"})
+    response = requests.get(
+        f"{constants.VIDEOS_LIST_URL}",
+        headers={"Authorization": f"Bearer {bearer_token}"},
+    )
     video_list = response.json()
     all_file_keys = []
 
     # Check if a specific target_directory is provided
     if target_directory:
-        root_folder = next((folder for folder in video_list if folder["key"] == target_directory.split("/")[0]), None)
+        root_folder = next(
+            (
+                folder
+                for folder in video_list
+                if folder["key"] == target_directory.split("/")[0]
+            ),
+            None,
+        )
         if root_folder:
             all_file_keys.extend(extract_file_keys(root_folder, target_directory))
         else:
-            return False, f"The target directory to download videos not found: {target_directory}"
+            return (
+                False,
+                f"The target directory to download videos not found: {target_directory}",
+            )
     else:
         for root_folder in video_list:
             all_file_keys.extend(extract_file_keys(root_folder))
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,4 @@ __pycache__ @@
     videos/
     .env
     venv
+    .downloaded