Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Resume Download #3

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env.sample
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
VOORIVEX_USERNAME=VOORIVEX_USERNAME
VOORIVEX_PASSWORD=VOORIVEX_PASSWORD
VOORIVEX_TARGET_DIRECTORY=DIRECTORY_TO_DOWNLOAD_LEAVE_EMPTY_TO_DOWNLOAD_ALL
SAVE_DIRECTORY=videos
SAVE_DIRECTORY=videos
LOG_DL_FILE=.downloaded
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ __pycache__
videos/
.env
venv
.downloaded
18 changes: 13 additions & 5 deletions auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@ def fetch_buildId():
def get_access_token(username, password):
headers_login = {"Content-Type": "application/json"}
data_login = {"username": username, "password": password}
response_login = requests.post(constants.LOGIN_API_URL, headers=headers_login, json=data_login)
response_login = requests.post(
constants.LOGIN_API_URL, headers=headers_login, json=data_login
)

if response_login.status_code != 201:
error_message = f"Login request failed with status code {response_login.status_code}."
error_message = (
f"Login request failed with status code {response_login.status_code}."
)
try:
error_details = response_login.json().get("error", "")
error_message += f" Details: {error_details}"
Expand Down Expand Up @@ -73,9 +77,12 @@ def auth():
time.sleep(1)

# Get Access Token
success, access_token = get_access_token(constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD)
success, access_token = get_access_token(
constants.ACADEMY_USERNAME, constants.ACADEMY_PASSWORD
)
if not success:
print(access_token) # In case of failure, the access_token variable will contain the error message.
# In case of failure, the access_token variable will contain the error message.
print(access_token)
exit(1)
print("Successfully logged in and obtained access token.")

Expand All @@ -84,7 +91,8 @@ def auth():
# Fetch Next Token
success, bearer_token = fetch_next_token(access_token, buildId)
if not success:
print(bearer_token) # In case of failure, the bearer_token variable will contain the error message.
# In case of failure, the bearer_token variable will contain the error message.
print(bearer_token)
exit(1)
print("Next token fetched successfully.")

Expand Down
3 changes: 3 additions & 0 deletions constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

load_dotenv()


LOGIN_PAGE_URL = "https://voorivex.academy/pages/login/"
LOGIN_API_URL = "https://api.voorivex.academy/auth/login"
NEXT_TOKEN_URL = "https://voorivex.academy/_next/data/{}/download.json"
Expand All @@ -18,3 +19,5 @@
ACADEMY_PASSWORD = os.getenv("VOORIVEX_PASSWORD")
ACADEMY_TARGET_DIRECTORY = os.getenv("VOORIVEX_TARGET_DIRECTORY", "")
SAVE_DIRECTORY = os.getenv("SAVE_DIRECTORY", "videos")

LOG_DL_FILE = os.getenv("LOG_DL_FILE", ".downloaded")
18 changes: 18 additions & 0 deletions data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import constants


def downloaded_videos():
try:
with open(constants.LOG_DL_FILE, "r") as file:
downloaded_files = file.read().splitlines()
except FileNotFoundError:
# create file
open(constants.LOG_DL_FILE, "w").close()
downloaded_files = []

return downloaded_files


def log_download(key):
with open(constants.LOG_DL_FILE, "a") as file:
file.write(key + "\n")
107 changes: 75 additions & 32 deletions downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,48 +5,84 @@
from tqdm import tqdm

import constants
from data import downloaded_videos, log_download
from url_generator import process_download_url
from videos_list import get_videos_list


def download_video(video_details):
key = video_details.get("key", "")
url = video_details.get("url", "")
video_name = video_details.get("title", "")
target_path = os.path.join(constants.SAVE_DIRECTORY, key) # Construct path from 'key'
target_path = os.path.join(
constants.SAVE_DIRECTORY, key
) # Construct path from 'key'
target_directory = os.path.dirname(target_path) # Get directory name without file

print(f"Video Key: {key}")
print(f"{video_name}: Downloading video...")

if not os.path.exists(target_directory):
os.makedirs(target_directory)

response = requests.get(url, stream=True)
# if file exists but size doesn't match, resume download
headers = {}
existing_file_size = 0
if os.path.exists(target_path):
# Get the existing file size
existing_file_size = os.path.getsize(target_path)

# Get the file size with HEAD request
with requests.head(url) as response:
if response.status_code == 200:
remote_file_size = int(requests.head(url).headers["Content-Length"])
else:
return (
False,
f"Failed to get the video size with status code {response.status_code}.",
)

if existing_file_size == remote_file_size:
return True, f"Video already exists at {target_path}"
elif existing_file_size < remote_file_size:
print(f"{key}: Partially downloaded file found. Resuming download...")
# Set the starting point to the size of the existing file
headers = {"Range": f"bytes={existing_file_size}-"}
else:
print(
f"{key}: File already exists but size is not valid. Deleting and downloading..."
)
os.remove(target_path)

print(f"{key}: Starting download...")

with requests.get(url, stream=True, headers=headers) as response:
if response.status_code not in [200, 206]:
error_message = f"Failed to start the video download with status code {response.status_code}."
try:
error_details = response.json().get("error", "")
error_message += f" Details: {error_details}"
except:
pass # If there's an error parsing the JSON, we'll just use the generic error message.
return False, error_message

total_size = existing_file_size + int(response.headers.get("content-length", 0))
block_size = 8192 # 8KB per piece
progress_bar = tqdm(
initial=existing_file_size,
total=total_size,
unit="iB",
unit_scale=True,
desc=key,
)

if response.status_code != 200:
error_message = f"Failed to start the video download with status code {response.status_code}."
try:
error_details = response.json().get("error", "")
error_message += f" Details: {error_details}"
except:
pass # If there's an error parsing the JSON, we'll just use the generic error message.
return False, error_message

total_size = int(response.headers.get("content-length", 0))
block_size = 8192 # 8KB per piece
progress_bar = tqdm(total=total_size, unit="iB", unit_scale=True)

try:
with open(target_path, "wb") as video_file:
for chunk in response.iter_content(block_size):
progress_bar.update(len(chunk))
video_file.write(chunk)
except Exception as e:
progress_bar.close()
return False, f"Error during writing the video file: {str(e)}"
with open(target_path, "ab") as video_file:
for chunk in response.iter_content(block_size):
if chunk:
progress_bar.update(len(chunk))
video_file.write(chunk)
except Exception as e:
progress_bar.close()
return False, f"Error during writing the video file: {str(e)}"

progress_bar.close()
progress_bar.close()

if total_size != 0 and progress_bar.n != total_size:
return False, "Mismatch in downloaded content size."
Expand All @@ -58,6 +94,7 @@ def download_videos(bearer_token):
target_directory = constants.ACADEMY_TARGET_DIRECTORY

success, videos_list = get_videos_list(bearer_token, target_directory)
total_videos = len(videos_list)
if not success:
print(videos_list)
exit(1)
Expand All @@ -67,26 +104,32 @@ def download_videos(bearer_token):
exit(2)
else:
directory = target_directory if target_directory else "root"
print(f"Found {len(videos_list)} videos in {directory} directory.")
print(f"Found {total_videos} videos in {directory} directory.")

print(f"Saving videos to {constants.SAVE_DIRECTORY}")

completed_videos = downloaded_videos()

# Loop through each file key and download the video
for idx, file_key in enumerate(videos_list, start=1):
# if file_key exists, skip it
if os.path.exists(os.path.join(constants.SAVE_DIRECTORY, file_key)):
print(f"File {idx} of {len(videos_list)} ({file_key}) already exists. Skipping...")
if file_key in completed_videos:
print(
f"{file_key}: File {idx} of {total_videos}, found in log file. Skipping..."
)
continue

print(f"Downloading {idx} of {len(videos_list)} videos...")
print(f"{file_key}: File {idx} of {total_videos} processing...")

video_details = process_download_url(bearer_token, file_key)
time.sleep(1)

# Download the video
success, result_or_error = download_video(video_details)
if not success:
print(f"{file_key}: File {idx} of {total_videos}, failed to download.")
print(f"\nERROR: {result_or_error}")
exit(1)
else:
print(f"\n{result_or_error}")
log_download(file_key)
print(f"{file_key}: {result_or_error}")
40 changes: 27 additions & 13 deletions url_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
def remove_previous_video(bearer_token, video_key):
headers_remove = {"Authorization": f"Bearer {bearer_token}"}
data_remove = {"key": video_key}
response_remove = requests.post(constants.REMOVE_URL, headers=headers_remove, json=data_remove)
response_remove = requests.post(
constants.REMOVE_URL, headers=headers_remove, json=data_remove
)

if response_remove.status_code != 201:
error_message = f"Removing the previous video failed with status code {response_remove.status_code}."
Expand All @@ -25,7 +27,9 @@ def remove_previous_video(bearer_token, video_key):
def request_video_generation(bearer_token, file_key):
headers_generate = {"Authorization": f"Bearer {bearer_token}"}
data_generate = {"key": file_key}
response_generate = requests.post(constants.LINK_GENERATOR_URL, headers=headers_generate, json=data_generate)
response_generate = requests.post(
constants.LINK_GENERATOR_URL, headers=headers_generate, json=data_generate
)

if response_generate.status_code != 201:
error_message = f"Video generation request failed with status code {response_generate.status_code}."
Expand All @@ -39,21 +43,23 @@ def request_video_generation(bearer_token, file_key):
return True, None


def fetch_active_video_link(bearer_token, video_name):
def fetch_active_video_link(bearer_token, video_name, key):
timeout = 60 # 60 seconds
step_interval = 3 # every 3 seconds
elapsed_time = 0

print(f"{video_name}: Checking for active download link...")
print(f"{key}: Checking for active download link...")
headers_video = {"Authorization": f"Bearer {bearer_token}"}

while elapsed_time <= timeout:
time.sleep(step_interval)
elapsed_time += step_interval

response_video = requests.get(constants.GET_ACTIVE_LINK_URL, headers=headers_video)
response_video = requests.get(
constants.GET_ACTIVE_LINK_URL, headers=headers_video
)
if response_video.status_code != 200:
error_message = f"{video_name}: Failed to fetch the active video link with status code {response_video.status_code}."
error_message = f"{key}: Failed to fetch the active video link with status code {response_video.status_code}."
try:
error_details = response_video.json().get("error", "")
error_message += f" Details: {error_details}"
Expand All @@ -64,18 +70,21 @@ def fetch_active_video_link(bearer_token, video_name):
video_data = response_video.json()

if video_data.get("type") == "pending":
print(f"{video_name}: Download link generation is still pending. Waiting...")
print(f"{key}: Download link generation is still pending. Waiting...")
continue

if video_data.get("type") == "active" and video_data.get("videos"):
video_details = video_data["videos"][0]
title = video_details.get("title", "")

if title == video_name:
print(f"{video_name}: Active download link found.")
print(f"{key}: Active download link found.")
return True, video_details

return False, f"{video_name}: Timeout reached without receiving an active video link."
return (
False,
f"{key}: Timeout reached without receiving an active video link.",
)


def process_download_url(bearer_token, file_key):
Expand All @@ -84,23 +93,28 @@ def process_download_url(bearer_token, file_key):
# Remove previous video
success, error_message = remove_previous_video(bearer_token, file_key)
if not success:
print(error_message) # In case of failure, error_message will contain the specific error.
# In case of failure, error_message will contain the specific error.
print(error_message)
exit(1)

time.sleep(1)

# Request to generate the video download link
success, error_message = request_video_generation(bearer_token, file_key)
if not success:
print(error_message) # In case of failure, error_message will contain the specific error.
# In case of failure, error_message will contain the specific error.
print(error_message)
exit(1)

time.sleep(1)

# Fetch the active video link
success, result_or_error = fetch_active_video_link(bearer_token, video_name)
success, result_or_error = fetch_active_video_link(
bearer_token, video_name, file_key
)
if not success:
print(result_or_error) # In case of failure, result_or_error will contain the specific error message.
# In case of failure, result_or_error will contain the specific error message.
print(result_or_error)
exit(1)
video_details = result_or_error # In case of success, result_or_error contains the video_details.
return video_details
19 changes: 16 additions & 3 deletions videos_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,30 @@ def extract_file_keys(folder, target_directory=""):

def get_videos_list(bearer_token, target_directory=""):
# Fetch all the files
response = requests.get(f"{constants.VIDEOS_LIST_URL}", headers={"Authorization": f"Bearer {bearer_token}"})
response = requests.get(
f"{constants.VIDEOS_LIST_URL}",
headers={"Authorization": f"Bearer {bearer_token}"},
)
video_list = response.json()
all_file_keys = []

# Check if a specific target_directory is provided
if target_directory:
root_folder = next((folder for folder in video_list if folder["key"] == target_directory.split("/")[0]), None)
root_folder = next(
(
folder
for folder in video_list
if folder["key"] == target_directory.split("/")[0]
),
None,
)
if root_folder:
all_file_keys.extend(extract_file_keys(root_folder, target_directory))
else:
return False, f"The target directory to download videos not found: {target_directory}"
return (
False,
f"The target directory to download videos not found: {target_directory}",
)
else:
for root_folder in video_list:
all_file_keys.extend(extract_file_keys(root_folder))
Expand Down