Skip to content

Commit

Permalink
feature/downloading-youtube-videos (#169)
Browse files Browse the repository at this point in the history
* Fixing documentation

* Adding thumbnail compression

* Fixing a small issue related to imports

* Removing duplicate code from the thumbnail compression methods and replacing the large video file

* Moving imports

* make it possible to downloaded youtube videos

* formatting the youtube downloading functions and tests

* adding the conftest

* fixing comments

* fixing more comments

* fixing some more comments
  • Loading branch information
Shak2000 authored Mar 6, 2022
1 parent c5cd965 commit 9f1ede6
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 1 deletion.
3 changes: 3 additions & 0 deletions cdp_backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def resources_dir() -> Path:
EXAMPLE_VIDEO_FILENAME = "example_video.mp4"
EXAMPLE_MKV_VIDEO_FILENAME = "example_video.mkv"
EXAMPLE_VIDEO_HD_FILENAME = "example_video_large.mp4"
EXAMPLE_YOUTUBE_VIDEO_EMBEDDED = "https://www.youtube.com/embed/XALBGkjkUPQ"
EXAMPLE_YOUTUBE_VIDEO_PARAMETER = "https://www.youtube.com/watch?v=XALBGkjkUPQ"
EXAMPLE_YOUTUBE_VIDEO_SHORT = "https://youtu.be/watch?v=XALBGkjkUPQ"


@pytest.fixture
Expand Down
34 changes: 34 additions & 0 deletions cdp_backend/tests/utils/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
resource_copy,
)

from .. import test_utils
from ..conftest import (
EXAMPLE_MKV_VIDEO_FILENAME,
EXAMPLE_VIDEO_FILENAME,
EXAMPLE_VIDEO_HD_FILENAME,
EXAMPLE_YOUTUBE_VIDEO_EMBEDDED,
EXAMPLE_YOUTUBE_VIDEO_PARAMETER,
EXAMPLE_YOUTUBE_VIDEO_SHORT,
)

#############################################################################
Expand Down Expand Up @@ -211,6 +215,10 @@ def test_hover_thumbnail_generator(
os.remove(result)


@pytest.mark.skipif(
not test_utils.internet_is_available(),
reason="No internet connection",
)
@pytest.mark.parametrize(
"video_uri, expected",
[
Expand All @@ -224,3 +232,29 @@ def test_convert_video_to_mp4(
) -> None:
filepath = str(resources_dir / video_uri)
assert file_utils.convert_video_to_mp4(filepath) == str(resources_dir / expected)


@pytest.mark.skipif(
not test_utils.internet_is_available(),
reason="No internet connection",
)
@pytest.mark.parametrize(
"youtube_uri, expected",
[
(EXAMPLE_YOUTUBE_VIDEO_EMBEDDED, "XALBGkjkUPQ.mp4"),
(EXAMPLE_YOUTUBE_VIDEO_PARAMETER, "XALBGkjkUPQ.mp4"),
(EXAMPLE_YOUTUBE_VIDEO_SHORT, "XALBGkjkUPQ.mp4"),
],
)
def test_youtube_downloader(
resources_dir: Path,
youtube_uri: str,
expected: str,
) -> None:
actual_uri = file_utils.resource_copy(youtube_uri, resources_dir, True)
expected_uri = str(resources_dir / expected)
assert actual_uri == expected_uri
assert Path(actual_uri).exists()
assert Path(actual_uri).is_file()

os.remove(actual_uri)
43 changes: 42 additions & 1 deletion cdp_backend/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import aiohttp
import fsspec
from fsspec.core import url_to_fs
from yt_dlp import YoutubeDL

###############################################################################

Expand Down Expand Up @@ -90,7 +91,12 @@ def resource_copy(
# Ensure dst doesn't exist
dst = Path(dst).resolve()
if dst.is_dir():
dst = dst / uri.split("/")[-1]
if "v=" in str(uri):
# Split by youtube video query parameter
dst = dst / uri.split("v=")[-1]
else:
# Split by the last "/"
dst = dst / uri.split("/")[-1]

# Ensure filename is less than 255 chars
# Otherwise this can raise an OSError for too long of a filename
Expand All @@ -105,6 +111,9 @@ def resource_copy(
log.info(f"Beginning resource copy from: {uri}")
# Get file system
try:
if uri.find("youtube.com") >= 0 or uri.find("youtu.be") >= 0:
return youtube_copy(uri, dst, overwrite)

kwargs = {}

# Set custom timeout for http resources
Expand All @@ -126,6 +135,38 @@ def resource_copy(
raise e


def youtube_copy(uri: str, dst: Path, overwrite: bool = False) -> str:
"""
Copy a video from YouTube to a local destination on the machine.
Parameters
----------
uri: str
The url of the YouTube video to copy.
dst: str
The location of the file to download.
overwrite: bool
Boolean value indicating whether or not to overwrite a local video with
the same name if it already exists.
Returns
_______
dst: str
The location of the downloaded file.
"""
# dst = Path(str(dst) + ".mp4")
dst = dst.with_suffix(".mp4")

# Ensure dest isn't a file
if dst.is_file() and not overwrite:
raise FileExistsError(dst)

ydl_opts = {"outtmpl": str(dst), "format": "mp4"}
with YoutubeDL(ydl_opts) as ydl:
ydl.download([uri])
return str(dst)


def split_audio(
video_read_path: str,
audio_save_path: str,
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"spacy~=3.0",
"truecase~=0.0.12",
"webvtt-py~=0.4.6",
"yt-dlp~=2022.2.4"
]

test_requirements = [
Expand Down

0 comments on commit 9f1ede6

Please sign in to comment.