Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/downloading-youtube-videos #169

Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cdp_backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def resources_dir() -> Path:
EXAMPLE_VIDEO_FILENAME = "example_video.mp4"
EXAMPLE_MKV_VIDEO_FILENAME = "example_video.mkv"
EXAMPLE_VIDEO_HD_FILENAME = "example_video_large.mp4"
EXAMPLE_YOUTUBE_VIDEO_EMBEDDED = "https://www.youtube.com/embed/XALBGkjkUPQ"
EXAMPLE_YOUTUBE_VIDEO_PARAMETER = "https://www.youtube.com/watch?v=XALBGkjkUPQ"
EXAMPLE_YOUTUBE_VIDEO_SHORT = "https://youtu.be/watch?v=XALBGkjkUPQ"


@pytest.fixture
Expand Down
32 changes: 32 additions & 0 deletions cdp_backend/tests/utils/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
resource_copy,
)

from .. import test_utils
from ..conftest import (
EXAMPLE_MKV_VIDEO_FILENAME,
EXAMPLE_VIDEO_FILENAME,
EXAMPLE_VIDEO_HD_FILENAME,
EXAMPLE_YOUTUBE_VIDEO_EMBEDDED,
EXAMPLE_YOUTUBE_VIDEO_PARAMETER,
EXAMPLE_YOUTUBE_VIDEO_SHORT,
)

#############################################################################
Expand Down Expand Up @@ -211,6 +215,10 @@ def test_hover_thumbnail_generator(
os.remove(result)


@pytest.mark.skipif(
not test_utils.internet_is_available(),
reason="No internet connection",
)
@pytest.mark.parametrize(
"video_uri, expected",
[
Expand All @@ -224,3 +232,27 @@ def test_convert_video_to_mp4(
) -> None:
filepath = str(resources_dir / video_uri)
assert file_utils.convert_video_to_mp4(filepath) == str(resources_dir / expected)


@pytest.mark.skipif(
not test_utils.internet_is_available(),
reason="No internet connection",
)
@pytest.mark.parametrize(
"youtube_uri, expected",
[
(EXAMPLE_YOUTUBE_VIDEO_EMBEDDED, "XALBGkjkUPQ.mp4"),
(EXAMPLE_YOUTUBE_VIDEO_PARAMETER, "XALBGkjkUPQ.mp4"),
(EXAMPLE_YOUTUBE_VIDEO_SHORT, "XALBGkjkUPQ.mp4"),
],
)
def test_youtube_downloader(
Shak2000 marked this conversation as resolved.
Show resolved Hide resolved
resources_dir: Path,
youtube_uri: str,
expected: str,
) -> None:
actual_uri = file_utils.resource_copy(youtube_uri, resources_dir, True)
expected_uri = str(resources_dir / expected)
assert actual_uri == expected_uri

os.remove(actual_uri)
40 changes: 39 additions & 1 deletion cdp_backend/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import aiohttp
import fsspec
from fsspec.core import url_to_fs
from yt_dlp import YoutubeDL

###############################################################################

Expand Down Expand Up @@ -90,7 +91,10 @@ def resource_copy(
# Ensure dst doesn't exist
dst = Path(dst).resolve()
if dst.is_dir():
dst = dst / uri.split("/")[-1]
if "=" in str(uri):
Shak2000 marked this conversation as resolved.
Show resolved Hide resolved
dst = dst / uri.split("=")[-1]
else:
dst = dst / uri.split("/")[-1]

# Ensure filename is less than 255 chars
# Otherwise this can raise an OSError for too long of a filename
Expand All @@ -105,6 +109,9 @@ def resource_copy(
log.info(f"Beginning resource copy from: {uri}")
# Get file system
try:
if uri.find("youtube.com") >= 0 or uri.find("youtu.be") >= 0:
return youtube_copy(uri, dst, overwrite)

kwargs = {}

# Set custom timeout for http resources
Expand All @@ -126,6 +133,37 @@ def resource_copy(
raise e


def youtube_copy(uri: str, dst: Path, overwrite: bool = False) -> str:
"""
Copy a video from YouTube to a local destination on the machine.

Parameters
----------
uri: str
The url of the YouTube video to copy.
dst: str
The location of the file to download.
overwrite: bool
Boolean value indicating whether or not to overwrite a local video with
the same name if it already exists.

Returns
_______
dst: str
The location of the downloaded file.
"""
dst = Path(str(dst) + ".mp4")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was about to merge but one last look I think catches this.

This will always overwrite the supplied dst correct?

I think there simply needs to be an if dst is not None before this statement.

Additionally the docstring / parameter typing should be updated to dst: Optional[Path] = None

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I assumed that dst is not empty since if dst is None, it would be a assigned a value, as seen here. I can throw an exception if dst is None though. WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ohhhh i see what you are doing. Yes. My mistake. Okay so then only nitpick would be instead of doing this str(dst) + ".mp4" string addition. Path has a function called with_suffix that is "safer" imo.

Path(str(dst)).with_suffix(".mp4")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But yes, you are correct. My mistake on on the overwrite vs simply adding the mp4

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed a new change based on your feedback.


# Ensure dest isn't a file
if dst.is_file() and not overwrite:
raise FileExistsError(dst)

ydl_opts = {"outtmpl": str(dst), "format": "mp4"}
with YoutubeDL(ydl_opts) as ydl:
ydl.download([uri])
return str(dst)


def split_audio(
video_read_path: str,
audio_save_path: str,
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"spacy~=3.0",
"truecase~=0.0.12",
"webvtt-py~=0.4.6",
"yt-dlp~=2022.2.4"
]

test_requirements = [
Expand Down