diff --git a/README.md b/README.md index 5bdd9ea..eecb77f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ HuggingFace Space -Open In Colab Youtube [![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml) +Open In Colab Youtube +[![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml) Bleep That Sh*t! - A whisper app that bleeps out chosen words in YouTube videos | Product Hunt # bleep that sh*t @@ -30,19 +31,18 @@ https://github.com/neonwatty/bleep_that_sht/assets/16326421/63ebd7a0-46f6-4efd-8 To get setup to run the notebook / bleep your own videos / run the strealit demo first install the requirements for this project by pasting the below in your terminal. ```python -pip install -r requirements.txt +pip install -r requirements.streamlit ``` -You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well. +To install requirements for the gradio demo use this install +```python +pip install -r requirements.gradio +``` -## Instructions for bleeping your own **local** videos +You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well. -Start this streamlit demo locally that lets you drag and drop local video files to bleep -```python -python -m streamlit run bleep_that_sht/app_video_upload.py -``` ## Instructions for bleeping **youtube** videos via youtube / shorts url @@ -52,4 +52,20 @@ Start this streamlit demo locally that lets you enter in a youtube / shorts url python -m streamlit run bleep_that_sht/app_url_download.py ``` +Alternatively you can start a gradio server with the same functionality + +```python +python -m bleep_that_sht/gradio_app_url_download.py +``` + This is the version hosted in the HF space [![HuggingFace Space](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/neonwatty/bleep_that_sht). + + + +## Instructions for bleeping your own **local** videos + +Start this streamlit demo locally that lets you drag and drop local video files to bleep + +```python +python -m streamlit run bleep_that_sht/app_video_upload.py +``` diff --git a/bleep_that_sht/create.py b/bleep_that_sht/create.py index e8605b8..c8c3514 100644 --- a/bleep_that_sht/create.py +++ b/bleep_that_sht/create.py @@ -33,6 +33,7 @@ def bleep_replace( bleep_words: list, timestamped_transcript: dict, ) -> None: + # # extract and save audio from original video # extract_audio(local_file_path=og_video_path, audio_filepath=og_audio_path) diff --git a/bleep_that_sht/gradio_app_url_download.py b/bleep_that_sht/gradio_app_url_download.py new file mode 100644 index 0000000..6f3c463 --- /dev/null +++ b/bleep_that_sht/gradio_app_url_download.py @@ -0,0 +1,187 @@ +from bleep_that_sht import main_dir +from bleep_that_sht.transcribe import avaliable_models +from bleep_that_sht.transcribe import transcribe +from bleep_that_sht.audio_extractor import extract_audio +from bleep_that_sht.create import bleep_replace +from bleep_that_sht.yt_download import download_video +import tempfile +import uuid +import os +import io +import gradio as gr + + +HF_TOKEN = None + +try: + HF_TOKEN = os.environ.get("HF_TOKEN") +except: + pass + + +print("Setting up Gradio interface...") +with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Bleep That Sh*t 🙊") as demo: + with gr.Tabs(): + with gr.TabItem("🎬 Bleep That Sh*t 🙊"): + with gr.Row(): + with gr.Column(scale=4): + url_input = gr.Textbox( + value="https://www.youtube.com/shorts/43BhDHYBG0o", + label="🔗 Paste YouTube / Shorts URL here", + placeholder="e.g., https://www.youtube.com/watch?v=.", + max_lines=1, + ) + + with gr.Row(): + with gr.Column(scale=8): + bleep_words = gr.Textbox( + placeholder="bleep keywords go here separated by commas", + label="bleep-word list", + value="treetz, ice, cream, chocolate, syrup, cookie, hooked, threats, treats, trees", + ) + with gr.Column(scale=3): + model_selection = gr.Dropdown(choices=avaliable_models, value="base", label="whisper model (base only in HF space)", info="whisper model selection", interactive=False) + with gr.Column(scale=4): + just_transcribe_button = gr.Button("Just Transcribe", variant="primary") + transcribe_and_bleep_button = gr.Button("Transcribe & Bleep", variant="primary") + + with tempfile.TemporaryDirectory() as tmpdirname: + with gr.Row(): + transcript_output = gr.Textbox(label="Video Transcript", placeholder="", max_lines=5, show_copy_button=True) + + with gr.Row(): + og_video = gr.Video( + visible=False, + show_download_button=True, + show_label=True, + label="original video", + format="mp4", + width="50vw", + height="50vw", + ) + + bleep_video = gr.Video( + visible=False, + show_download_button=True, + show_label=True, + label="bleeped video", + format="mp4", + width="50vw", + height="50vw", + ) + + @just_transcribe_button.click(inputs=[url_input, model_selection], outputs=[og_video, bleep_video, transcript_output]) + def just_transcribe(url_input, model_selection): + temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4" + temporary_audio_location = temporary_video_location.replace("mp4", "mp3") + + download_video(url_input, temporary_video_location) + filename = open(temporary_video_location, "rb") + byte_file = io.BytesIO(filename.read()) + with open(temporary_video_location, "wb") as out: + out.write(byte_file.read()) + + new_og_video = gr.Video( + value=temporary_video_location, + visible=True, + show_download_button=True, + show_label=True, + label="original video", + format="mp4", + width="50vw", + height="50vw", + ) + + new_bleep_video = gr.Video( + visible=False, + show_download_button=True, + show_label=True, + label="bleeped video", + format="mp4", + width="50vw", + height="50vw", + ) + + + extract_audio(temporary_video_location, temporary_audio_location) + transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection) + + return new_og_video, new_bleep_video, transcript + + + @transcribe_and_bleep_button.click(inputs=[url_input, model_selection, bleep_words], outputs=[og_video, bleep_video, transcript_output]) + def transcribe_and_bleep(url_input, model_selection, bleep_words): + if len(bleep_words) > 0: + temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4" + temporary_audio_location = temporary_video_location.replace("mp4", "mp3") + + download_video(url_input, temporary_video_location) + filename = open(temporary_video_location, "rb") + byte_file = io.BytesIO(filename.read()) + with open(temporary_video_location, "wb") as out: + out.write(byte_file.read()) + + new_og_video = gr.Video( + value=temporary_video_location, + visible=True, + show_download_button=True, + show_label=True, + label="original video", + format="mp4", + width="50vw", + height="50vw", + ) + + extract_audio(temporary_video_location, temporary_audio_location) + transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection) + + bleep_word_list = bleep_words.split(",") + bleep_word_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0] + bleep_video_output = temporary_video_location.replace("original", "bleep") + bleep_audio_output = bleep_video_output.replace("mp4", "mp3") + + bleep_replace( + temporary_video_location, + temporary_audio_location, + bleep_video_output, + bleep_audio_output, + bleep_word_list, + timestamped_transcript, + ) + + new_bleep_video = gr.Video( + value=bleep_video_output, + visible=True, + show_download_button=True, + show_label=True, + label="bleeped video", + format="mp4", + width="50vw", + height="50vw", + ) + + return new_og_video, new_bleep_video, transcript + else: + gr.Warning("bleep words empty!", duration=3) + return None, None, None + + with gr.TabItem("💡 About"): + with gr.Blocks() as about: + gr.Markdown( + ( + "### Bleep out words of your choice from an input video. \n" + "How it works: \n\n" + "1. Provided a youtube / shorts url \n" + "2. Choose your your desired bleep keywords \n" + "3. (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n" + "4. (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n" + "5. Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n" + "If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n" + "Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n" + "You do *not* need a GPU to run this locally. Larger models take more time to process locally, but its doable. \n" + ) + ) + +if __name__ == "__main__": + print("Launching Gradio interface...") + demo.launch() diff --git a/bleep_that_sht/app_url_download.py b/bleep_that_sht/streamlit_app_url_download.py similarity index 100% rename from bleep_that_sht/app_url_download.py rename to bleep_that_sht/streamlit_app_url_download.py diff --git a/bleep_that_sht/app_video_upload.py b/bleep_that_sht/streamlit_app_video_upload.py similarity index 100% rename from bleep_that_sht/app_video_upload.py rename to bleep_that_sht/streamlit_app_video_upload.py diff --git a/bleep_that_sht/yt_download.py b/bleep_that_sht/yt_download.py index a31ab0c..f51e670 100644 --- a/bleep_that_sht/yt_download.py +++ b/bleep_that_sht/yt_download.py @@ -1,4 +1,6 @@ -from pytube import YouTube +# from pytube import YouTube + +import yt_dlp import re @@ -15,21 +17,14 @@ def download_video(url: str, savepath: str, my_proxies: dict = {}) -> None: try: print("Downloading video from youtube...") if is_valid_youtube_url(url): - yt = YouTube(url, proxies=my_proxies) - audio_video_streams = ( - yt.streams.filter( - file_extension="mp4", - only_audio=False, - only_video=False, - progressive=True, - type="video", - ) - .order_by("resolution") - .asc() - ) - audio_video_itags = [v.itag for v in audio_video_streams] - first_choice_itag = audio_video_itags[0] - yt.streams.get_by_itag(first_choice_itag).download(filename=savepath) + ydl_opts = { + 'format': 'bestvideo[height<=720]+bestaudio/best', + 'merge_output_format': 'mp4', + 'outtmpl': savepath, + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + print("...done!") else: raise ValueError(f"invalid input url: {url}") diff --git a/requirements.gradio b/requirements.gradio new file mode 100644 index 0000000..e04eb08 --- /dev/null +++ b/requirements.gradio @@ -0,0 +1,4 @@ +whisper-timestamped +moviepy +yt-dlp +gradio diff --git a/requirements.streamlit b/requirements.streamlit new file mode 100644 index 0000000..79cb2e8 --- /dev/null +++ b/requirements.streamlit @@ -0,0 +1,4 @@ +whisper-timestamped +moviepy +yt-dlp +streamlit diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f3abd7b..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -whisper-timestamped -moviepy -pydub -git+https://github.com/neonwatty/pytube.git -streamlit