Merge pull request #1 from neonwatty/gradio_revision

Gradio revision
neonwatty · Jul 17, 2024 · 110b5ee · 110b5ee
2 parents 7a30682 + a1b2720
commit 110b5ee
Show file tree

Hide file tree

Showing 9 changed files with 231 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 <a href="https://huggingface.co/spaces/neonwatty/bleep_that_sht" target="_parent"><img src="https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg" alt="HuggingFace Space"/></a>
-<a href="https://colab.research.google.com/github/jermwatt/bleep_that_sht/blob/main/beep_that_sht_walkthrough.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>  <a href="https://www.youtube.com/watch?v=U8Ki9dD3HF0" target="_parent"><img src="https://badges.aleen42.com/src/youtube.svg" alt="Youtube"/></a> [![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml)
+<a href="https://colab.research.google.com/github/jermwatt/bleep_that_sht/blob/main/beep_that_sht_walkthrough.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>  <a href="https://www.youtube.com/watch?v=U8Ki9dD3HF0" target="_parent"><img src="https://badges.aleen42.com/src/youtube.svg" alt="Youtube"/></a>
+[![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml) <a href="https://www.producthunt.com/posts/bleep-that-sh-t?embed=true&utm_source=badge-featured&utm_medium=badge&utm_souce=badge-bleep&#0045;that&#0045;sh&#0045;t" target="_parent"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=470378&theme=light" alt="Bleep&#0032;That&#0032;Sh&#0042;t&#0033; - A&#0032;whisper&#0032;app&#0032;that&#0032;bleeps&#0032;out&#0032;chosen&#0032;words&#0032;in&#0032;YouTube&#0032;videos | Product Hunt" style="width: 250px; height: 45px;" /></a>
 
 
 # bleep that sh*t
@@ -30,19 +31,18 @@ https://github.com/neonwatty/bleep_that_sht/assets/16326421/63ebd7a0-46f6-4efd-8
 To get setup to run the notebook / bleep your own videos / run the strealit demo first install the requirements for this project by pasting the below in your terminal.
 
 ```python
-pip install -r requirements.txt
+pip install -r requirements.streamlit
 ```
 
-You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
+To install requirements for the gradio demo use this install
 
+```python
+pip install -r requirements.gradio
+```
 
-## Instructions for bleeping your own **local** videos
+You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
 
-Start this streamlit demo locally that lets you drag and drop local video files to bleep
 
-```python
-python -m streamlit run bleep_that_sht/app_video_upload.py
-```
 
 ## Instructions for bleeping **youtube** videos via youtube / shorts url
 
@@ -52,4 +52,20 @@ Start this streamlit demo locally that lets you enter in a youtube / shorts url
 python -m streamlit run bleep_that_sht/app_url_download.py
 ```
 
+Alternatively you can start a gradio server with the same functionality
+
+```python
+python -m bleep_that_sht/gradio_app_url_download.py
+```
+
 This is the version hosted in the HF space [![HuggingFace Space](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/neonwatty/bleep_that_sht).
+
+
+
+## Instructions for bleeping your own **local** videos
+
+Start this streamlit demo locally that lets you drag and drop local video files to bleep
+
+```python
+python -m streamlit run bleep_that_sht/app_video_upload.py
+```
diff --git a/bleep_that_sht/create.py b/bleep_that_sht/create.py
@@ -33,6 +33,7 @@ def bleep_replace(
     bleep_words: list,
     timestamped_transcript: dict,
 ) -> None:
+
     # # extract and save audio from original video
     # extract_audio(local_file_path=og_video_path, audio_filepath=og_audio_path)
 

diff --git a/bleep_that_sht/gradio_app_url_download.py b/bleep_that_sht/gradio_app_url_download.py
@@ -0,0 +1,187 @@
+from bleep_that_sht import main_dir
+from bleep_that_sht.transcribe import avaliable_models
+from bleep_that_sht.transcribe import transcribe
+from bleep_that_sht.audio_extractor import extract_audio
+from bleep_that_sht.create import bleep_replace
+from bleep_that_sht.yt_download import download_video
+import tempfile
+import uuid
+import os
+import io
+import gradio as gr
+
+
+HF_TOKEN = None
+
+try:
+    HF_TOKEN = os.environ.get("HF_TOKEN")
+except:
+    pass
+
+
+print("Setting up Gradio interface...")
+with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Bleep That Sh*t 🙊") as demo:
+    with gr.Tabs():
+        with gr.TabItem("🎬 Bleep That Sh*t 🙊"):
+            with gr.Row():
+                with gr.Column(scale=4):
+                    url_input = gr.Textbox(
+                        value="https://www.youtube.com/shorts/43BhDHYBG0o",
+                        label="🔗 Paste YouTube / Shorts URL here",
+                        placeholder="e.g., https://www.youtube.com/watch?v=.",
+                        max_lines=1,
+                    )
+
+            with gr.Row():
+                with gr.Column(scale=8):
+                    bleep_words = gr.Textbox(
+                        placeholder="bleep keywords go here separated by commas",
+                        label="bleep-word list",
+                        value="treetz, ice, cream, chocolate, syrup, cookie, hooked, threats, treats, trees",
+                    )
+                with gr.Column(scale=3):
+                    model_selection = gr.Dropdown(choices=avaliable_models, value="base", label="whisper model (base only in HF space)", info="whisper model selection", interactive=False)
+                with gr.Column(scale=4):
+                    just_transcribe_button = gr.Button("Just Transcribe", variant="primary")
+                    transcribe_and_bleep_button = gr.Button("Transcribe & Bleep", variant="primary")
+
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                with gr.Row():
+                    transcript_output = gr.Textbox(label="Video Transcript", placeholder="", max_lines=5, show_copy_button=True)
+
+                with gr.Row():
+                    og_video = gr.Video(
+                        visible=False,
+                        show_download_button=True,
+                        show_label=True,
+                        label="original video",
+                        format="mp4",
+                        width="50vw",
+                        height="50vw",
+                    )
+
+                    bleep_video = gr.Video(
+                        visible=False,
+                        show_download_button=True,
+                        show_label=True,
+                        label="bleeped video",
+                        format="mp4",
+                        width="50vw",
+                        height="50vw",
+                    )
+
+                @just_transcribe_button.click(inputs=[url_input, model_selection], outputs=[og_video, bleep_video, transcript_output])
+                def just_transcribe(url_input, model_selection):
+                    temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
+                    temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
+
+                    download_video(url_input, temporary_video_location)
+                    filename = open(temporary_video_location, "rb")
+                    byte_file = io.BytesIO(filename.read())
+                    with open(temporary_video_location, "wb") as out:
+                        out.write(byte_file.read())
+
+                    new_og_video = gr.Video(
+                        value=temporary_video_location,
+                        visible=True,
+                        show_download_button=True,
+                        show_label=True,
+                        label="original video",
+                        format="mp4",
+                        width="50vw",
+                        height="50vw",
+                    )
+
+                    new_bleep_video = gr.Video(
+                        visible=False,
+                        show_download_button=True,
+                        show_label=True,
+                        label="bleeped video",
+                        format="mp4",
+                        width="50vw",
+                        height="50vw",
+                    )
+
+
+                    extract_audio(temporary_video_location, temporary_audio_location)
+                    transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
+
+                    return new_og_video, new_bleep_video, transcript
+
+
+                @transcribe_and_bleep_button.click(inputs=[url_input, model_selection, bleep_words], outputs=[og_video, bleep_video, transcript_output])
+                def transcribe_and_bleep(url_input, model_selection, bleep_words):
+                    if len(bleep_words) > 0:    
+                        temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
+                        temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
+
+                        download_video(url_input, temporary_video_location)
+                        filename = open(temporary_video_location, "rb")
+                        byte_file = io.BytesIO(filename.read())
+                        with open(temporary_video_location, "wb") as out:
+                            out.write(byte_file.read())
+
+                        new_og_video = gr.Video(
+                            value=temporary_video_location,
+                            visible=True,
+                            show_download_button=True,
+                            show_label=True,
+                            label="original video",
+                            format="mp4",
+                            width="50vw",
+                            height="50vw",
+                        )
+
+                        extract_audio(temporary_video_location, temporary_audio_location)
+                        transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
+
+                        bleep_word_list = bleep_words.split(",")
+                        bleep_word_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
+                        bleep_video_output = temporary_video_location.replace("original", "bleep")
+                        bleep_audio_output = bleep_video_output.replace("mp4", "mp3")
+
+                        bleep_replace(
+                            temporary_video_location,
+                            temporary_audio_location,
+                            bleep_video_output,
+                            bleep_audio_output,
+                            bleep_word_list,
+                            timestamped_transcript,
+                        )
+
+                        new_bleep_video = gr.Video(
+                            value=bleep_video_output,
+                            visible=True,
+                            show_download_button=True,
+                            show_label=True,
+                            label="bleeped video",
+                            format="mp4",
+                            width="50vw",
+                            height="50vw",
+                        )
+
+                        return new_og_video, new_bleep_video, transcript
+                    else:
+                        gr.Warning("bleep words empty!", duration=3)
+                        return None, None, None
+
+        with gr.TabItem("💡 About"):
+            with gr.Blocks() as about:
+                gr.Markdown(
+                    (
+                        "### Bleep out words of your choice from an input video.  \n"
+                        "How it works: \n\n"
+                        "1.  Provided a youtube / shorts url \n"
+                        "2.  Choose your your desired bleep keywords \n"
+                        "3.  (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
+                        "4.  (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
+                        "5.  Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
+                        "If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
+                        "Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
+                        "You do *not* need a GPU to run this locally.  Larger models take more time to process locally, but its doable. \n"
+                    )
+                )
+
+if __name__ == "__main__":
+    print("Launching Gradio interface...")
+    demo.launch()
diff --git a/bleep_that_sht/app_url_download.py → bleep_that_sht/streamlit_app_url_download.py b/bleep_that_sht/app_url_download.py → bleep_that_sht/streamlit_app_url_download.py
diff --git a/bleep_that_sht/app_video_upload.py → bleep_that_sht/streamlit_app_video_upload.py b/bleep_that_sht/app_video_upload.py → bleep_that_sht/streamlit_app_video_upload.py
diff --git a/bleep_that_sht/yt_download.py b/bleep_that_sht/yt_download.py
@@ -1,4 +1,6 @@
-from pytube import YouTube
+# from pytube import YouTube
+
+import yt_dlp
 import re
 
 
@@ -15,21 +17,14 @@ def download_video(url: str, savepath: str, my_proxies: dict = {}) -> None:
     try:
         print("Downloading video from youtube...")
         if is_valid_youtube_url(url):
-            yt = YouTube(url, proxies=my_proxies)
-            audio_video_streams = (
-                yt.streams.filter(
-                    file_extension="mp4",
-                    only_audio=False,
-                    only_video=False,
-                    progressive=True,
-                    type="video",
-                )
-                .order_by("resolution")
-                .asc()
-            )
-            audio_video_itags = [v.itag for v in audio_video_streams]
-            first_choice_itag = audio_video_itags[0]
-            yt.streams.get_by_itag(first_choice_itag).download(filename=savepath)
+            ydl_opts = {
+                'format': 'bestvideo[height<=720]+bestaudio/best', 
+                'merge_output_format': 'mp4',
+                'outtmpl': savepath,
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([url])
+
             print("...done!")
         else:
             raise ValueError(f"invalid input url: {url}")

diff --git a/requirements.gradio b/requirements.gradio
@@ -0,0 +1,4 @@
+whisper-timestamped
+moviepy
+yt-dlp
+gradio
diff --git a/requirements.streamlit b/requirements.streamlit
@@ -0,0 +1,4 @@
+whisper-timestamped
+moviepy
+yt-dlp
+streamlit
diff --git a/requirements.txt b/requirements.txt