diff --git a/README.md b/README.md
index 5bdd9ea..eecb77f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
- [![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml)
+
+[![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml)
# bleep that sh*t
@@ -30,19 +31,18 @@ https://github.com/neonwatty/bleep_that_sht/assets/16326421/63ebd7a0-46f6-4efd-8
To get setup to run the notebook / bleep your own videos / run the strealit demo first install the requirements for this project by pasting the below in your terminal.
```python
-pip install -r requirements.txt
+pip install -r requirements.streamlit
```
-You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
+To install requirements for the gradio demo use this install
+```python
+pip install -r requirements.gradio
+```
-## Instructions for bleeping your own **local** videos
+You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
-Start this streamlit demo locally that lets you drag and drop local video files to bleep
-```python
-python -m streamlit run bleep_that_sht/app_video_upload.py
-```
## Instructions for bleeping **youtube** videos via youtube / shorts url
@@ -52,4 +52,20 @@ Start this streamlit demo locally that lets you enter in a youtube / shorts url
python -m streamlit run bleep_that_sht/app_url_download.py
```
+Alternatively you can start a gradio server with the same functionality
+
+```python
+python -m bleep_that_sht/gradio_app_url_download.py
+```
+
This is the version hosted in the HF space [![HuggingFace Space](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/neonwatty/bleep_that_sht).
+
+
+
+## Instructions for bleeping your own **local** videos
+
+Start this streamlit demo locally that lets you drag and drop local video files to bleep
+
+```python
+python -m streamlit run bleep_that_sht/app_video_upload.py
+```
diff --git a/bleep_that_sht/create.py b/bleep_that_sht/create.py
index e8605b8..c8c3514 100644
--- a/bleep_that_sht/create.py
+++ b/bleep_that_sht/create.py
@@ -33,6 +33,7 @@ def bleep_replace(
bleep_words: list,
timestamped_transcript: dict,
) -> None:
+
# # extract and save audio from original video
# extract_audio(local_file_path=og_video_path, audio_filepath=og_audio_path)
diff --git a/bleep_that_sht/gradio_app_url_download.py b/bleep_that_sht/gradio_app_url_download.py
new file mode 100644
index 0000000..6f3c463
--- /dev/null
+++ b/bleep_that_sht/gradio_app_url_download.py
@@ -0,0 +1,187 @@
+from bleep_that_sht import main_dir
+from bleep_that_sht.transcribe import avaliable_models
+from bleep_that_sht.transcribe import transcribe
+from bleep_that_sht.audio_extractor import extract_audio
+from bleep_that_sht.create import bleep_replace
+from bleep_that_sht.yt_download import download_video
+import tempfile
+import uuid
+import os
+import io
+import gradio as gr
+
+
+HF_TOKEN = None
+
+try:
+ HF_TOKEN = os.environ.get("HF_TOKEN")
+except:
+ pass
+
+
+print("Setting up Gradio interface...")
+with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Bleep That Sh*t 🙊") as demo:
+ with gr.Tabs():
+ with gr.TabItem("🎬 Bleep That Sh*t 🙊"):
+ with gr.Row():
+ with gr.Column(scale=4):
+ url_input = gr.Textbox(
+ value="https://www.youtube.com/shorts/43BhDHYBG0o",
+ label="🔗 Paste YouTube / Shorts URL here",
+ placeholder="e.g., https://www.youtube.com/watch?v=.",
+ max_lines=1,
+ )
+
+ with gr.Row():
+ with gr.Column(scale=8):
+ bleep_words = gr.Textbox(
+ placeholder="bleep keywords go here separated by commas",
+ label="bleep-word list",
+ value="treetz, ice, cream, chocolate, syrup, cookie, hooked, threats, treats, trees",
+ )
+ with gr.Column(scale=3):
+ model_selection = gr.Dropdown(choices=avaliable_models, value="base", label="whisper model (base only in HF space)", info="whisper model selection", interactive=False)
+ with gr.Column(scale=4):
+ just_transcribe_button = gr.Button("Just Transcribe", variant="primary")
+ transcribe_and_bleep_button = gr.Button("Transcribe & Bleep", variant="primary")
+
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ with gr.Row():
+ transcript_output = gr.Textbox(label="Video Transcript", placeholder="", max_lines=5, show_copy_button=True)
+
+ with gr.Row():
+ og_video = gr.Video(
+ visible=False,
+ show_download_button=True,
+ show_label=True,
+ label="original video",
+ format="mp4",
+ width="50vw",
+ height="50vw",
+ )
+
+ bleep_video = gr.Video(
+ visible=False,
+ show_download_button=True,
+ show_label=True,
+ label="bleeped video",
+ format="mp4",
+ width="50vw",
+ height="50vw",
+ )
+
+ @just_transcribe_button.click(inputs=[url_input, model_selection], outputs=[og_video, bleep_video, transcript_output])
+ def just_transcribe(url_input, model_selection):
+ temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
+ temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
+
+ download_video(url_input, temporary_video_location)
+ filename = open(temporary_video_location, "rb")
+ byte_file = io.BytesIO(filename.read())
+ with open(temporary_video_location, "wb") as out:
+ out.write(byte_file.read())
+
+ new_og_video = gr.Video(
+ value=temporary_video_location,
+ visible=True,
+ show_download_button=True,
+ show_label=True,
+ label="original video",
+ format="mp4",
+ width="50vw",
+ height="50vw",
+ )
+
+ new_bleep_video = gr.Video(
+ visible=False,
+ show_download_button=True,
+ show_label=True,
+ label="bleeped video",
+ format="mp4",
+ width="50vw",
+ height="50vw",
+ )
+
+
+ extract_audio(temporary_video_location, temporary_audio_location)
+ transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
+
+ return new_og_video, new_bleep_video, transcript
+
+
+ @transcribe_and_bleep_button.click(inputs=[url_input, model_selection, bleep_words], outputs=[og_video, bleep_video, transcript_output])
+ def transcribe_and_bleep(url_input, model_selection, bleep_words):
+ if len(bleep_words) > 0:
+ temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
+ temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
+
+ download_video(url_input, temporary_video_location)
+ filename = open(temporary_video_location, "rb")
+ byte_file = io.BytesIO(filename.read())
+ with open(temporary_video_location, "wb") as out:
+ out.write(byte_file.read())
+
+ new_og_video = gr.Video(
+ value=temporary_video_location,
+ visible=True,
+ show_download_button=True,
+ show_label=True,
+ label="original video",
+ format="mp4",
+ width="50vw",
+ height="50vw",
+ )
+
+ extract_audio(temporary_video_location, temporary_audio_location)
+ transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
+
+ bleep_word_list = bleep_words.split(",")
+ bleep_word_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
+ bleep_video_output = temporary_video_location.replace("original", "bleep")
+ bleep_audio_output = bleep_video_output.replace("mp4", "mp3")
+
+ bleep_replace(
+ temporary_video_location,
+ temporary_audio_location,
+ bleep_video_output,
+ bleep_audio_output,
+ bleep_word_list,
+ timestamped_transcript,
+ )
+
+ new_bleep_video = gr.Video(
+ value=bleep_video_output,
+ visible=True,
+ show_download_button=True,
+ show_label=True,
+ label="bleeped video",
+ format="mp4",
+ width="50vw",
+ height="50vw",
+ )
+
+ return new_og_video, new_bleep_video, transcript
+ else:
+ gr.Warning("bleep words empty!", duration=3)
+ return None, None, None
+
+ with gr.TabItem("💡 About"):
+ with gr.Blocks() as about:
+ gr.Markdown(
+ (
+ "### Bleep out words of your choice from an input video. \n"
+ "How it works: \n\n"
+ "1. Provided a youtube / shorts url \n"
+ "2. Choose your your desired bleep keywords \n"
+ "3. (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
+ "4. (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
+ "5. Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
+ "If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
+ "Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
+ "You do *not* need a GPU to run this locally. Larger models take more time to process locally, but its doable. \n"
+ )
+ )
+
+if __name__ == "__main__":
+ print("Launching Gradio interface...")
+ demo.launch()
diff --git a/bleep_that_sht/app_url_download.py b/bleep_that_sht/streamlit_app_url_download.py
similarity index 100%
rename from bleep_that_sht/app_url_download.py
rename to bleep_that_sht/streamlit_app_url_download.py
diff --git a/bleep_that_sht/app_video_upload.py b/bleep_that_sht/streamlit_app_video_upload.py
similarity index 100%
rename from bleep_that_sht/app_video_upload.py
rename to bleep_that_sht/streamlit_app_video_upload.py
diff --git a/bleep_that_sht/yt_download.py b/bleep_that_sht/yt_download.py
index a31ab0c..f51e670 100644
--- a/bleep_that_sht/yt_download.py
+++ b/bleep_that_sht/yt_download.py
@@ -1,4 +1,6 @@
-from pytube import YouTube
+# from pytube import YouTube
+
+import yt_dlp
import re
@@ -15,21 +17,14 @@ def download_video(url: str, savepath: str, my_proxies: dict = {}) -> None:
try:
print("Downloading video from youtube...")
if is_valid_youtube_url(url):
- yt = YouTube(url, proxies=my_proxies)
- audio_video_streams = (
- yt.streams.filter(
- file_extension="mp4",
- only_audio=False,
- only_video=False,
- progressive=True,
- type="video",
- )
- .order_by("resolution")
- .asc()
- )
- audio_video_itags = [v.itag for v in audio_video_streams]
- first_choice_itag = audio_video_itags[0]
- yt.streams.get_by_itag(first_choice_itag).download(filename=savepath)
+ ydl_opts = {
+ 'format': 'bestvideo[height<=720]+bestaudio/best',
+ 'merge_output_format': 'mp4',
+ 'outtmpl': savepath,
+ }
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ ydl.download([url])
+
print("...done!")
else:
raise ValueError(f"invalid input url: {url}")
diff --git a/requirements.gradio b/requirements.gradio
new file mode 100644
index 0000000..e04eb08
--- /dev/null
+++ b/requirements.gradio
@@ -0,0 +1,4 @@
+whisper-timestamped
+moviepy
+yt-dlp
+gradio
diff --git a/requirements.streamlit b/requirements.streamlit
new file mode 100644
index 0000000..79cb2e8
--- /dev/null
+++ b/requirements.streamlit
@@ -0,0 +1,4 @@
+whisper-timestamped
+moviepy
+yt-dlp
+streamlit
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index f3abd7b..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-whisper-timestamped
-moviepy
-pydub
-git+https://github.com/neonwatty/pytube.git
-streamlit