Skip to content

Commit

Permalink
Merge pull request #1 from neonwatty/gradio_revision
Browse files Browse the repository at this point in the history
Gradio revision
  • Loading branch information
neonwatty authored Jul 17, 2024
2 parents 7a30682 + a1b2720 commit 110b5ee
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 29 deletions.
32 changes: 24 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<a href="https://huggingface.co/spaces/neonwatty/bleep_that_sht" target="_parent"><img src="https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg" alt="HuggingFace Space"/></a>
<a href="https://colab.research.google.com/github/jermwatt/bleep_that_sht/blob/main/beep_that_sht_walkthrough.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a> <a href="https://www.youtube.com/watch?v=U8Ki9dD3HF0" target="_parent"><img src="https://badges.aleen42.com/src/youtube.svg" alt="Youtube"/></a> [![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml)
<a href="https://colab.research.google.com/github/jermwatt/bleep_that_sht/blob/main/beep_that_sht_walkthrough.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a> <a href="https://www.youtube.com/watch?v=U8Ki9dD3HF0" target="_parent"><img src="https://badges.aleen42.com/src/youtube.svg" alt="Youtube"/></a>
[![Python application](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/badge.svg)](https://github.com/neonwatty/bleep_that_sht/actions/workflows/python-app.yml/python-app.yml) <a href="https://www.producthunt.com/posts/bleep-that-sh-t?embed=true&utm_source=badge-featured&utm_medium=badge&utm_souce=badge-bleep&#0045;that&#0045;sh&#0045;t" target="_parent"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=470378&theme=light" alt="Bleep&#0032;That&#0032;Sh&#0042;t&#0033; - A&#0032;whisper&#0032;app&#0032;that&#0032;bleeps&#0032;out&#0032;chosen&#0032;words&#0032;in&#0032;YouTube&#0032;videos | Product Hunt" style="width: 250px; height: 45px;" /></a>


# bleep that sh*t
Expand Down Expand Up @@ -30,19 +31,18 @@ https://github.com/neonwatty/bleep_that_sht/assets/16326421/63ebd7a0-46f6-4efd-8
To get setup to run the notebook / bleep your own videos / run the strealit demo first install the requirements for this project by pasting the below in your terminal.

```python
pip install -r requirements.txt
pip install -r requirements.streamlit
```

You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
To install requirements for the gradio demo use this install

```python
pip install -r requirements.gradio
```

## Instructions for bleeping your own **local** videos
You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.

Start this streamlit demo locally that lets you drag and drop local video files to bleep

```python
python -m streamlit run bleep_that_sht/app_video_upload.py
```

## Instructions for bleeping **youtube** videos via youtube / shorts url

Expand All @@ -52,4 +52,20 @@ Start this streamlit demo locally that lets you enter in a youtube / shorts url
python -m streamlit run bleep_that_sht/app_url_download.py
```

Alternatively you can start a gradio server with the same functionality

```python
python -m bleep_that_sht/gradio_app_url_download.py
```

This is the version hosted in the HF space [![HuggingFace Space](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/neonwatty/bleep_that_sht).



## Instructions for bleeping your own **local** videos

Start this streamlit demo locally that lets you drag and drop local video files to bleep

```python
python -m streamlit run bleep_that_sht/app_video_upload.py
```
1 change: 1 addition & 0 deletions bleep_that_sht/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def bleep_replace(
bleep_words: list,
timestamped_transcript: dict,
) -> None:

# # extract and save audio from original video
# extract_audio(local_file_path=og_video_path, audio_filepath=og_audio_path)

Expand Down
187 changes: 187 additions & 0 deletions bleep_that_sht/gradio_app_url_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from bleep_that_sht import main_dir
from bleep_that_sht.transcribe import avaliable_models
from bleep_that_sht.transcribe import transcribe
from bleep_that_sht.audio_extractor import extract_audio
from bleep_that_sht.create import bleep_replace
from bleep_that_sht.yt_download import download_video
import tempfile
import uuid
import os
import io
import gradio as gr


HF_TOKEN = None

try:
HF_TOKEN = os.environ.get("HF_TOKEN")
except:
pass


print("Setting up Gradio interface...")
with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Bleep That Sh*t 🙊") as demo:
with gr.Tabs():
with gr.TabItem("🎬 Bleep That Sh*t 🙊"):
with gr.Row():
with gr.Column(scale=4):
url_input = gr.Textbox(
value="https://www.youtube.com/shorts/43BhDHYBG0o",
label="🔗 Paste YouTube / Shorts URL here",
placeholder="e.g., https://www.youtube.com/watch?v=.",
max_lines=1,
)

with gr.Row():
with gr.Column(scale=8):
bleep_words = gr.Textbox(
placeholder="bleep keywords go here separated by commas",
label="bleep-word list",
value="treetz, ice, cream, chocolate, syrup, cookie, hooked, threats, treats, trees",
)
with gr.Column(scale=3):
model_selection = gr.Dropdown(choices=avaliable_models, value="base", label="whisper model (base only in HF space)", info="whisper model selection", interactive=False)
with gr.Column(scale=4):
just_transcribe_button = gr.Button("Just Transcribe", variant="primary")
transcribe_and_bleep_button = gr.Button("Transcribe & Bleep", variant="primary")

with tempfile.TemporaryDirectory() as tmpdirname:
with gr.Row():
transcript_output = gr.Textbox(label="Video Transcript", placeholder="", max_lines=5, show_copy_button=True)

with gr.Row():
og_video = gr.Video(
visible=False,
show_download_button=True,
show_label=True,
label="original video",
format="mp4",
width="50vw",
height="50vw",
)

bleep_video = gr.Video(
visible=False,
show_download_button=True,
show_label=True,
label="bleeped video",
format="mp4",
width="50vw",
height="50vw",
)

@just_transcribe_button.click(inputs=[url_input, model_selection], outputs=[og_video, bleep_video, transcript_output])
def just_transcribe(url_input, model_selection):
temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
temporary_audio_location = temporary_video_location.replace("mp4", "mp3")

download_video(url_input, temporary_video_location)
filename = open(temporary_video_location, "rb")
byte_file = io.BytesIO(filename.read())
with open(temporary_video_location, "wb") as out:
out.write(byte_file.read())

new_og_video = gr.Video(
value=temporary_video_location,
visible=True,
show_download_button=True,
show_label=True,
label="original video",
format="mp4",
width="50vw",
height="50vw",
)

new_bleep_video = gr.Video(
visible=False,
show_download_button=True,
show_label=True,
label="bleeped video",
format="mp4",
width="50vw",
height="50vw",
)


extract_audio(temporary_video_location, temporary_audio_location)
transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)

return new_og_video, new_bleep_video, transcript


@transcribe_and_bleep_button.click(inputs=[url_input, model_selection, bleep_words], outputs=[og_video, bleep_video, transcript_output])
def transcribe_and_bleep(url_input, model_selection, bleep_words):
if len(bleep_words) > 0:
temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
temporary_audio_location = temporary_video_location.replace("mp4", "mp3")

download_video(url_input, temporary_video_location)
filename = open(temporary_video_location, "rb")
byte_file = io.BytesIO(filename.read())
with open(temporary_video_location, "wb") as out:
out.write(byte_file.read())

new_og_video = gr.Video(
value=temporary_video_location,
visible=True,
show_download_button=True,
show_label=True,
label="original video",
format="mp4",
width="50vw",
height="50vw",
)

extract_audio(temporary_video_location, temporary_audio_location)
transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)

bleep_word_list = bleep_words.split(",")
bleep_word_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
bleep_video_output = temporary_video_location.replace("original", "bleep")
bleep_audio_output = bleep_video_output.replace("mp4", "mp3")

bleep_replace(
temporary_video_location,
temporary_audio_location,
bleep_video_output,
bleep_audio_output,
bleep_word_list,
timestamped_transcript,
)

new_bleep_video = gr.Video(
value=bleep_video_output,
visible=True,
show_download_button=True,
show_label=True,
label="bleeped video",
format="mp4",
width="50vw",
height="50vw",
)

return new_og_video, new_bleep_video, transcript
else:
gr.Warning("bleep words empty!", duration=3)
return None, None, None

with gr.TabItem("💡 About"):
with gr.Blocks() as about:
gr.Markdown(
(
"### Bleep out words of your choice from an input video. \n"
"How it works: \n\n"
"1. Provided a youtube / shorts url \n"
"2. Choose your your desired bleep keywords \n"
"3. (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
"4. (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
"5. Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
"If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
"Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
"You do *not* need a GPU to run this locally. Larger models take more time to process locally, but its doable. \n"
)
)

if __name__ == "__main__":
print("Launching Gradio interface...")
demo.launch()
File renamed without changes.
File renamed without changes.
27 changes: 11 additions & 16 deletions bleep_that_sht/yt_download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from pytube import YouTube
# from pytube import YouTube

import yt_dlp
import re


Expand All @@ -15,21 +17,14 @@ def download_video(url: str, savepath: str, my_proxies: dict = {}) -> None:
try:
print("Downloading video from youtube...")
if is_valid_youtube_url(url):
yt = YouTube(url, proxies=my_proxies)
audio_video_streams = (
yt.streams.filter(
file_extension="mp4",
only_audio=False,
only_video=False,
progressive=True,
type="video",
)
.order_by("resolution")
.asc()
)
audio_video_itags = [v.itag for v in audio_video_streams]
first_choice_itag = audio_video_itags[0]
yt.streams.get_by_itag(first_choice_itag).download(filename=savepath)
ydl_opts = {
'format': 'bestvideo[height<=720]+bestaudio/best',
'merge_output_format': 'mp4',
'outtmpl': savepath,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])

print("...done!")
else:
raise ValueError(f"invalid input url: {url}")
Expand Down
4 changes: 4 additions & 0 deletions requirements.gradio
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
whisper-timestamped
moviepy
yt-dlp
gradio
4 changes: 4 additions & 0 deletions requirements.streamlit
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
whisper-timestamped
moviepy
yt-dlp
streamlit
5 changes: 0 additions & 5 deletions requirements.txt

This file was deleted.

0 comments on commit 110b5ee

Please sign in to comment.