Skip to content

Commit

Permalink
test requirements added
Browse files Browse the repository at this point in the history
  • Loading branch information
neonwatty committed Jul 16, 2024
1 parent 376166d commit aed7eb8
Show file tree
Hide file tree
Showing 12 changed files with 210 additions and 133 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application

on:
push:
branches: [ "main"]
paths-ignore:
- '**/README.md'
- '**/CONTRIBUTING.md'
pull_request:
branches: [ "main" ]
paths-ignore:
- '**/README.md'
- '**/CONTRIBUTING.md'

jobs:
ruff:
name: lint with ruff
runs-on: ubuntu-latest
timeout-minutes: 3
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v2
- uses: chartboost/ruff-action@v1
with:
args: 'format --check'
config: .ruff.toml
test:
name: run pytest
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.test
pip install -r requirements.txt
- name: Run pytest
run: |
PYTHONPATH=. python3.10 -m pytest tests/test_streamlit.py
39 changes: 39 additions & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
line-length = 150
target-version = "py38"
lint.select = ["E", "W"]
lint.fixable = ["ALL"]
lint.ignore = ["E501", "E999", "E402"]
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".ipynb_checkpoints",
".mypy_cache",
".nox",
".pants.d",
".pyenv",
".pytest_cache",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
".vscode",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"site-packages",
"venv",
"tests",
"scratch_notebooks",
"release_notes",
"notebook_tests",
"demos"
]
extend-include = ["*.ipynb"]
78 changes: 35 additions & 43 deletions beep_that_sht_walkthrough.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"outputs": [],
"source": [
"import os\n",
"from IPython.display import HTML \n",
"from IPython.display import HTML\n",
"from base64 import b64encode\n",
"\n",
"# if running in collab pull repo and install requirements\n",
Expand All @@ -23,18 +23,20 @@
" %cd bleep_that_sht\n",
" !pip install -r requirements.txt\n",
"\n",
"\n",
"# make sure video can be played on ubuntu\n",
"def display_video(path): \n",
" mp4 = open(path,'rb').read() \n",
"def display_video(path):\n",
" mp4 = open(path, \"rb\").read()\n",
" data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
" display(\n",
" HTML(\n",
" \"\"\"\n",
" HTML(\n",
" \"\"\"\n",
" <video width=200 controls>\n",
" <source src=\"%s\" type=\"video/mp4\">\n",
" </video>\n",
" \"\"\" % data_url\n",
" ) \n",
" \"\"\"\n",
" % data_url\n",
" )\n",
" )"
]
},
Expand Down Expand Up @@ -77,6 +79,7 @@
"source": [
"# play the *bleep* sound\n",
"from IPython.display import Audio, display\n",
"\n",
"display(Audio(\"bleep_that_sht/bleep.mp3\", autoplay=True))"
]
},
Expand Down Expand Up @@ -152,6 +155,7 @@
"source": [
"from moviepy.editor import VideoFileClip\n",
"\n",
"\n",
"def extract_audio(*, local_file_path: str, audio_filepath: str) -> None:\n",
" try:\n",
" video = VideoFileClip(local_file_path)\n",
Expand Down Expand Up @@ -370,11 +374,11 @@
"source": [
"# simple word cleaner - remove punctuation etc.,\n",
"def word_cleaner(word: str) -> str:\n",
" return ''.join(e for e in word if e.isalnum()).lower().strip()\n",
" return \"\".join(e for e in word if e.isalnum()).lower().strip()\n",
"\n",
"\n",
"# collect all timestamped instances of bleep_word in transcript\n",
"def query_transcript(bleep_words: list,\n",
" timestamped_transcript: list) -> list:\n",
"def query_transcript(bleep_words: list, timestamped_transcript: list) -> list:\n",
" transcript_words = sum([timestamped_transcript[i][\"words\"] for i in range(len(timestamped_transcript))], [])\n",
" detected_bleep_words = []\n",
" for bleep_word in bleep_words:\n",
Expand All @@ -400,14 +404,14 @@
"metadata": {},
"outputs": [],
"source": [
"from pydub import AudioSegment \n",
"from pydub import AudioSegment\n",
"\n",
"bleep_sound = AudioSegment.from_mp3(\"bleep_that_sht/bleep.mp3\")\n",
"bleep_first_sec = bleep_sound[1 * 1000: 2 * 1000] \n",
"bleep_first_sec = bleep_sound[1 * 1000 : 2 * 1000]\n",
"\n",
"\n",
"def splice_audio_with_bleeps(og_audio_path: str,\n",
" bleep_words: list) -> list:\n",
" # input original audio file for splicing \n",
"def splice_audio_with_bleeps(og_audio_path: str, bleep_words: list) -> list:\n",
" # input original audio file for splicing\n",
" test_sound = AudioSegment.from_mp3(og_audio_path)\n",
"\n",
" # find bleep_words in timestamped transcript\n",
Expand All @@ -421,22 +425,22 @@
" prev_end_time = 1\n",
" for instance in bleep_word_instances:\n",
" # unpack bleep_word start / end times - converted to microseconds\n",
" start_time = int(instance[\"start\"]*1000) - 50\n",
" end_time = int(instance[\"end\"]*1000) + 50\n",
" \n",
" start_time = int(instance[\"start\"] * 1000) - 50\n",
" end_time = int(instance[\"end\"] * 1000) + 50\n",
"\n",
" # collect clip of test starting at previous end time, and leading to start_time of next bleep\n",
" audio_clip = test_sound[prev_end_time:start_time]\n",
" \n",
"\n",
" # create bleep clip for this instance\n",
" bleep_clip = bleep_first_sec[:(end_time - start_time)]\n",
" \n",
" bleep_clip = bleep_first_sec[: (end_time - start_time)]\n",
"\n",
" # store test and bleep clips\n",
" contiguous_audio_clips.append(audio_clip)\n",
" contiguous_audio_clips.append(bleep_clip)\n",
"\n",
" # update prev_end_time\n",
" prev_end_time = end_time\n",
" \n",
"\n",
" # create final clip from test\n",
" audio_clip = test_sound[prev_end_time:]\n",
" contiguous_audio_clips.append(audio_clip)\n",
Expand Down Expand Up @@ -470,31 +474,23 @@
"from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip\n",
"\n",
"\n",
"def bleep_that_sht(og_video_path: str,\n",
" og_audio_path: str,\n",
" final_video_path: str,\n",
" final_audio_path: str,\n",
" bleep_words: list) -> None:\n",
" # input og audio file for splicing \n",
"def bleep_that_sht(og_video_path: str, og_audio_path: str, final_video_path: str, final_audio_path: str, bleep_words: list) -> None:\n",
" # input og audio file for splicing\n",
" test_sound = AudioSegment.from_mp3(og_audio_path)\n",
"\n",
" # create list of new audio clips replacing all bleep words\n",
" contiguous_audio_clips = splice_audio_with_bleeps(og_audio_path, bleep_words)\n",
" \n",
"\n",
" # merge and save bleeped audio\n",
" bleeped_test_clip = sum(contiguous_audio_clips)\n",
" bleeped_test_clip.export(final_audio_path, format=\"mp3\") \n",
" \n",
" bleeped_test_clip.export(final_audio_path, format=\"mp3\")\n",
"\n",
" # load in og video, overlay with bleeped audio\n",
" og_video = VideoFileClip(og_video_path)\n",
" bleep_audio = AudioFileClip(final_audio_path)\n",
" new_audioclip = CompositeAudioClip([bleep_audio])\n",
" og_video.audio = new_audioclip\n",
" og_video.write_videofile(final_video_path,\n",
" codec='libx264', \n",
" audio_codec='aac', \n",
" temp_audiofile='temp-audio.m4a', \n",
" remove_temp=True)"
" og_video.write_videofile(final_video_path, codec=\"libx264\", audio_codec=\"aac\", temp_audiofile=\"temp-audio.m4a\", remove_temp=True)"
]
},
{
Expand Down Expand Up @@ -560,13 +556,9 @@
"# define path to saved bleep audio and video\n",
"final_video_path = \"data/output/bleep_test_1.mp4\"\n",
"final_audio_path = \"data/output/bleep_test_1.mp3\"\n",
" \n",
"# create bleeped audio and video \n",
"bleep_that_sht(og_video_path, \n",
" og_audio_path, \n",
" final_video_path, \n",
" final_audio_path, \n",
" bleep_words)"
"\n",
"# create bleeped audio and video\n",
"bleep_that_sht(og_video_path, og_audio_path, final_video_path, final_audio_path, bleep_words)"
]
},
{
Expand Down
47 changes: 14 additions & 33 deletions bleep_that_sht/app_url_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import tempfile
import uuid
import io
import time

st.set_page_config(page_title="🎬 Bleep That Sh*t 🙊")
st.title("🎬 Bleep That Sh*t 🙊")
Expand All @@ -16,26 +15,16 @@

with tab2:
st.markdown(
"""
### Bleep out words of your choice from an input video.
How it works:
1. Provided a youtube / shorts url
2. Choose your your desired bleep keywords
3. (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space)
4. (optional) Press "Just Transcribe" to examine / download just the transcription of the video (can help in choosing bleep words)
5. Press "Transcribe and bleep" to transcribe and replace all instances of your keywords with *beep* sounds
If you want to select your Whisper model / run longer videos pull and run the app locally.
Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription.
You do *not* need a GPU to run this locally. Larger models take more time to process locally, but its doable.
"""
"### Bleep out words of your choice from an input video. \n"
"How it works: \n\n"
"1. Provided a youtube / shorts url \n"
"2. Choose your your desired bleep keywords \n"
"3. (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
"4. (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
"5. Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
"If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
"Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
"You do *not* need a GPU to run this locally. Larger models take more time to process locally, but its doable. \n"
)

with tab1:
Expand Down Expand Up @@ -97,9 +86,7 @@ def button_logic(
out.close()

extract_audio(temporary_video_location, temporary_audio_location)
transcript, timestamped_transcript = transcribe(
local_file_path=temporary_audio_location, model=model_selection
)
transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)

with col0.container(border=True):
st.text_area(
Expand All @@ -121,9 +108,7 @@ def button_logic(
out.close()

extract_audio(temporary_video_location, temporary_audio_location)
transcript, timestamped_transcript = transcribe(
local_file_path=temporary_audio_location, model=model_selection
)
transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)

with col0.container(border=True):
st.text_area(
Expand All @@ -146,11 +131,7 @@ def button_logic(
st.video(bleep_video_output)

with tempfile.TemporaryDirectory() as tmpdirname:
temporary_video_location = (
tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
)
temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
bleep_word_list = bleep_words.split(",")
bleep_words_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
button_logic(
temporary_video_location, model_selection, bleep_words_list, upload_url
)
button_logic(temporary_video_location, model_selection, bleep_words_list, upload_url)
Loading

0 comments on commit aed7eb8

Please sign in to comment.