test requirements added

neonwatty · Jul 16, 2024 · aed7eb8 · aed7eb8
1 parent 376166d
commit aed7eb8
Show file tree

Hide file tree

Showing 12 changed files with 210 additions and 133 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -0,0 +1,48 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python application
+
+on:
+  push:
+    branches: [ "main"]
+    paths-ignore:
+      - '**/README.md'
+      - '**/CONTRIBUTING.md'
+  pull_request:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**/README.md'
+      - '**/CONTRIBUTING.md'
+
+jobs:
+  ruff:
+    name: lint with ruff
+    runs-on: ubuntu-latest
+    timeout-minutes: 3
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v2
+      - uses: chartboost/ruff-action@v1
+        with:
+          args: 'format --check'
+          config: .ruff.toml
+  test:
+    name: run pytest
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.10'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.test
+        pip install -r requirements.txt
+    - name: Run pytest
+      run: |
+        PYTHONPATH=. python3.10 -m pytest tests/test_streamlit.py
diff --git a/.ruff.toml b/.ruff.toml
@@ -0,0 +1,39 @@
+line-length = 150
+target-version = "py38"
+lint.select = ["E", "W"]
+lint.fixable = ["ALL"]
+lint.ignore = ["E501", "E999", "E402"]
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".ipynb_checkpoints",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pyenv",
+    ".pytest_cache",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+    "venv",
+    "tests",
+    "scratch_notebooks",
+    "release_notes",
+    "notebook_tests",
+    "demos"
+]
+extend-include = ["*.ipynb"]
diff --git a/beep_that_sht_walkthrough.ipynb b/beep_that_sht_walkthrough.ipynb
@@ -14,7 +14,7 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "from IPython.display import HTML \n",
+    "from IPython.display import HTML\n",
     "from base64 import b64encode\n",
     "\n",
     "# if running in collab pull repo and install requirements\n",
@@ -23,18 +23,20 @@
     "    %cd bleep_that_sht\n",
     "    !pip install -r requirements.txt\n",
     "\n",
+    "\n",
     "# make sure video can be played on ubuntu\n",
-    "def display_video(path):  \n",
-    "    mp4 = open(path,'rb').read()   \n",
+    "def display_video(path):\n",
+    "    mp4 = open(path, \"rb\").read()\n",
     "    data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
     "    display(\n",
-    "      HTML(\n",
-    "      \"\"\"\n",
+    "        HTML(\n",
+    "            \"\"\"\n",
     "          <video width=200 controls>\n",
     "                <source src=\"%s\" type=\"video/mp4\">\n",
     "          </video>\n",
-    "      \"\"\" % data_url\n",
-    "           )   \n",
+    "      \"\"\"\n",
+    "            % data_url\n",
+    "        )\n",
     "    )"
    ]
   },
@@ -77,6 +79,7 @@
    "source": [
     "# play the *bleep* sound\n",
     "from IPython.display import Audio, display\n",
+    "\n",
     "display(Audio(\"bleep_that_sht/bleep.mp3\", autoplay=True))"
    ]
   },
@@ -152,6 +155,7 @@
    "source": [
     "from moviepy.editor import VideoFileClip\n",
     "\n",
+    "\n",
     "def extract_audio(*, local_file_path: str, audio_filepath: str) -> None:\n",
     "    try:\n",
     "        video = VideoFileClip(local_file_path)\n",
@@ -370,11 +374,11 @@
    "source": [
     "# simple word cleaner - remove punctuation etc.,\n",
     "def word_cleaner(word: str) -> str:\n",
-    "    return ''.join(e for e in word if e.isalnum()).lower().strip()\n",
+    "    return \"\".join(e for e in word if e.isalnum()).lower().strip()\n",
+    "\n",
     "\n",
     "# collect all timestamped instances of bleep_word in transcript\n",
-    "def query_transcript(bleep_words: list,\n",
-    "                     timestamped_transcript: list) -> list:\n",
+    "def query_transcript(bleep_words: list, timestamped_transcript: list) -> list:\n",
     "    transcript_words = sum([timestamped_transcript[i][\"words\"] for i in range(len(timestamped_transcript))], [])\n",
     "    detected_bleep_words = []\n",
     "    for bleep_word in bleep_words:\n",
@@ -400,14 +404,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pydub import AudioSegment \n",
+    "from pydub import AudioSegment\n",
+    "\n",
     "bleep_sound = AudioSegment.from_mp3(\"bleep_that_sht/bleep.mp3\")\n",
-    "bleep_first_sec = bleep_sound[1 * 1000: 2 * 1000] \n",
+    "bleep_first_sec = bleep_sound[1 * 1000 : 2 * 1000]\n",
     "\n",
     "\n",
-    "def splice_audio_with_bleeps(og_audio_path: str,\n",
-    "                             bleep_words: list) -> list:\n",
-    "    # input original audio file for splicing    \n",
+    "def splice_audio_with_bleeps(og_audio_path: str, bleep_words: list) -> list:\n",
+    "    # input original audio file for splicing\n",
     "    test_sound = AudioSegment.from_mp3(og_audio_path)\n",
     "\n",
     "    # find bleep_words in timestamped transcript\n",
@@ -421,22 +425,22 @@
     "    prev_end_time = 1\n",
     "    for instance in bleep_word_instances:\n",
     "        # unpack bleep_word start / end times - converted to microseconds\n",
-    "        start_time = int(instance[\"start\"]*1000) - 50\n",
-    "        end_time = int(instance[\"end\"]*1000) + 50\n",
-    "        \n",
+    "        start_time = int(instance[\"start\"] * 1000) - 50\n",
+    "        end_time = int(instance[\"end\"] * 1000) + 50\n",
+    "\n",
     "        # collect clip of test starting at previous end time, and leading to start_time of next bleep\n",
     "        audio_clip = test_sound[prev_end_time:start_time]\n",
-    "        \n",
+    "\n",
     "        # create bleep clip for this instance\n",
-    "        bleep_clip = bleep_first_sec[:(end_time - start_time)]\n",
-    "        \n",
+    "        bleep_clip = bleep_first_sec[: (end_time - start_time)]\n",
+    "\n",
     "        # store test and bleep clips\n",
     "        contiguous_audio_clips.append(audio_clip)\n",
     "        contiguous_audio_clips.append(bleep_clip)\n",
     "\n",
     "        # update prev_end_time\n",
     "        prev_end_time = end_time\n",
-    "        \n",
+    "\n",
     "    # create final clip from test\n",
     "    audio_clip = test_sound[prev_end_time:]\n",
     "    contiguous_audio_clips.append(audio_clip)\n",
@@ -470,31 +474,23 @@
     "from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip\n",
     "\n",
     "\n",
-    "def bleep_that_sht(og_video_path: str,\n",
-    "                   og_audio_path: str,\n",
-    "                   final_video_path: str,\n",
-    "                   final_audio_path: str,\n",
-    "                   bleep_words: list) -> None:\n",
-    "    # input og audio file for splicing    \n",
+    "def bleep_that_sht(og_video_path: str, og_audio_path: str, final_video_path: str, final_audio_path: str, bleep_words: list) -> None:\n",
+    "    # input og audio file for splicing\n",
     "    test_sound = AudioSegment.from_mp3(og_audio_path)\n",
     "\n",
     "    # create list of new audio clips replacing all bleep words\n",
     "    contiguous_audio_clips = splice_audio_with_bleeps(og_audio_path, bleep_words)\n",
-    "    \n",
+    "\n",
     "    # merge and save bleeped audio\n",
     "    bleeped_test_clip = sum(contiguous_audio_clips)\n",
-    "    bleeped_test_clip.export(final_audio_path, format=\"mp3\") \n",
-    "    \n",
+    "    bleeped_test_clip.export(final_audio_path, format=\"mp3\")\n",
+    "\n",
     "    # load in og video, overlay with bleeped audio\n",
     "    og_video = VideoFileClip(og_video_path)\n",
     "    bleep_audio = AudioFileClip(final_audio_path)\n",
     "    new_audioclip = CompositeAudioClip([bleep_audio])\n",
     "    og_video.audio = new_audioclip\n",
-    "    og_video.write_videofile(final_video_path,\n",
-    "                             codec='libx264', \n",
-    "                             audio_codec='aac', \n",
-    "                             temp_audiofile='temp-audio.m4a', \n",
-    "                             remove_temp=True)"
+    "    og_video.write_videofile(final_video_path, codec=\"libx264\", audio_codec=\"aac\", temp_audiofile=\"temp-audio.m4a\", remove_temp=True)"
    ]
   },
   {
@@ -560,13 +556,9 @@
     "# define path to saved bleep audio and video\n",
     "final_video_path = \"data/output/bleep_test_1.mp4\"\n",
     "final_audio_path = \"data/output/bleep_test_1.mp3\"\n",
-    "    \n",
-    "# create bleeped audio and video \n",
-    "bleep_that_sht(og_video_path, \n",
-    "               og_audio_path, \n",
-    "               final_video_path, \n",
-    "               final_audio_path, \n",
-    "               bleep_words)"
+    "\n",
+    "# create bleeped audio and video\n",
+    "bleep_that_sht(og_video_path, og_audio_path, final_video_path, final_audio_path, bleep_words)"
    ]
   },
   {

diff --git a/bleep_that_sht/app_url_download.py b/bleep_that_sht/app_url_download.py
@@ -7,7 +7,6 @@
 import tempfile
 import uuid
 import io
-import time
 
 st.set_page_config(page_title="🎬 Bleep That Sh*t 🙊")
 st.title("🎬 Bleep That Sh*t 🙊")
@@ -16,26 +15,16 @@
 
 with tab2:
     st.markdown(
-        """
-            ### Bleep out words of your choice from an input video. 
-            
-            How it works:
-            
-            1.  Provided a youtube / shorts url
-            
-            2.  Choose your your desired bleep keywords
-            
-            3.  (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space)
-            4.  (optional) Press "Just Transcribe" to examine / download just the transcription of the video (can help in choosing bleep words)
-            5.  Press "Transcribe and bleep" to transcribe and replace all instances of your keywords with *beep* sounds
-                    
-            If you want to select your Whisper model / run longer videos pull and run the app locally. 
-            
-            Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription.
-            
-            You do *not* need a GPU to run this locally.  Larger models take more time to process locally, but its doable.
-        
-            """
+        "### Bleep out words of your choice from an input video.  \n"
+        "How it works: \n\n"
+        "1.  Provided a youtube / shorts url \n"
+        "2.  Choose your your desired bleep keywords \n"
+        "3.  (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
+        "4.  (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
+        "5.  Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
+        "If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
+        "Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
+        "You do *not* need a GPU to run this locally.  Larger models take more time to process locally, but its doable. \n"
     )
 
 with tab1:
@@ -97,9 +86,7 @@ def button_logic(
                 out.close()
 
             extract_audio(temporary_video_location, temporary_audio_location)
-            transcript, timestamped_transcript = transcribe(
-                local_file_path=temporary_audio_location, model=model_selection
-            )
+            transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
 
             with col0.container(border=True):
                 st.text_area(
@@ -121,9 +108,7 @@ def button_logic(
                 out.close()
 
             extract_audio(temporary_video_location, temporary_audio_location)
-            transcript, timestamped_transcript = transcribe(
-                local_file_path=temporary_audio_location, model=model_selection
-            )
+            transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
 
             with col0.container(border=True):
                 st.text_area(
@@ -146,11 +131,7 @@ def button_logic(
                 st.video(bleep_video_output)
 
     with tempfile.TemporaryDirectory() as tmpdirname:
-        temporary_video_location = (
-            tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
-        )
+        temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
         bleep_word_list = bleep_words.split(",")
         bleep_words_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
-        button_logic(
-            temporary_video_location, model_selection, bleep_words_list, upload_url
-        )
+        button_logic(temporary_video_location, model_selection, bleep_words_list, upload_url)