diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..34584c23 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,26 @@ +name: Static analysis + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + flake8: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install flake8 + - name: Run flake8 + run: | + # ignore errors for long lines and multi-statement lines + python -m flake8 --ignore=E501,E701,W503 speech_recognition tests examples setup.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 704912d9..00000000 --- a/.travis.yml +++ /dev/null @@ -1,35 +0,0 @@ -language: python -python: - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" - - "3.6-dev" - - "3.7-dev" - - "nightly" - -addons: - apt: - packages: - - swig - - libpulse-dev - - libasound2-dev -install: - - trap 'sleep 3' ERR - - pip install pocketsphinx monotonic - - pip install flake8 rstcheck - - pip install -e . -script: - - flake8 --ignore=E501,E701,W504 speech_recognition tests examples setup.py # ignore errors for long lines and multi-statement lines - - rstcheck README.rst reference/*.rst # ensure RST is well-formed - - python -m unittest discover --verbose # run unit tests -sudo: false # this allows TravisCI to use the fast Docker build environment rather than the slower VMs -env: - global: - - secure: "jFHi/NK+hkf8Jw/bA06utypMRAzOcpeKPEZz/P2U79c70aIcmeAOGNUG6t5x2hmaeNpaP1STDtOLVdDawLY904rv/2sAhdMExlLUYubVQrJumvfgwyHRep0NLxrWV/Sf7y6FBPsvS0We29sn5HeEUlSzFwLrANyagpZYGeeWI3SGfdseDK/n4SlD436i7n5jM0Vlbmo07JDtdTN5Ov17APtuqy0ZViNhhTG+wvU8RCd/0/1IvstaaOhSa/82jABXNzH12hY4ynSuK75EVdVLj/WstSmH90r+8TS+YHH1D68yFeoub8kjTzZirqDuwb1s0nGOzx3VAC03+Fb48jHNfz2X0LJEj6gOpaaxgXOr4qkb1+Bx4L1bUkMk3ywjKoXFF0BU/haZfPbzG0fFUDubEXYjhC88gM1CR0LrFf4qtIqFcdM4sjasfv7TD2peiuWqVRZeHzjcvQVC8aDxVFFbTF+Cx1xZ1qLxAY5iJ/dUPWpOVcSs0GIJaJw7LQJU5uQbiU0vg17k9QcVYbASJu0cFAt/OsWGDZp/uArSWrMcSoexe8wI8/k5u9XFnOmlEu5kUJXOrZANjniUk5ilFUe+lag2Zl/ZasNtW16qke+vaWfBnpKl7NOoQemWNdYOxgyc/4x9B3x8gryf5XAmfBeqneh7k10O18u6GYpt33r0zuQ=" # encrypted version of "WIT_AI_KEY=(my key)" - - secure: "ZKs+ywhJett8CpA24wR8js3C5B0uzaXMFIaiWBgkQfVhwbwkecCjG2HbLaJ1ncXP5VZnrXF6Ym4pZm87q0mIp/S0dMS7ZC5Jikowc3Bdyph9L49MDubZL0SO98+YR9j0QeKw8wxiVP6kv9cw12uVWn4VNgGcuW6AYZ0AqzdvUfW4+zby+Ua9U8LC0RcDKY3GR4Svq6dUjNFtJmI5uJ129UFO4oujCzuHNZL3KSSUJVt1KelVX+1eUNJ67sN3AvoMfx86jXNtN0kS12lZ+dP4YDo+lCtViG/W1dHCCdBmnUZsPE4Bc+Uyvg/BeKZaL1hgrNb6QHCNWmZC7jGxzkP2akwX5PxmKW7ClXn/79c7e84YUiRHlYQgL0qP+kZ7WDG6nJyKqLNFAtTHAw5F++5cpomNThYoCJeQOmkhi+KLEs9KMmn4d/bOLzW1RCeuf0bhjOOCI89+761aqJ1ky8UHJUZCKjYegHLM/bZ9LkKnUi+d+KYNQB8qpluZSLqObknCczh6ekKt/1FdrC+FBbFmpkTCuru1K9APdz01+ipVV8Av6NB+ax0+KPlKp49TA9uzANKWyLRkW9j6LD67MGF6SH/h8t5OeNZXdmf4DGjqv1erbKZeW+y25Hw7lVbqEo1m4T9wn1lmA1nse0kBrqGF+kQ4mNdfNSmWGWKxj+gFuxA=" # encrypted version of "BING_KEY=(my key)" - - secure: "JEtMaAhDglqRrHdKZapxIaY0zlCohsepgxfRckhuCB3RZljeIKjt15Q/8LzFcx0ZdQV2thOQ/2oA0WpnfTckEnh42X+Ki0AUlezjXXYII2DenCs9q7jXxuOYK5AjxcNzyfeh7NnI2R3jdAyf49FdnoOa/OdEZq7aYRouP0yZtVKK/eMueURfr7JMsTrmuYoy1LXkF/yEyxns9HiaSebn7YqeQ7cb9Q5LcSigM6kCXZrtG1K4MqWGrvnqGeabE6xoZVxkf+az6fMv91oZ4spZRfjjlFpGx050gP4SCpk8XQqVS2HAtzVSFBdnLld4ydRoGVHVMAOmvQY5xbk5y9REVj4EVdfeErOhaEz6CfFqZi9UpAS0Zza/7khGDDWkHmfg4O4CzrVLkfdcPIgIKcz9TT9zP+wPVCYmfN2Qq0XB+PJkewjmgPuWZnUyBb402iPs1hWEze8oK6Yk5K3OnBuSqeE4EtvpT/SUrLtroSNcWJJ7i585cqgNB5KwzDDKNnyn0zteQQTj+fUzrumQ+/FTYjaafOVZ6ZAiZ+xvgge0+foB94GCoV/8LUm5rVTtk8vV3c3oJu9jdzsyiOSargYPSYg7iy1kzkC/eQ12rX89EWLGjoP+mveLGBpUebQNbB8vxaVRd8uaozW/G3Vwgelqg7gzrvmwkaYK3g6a1TAVpcs=" # encrypted version of "HOUNDIFY_CLIENT_ID=(my client ID)" - - secure: "izFPobia0Luga6mL0pXDBmp/V1/kzZdFc09PbYUBNoyx63DPmDwP8dtSFy9ynEERJg4HQ6KeQzsPED3ZhnYO3C3lD3y078+k6Ryl15aONLrou6jzDiYMw6KV1CQ6V1OIz3tLwZoS7wwWdr0ZYdMEklYVVVu8wJOzl6aZ8gtp8Y3woev6qrxFeXhkkNZOybtQ8ugV6a5EypVEVQ2IGTEVvA6A8oSGDd8BDOSYyKPQ3LXPx7imA6freqio/b5HaACkBIidFRykly3xkBib2phhww2D18Zdu5imJtCmHxFQ3V+N5ZzlUkgmR9gyvdblQgJ7sCwpQAC/Mb0KWqUDar59nRA1WmY+onVN/t7sjBBCPjS0Ddu5Ls3X9Qdh3rflQ2Fc7nSi8iVITAHFreUKEW/jgJyBnFuau0Cu5DNcZYy24W+GBzwks1g/uoy4vWVbijaIzSEXu352CqClSJpBTltp3z0KZ/9D9VRB1tFoFmlVWkW39bBBqpJy/49mGVlbrG2J+hyCW+J+BQFpTcjXSd+JS57XXYKcm3QXnNxxnIQ5lw/6t92SbWWP+IeJB9fJENFLteE5XjtQWQ7gHbb7hP0iH9u92mJbehzvdo9KwePlIeWFC1Wyw3ZHrLa56DykfPNg9kYcuJdTwLMRxI4X5aG/e1QBVAwM8tii6Zrjag684iM=" # encrypted version of "HOUNDIFY_CLIENT_KEY=(my client key)" - - secure: "uj5LUKDtf214EZPqsjpy6tk8iXEfydC3z/px98xbXa/H6PVN6wMPTHsF1DuuTWCbLrqNyi9/rMbpJFiNuqMm+q0LarrvvuTKHA9JFe/ZA11R1w3WI2ZMTvub6vzCbmcznIkjq981BjFWz5aCazPXhLt18e0iMit2FR+D6jwZ4al8TIo9i6RjkJ3MimH2/Sgm2BnXZ7qHsmDlG+4VsABiPiH0SPzrxqJJ4WSOb8EnNkNcOujiHuYvDNR+6R566bXjV1x+z2ewKb2nae5LOEl8L+6B/CsNT2cyeds2imYWAw9vTZoTajXf2u21M3pqRINQ67CuWhGFOdUXiEd6E/jTQFcsE4GuB7eMIYcHCmPzhhHn1b6XzNJtf923+YlSnayf63Y5jHjeSWSWs6pjJOUjJquuXS8vQYuJYX4n8sXDeEsZg0yD2jdxFMqMmjZoKKJzWPTPUkDTLawZdZs2q6bOF+xBQysUPozgSnxe3koCMFLeA1cU6fUkXWWIFDuAehR0JqYQHaUovoO0ZYx8Env0Ojhl6IZclONxaLVA41CbzkSUC1pg0k/VeMiv6YB2SQsFxV1riKM/OPDxq7AAuUuNVDCj/SGya4BJEYrxtagtmq0em8Q8SJzLq7IFNBNq5pO8IaqA0JO/tieSIsutrhdRzVMI35apuwbE+5jxoDmiGW0=" # encrypted version of "IBM_USERNAME=(my username)" - - secure: "fqWkYnsx5xrYjDosEkHramkzuuRjAu6NUkSx/yJf78WTDgJ0XAvu7BP9vdfO9g+KvwVZ9uBSClBXiNM7c1i/CpZCJcZJQtQS9PqL3YB9+76J3hPwOsQx0t3oRiYKPDmHX3WFUFuGhI2k90iw4n6nWHUUiU2WxWk/8sibXxyCf99CRMGwpfycd+w8mhsi/MkzbyxWBjzgRKIFu6tg28bs6GcjrMyoq6avD3jpwghGAu1CA3UnuxdOqY9WI2+d9MwmtK6cUQ88o/5MX7GjPZzfkiohru03yn1sBBBivf1V7Vwvd7xsnUZ+6/WiJnzRkaqoGaYSOnI5bhJ/qR21zNMwNEaYrbdyCWau+YLuOheTJzihyeTN9f5zQg/PiBQMLDyKWBw7+v2rQMzTmKoif7fz+SAN5GMXvqgcoMlZ7se9sk0QH6z+GLYbnZNtu0Qpf01gNaJaveQRuurdLtihF8EBTET+hBouiRTUWHvJMgd6PI2pp9BRdnvwwHlhCQLwUjqprLUHX6OdbhFc2ixHwao+Qbg+oCEv+IhCrW1HoTCFIBy/SllRx0l7MfroEiRDRkaZeKA6bOr+3yirVmUOQVLH5rmVUuoNCmI0BZG5GPt5+AhZ36Wlw3/CXkcJAf7VNcya+u4ls+Hdxb9SyFNsZ5IF0ZWNRPfZlG8uEGDy/o05fbY=" # encrypted version of "IBM_PASSWORD=(my password)" diff --git a/README.rst b/README.rst index 50cae061..965bd46a 100644 --- a/README.rst +++ b/README.rst @@ -276,7 +276,7 @@ To hack on this library, first make sure you have all the requirements listed in - Documentation can be found in the ``reference/`` `directory `__. - Third-party libraries, utilities, and reference material are in the ``third-party/`` `directory `__. -To install/reinstall the library locally, run ``python setup.py install`` in the project `root directory `__. +To install/reinstall the library locally, run ``python -m pip install -e .[dev]`` in the project `root directory `__. Before a release, the version number is bumped in ``README.rst`` and ``speech_recognition/__init__.py``. Version tags are then created using ``git config gpg.program gpg2 && git config user.signingkey DB45F6C431DE7C2DCD99FF7904882258A4063489 && git tag -s VERSION_GOES_HERE -m "Version VERSION_GOES_HERE"``. @@ -291,17 +291,19 @@ To run all the tests: python -m unittest discover --verbose -Testing is also done automatically by TravisCI, upon every push. To set up the environment for offline/local Travis-like testing on a Debian-like system: +To run static analysis: .. code:: bash - sudo docker run --volume "$(pwd):/speech_recognition" --interactive --tty quay.io/travisci/travis-python:latest /bin/bash - su - travis && cd /speech_recognition - sudo apt-get update && sudo apt-get install swig libpulse-dev - pip install --user pocketsphinx && pip install --user flake8 rstcheck && pip install --user -e . - python -m unittest discover --verbose # run unit tests - python -m flake8 --ignore=E501,E701 speech_recognition tests examples setup.py # ignore errors for long lines and multi-statement lines - python -m rstcheck README.rst reference/*.rst # ensure RST is well-formed + python -m flake8 --ignore=E501,E701,W503 speech_recognition tests examples setup.py + +To ensure RST is well-formed: + +.. code:: bash + + python -m rstcheck README.rst reference/*.rst + +Testing is also done automatically by GitHub Actions, upon every push. FLAC Executables ~~~~~~~~~~~~~~~~ diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py index c46b412a..a5d8a688 100644 --- a/examples/microphone_recognition.py +++ b/examples/microphone_recognition.py @@ -91,11 +91,11 @@ except sr.UnknownValueError: print("Whisper could not understand audio") except sr.RequestError as e: - print("Could not request results from Whisper") + print(f"Could not request results from Whisper; {e}") # recognize speech using Whisper API OPENAI_API_KEY = "INSERT OPENAI API KEY HERE" try: print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}") except sr.RequestError as e: - print("Could not request results from Whisper API") + print(f"Could not request results from Whisper API; {e}") diff --git a/setup.cfg b/setup.cfg index d2fa6c02..c5c47909 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,6 +4,9 @@ universal=1 [options.extras_require] +dev = + flake8 + rstcheck whisper-local = openai-whisper soundfile diff --git a/setup.py b/setup.py index 8d717790..158d1373 100644 --- a/setup.py +++ b/setup.py @@ -21,12 +21,15 @@ def run(self): if os.path.basename(output_path) in FILES_TO_MARK_EXECUTABLE: log.info("setting executable permissions on {}".format(output_path)) stat_info = os.stat(output_path) + OWNER_CAN_READ_EXECUTE = stat.S_IRUSR | stat.S_IXUSR + GROUP_CAN_READ_EXECUTE = stat.S_IRGRP | stat.S_IXGRP + OTHERS_CAN_READ_EXECUTE = stat.S_IROTH | stat.S_IXOTH os.chmod( output_path, - stat_info.st_mode | - stat.S_IRUSR | stat.S_IXUSR | # owner can read/execute - stat.S_IRGRP | stat.S_IXGRP | # group can read/execute - stat.S_IROTH | stat.S_IXOTH # everyone else can read/execute + stat_info.st_mode + | OWNER_CAN_READ_EXECUTE + | GROUP_CAN_READ_EXECUTE + | OTHERS_CAN_READ_EXECUTE, ) diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py index cfa3d6d5..8e68c30d 100644 --- a/speech_recognition/__init__.py +++ b/speech_recognition/__init__.py @@ -1087,7 +1087,8 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec aws_secret_access_key=secret_access_key, region_name=region) - s3 = boto3.client('s3', + s3 = boto3.client( + 's3', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, region_name=region) @@ -1107,7 +1108,6 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec except ClientError as exc: print('Error creating bucket %s: %s' % (bucket_name, exc)) s3res = session.resource('s3') - bucket = s3res.Bucket(bucket_name) if audio_data is not None: print('Uploading audio data...') wav_data = audio_data.get_wav_data() @@ -1124,7 +1124,7 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec try: status = transcribe.get_transcription_job(TranscriptionJobName=job_name) except ClientError as exc: - print('!'*80) + print('!' * 80) print('Error getting job:', exc.response) if exc.response['Error']['Code'] == 'BadRequestException' and "The requested job couldn't be found" in str(exc): # Some error caused the job we recorded to not exist on AWS. @@ -1137,7 +1137,7 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec else: # Some other error happened, so re-raise. raise - + job = status['TranscriptionJob'] if job['TranscriptionJobStatus'] in ['COMPLETED'] and 'TranscriptFileUri' in job['Transcript']: @@ -1152,12 +1152,12 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec confidences.append(float(item['alternatives'][0]['confidence'])) confidence = 0.5 if confidences: - confidence = sum(confidences)/float(len(confidences)) + confidence = sum(confidences) / float(len(confidences)) transcript = d['results']['transcripts'][0]['transcript'] # Delete job. try: - transcribe.delete_transcription_job(TranscriptionJobName=job_name) # cleanup + transcribe.delete_transcription_job(TranscriptionJobName=job_name) # cleanup except Exception as exc: print('Warning, could not clean up transcription: %s' % exc) traceback.print_exc() @@ -1167,17 +1167,17 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec return transcript, confidence elif job['TranscriptionJobStatus'] in ['FAILED']: - + # Delete job. try: - transcribe.delete_transcription_job(TranscriptionJobName=job_name) # cleanup + transcribe.delete_transcription_job(TranscriptionJobName=job_name) # cleanup except Exception as exc: print('Warning, could not clean up transcription: %s' % exc) traceback.print_exc() # Delete S3 file. s3.delete_object(Bucket=bucket_name, Key=filename) - + exc = TranscriptionFailed() exc.job_name = None exc.file_key = None @@ -1193,11 +1193,6 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec else: # Launch the transcription job. - # try: - # transcribe.delete_transcription_job(TranscriptionJobName=job_name) # pre-cleanup - # except: - # # It's ok if this fails because the job hopefully doesn't exist yet. - # pass try: transcribe.start_transcription_job( TranscriptionJobName=job_name, @@ -1210,7 +1205,7 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec exc.file_key = None raise exc except ClientError as exc: - print('!'*80) + print('!' * 80) print('Error starting job:', exc.response) if exc.response['Error']['Code'] == 'LimitExceededException': # Could not start job. Cancel everything. @@ -1277,9 +1272,7 @@ def read_file(filename, chunk_size=5242880): # Queue file for transcription. endpoint = "https://api.assemblyai.com/v2/transcript" - json = { - "audio_url": upload_url - } + json = {"audio_url": upload_url} headers = { "authorization": api_token, "content-type": "application/json" @@ -1436,23 +1429,23 @@ def recognize_whisper(self, audio_data, model="base", show_dict=False, load_opti return result else: return result["text"] - + def recognize_vosk(self, audio_data, language='en'): from vosk import KaldiRecognizer, Model - + assert isinstance(audio_data, AudioData), "Data must be audio data" - + if not hasattr(self, 'vosk_model'): if not os.path.exists("model"): return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder." - exit (1) + exit(1) self.vosk_model = Model("model") - rec = KaldiRecognizer(self.vosk_model, 16000); - - rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2)); + rec = KaldiRecognizer(self.vosk_model, 16000) + + rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2)) finalRecognition = rec.FinalResult() - + return finalRecognition diff --git a/speech_recognition/audio.py b/speech_recognition/audio.py index 732f7e01..4b7a6132 100644 --- a/speech_recognition/audio.py +++ b/speech_recognition/audio.py @@ -113,7 +113,7 @@ def get_raw_data(self, convert_rate=None, convert_width=None): audioop.error ): # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less) raw_data = b"".join( - raw_data[i + 1 : i + 4] + raw_data[i + 1: i + 4] for i in range(0, len(raw_data), 4) ) # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample else: # 24-bit audio fully supported, we don't need to shim anything @@ -188,8 +188,8 @@ def get_aiff_data(self, convert_rate=None, convert_width=None): ): # ``audioop.byteswap`` was only added in Python 3.4 raw_data = audioop.byteswap(raw_data, sample_width) else: # manually reverse the bytes of each sample, which is slower but works well enough as a fallback - raw_data = raw_data[sample_width - 1 :: -1] + b"".join( - raw_data[i + sample_width : i : -1] + raw_data = raw_data[sample_width - 1:: -1] + b"".join( + raw_data[i + sample_width: i: -1] for i in range(sample_width - 1, len(raw_data), sample_width) ) diff --git a/tests/recognizers/test_google.py b/tests/recognizers/test_google.py index aa91eb64..79c58770 100644 --- a/tests/recognizers/test_google.py +++ b/tests/recognizers/test_google.py @@ -79,7 +79,7 @@ def test_parse_without_confidence( @patch(f"{CLASS_UNDER_TEST}.find_best_hypothesis") @patch(f"{CLASS_UNDER_TEST}.convert_to_result") - def test_parse_without_confidence( + def test_parse_with_confidence( self, convert_to_result, find_best_hypothesis ): convert_to_result.return_value = {"alternative": "dummy3"} diff --git a/tests/recognizers/test_whisper.py b/tests/recognizers/test_whisper.py index f2c8e7fd..e84d0503 100644 --- a/tests/recognizers/test_whisper.py +++ b/tests/recognizers/test_whisper.py @@ -32,7 +32,7 @@ def test_recognize_pass_arguments(self, OpenAI, BytesIO, environ): recognizer = MagicMock(spec=Recognizer) audio_data = MagicMock(spec=AudioData) - actual = whisper.recognize_whisper_api( + _ = whisper.recognize_whisper_api( recognizer, audio_data, model="x-whisper", api_key="OPENAI_API_KEY" ) diff --git a/tests/test_recognition.py b/tests/test_recognition.py index 2176023c..bb0ce3a9 100644 --- a/tests/test_recognition.py +++ b/tests/test_recognition.py @@ -1,10 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import http import os import unittest -from unittest.mock import patch, MagicMock import speech_recognition as sr