Skip to content

Commit

Permalink
fix: deleted a line duplicating the call to the recognizer (#83)
Browse files Browse the repository at this point in the history
* I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* docs: I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* fix: resolved conflicts

    pick f510e8f chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* fix: conflicts

* fix: migrated to speech 2.0.0

* fix: fixed lint issues

* fix: deleted a duplicate line that calls the recognizer

* docs: repaired region tag mismatch

* chore: formatting

* chore: added ]
  • Loading branch information
b-loved-dreamer authored and telpirion committed Mar 13, 2023
1 parent 2ce9b5d commit 24e3984
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 142 deletions.
71 changes: 40 additions & 31 deletions speech/microphone/transcribe_streaming_infinite.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@
SAMPLE_RATE = 16000
CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms

RED = '\033[0;31m'
GREEN = '\033[0;32m'
YELLOW = '\033[0;33m'
RED = "\033[0;31m"
GREEN = "\033[0;32m"
YELLOW = "\033[0;33m"


def get_current_time():
Expand Down Expand Up @@ -123,12 +123,14 @@ def generator(self):
if self.bridging_offset > self.final_request_end_time:
self.bridging_offset = self.final_request_end_time

chunks_from_ms = round((self.final_request_end_time -
self.bridging_offset) / chunk_time)
chunks_from_ms = round(
(self.final_request_end_time - self.bridging_offset)
/ chunk_time
)

self.bridging_offset = (round((
len(self.last_audio_input) - chunks_from_ms)
* chunk_time))
self.bridging_offset = round(
(len(self.last_audio_input) - chunks_from_ms) * chunk_time
)

for i in range(chunks_from_ms, len(self.last_audio_input)):
data.append(self.last_audio_input[i])
Expand Down Expand Up @@ -157,7 +159,7 @@ def generator(self):
except queue.Empty:
break

yield b''.join(data)
yield b"".join(data)


def listen_print_loop(responses, stream):
Expand Down Expand Up @@ -203,32 +205,35 @@ def listen_print_loop(responses, stream):

stream.result_end_time = int((result_seconds * 1000) + (result_micros / 1000))

corrected_time = (stream.result_end_time - stream.bridging_offset
+ (STREAMING_LIMIT * stream.restart_counter))
corrected_time = (
stream.result_end_time
- stream.bridging_offset
+ (STREAMING_LIMIT * stream.restart_counter)
)
# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.

if result.is_final:

sys.stdout.write(GREEN)
sys.stdout.write('\033[K')
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n')
sys.stdout.write("\033[K")
sys.stdout.write(str(corrected_time) + ": " + transcript + "\n")

stream.is_final_end_time = stream.result_end_time
stream.last_transcript_was_final = True

# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if re.search(r'\b(exit|quit)\b', transcript, re.I):
if re.search(r"\b(exit|quit)\b", transcript, re.I):
sys.stdout.write(YELLOW)
sys.stdout.write('Exiting...\n')
sys.stdout.write("Exiting...\n")
stream.closed = True
break

else:
sys.stdout.write(RED)
sys.stdout.write('\033[K')
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r')
sys.stdout.write("\033[K")
sys.stdout.write(str(corrected_time) + ": " + transcript + "\r")

stream.last_transcript_was_final = False

Expand All @@ -240,34 +245,38 @@ def main():
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=SAMPLE_RATE,
language_code='en-US',
max_alternatives=1)
language_code="en-US",
max_alternatives=1,
)

streaming_config = speech.StreamingRecognitionConfig(
config=config,
interim_results=True)
config=config, interim_results=True
)

mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
print(mic_manager.chunk_size)
sys.stdout.write(YELLOW)
sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
sys.stdout.write('End (ms) Transcript Results/Status\n')
sys.stdout.write('=====================================================\n')
sys.stdout.write("End (ms) Transcript Results/Status\n")
sys.stdout.write("=====================================================\n")

with mic_manager as stream:

while not stream.closed:
sys.stdout.write(YELLOW)
sys.stdout.write('\n' + str(
STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n')
sys.stdout.write(
"\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
)

stream.audio_input = []
audio_generator = stream.generator()

requests = (speech.StreamingRecognizeRequest(
audio_content=content)for content in audio_generator)
requests = (
speech.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator
)

responses = client.streaming_recognize(streaming_config,
requests)
responses = client.streaming_recognize(streaming_config, requests)

# Now, put the transcription responses to use.
listen_print_loop(responses, stream)
Expand All @@ -281,11 +290,11 @@ def main():
stream.restart_counter = stream.restart_counter + 1

if not stream.last_transcript_was_final:
sys.stdout.write('\n')
sys.stdout.write("\n")
stream.new_stream = True


if __name__ == '__main__':
if __name__ == "__main__":

main()

Expand Down
35 changes: 21 additions & 14 deletions speech/microphone/transcribe_streaming_mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

class MicrophoneStream(object):
"""Opens a recording stream as a generator yielding the audio chunks."""

def __init__(self, rate, chunk):
self._rate = rate
self._chunk = chunk
Expand All @@ -57,8 +58,10 @@ def __enter__(self):
format=pyaudio.paInt16,
# The API currently only supports 1-channel (mono) audio
# https://goo.gl/z757pE
channels=1, rate=self._rate,
input=True, frames_per_buffer=self._chunk,
channels=1,
rate=self._rate,
input=True,
frames_per_buffer=self._chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow while the calling thread makes network requests, etc.
Expand Down Expand Up @@ -103,7 +106,7 @@ def generator(self):
except queue.Empty:
break

yield b''.join(data)
yield b"".join(data)


def listen_print_loop(responses):
Expand Down Expand Up @@ -141,10 +144,10 @@ def listen_print_loop(responses):
#
# If the previous result was longer than this one, we need to print
# some extra spaces to overwrite the previous result
overwrite_chars = ' ' * (num_chars_printed - len(transcript))
overwrite_chars = " " * (num_chars_printed - len(transcript))

if not result.is_final:
sys.stdout.write(transcript + overwrite_chars + '\r')
sys.stdout.write(transcript + overwrite_chars + "\r")
sys.stdout.flush()

num_chars_printed = len(transcript)
Expand All @@ -154,8 +157,8 @@ def listen_print_loop(responses):

# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if re.search(r'\b(exit|quit)\b', transcript, re.I):
print('Exiting..')
if re.search(r"\b(exit|quit)\b", transcript, re.I):
print("Exiting..")
break

num_chars_printed = 0
Expand All @@ -164,28 +167,32 @@ def listen_print_loop(responses):
def main():
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
language_code = 'en-US' # a BCP-47 language tag
language_code = "en-US" # a BCP-47 language tag

client = speech.SpeechClient()
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=RATE,
language_code=language_code)
language_code=language_code,
)

streaming_config = speech.StreamingRecognitionConfig(
config=config,
interim_results=True)
config=config, interim_results=True
)

with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator()
requests = (speech.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator)
requests = (
speech.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator
)

responses = client.streaming_recognize(streaming_config, requests)

# Now, put the transcription responses to use.
listen_print_loop(responses)


if __name__ == '__main__':
if __name__ == "__main__":
main()
# [END speech_transcribe_streaming_mic]
19 changes: 11 additions & 8 deletions speech/microphone/transcribe_streaming_mic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import mock

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")


class MockPyAudio(object):
Expand All @@ -32,8 +32,9 @@ def open(self, stream_callback, rate, *args, **kwargs):
self.rate = rate
self.closed = threading.Event()
self.stream_thread = threading.Thread(
target=self.stream_audio, args=(
self.audio_filename, stream_callback, self.closed))
target=self.stream_audio,
args=(self.audio_filename, stream_callback, self.closed),
)
self.stream_thread.start()
return self

Expand All @@ -47,23 +48,25 @@ def terminate(self):
pass

def stream_audio(self, audio_filename, callback, closed, num_frames=512):
with open(audio_filename, 'rb') as audio_file:
with open(audio_filename, "rb") as audio_file:
while not closed.is_set():
# Approximate realtime by sleeping for the appropriate time for
# the requested number of frames
time.sleep(num_frames / float(self.rate))
# audio is 16-bit samples, whereas python byte is 8-bit
num_bytes = 2 * num_frames
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
chunk = audio_file.read(num_bytes) or b"\0" * num_bytes
callback(chunk, None, None, None)


@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
@mock.patch.dict(
"sys.modules",
pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))),
)
def test_main(capsys):
import transcribe_streaming_mic

transcribe_streaming_mic.main()
out, err = capsys.readouterr()

assert re.search(r'quit', out, re.DOTALL | re.I)
assert re.search(r"quit", out, re.DOTALL | re.I)
Loading

0 comments on commit 24e3984

Please sign in to comment.