diff --git a/speech/microphone/transcribe_streaming_infinite.py b/speech/microphone/transcribe_streaming_infinite.py index dedb0d5bf72e..1866022a6a6f 100644 --- a/speech/microphone/transcribe_streaming_infinite.py +++ b/speech/microphone/transcribe_streaming_infinite.py @@ -41,9 +41,9 @@ SAMPLE_RATE = 16000 CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms -RED = '\033[0;31m' -GREEN = '\033[0;32m' -YELLOW = '\033[0;33m' +RED = "\033[0;31m" +GREEN = "\033[0;32m" +YELLOW = "\033[0;33m" def get_current_time(): @@ -123,12 +123,14 @@ def generator(self): if self.bridging_offset > self.final_request_end_time: self.bridging_offset = self.final_request_end_time - chunks_from_ms = round((self.final_request_end_time - - self.bridging_offset) / chunk_time) + chunks_from_ms = round( + (self.final_request_end_time - self.bridging_offset) + / chunk_time + ) - self.bridging_offset = (round(( - len(self.last_audio_input) - chunks_from_ms) - * chunk_time)) + self.bridging_offset = round( + (len(self.last_audio_input) - chunks_from_ms) * chunk_time + ) for i in range(chunks_from_ms, len(self.last_audio_input)): data.append(self.last_audio_input[i]) @@ -157,7 +159,7 @@ def generator(self): except queue.Empty: break - yield b''.join(data) + yield b"".join(data) def listen_print_loop(responses, stream): @@ -203,32 +205,35 @@ def listen_print_loop(responses, stream): stream.result_end_time = int((result_seconds * 1000) + (result_micros / 1000)) - corrected_time = (stream.result_end_time - stream.bridging_offset - + (STREAMING_LIMIT * stream.restart_counter)) + corrected_time = ( + stream.result_end_time + - stream.bridging_offset + + (STREAMING_LIMIT * stream.restart_counter) + ) # Display interim results, but with a carriage return at the end of the # line, so subsequent lines will overwrite them. if result.is_final: sys.stdout.write(GREEN) - sys.stdout.write('\033[K') - sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n') + sys.stdout.write("\033[K") + sys.stdout.write(str(corrected_time) + ": " + transcript + "\n") stream.is_final_end_time = stream.result_end_time stream.last_transcript_was_final = True # Exit recognition if any of the transcribed phrases could be # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): + if re.search(r"\b(exit|quit)\b", transcript, re.I): sys.stdout.write(YELLOW) - sys.stdout.write('Exiting...\n') + sys.stdout.write("Exiting...\n") stream.closed = True break else: sys.stdout.write(RED) - sys.stdout.write('\033[K') - sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r') + sys.stdout.write("\033[K") + sys.stdout.write(str(corrected_time) + ": " + transcript + "\r") stream.last_transcript_was_final = False @@ -240,34 +245,38 @@ def main(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, - language_code='en-US', - max_alternatives=1) + language_code="en-US", + max_alternatives=1, + ) + streaming_config = speech.StreamingRecognitionConfig( - config=config, - interim_results=True) + config=config, interim_results=True + ) mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) print(mic_manager.chunk_size) sys.stdout.write(YELLOW) sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n') - sys.stdout.write('End (ms) Transcript Results/Status\n') - sys.stdout.write('=====================================================\n') + sys.stdout.write("End (ms) Transcript Results/Status\n") + sys.stdout.write("=====================================================\n") with mic_manager as stream: while not stream.closed: sys.stdout.write(YELLOW) - sys.stdout.write('\n' + str( - STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n') + sys.stdout.write( + "\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n" + ) stream.audio_input = [] audio_generator = stream.generator() - requests = (speech.StreamingRecognizeRequest( - audio_content=content)for content in audio_generator) + requests = ( + speech.StreamingRecognizeRequest(audio_content=content) + for content in audio_generator + ) - responses = client.streaming_recognize(streaming_config, - requests) + responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses, stream) @@ -281,11 +290,11 @@ def main(): stream.restart_counter = stream.restart_counter + 1 if not stream.last_transcript_was_final: - sys.stdout.write('\n') + sys.stdout.write("\n") stream.new_stream = True -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/speech/microphone/transcribe_streaming_mic.py b/speech/microphone/transcribe_streaming_mic.py index 597e1bd7c1a9..5d9776f80439 100644 --- a/speech/microphone/transcribe_streaming_mic.py +++ b/speech/microphone/transcribe_streaming_mic.py @@ -43,6 +43,7 @@ class MicrophoneStream(object): """Opens a recording stream as a generator yielding the audio chunks.""" + def __init__(self, rate, chunk): self._rate = rate self._chunk = chunk @@ -57,8 +58,10 @@ def __enter__(self): format=pyaudio.paInt16, # The API currently only supports 1-channel (mono) audio # https://goo.gl/z757pE - channels=1, rate=self._rate, - input=True, frames_per_buffer=self._chunk, + channels=1, + rate=self._rate, + input=True, + frames_per_buffer=self._chunk, # Run the audio stream asynchronously to fill the buffer object. # This is necessary so that the input device's buffer doesn't # overflow while the calling thread makes network requests, etc. @@ -103,7 +106,7 @@ def generator(self): except queue.Empty: break - yield b''.join(data) + yield b"".join(data) def listen_print_loop(responses): @@ -141,10 +144,10 @@ def listen_print_loop(responses): # # If the previous result was longer than this one, we need to print # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * (num_chars_printed - len(transcript)) + overwrite_chars = " " * (num_chars_printed - len(transcript)) if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') + sys.stdout.write(transcript + overwrite_chars + "\r") sys.stdout.flush() num_chars_printed = len(transcript) @@ -154,8 +157,8 @@ def listen_print_loop(responses): # Exit recognition if any of the transcribed phrases could be # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') + if re.search(r"\b(exit|quit)\b", transcript, re.I): + print("Exiting..") break num_chars_printed = 0 @@ -164,21 +167,25 @@ def listen_print_loop(responses): def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. - language_code = 'en-US' # a BCP-47 language tag + language_code = "en-US" # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, - language_code=language_code) + language_code=language_code, + ) + streaming_config = speech.StreamingRecognitionConfig( - config=config, - interim_results=True) + config=config, interim_results=True + ) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() - requests = (speech.StreamingRecognizeRequest(audio_content=content) - for content in audio_generator) + requests = ( + speech.StreamingRecognizeRequest(audio_content=content) + for content in audio_generator + ) responses = client.streaming_recognize(streaming_config, requests) @@ -186,6 +193,6 @@ def main(): listen_print_loop(responses) -if __name__ == '__main__': +if __name__ == "__main__": main() # [END speech_transcribe_streaming_mic] diff --git a/speech/microphone/transcribe_streaming_mic_test.py b/speech/microphone/transcribe_streaming_mic_test.py index dd5e7ea6f5e6..f5e08f5d30b2 100644 --- a/speech/microphone/transcribe_streaming_mic_test.py +++ b/speech/microphone/transcribe_streaming_mic_test.py @@ -18,7 +18,7 @@ import mock -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") class MockPyAudio(object): @@ -32,8 +32,9 @@ def open(self, stream_callback, rate, *args, **kwargs): self.rate = rate self.closed = threading.Event() self.stream_thread = threading.Thread( - target=self.stream_audio, args=( - self.audio_filename, stream_callback, self.closed)) + target=self.stream_audio, + args=(self.audio_filename, stream_callback, self.closed), + ) self.stream_thread.start() return self @@ -47,23 +48,25 @@ def terminate(self): pass def stream_audio(self, audio_filename, callback, closed, num_frames=512): - with open(audio_filename, 'rb') as audio_file: + with open(audio_filename, "rb") as audio_file: while not closed.is_set(): # Approximate realtime by sleeping for the appropriate time for # the requested number of frames time.sleep(num_frames / float(self.rate)) # audio is 16-bit samples, whereas python byte is 8-bit num_bytes = 2 * num_frames - chunk = audio_file.read(num_bytes) or b'\0' * num_bytes + chunk = audio_file.read(num_bytes) or b"\0" * num_bytes callback(chunk, None, None, None) -@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock( - PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw')))) +@mock.patch.dict( + "sys.modules", + pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))), +) def test_main(capsys): import transcribe_streaming_mic transcribe_streaming_mic.main() out, err = capsys.readouterr() - assert re.search(r'quit', out, re.DOTALL | re.I) + assert re.search(r"quit", out, re.DOTALL | re.I) diff --git a/speech/snippets/beta_snippets.py b/speech/snippets/beta_snippets.py index 8d9571006959..45f661cdadd4 100644 --- a/speech/snippets/beta_snippets.py +++ b/speech/snippets/beta_snippets.py @@ -35,29 +35,31 @@ def transcribe_file_with_enhanced_model(): """Transcribe the given audio file using an enhanced model.""" # [START speech_transcribe_enhanced_model_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with io.open(speech_file, 'rb') as audio_file: + with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", use_enhanced=True, # A model must be specified to use enhanced model. - model='phone_call') + model="phone_call", + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_enhanced_model_beta] @@ -65,23 +67,26 @@ def transcribe_file_with_metadata(): """Send a request that includes recognition metadata.""" # [START speech_transcribe_recognition_metadata_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with io.open(speech_file, 'rb') as audio_file: + with io.open(speech_file, "rb") as audio_file: content = audio_file.read() # Here we construct a recognition metadata object. # Most metadata fields are specified as enums that can be found # in speech.enums.RecognitionMetadata metadata = speech.RecognitionMetadata() - metadata.interaction_type = ( - speech.RecognitionMetadata.InteractionType.DISCUSSION) + metadata.interaction_type = speech.RecognitionMetadata.InteractionType.DISCUSSION metadata.microphone_distance = ( - speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD) + speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD + ) metadata.recording_device_type = ( - speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) + speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE + ) + # Some metadata fields are free form strings metadata.recording_device_name = "Pixel 2 XL" # And some are integers, for instance the 6 digit NAICS code @@ -92,17 +97,18 @@ def transcribe_file_with_metadata(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", # Add this in the request to send metadata. - metadata=metadata) + metadata=metadata, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_recognition_metadata_beta] @@ -110,28 +116,30 @@ def transcribe_file_with_auto_punctuation(): """Transcribe the given audio file with auto punctuation enabled.""" # [START speech_transcribe_auto_punctuation_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with io.open(speech_file, 'rb') as audio_file: + with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", # Enable automatic punctuation - enable_automatic_punctuation=True) + enable_automatic_punctuation=True, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_auto_punctuation_beta] @@ -139,11 +147,12 @@ def transcribe_file_with_diarization(): """Transcribe the given audio file synchronously with diarization.""" # [START speech_transcribe_diarization_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -151,11 +160,12 @@ def transcribe_file_with_diarization(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", enable_speaker_diarization=True, - diarization_speaker_count=2) + diarization_speaker_count=2, + ) - print('Waiting for operation to complete...') + print("Waiting for operation to complete...") response = client.recognize(config=config, audio=audio) # The transcript within each result is separate and sequential per result. @@ -168,21 +178,23 @@ def transcribe_file_with_diarization(): # Printing out the output: for word_info in words_info: - print(u"word: '{}', speaker_tag: {}".format( - word_info.word, word_info.speaker_tag)) + print( + u"word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag) + ) # [END speech_transcribe_diarization_beta] def transcribe_file_with_multichannel(): """Transcribe the given audio file synchronously with - multi channel.""" + multi channel.""" # [START speech_transcribe_multichannel_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/Google_Gnome.wav' + speech_file = "resources/Google_Gnome.wav" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -190,33 +202,35 @@ def transcribe_file_with_multichannel(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, - language_code='en-US', + language_code="en-US", audio_channel_count=1, - enable_separate_recognition_per_channel=True) + enable_separate_recognition_per_channel=True, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) - print(u'Channel Tag: {}'.format(result.channel_tag)) + print("-" * 20) + print("First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) + print(u"Channel Tag: {}".format(result.channel_tag)) # [END speech_transcribe_multichannel_beta] def transcribe_file_with_multilanguage(): """Transcribe the given audio file synchronously with - multi language.""" + multi language.""" # [START speech_transcribe_multilanguage_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/multi.wav' - first_lang = 'en-US' - second_lang = 'es' + speech_file = "resources/multi.wav" + first_lang = "en-US" + second_lang = "es" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -226,29 +240,31 @@ def transcribe_file_with_multilanguage(): sample_rate_hertz=44100, audio_channel_count=2, language_code=first_lang, - alternative_language_codes=[second_lang]) + alternative_language_codes=[second_lang], + ) - print('Waiting for operation to complete...') + print("Waiting for operation to complete...") response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}: {}'.format(i, alternative)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}: {}".format(i, alternative)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_multilanguage_beta] def transcribe_file_with_word_level_confidence(): """Transcribe the given audio file synchronously with - word level confidence.""" + word level confidence.""" # [START speech_transcribe_word_level_confidence_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/Google_Gnome.wav' + speech_file = "resources/Google_Gnome.wav" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -256,40 +272,44 @@ def transcribe_file_with_word_level_confidence(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, - language_code='en-US', - enable_word_confidence=True) + language_code="en-US", + enable_word_confidence=True, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) - print(u'First Word and Confidence: ({}, {})'.format( - alternative.words[0].word, alternative.words[0].confidence)) + print("-" * 20) + print("First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) + print( + u"First Word and Confidence: ({}, {})".format( + alternative.words[0].word, alternative.words[0].confidence + ) + ) # [END speech_transcribe_word_level_confidence_beta] -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('command') + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("command") args = parser.parse_args() - if args.command == 'enhanced-model': + if args.command == "enhanced-model": transcribe_file_with_enhanced_model() - elif args.command == 'metadata': + elif args.command == "metadata": transcribe_file_with_metadata() - elif args.command == 'punctuation': + elif args.command == "punctuation": transcribe_file_with_auto_punctuation() - elif args.command == 'diarization': + elif args.command == "diarization": transcribe_file_with_diarization() - elif args.command == 'multi-channel': + elif args.command == "multi-channel": transcribe_file_with_multichannel() - elif args.command == 'multi-language': + elif args.command == "multi-language": transcribe_file_with_multilanguage() - elif args.command == 'word-level-conf': + elif args.command == "word-level-conf": transcribe_file_with_word_level_confidence() diff --git a/speech/snippets/transcribe_async.py b/speech/snippets/transcribe_async.py index 56c7fca13d8e..51d5a1328553 100644 --- a/speech/snippets/transcribe_async.py +++ b/speech/snippets/transcribe_async.py @@ -48,11 +48,7 @@ def transcribe_file(speech_file): sample_rate_hertz=16000, language_code="en-US", ) - - # [START speech_python_migration_async_response - operation = client.long_running_recognize( - request={"config": config, "audio": audio} - ) + # [START speech_python_migration_async_response] operation = client.long_running_recognize(config=config, audio=audio) # [END speech_python_migration_async_request] @@ -85,10 +81,6 @@ def transcribe_gcs(gcs_uri): language_code="en-US", ) - operation = client.long_running_recognize( - request={"config": config, "audio": audio} - ) - operation = client.long_running_recognize(config=config, audio=audio) print("Waiting for operation to complete...") diff --git a/speech/snippets/transcribe_context_classes.py b/speech/snippets/transcribe_context_classes.py index 72b40507614f..fc22e18404a9 100644 --- a/speech/snippets/transcribe_context_classes.py +++ b/speech/snippets/transcribe_context_classes.py @@ -28,7 +28,7 @@ def transcribe_context_classes(storage_uri): # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#speechcontext # Full list of supported phrases (class tokens) here: # https://cloud.google.com/speech-to-text/docs/class-tokens - speech_context = speech.SpeechContext(phrases=['$TIME']) + speech_context = speech.SpeechContext(phrases=["$TIME"]) # RecognitionConfig: to configure your encoding and sample_rate_hertz, see: # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#recognitionconfig diff --git a/speech/snippets/transcribe_multichannel.py b/speech/snippets/transcribe_multichannel.py index 68fd013cffbd..245738553111 100644 --- a/speech/snippets/transcribe_multichannel.py +++ b/speech/snippets/transcribe_multichannel.py @@ -27,7 +27,7 @@ def transcribe_file_with_multichannel(speech_file): """Transcribe the given audio file synchronously with - multi channel.""" + multi channel.""" # [START speech_transcribe_multichannel] from google.cloud import speech @@ -59,7 +59,7 @@ def transcribe_file_with_multichannel(speech_file): def transcribe_gcs_with_multichannel(gcs_uri): """Transcribe the given audio file on GCS with - multi channel.""" + multi channel.""" # [START speech_transcribe_multichannel_gcs] from google.cloud import speech diff --git a/speech/snippets/transcribe_streaming.py b/speech/snippets/transcribe_streaming.py index 93243171a756..979478a6aae5 100644 --- a/speech/snippets/transcribe_streaming.py +++ b/speech/snippets/transcribe_streaming.py @@ -38,20 +38,24 @@ def transcribe_streaming(stream_file): # In practice, stream should be a generator yielding chunks of audio data. stream = [content] - requests = (speech.StreamingRecognizeRequest(audio_content=chunk) - for chunk in stream) + requests = ( + speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream + ) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, - - language_code='en-US') + language_code="en-US", + ) streaming_config = speech.StreamingRecognitionConfig(config=config) # streaming_recognize returns a generator. # [START speech_python_migration_streaming_response] - responses = client.streaming_recognize(config=streaming_config, requests=requests,) + responses = client.streaming_recognize( + config=streaming_config, + requests=requests, + ) # [END speech_python_migration_streaming_request] for response in responses: diff --git a/speech/snippets/transcribe_word_time_offsets.py b/speech/snippets/transcribe_word_time_offsets.py index ced4baecf629..a0e32c27135b 100644 --- a/speech/snippets/transcribe_word_time_offsets.py +++ b/speech/snippets/transcribe_word_time_offsets.py @@ -95,6 +95,7 @@ def transcribe_gcs_with_word_time_offsets(gcs_uri): f"Word: {word}, start_time: {start_time.total_seconds()}, end_time: {end_time.total_seconds()}" ) + # [END speech_transcribe_async_word_time_offsets_gcs]