Skip to content

Commit

Permalink
Speech sample using microphone [(#1013)](#1013)
Browse files Browse the repository at this point in the history
* Add sample for transcribing from microphone.

* Remove error handling that probably won't work

* Add test.

* Fix lint.

* Increment copyright date
  • Loading branch information
jerjou authored and telpirion committed Mar 13, 2023
1 parent 940cce1 commit 955916b
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 0 deletions.
Binary file added speech/snippets/resources/quit.raw
Binary file not shown.
168 changes: 168 additions & 0 deletions speech/snippets/transcribe_streaming_mic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Speech API sample application using the streaming API.
NOTE: This module requires the additional dependency `pyaudio`. To install
using pip:
pip install pyaudio
Example usage:
python transcribe_streaming_mic.py
"""

# [START import_libraries]
from __future__ import division

import re
import sys

from google.cloud import speech
import pyaudio
from six.moves import queue
# [END import_libraries]

# Audio recording parameters
RATE = 16000
CHUNK = int(RATE / 10) # 100ms


class MicAsFile(object):
"""Opens a recording stream as a file-like object."""
def __init__(self, rate, chunk):
self._rate = rate
self._chunk = chunk

# Create a thread-safe buffer of audio data
self._buff = queue.Queue()
self.closed = True

def __enter__(self):
self._audio_interface = pyaudio.PyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
# The API currently only supports 1-channel (mono) audio
# https://goo.gl/z757pE
channels=1, rate=self._rate,
input=True, frames_per_buffer=self._chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow while the calling thread makes network requests, etc.
stream_callback=self._fill_buffer,
)

self.closed = False

return self

def __exit__(self, type, value, traceback):
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Flush out the read, just in case
self._buff.put(None)
self._audio_interface.terminate()

def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream, into the buffer."""
self._buff.put(in_data)
return None, pyaudio.paContinue

def read(self, chunk_size):
if self.closed:
return

# Use a blocking get() to ensure there's at least one chunk of data.
data = [self._buff.get()]

# Now consume whatever other data's still buffered.
while True:
try:
data.append(self._buff.get(block=False))
except queue.Empty:
break

if self.closed:
return
return b''.join(data)
# [END audio_stream]


def listen_print_loop(results_gen):
"""Iterates through server responses and prints them.
The results_gen passed is a generator that will block until a response
is provided by the server. When the transcription response comes, print it.
In this case, responses are provided for interim results as well. If the
response is an interim one, print a line feed at the end of it, to allow
the next result to overwrite it, until the response is a final one. For the
final one, print a newline to preserve the finalized transcription.
"""
num_chars_printed = 0
for result in results_gen:
if not result.alternatives:
continue

# Display the top transcription
transcript = result.transcript

# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.
#
# If the previous result was longer than this one, we need to print
# some extra spaces to overwrite the previous result
overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript))

if not result.is_final:
sys.stdout.write(transcript + overwrite_chars + '\r')
sys.stdout.flush()

num_chars_printed = len(transcript)

else:
print(transcript + overwrite_chars)

# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if re.search(r'\b(exit|quit)\b', transcript, re.I):
print('Exiting..')
break

num_chars_printed = 0


def main():
speech_client = speech.Client()

with MicAsFile(RATE, CHUNK) as stream:
audio_sample = speech_client.sample(
stream=stream,
encoding=speech.encoding.Encoding.LINEAR16,
sample_rate_hertz=RATE)
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
language_code = 'en-US' # a BCP-47 language tag
results_gen = audio_sample.streaming_recognize(
language_code=language_code, interim_results=True)

# Now, put the transcription responses to use.
listen_print_loop(results_gen)


if __name__ == '__main__':
main()
69 changes: 69 additions & 0 deletions speech/snippets/transcribe_streaming_mic_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2017, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
import threading
import time

import mock

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')


class MockPyAudio(object):
def __init__(self, audio_filename):
self.audio_filename = audio_filename

def __call__(self, *args):
return self

def open(self, stream_callback, rate, *args, **kwargs):
self.rate = rate
self.closed = threading.Event()
self.stream_thread = threading.Thread(
target=self.stream_audio, args=(
self.audio_filename, stream_callback, self.closed))
self.stream_thread.start()
return self

def close(self):
self.closed.set()

def stop_stream(self):
pass

def terminate(self):
pass

def stream_audio(self, audio_filename, callback, closed, num_frames=512):
with open(audio_filename, 'rb') as audio_file:
while not closed.is_set():
# Approximate realtime by sleeping for the appropriate time for
# the requested number of frames
time.sleep(num_frames / float(self.rate))
# audio is 16-bit samples, whereas python byte is 8-bit
num_bytes = 2 * num_frames
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
callback(chunk, None, None, None)


@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
def test_main(capsys):
import transcribe_streaming_mic

transcribe_streaming_mic.main()
out, err = capsys.readouterr()

assert re.search(r'quit', out, re.DOTALL | re.I)

0 comments on commit 955916b

Please sign in to comment.