-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add word time offset samples [(#1050)](GoogleCloudPlatform/python-doc…
- Loading branch information
1 parent
3e504dc
commit 648935d
Showing
6 changed files
with
189 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
111 changes: 111 additions & 0 deletions
111
google-cloud-speech/samples/snippets/transcribe_word_time_offsets.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#!/usr/bin/env python | ||
|
||
# Copyright 2017 Google Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Google Cloud Speech API sample that demonstrates word time offsets. | ||
Example usage: | ||
python transcribe_word_time_offsets.py resources/audio.raw | ||
python transcribe_word_time_offsets.py \ | ||
gs://cloud-samples-tests/speech/vr.flac | ||
""" | ||
|
||
import argparse | ||
import io | ||
|
||
|
||
def transcribe_file_with_word_time_offsets(speech_file): | ||
"""Transcribe the given audio file synchronously and output the word time | ||
offsets.""" | ||
from google.cloud import speech | ||
from google.cloud.speech import enums | ||
from google.cloud.speech import types | ||
client = speech.SpeechClient() | ||
|
||
with io.open(speech_file, 'rb') as audio_file: | ||
content = audio_file.read() | ||
|
||
audio = types.RecognitionAudio(content=content) | ||
config = types.RecognitionConfig( | ||
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, | ||
sample_rate_hertz=16000, | ||
language_code='en-US', | ||
enable_word_time_offsets=True) | ||
|
||
response = client.recognize(config, audio) | ||
|
||
alternatives = response.results[0].alternatives | ||
|
||
for alternative in alternatives: | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
|
||
for word_info in alternative.words: | ||
word = word_info.word | ||
start_time = word_info.start_time | ||
end_time = word_info.end_time | ||
print('Word: {}, start_time: {}, end_time: {}'.format( | ||
word, | ||
start_time.seconds + start_time.nanos * 1e-9, | ||
end_time.seconds + end_time.nanos * 1e-9)) | ||
|
||
|
||
# [START def_transcribe_gcs] | ||
def transcribe_gcs_with_word_time_offsets(gcs_uri): | ||
"""Transcribe the given audio file asynchronously and output the word time | ||
offsets.""" | ||
from google.cloud import speech | ||
from google.cloud.speech import enums | ||
from google.cloud.speech import types | ||
client = speech.SpeechClient() | ||
|
||
audio = types.RecognitionAudio(uri=gcs_uri) | ||
config = types.RecognitionConfig( | ||
encoding=enums.RecognitionConfig.AudioEncoding.FLAC, | ||
sample_rate_hertz=16000, | ||
language_code='en-US', | ||
enable_word_time_offsets=True) | ||
|
||
operation = client.long_running_recognize(config, audio) | ||
|
||
print('Waiting for operation to complete...') | ||
result = operation.result(timeout=90) | ||
|
||
alternatives = result.results[0].alternatives | ||
for alternative in alternatives: | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
print('Confidence: {}'.format(alternative.confidence)) | ||
|
||
for word_info in alternative.words: | ||
word = word_info.word | ||
start_time = word_info.start_time | ||
end_time = word_info.end_time | ||
print('Word: {}, start_time: {}, end_time: {}'.format( | ||
word, | ||
start_time.seconds + start_time.nanos * 1e-9, | ||
end_time.seconds + end_time.nanos * 1e-9)) | ||
# [END def_transcribe_gcs] | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser( | ||
description=__doc__, | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument( | ||
'path', help='File or GCS path for audio file to be recognized') | ||
args = parser.parse_args() | ||
if args.path.startswith('gs://'): | ||
transcribe_gcs_with_word_time_offsets(args.path) | ||
else: | ||
transcribe_file_with_word_time_offsets(args.path) |
43 changes: 43 additions & 0 deletions
43
google-cloud-speech/samples/snippets/transcribe_word_time_offsets_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Copyright 2016, Google, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import re | ||
|
||
import transcribe_word_time_offsets | ||
|
||
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') | ||
|
||
|
||
def test_transcribe_file_with_word_time_offsets(capsys): | ||
transcribe_word_time_offsets.transcribe_file_with_word_time_offsets( | ||
os.path.join(RESOURCES, 'audio.raw')) | ||
out, _ = capsys.readouterr() | ||
|
||
print(out) | ||
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) | ||
time = float(match.group(1)) | ||
|
||
assert time > 0 | ||
|
||
|
||
def test_transcribe_gcs_with_word_time_offsets(capsys): | ||
transcribe_word_time_offsets.transcribe_gcs_with_word_time_offsets( | ||
'gs://python-docs-samples-tests/speech/audio.flac') | ||
out, _ = capsys.readouterr() | ||
|
||
print(out) | ||
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) | ||
time = float(match.group(1)) | ||
|
||
assert time > 0 |