Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return all results and do not detect encoding/sample rate in the client #449

Merged
merged 6 commits into from
Apr 14, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/contents/cloud-speech.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
}, {
"title": "Operation",
"type": "speech/operation"
}, {
"title": "Result",
"type": "speech/result"
}, {
"title": "v1beta1",
"type": "speech/v1beta1/readme",
Expand Down
21 changes: 12 additions & 9 deletions src/Speech/Operation.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,26 @@ public function isComplete(array $options = [])
* ```
*
* @codingStandardsIgnoreStart
* @see https://cloud.google.com/speech/reference/rest/v1/speech/recognize#SpeechRecognitionAlternative SpeechRecognitionAlternative
* @see https://cloud.google.com/speech/reference/rest/v1/speech/recognize#SpeechRecognitionResult SpeechRecognitionResult
* @codingStandardsIgnoreEnd
*
* @param array $options [optional] Configuration Options.
* @return array The transcribed results. Each element of the array contains
* a `transcript` key which holds the transcribed text. Optionally
* a `confidence` key holding the confidence estimate ranging from
* 0.0 to 1.0 may be present. `confidence` is typically provided
* only for the top hypothesis.
* @return Result[]
*/
public function results(array $options = [])
{
$info = $this->info($options);
$results = [];

return isset($info['response']['results'])
? $info['response']['results'][0]['alternatives']
: [];
if (!isset($info['response']['results'])) {
return $results;
}

foreach ($info['response']['results'] as $result) {
$results[] = new Result($result);
}

return $results;
}

/**
Expand Down
114 changes: 114 additions & 0 deletions src/Speech/Result.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
<?php
/**
* Copyright 2017 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace Google\Cloud\Speech;

/**
* Represents a speech recognition result.
*
* Example:
* ```
* use Google\Cloud\Speech\SpeechClient;
*
* $speech = new SpeechClient([
* 'languageCode' => 'en-US'
* ]);
*
* $result = $speech->recognize(
* fopen(__DIR__ . '/audio.flac', 'r')
* );
* ```

This comment was marked as spam.

*/
class Result
{
/**
* @var array
*/
private $info;

/**
* @param array $info Data corresponding to the result.
*/
public function __construct(array $info)
{
$this->info = $info;
}

/**
* Retrieves the alternatives.
*
* Example:
* ```
* $alternatives = $result->alternatives();
*
* foreach ($alternatives as $alternative) {
* echo $alternative['transcript'] . PHP_EOL;
* }
* ```
*
* @codingStandardsIgnoreStart
* @see https://cloud.google.com/speech/reference/rest/v1/speech/recognize#SpeechRecognitionAlternative SpeechRecognitionAlternative
* @codingStandardsIgnoreEnd
*
* @return array The transcribed results. Each element of the array contains
* a `transcript` key which holds the transcribed text. Optionally
* a `confidence` key holding the confidence estimate ranging from
* 0.0 to 1.0 may be present. `confidence` is typically provided
* only for the top hypothesis.
*/
public function alternatives()
{
return $this->info['alternatives'];
}

/**
* Retrieves the top alternative. This is typically the most reliable
* transcription.
*
* Example:
* ```
* $alternative = $result->topAlternative();
*
* echo $alternative['transcript'];
* ```
*
* @return array The top alternative. Contains a `transcript` key which
* holds the transcribed text. Optionally a `confidence` key holding
* the confidence estimate ranging from 0.0 to 1.0 may be present.
*/
public function topAlternative()
{
return $this->info['alternatives'][0];
}

/**
* Retrieves all available result data.
*
* Example:
* ```
* $info = $result->info();
*
* echo $info['alternatives'][0]['transcript'];
* ```
*
* @return array
*/
public function info()
{
return $this->info;
}
}
107 changes: 27 additions & 80 deletions src/Speech/SpeechClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public function __construct(array $config = [])
* );
*
* foreach ($results as $result) {
* echo $result['transcript'];
* echo $result->topAlternative()['transcript'] . PHP_EOL;
* }
* ```
*
Expand All @@ -140,18 +140,18 @@ public function __construct(array $config = [])
* ]);
*
* foreach ($results as $result) {
* echo $result['transcript'];
* echo $result->topAlternative()['transcript'] . PHP_EOL;
* }
* ```
*
* @codingStandardsIgnoreStart
* @see https://cloud.google.com/speech/reference/rest/v1/speech/recognize#SpeechRecognitionAlternative SpeechRecognitionAlternative
* @see https://cloud.google.com/speech/reference/rest/v1/speech/recognize#SpeechRecognitionResult SpeechRecognitionResult
* @see https://cloud.google.com/speech/reference/rest/v1/speech/recognize Recognize API documentation
* @see https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig#AudioEncoding AudioEncoding types
* @see https://cloud.google.com/speech/docs/best-practices Speech API best practices
* @codingStandardsIgnoreEnd
*
* @param resource|string|StorageObject $audio The audio to recognize. May
* @param resource|string|StorageObject $audio The audio to recognize. May
* be a resource, string of bytes, a URI pointing to a
* Google Cloud Storage object in the format of
* `gs://{bucket-name}/{object-name}` or a
Expand All @@ -172,14 +172,13 @@ public function __construct(array $config = [])
* audio. Valid values are: 8000-48000. 16000 is optimal. For best
* results, set the sampling rate of the audio source to 16000 Hz.
* If that's not possible, use the native sample rate of the audio
* source (instead of re-sampling). **Defaults to** `8000` with
* .amr files and `16000` with .awb files. For .flac files the
* source (instead of re-sampling). For .flac and .wav files the
* Speech API will make a best effort to read the sample rate from
* the file's headers.
* @type string $encoding Encoding of the provided audio. May be one of
* `"LINEAR16"`, `"FLAC"`, `"MULAW"`, `"AMR"`, `"AMR_WB"`.
* **Defaults to** `"FLAC"` with .flac files, `"AMR"` with .amr
* files and `"AMR_WB"` with .awb files.
* `"LINEAR16"`, `"FLAC"`, `"MULAW"`, `"AMR"`, `"AMR_WB"`. For
* .flac and .wav files the Speech API will make a best effort to
* determine the encoding type from the file's headers.
* @type int $maxAlternatives Maximum number of alternatives to be
* returned. Valid values are 1-30. **Defaults to** `1`.
* @type bool $profanityFilter If set to `true`, the server will attempt
Expand All @@ -193,20 +192,25 @@ public function __construct(array $config = [])
* [SpeechContext](https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig#SpeechContext)
* for more information.
* }
* @return array The transcribed results. Each element of the array contains
* a `transcript` key which holds the transcribed text. Optionally
* a `confidence` key holding the confidence estimate ranging from
* 0.0 to 1.0 may be present. `confidence` is typically provided
* only for the top hypothesis.
* @return array Result[]
* @throws \InvalidArgumentException
*/
public function recognize($audio, array $options = [])
{
$results = [];
$response = $this->connection->recognize(
$this->formatRequest($audio, $options)
);

return isset($response['results']) ? $response['results'][0]['alternatives'] : [];
if (!isset($response['results'])) {
return $results;
}

foreach ($response['results'] as $result) {
$results[] = new Result($result);
}

return $results;
}

/**
Expand Down Expand Up @@ -237,7 +241,8 @@ public function recognize($audio, array $options = [])
* $isComplete = $operation->isComplete();
* }
*
* print_r($operation->results());
* $result = $operation->results()[0];
* print_r($result->topAlternative());
* ```
*
* ```
Expand All @@ -264,7 +269,8 @@ public function recognize($audio, array $options = [])
* $isComplete = $operation->isComplete();
* }
*
* print_r($operation->results());
* $result = $operation->results()[0];
* print_r($result->topAlternative());
* ```
*
* @codingStandardsIgnoreStart
Expand Down Expand Up @@ -295,14 +301,13 @@ public function recognize($audio, array $options = [])
* audio. Valid values are: 8000-48000. 16000 is optimal. For best
* results, set the sampling rate of the audio source to 16000 Hz.
* If that's not possible, use the native sample rate of the audio
* source (instead of re-sampling). **Defaults to** `8000` with
* .amr files and `16000` with .awb files. For .flac files the
* source (instead of re-sampling). For .flac and .wav files the
* Speech API will make a best effort to read the sample rate from
* the file's headers.
* @type string $encoding Encoding of the provided audio. May be one of
* `"LINEAR16"`, `"FLAC"`, `"MULAW"`, `"AMR"`, `"AMR_WB"`.
* **Defaults to** `"FLAC"` with .flac files, `"AMR"` with .amr
* files and `"AMR_WB"` with .awb files.
* `"LINEAR16"`, `"FLAC"`, `"MULAW"`, `"AMR"`, `"AMR_WB"`. For
* .flac and .wav files the Speech API will make a best effort to
* determine the encoding type from the file's headers.
* @type int $maxAlternatives Maximum number of alternatives to be
* returned. Valid values are 1-30. **Defaults to** `1`.
* @type bool $profanityFilter If set to `true`, the server will attempt
Expand Down Expand Up @@ -391,18 +396,6 @@ private function formatRequest($audio, array $options)
? $options['languageCode']
: $this->languageCode;

$options['encoding'] = isset($options['encoding'])
? $options['encoding']
: $this->determineEncoding($fileFormat);

$options['sampleRateHertz'] = isset($options['sampleRateHertz'])
? $options['sampleRateHertz']
: $this->determineSampleRate($options['encoding']);

if (!$options['sampleRateHertz']) {
unset($options['sampleRateHertz']);
}

foreach ($options as $option => $value) {
if (in_array($option, $recognizeOptions)) {
$options['config'][$option] = $value;
Expand All @@ -412,50 +405,4 @@ private function formatRequest($audio, array $options)

return $options;
}

/**
* Attempts to determine the encoding based on the file format.
*
* @param string $fileFormat
* @return string
* @throws \InvalidArgumentException
*/
private function determineEncoding($fileFormat)
{
switch ($fileFormat) {
case 'flac':
return 'FLAC';
case 'amr':
return 'AMR';
case 'awb':
return 'AMR_WB';
default:
throw new \InvalidArgumentException(
'Unable to determine encoding. Please provide the value manually.'
);
}
}

/**
* Attempts to determine the sample rate based on the encoding.
*
* @param string $encoding
* @return int|null
* @throws \InvalidArgumentException
*/
private function determineSampleRate($encoding)
{
switch ($encoding) {
case 'AMR':
return 8000;
case 'AMR_WB':
return 16000;
case 'FLAC':
return null;
default:
throw new \InvalidArgumentException(
'Unable to determine sample rate. Please provide the value manually.'
);
}
}
}
12 changes: 8 additions & 4 deletions tests/snippets/Speech/OperationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use Google\Cloud\Dev\Snippet\SnippetTestCase;
use Google\Cloud\Speech\Connection\ConnectionInterface;
use Google\Cloud\Speech\Operation;
use Google\Cloud\Speech\Result;
use Google\Cloud\Speech\SpeechClient;
use Prophecy\Argument;

Expand Down Expand Up @@ -50,9 +51,6 @@ public function setUp()
$this->operation = new \SpeechOperationStub($this->connection->reveal(), $this->opData['name'], $this->opData);
}

/**
* @expectedException InvalidArgumentException
*/
public function testClass()
{
$snippet = $this->snippetFromClass(Operation::class);
Expand All @@ -63,6 +61,12 @@ public function testClass()
->willReturn(['name' => 'foo']);

$snippet->addLocal('connectionStub', $connectionStub->reveal());
$snippet->insertAfterLine(4, '$reflection = new \ReflectionClass($speech);
$property = $reflection->getProperty(\'connection\');
$property->setAccessible(true);
$property->setValue($speech, $connectionStub);
$property->setAccessible(false);'
);

$snippet->replace("__DIR__ . '/audio.flac'", '"php://temp"');

Expand All @@ -84,7 +88,7 @@ public function testResults()
$snippet->addLocal('operation', $this->operation);

$res = $snippet->invoke('results');
$this->assertEquals($this->opData['response']['results'][0]['alternatives'], $res->returnVal());
$this->assertContainsOnlyInstancesOf(Result::class, $res->returnVal());
}

public function testExists()
Expand Down
Loading