Skip to content

Commit

Permalink
Improve telemetry sent to Speech Service. (#41)
Browse files Browse the repository at this point in the history
* Improve telemetry sent to Speech Service.

Send user latency telemetry on time from SDK acquisition of audio frames until first hypothesis returned and phrase returned.
Add Telemetry on type of audio source and label for Microphone if available.

Add more telemetry tests.
Add specific test for replay buffer.

* Spelling fixes
  • Loading branch information
rhurey authored Mar 21, 2019
1 parent 0667816 commit 87c57b2
Show file tree
Hide file tree
Showing 20 changed files with 836 additions and 177 deletions.
25 changes: 22 additions & 3 deletions src/common.browser/FileAudioSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
// Licensed under the MIT license.

import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
import {
connectivity,
ISpeechConfigAudioDevice,
type,
} from "../common.speech/Exports";
import {
AudioSourceErrorEvent,
AudioSourceEvent,
Expand Down Expand Up @@ -33,8 +38,6 @@ export class FileAudioSource implements IAudioSource {
// per second, with the chunk size == sample rate in bytes per second * 2 / 5).
private static readonly CHUNK_SIZE: number = FileAudioSource.SAMPLE_RATE * 2 / 5;

private static readonly UPLOAD_INTERVAL: number = 200; // milliseconds

// 10 seconds of audio in bytes =
// sample rate (bytes/second) * 600 (seconds) + 44 (size of the wave header).
private static readonly MAX_SIZE: number = FileAudioSource.SAMPLE_RATE * 600 + 44;
Expand Down Expand Up @@ -132,6 +135,18 @@ export class FileAudioSource implements IAudioSource {
return this.privEvents;
}

public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
return PromiseHelper.fromResult({
bitspersample: FileAudioSource.FILEFORMAT.bitsPerSample,
channelcount: FileAudioSource.FILEFORMAT.channels,
connectivity: connectivity.Unknown,
manufacturer: "Speech SDK",
model: "File",
samplerate: FileAudioSource.FILEFORMAT.samplesPerSec,
type: type.Unknown,
});
}

private upload = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
return this.turnOn()
.onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {
Expand All @@ -149,7 +164,11 @@ export class FileAudioSource implements IAudioSource {
return; // output stream was closed (somebody called TurnOff). We're done here.
}

stream.write(reader.result as ArrayBuffer);
stream.writeStreamChunk({
buffer: reader.result as ArrayBuffer,
isEnd: false,
timeReceived: Date.now(),
});

if (endOffset < this.privFile.size) {
startOffset = endOffset;
Expand Down
64 changes: 63 additions & 1 deletion src/common.browser/MicAudioSource.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.

import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
import {
AudioStreamFormat,
AudioStreamFormatImpl,
} from "../../src/sdk/Audio/AudioStreamFormat";
import {
connectivity,
ISpeechConfigAudioDevice,
type
} from "../common.speech/Exports";
import {
AudioSourceErrorEvent,
AudioSourceEvent,
Expand Down Expand Up @@ -49,6 +57,8 @@ export class MicAudioSource implements IAudioSource {

private privContext: AudioContext;

private privMicrophoneLabel: string;

public constructor(private readonly privRecorder: IRecorder, audioSourceId?: string, private readonly deviceId?: string) {
this.privId = audioSourceId ? audioSourceId : createNoDashGuid();
this.privEvents = new EventSource<AudioSourceEvent>();
Expand Down Expand Up @@ -184,6 +194,58 @@ export class MicAudioSource implements IAudioSource {
return this.privEvents;
}

public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
return this.getMicrophoneLabel().onSuccessContinueWith((label: string) => {
return {
bitspersample: MicAudioSource.AUDIOFORMAT.bitsPerSample,
channelcount: MicAudioSource.AUDIOFORMAT.channels,
connectivity: connectivity.Unknown,
manufacturer: "Speech SDK",
model: label,
samplerate: MicAudioSource.AUDIOFORMAT.samplesPerSec,
type: type.Microphones,
};
});
}

private getMicrophoneLabel(): Promise<string> {
const defaultMicrophoneName: string = "microphone";

// If we did this already, return the value.
if (this.privMicrophoneLabel !== undefined) {
return PromiseHelper.fromResult(this.privMicrophoneLabel);
}

// If the stream isn't currently running, we can't query devices because security.
if (this.privMediaStream === undefined || !this.privMediaStream.active) {
return PromiseHelper.fromResult(defaultMicrophoneName);
}

// Get the id of the device running the audio track.
const microphoneDeviceId: string = this.privMediaStream.getTracks()[0].getSettings().deviceId;

// If the browser doesn't support getting the device ID, set a default and return.
if (undefined === microphoneDeviceId) {
this.privMicrophoneLabel = defaultMicrophoneName;
return PromiseHelper.fromResult(this.privMicrophoneLabel);
}

const deferred: Deferred<string> = new Deferred<string>();

// Enumerate the media devices.
navigator.mediaDevices.enumerateDevices().then((devices: MediaDeviceInfo[]) => {
for (const device of devices) {
if (device.deviceId === microphoneDeviceId) {
// Found the device
this.privMicrophoneLabel = device.label;
deferred.resolve(this.privMicrophoneLabel);
}
}
});

return deferred.promise();
}

private listen = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
return this.turnOn()
.onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {
Expand Down
6 changes: 5 additions & 1 deletion src/common.browser/OpusRecorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ export class OpusRecorder implements IRecorder {
const reader = new FileReader();
reader.readAsArrayBuffer(dataAvailableEvent.data);
reader.onloadend = (event: ProgressEvent) => {
outputStream.write(reader.result as ArrayBuffer);
outputStream.writeStreamChunk({
buffer: reader.result as ArrayBuffer,
isEnd: false,
timeReceived: Date.now(),
});
};
}
};
Expand Down
6 changes: 5 additions & 1 deletion src/common.browser/PCMRecorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ export class PcmRecorder implements IRecorder {
if (outputStream && !outputStream.isClosed) {
const waveFrame = waveStreamEncoder.encode(needHeader, inputFrame);
if (!!waveFrame) {
outputStream.write(waveFrame);
outputStream.writeStreamChunk({
buffer: waveFrame,
isEnd: false,
timeReceived: Date.now(),
});
needHeader = false;
}
}
Expand Down
42 changes: 30 additions & 12 deletions src/common.browser/ReplayableAudioNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
private privBufferSerial: number = 0;
private privBufferedBytes: number = 0;
private privReplay: boolean = false;
private privLastChunkAcquiredTime: number = 0;

public constructor(audioSource: IAudioStreamNode, format: AudioStreamFormatImpl) {
this.privAudioNode = audioSource;
Expand Down Expand Up @@ -48,11 +49,11 @@ export class ReplayableAudioNode implements IAudioStreamNode {

let i: number = 0;

while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
}

const retVal: ArrayBuffer = this.privBuffers[i].buffer.slice(bytesToSeek);
const retVal: ArrayBuffer = this.privBuffers[i].chunk.buffer.slice(bytesToSeek);

this.privReplayOffset += (retVal.byteLength / this.privFormat.avgBytesPerSec) * 1e+7;

Expand All @@ -64,14 +65,14 @@ export class ReplayableAudioNode implements IAudioStreamNode {
return PromiseHelper.fromResult<IStreamChunk<ArrayBuffer>>({
buffer: retVal,
isEnd: false,
timeReceived: this.privBuffers[i].chunk.timeReceived,
});
}

return this.privAudioNode.read()
.onSuccessContinueWith((result: IStreamChunk<ArrayBuffer>) => {
if (result.buffer) {

this.privBuffers.push(new BufferEntry(result.buffer, this.privBufferSerial++, this.privBufferedBytes));
this.privBuffers.push(new BufferEntry(result, this.privBufferSerial++, this.privBufferedBytes));
this.privBufferedBytes += result.buffer.byteLength;
}
return result;
Expand All @@ -91,7 +92,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
}

// Shrinks the existing audio buffers to start at the new offset, or at the
// beginnign of the buffer closest to the requested offset.
// beginning of the buffer closest to the requested offset.
// A replay request will start from the last shrink point.
public shrinkBuffers(offset: number): void {
this.privLastShrinkOffset = offset;
Expand All @@ -105,26 +106,43 @@ export class ReplayableAudioNode implements IAudioStreamNode {

let i: number = 0;

while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
}
this.privBufferStartOffset = Math.round(offset - ((bytesToSeek / this.privFormat.avgBytesPerSec) * 1e+7));

this.privBuffers = this.privBuffers.slice(i);
}

// Finds the time a buffer of audio was first seen by offset.
public findTimeAtOffset(offset: number): number {
if (offset < this.privBufferStartOffset) {
return 0;
}

for (const value of this.privBuffers) {
const startOffset: number = (value.byteOffset / this.privFormat.avgBytesPerSec) * 1e7;
const endOffset: number = startOffset + ((value.chunk.buffer.byteLength / this.privFormat.avgBytesPerSec) * 1e7);

if (offset >= startOffset && offset <= endOffset) {
return value.chunk.timeReceived;
}
}

return 0;
}
}

// Primary use of this class is to help debugging problems with the replay
// code. If the memory cost of alloc / dealloc gets too much, drop it and just use
// the ArrayBuffer directly.
// tslint:disable-next-line:max-classes-per-file
class BufferEntry {
public buffer: ArrayBuffer;
public chunk: IStreamChunk<ArrayBuffer>;
public serial: number;
public byteOffset: number;

public constructor(buffer: ArrayBuffer, serial: number, byteOffset: number) {
this.buffer = buffer;
public constructor(chunk: IStreamChunk<ArrayBuffer>, serial: number, byteOffset: number) {
this.chunk = chunk;
this.serial = serial;
this.byteOffset = byteOffset;
}
Expand Down
21 changes: 8 additions & 13 deletions src/common.speech/IntentServiceRecognizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
connectionMessage.textBody,
resultProps);

this.privRequestSession.onHypothesis(result.offset);

ev = new IntentRecognitionEventArgs(result, speechHypothesis.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);

if (!!this.privIntentRecognizer.recognizing) {
Expand All @@ -110,14 +112,9 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
connectionMessage.textBody,
resultProps);

ev = new IntentRecognitionEventArgs(result, result.offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
ev = new IntentRecognitionEventArgs(result, result.offset, this.privRequestSession.sessionId);

const sendEvent: () => void = () => {
if (this.privRecognizerConfig.isContinuousRecognition) {
// For continuous recognition telemetry has to be sent for every phrase as per spec.
this.sendTelemetryData();
}

if (!!this.privIntentRecognizer.recognized) {
try {
this.privIntentRecognizer.recognized(this.privIntentRecognizer, ev);
Expand Down Expand Up @@ -148,6 +145,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
// If intent data was sent, the terminal result for this recognizer is an intent being found.
// If no intent data was sent, the terminal event is speech recognition being successful.
if (false === this.privIntentDataSent || ResultReason.NoMatch === ev.result.reason) {
// Advance the buffers.
this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);
sendEvent();
} else {
// Squirrel away the args, when the response event arrives it will build upon them
Expand All @@ -158,11 +157,6 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
break;
case "response":
// Response from LUIS
if (this.privRecognizerConfig.isContinuousRecognition) {
// For continuous recognition telemetry has to be sent for every phrase as per spec.
this.sendTelemetryData();
}

ev = this.privPendingIntentArgs;
this.privPendingIntentArgs = undefined;

Expand Down Expand Up @@ -209,13 +203,14 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
reason,
ev.result.text,
ev.result.duration,
ev.result.offset + this.privRequestSession.currentTurnAudioOffset,
ev.result.offset,
ev.result.errorDetails,
ev.result.json,
properties),
ev.offset + this.privRequestSession.currentTurnAudioOffset,
ev.offset,
ev.sessionId);
}
this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);

if (!!this.privIntentRecognizer.recognized) {
try {
Expand Down
37 changes: 37 additions & 0 deletions src/common.speech/RecognizerConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export class SpeechServiceConfig {
export class Context {
public system: System;
public os: OS;
public audio: ISpeechConfigAudio;

constructor(os: OS) {
this.system = new System();
Expand Down Expand Up @@ -146,3 +147,39 @@ export class Device {
this.version = version;
}
}

export interface ISpeechConfigAudio {
source?: ISpeechConfigAudioDevice;
playback?: ISpeechConfigAudioDevice;
}

export interface ISpeechConfigAudioDevice {
manufacturer: string;
model: string;
connectivity: connectivity;
type: type;
samplerate: number;
bitspersample: number;
channelcount: number;
}

export enum connectivity {
Bluetooth = "Bluetooth",
Wired = "Wired",
WiFi = "WiFi",
Cellular = "Cellular",
InBuilt = "InBuilt",
Unknown = "Unknown",
}

export enum type {
Phone = "Phone",
Speaker = "Speaker",
Car = "Car",
Headset = "Headset",
Thermostat = "Thermostat",
Microphones = "Microphones",
Deskphone = "Deskphone",
RemoteControl = "RemoteControl",
Unknown = "Unknown"
}
Loading

0 comments on commit 87c57b2

Please sign in to comment.