Improve telemetry sent to Speech Service. (#41)

* Improve telemetry sent to Speech Service. Send user latency telemetry on time from SDK acquisition of audio frames until first hypothesis returned and phrase returned. Add Telemetry on type of audio source and label for Microphone if available. Add more telemetry tests. Add specific test for replay buffer. * Spelling fixes
microsoft · Mar 21, 2019 · 87c57b2 · 87c57b2
1 parent 0667816
commit 87c57b2
Show file tree

Hide file tree

Showing 20 changed files with 836 additions and 177 deletions.
diff --git a/src/common.browser/FileAudioSource.ts b/src/common.browser/FileAudioSource.ts
@@ -2,6 +2,11 @@
 // Licensed under the MIT license.
 
 import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
+import {
+    connectivity,
+    ISpeechConfigAudioDevice,
+    type,
+} from "../common.speech/Exports";
 import {
     AudioSourceErrorEvent,
     AudioSourceEvent,
@@ -33,8 +38,6 @@ export class FileAudioSource implements IAudioSource {
     // per second, with the chunk size == sample rate in bytes per second * 2 / 5).
     private static readonly CHUNK_SIZE: number = FileAudioSource.SAMPLE_RATE * 2 / 5;
 
-    private static readonly UPLOAD_INTERVAL: number = 200; // milliseconds
-
     // 10 seconds of audio in bytes =
     // sample rate (bytes/second) * 600 (seconds) + 44 (size of the wave header).
     private static readonly MAX_SIZE: number = FileAudioSource.SAMPLE_RATE * 600 + 44;
@@ -132,6 +135,18 @@ export class FileAudioSource implements IAudioSource {
         return this.privEvents;
     }
 
+    public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
+        return PromiseHelper.fromResult({
+            bitspersample: FileAudioSource.FILEFORMAT.bitsPerSample,
+            channelcount: FileAudioSource.FILEFORMAT.channels,
+            connectivity: connectivity.Unknown,
+            manufacturer: "Speech SDK",
+            model: "File",
+            samplerate: FileAudioSource.FILEFORMAT.samplesPerSec,
+            type: type.Unknown,
+        });
+    }
+
     private upload = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
         return this.turnOn()
             .onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {
@@ -149,7 +164,11 @@ export class FileAudioSource implements IAudioSource {
                         return; // output stream was closed (somebody called TurnOff). We're done here.
                     }
 
-                    stream.write(reader.result as ArrayBuffer);
+                    stream.writeStreamChunk({
+                        buffer: reader.result as ArrayBuffer,
+                        isEnd: false,
+                        timeReceived: Date.now(),
+                    });
 
                     if (endOffset < this.privFile.size) {
                         startOffset = endOffset;

diff --git a/src/common.browser/MicAudioSource.ts b/src/common.browser/MicAudioSource.ts
@@ -1,7 +1,15 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT license.
 
-import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
+import {
+    AudioStreamFormat,
+    AudioStreamFormatImpl,
+} from "../../src/sdk/Audio/AudioStreamFormat";
+import {
+    connectivity,
+    ISpeechConfigAudioDevice,
+    type
+} from "../common.speech/Exports";
 import {
     AudioSourceErrorEvent,
     AudioSourceEvent,
@@ -49,6 +57,8 @@ export class MicAudioSource implements IAudioSource {
 
     private privContext: AudioContext;
 
+    private privMicrophoneLabel: string;
+
     public constructor(private readonly privRecorder: IRecorder, audioSourceId?: string, private readonly deviceId?: string) {
         this.privId = audioSourceId ? audioSourceId : createNoDashGuid();
         this.privEvents = new EventSource<AudioSourceEvent>();
@@ -184,6 +194,58 @@ export class MicAudioSource implements IAudioSource {
         return this.privEvents;
     }
 
+    public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
+        return this.getMicrophoneLabel().onSuccessContinueWith((label: string) => {
+            return {
+                bitspersample: MicAudioSource.AUDIOFORMAT.bitsPerSample,
+                channelcount: MicAudioSource.AUDIOFORMAT.channels,
+                connectivity: connectivity.Unknown,
+                manufacturer: "Speech SDK",
+                model: label,
+                samplerate: MicAudioSource.AUDIOFORMAT.samplesPerSec,
+                type: type.Microphones,
+            };
+        });
+    }
+
+    private getMicrophoneLabel(): Promise<string> {
+        const defaultMicrophoneName: string = "microphone";
+
+        // If we did this already, return the value.
+        if (this.privMicrophoneLabel !== undefined) {
+            return PromiseHelper.fromResult(this.privMicrophoneLabel);
+        }
+
+        // If the stream isn't currently running, we can't query devices because security.
+        if (this.privMediaStream === undefined || !this.privMediaStream.active) {
+            return PromiseHelper.fromResult(defaultMicrophoneName);
+        }
+
+        // Get the id of the device running the audio track.
+        const microphoneDeviceId: string = this.privMediaStream.getTracks()[0].getSettings().deviceId;
+
+        // If the browser doesn't support getting the device ID, set a default and return.
+        if (undefined === microphoneDeviceId) {
+            this.privMicrophoneLabel = defaultMicrophoneName;
+            return PromiseHelper.fromResult(this.privMicrophoneLabel);
+        }
+
+        const deferred: Deferred<string> = new Deferred<string>();
+
+        // Enumerate the media devices.
+        navigator.mediaDevices.enumerateDevices().then((devices: MediaDeviceInfo[]) => {
+            for (const device of devices) {
+                if (device.deviceId === microphoneDeviceId) {
+                    // Found the device
+                    this.privMicrophoneLabel = device.label;
+                    deferred.resolve(this.privMicrophoneLabel);
+                }
+            }
+        });
+
+        return deferred.promise();
+    }
+
     private listen = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
         return this.turnOn()
             .onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {

diff --git a/src/common.browser/OpusRecorder.ts b/src/common.browser/OpusRecorder.ts
@@ -23,7 +23,11 @@ export class OpusRecorder implements IRecorder {
                 const reader = new FileReader();
                 reader.readAsArrayBuffer(dataAvailableEvent.data);
                 reader.onloadend = (event: ProgressEvent) => {
-                    outputStream.write(reader.result as ArrayBuffer);
+                    outputStream.writeStreamChunk({
+                        buffer: reader.result as ArrayBuffer,
+                        isEnd: false,
+                        timeReceived: Date.now(),
+                    });
                 };
             }
         };

diff --git a/src/common.browser/PCMRecorder.ts b/src/common.browser/PCMRecorder.ts
@@ -35,7 +35,11 @@ export class PcmRecorder implements IRecorder {
             if (outputStream && !outputStream.isClosed) {
                 const waveFrame = waveStreamEncoder.encode(needHeader, inputFrame);
                 if (!!waveFrame) {
-                    outputStream.write(waveFrame);
+                    outputStream.writeStreamChunk({
+                        buffer: waveFrame,
+                        isEnd: false,
+                        timeReceived: Date.now(),
+                    });
                     needHeader = false;
                 }
             }

diff --git a/src/common.browser/ReplayableAudioNode.ts b/src/common.browser/ReplayableAudioNode.ts
@@ -19,6 +19,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
     private privBufferSerial: number = 0;
     private privBufferedBytes: number = 0;
     private privReplay: boolean = false;
+    private privLastChunkAcquiredTime: number = 0;
 
     public constructor(audioSource: IAudioStreamNode, format: AudioStreamFormatImpl) {
         this.privAudioNode = audioSource;
@@ -48,11 +49,11 @@ export class ReplayableAudioNode implements IAudioStreamNode {
 
             let i: number = 0;
 
-            while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
-                bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
+            while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
+                bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
             }
 
-            const retVal: ArrayBuffer = this.privBuffers[i].buffer.slice(bytesToSeek);
+            const retVal: ArrayBuffer = this.privBuffers[i].chunk.buffer.slice(bytesToSeek);
 
             this.privReplayOffset += (retVal.byteLength / this.privFormat.avgBytesPerSec) * 1e+7;
 
@@ -64,14 +65,14 @@ export class ReplayableAudioNode implements IAudioStreamNode {
             return PromiseHelper.fromResult<IStreamChunk<ArrayBuffer>>({
                 buffer: retVal,
                 isEnd: false,
+                timeReceived: this.privBuffers[i].chunk.timeReceived,
             });
         }
 
         return this.privAudioNode.read()
             .onSuccessContinueWith((result: IStreamChunk<ArrayBuffer>) => {
                 if (result.buffer) {
-
-                    this.privBuffers.push(new BufferEntry(result.buffer, this.privBufferSerial++, this.privBufferedBytes));
+                    this.privBuffers.push(new BufferEntry(result, this.privBufferSerial++, this.privBufferedBytes));
                     this.privBufferedBytes += result.buffer.byteLength;
                 }
                 return result;
@@ -91,7 +92,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
     }
 
     // Shrinks the existing audio buffers to start at the new offset, or at the
-    // beginnign of the buffer closest to the requested offset.
+    // beginning of the buffer closest to the requested offset.
     // A replay request will start from the last shrink point.
     public shrinkBuffers(offset: number): void {
         this.privLastShrinkOffset = offset;
@@ -105,26 +106,43 @@ export class ReplayableAudioNode implements IAudioStreamNode {
 
         let i: number = 0;
 
-        while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
-            bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
+        while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
+            bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
         }
         this.privBufferStartOffset = Math.round(offset - ((bytesToSeek / this.privFormat.avgBytesPerSec) * 1e+7));
-
         this.privBuffers = this.privBuffers.slice(i);
     }
+
+    // Finds the time a buffer of audio was first seen by offset.
+    public findTimeAtOffset(offset: number): number {
+        if (offset < this.privBufferStartOffset) {
+            return 0;
+        }
+
+        for (const value of this.privBuffers) {
+            const startOffset: number = (value.byteOffset / this.privFormat.avgBytesPerSec) * 1e7;
+            const endOffset: number = startOffset + ((value.chunk.buffer.byteLength / this.privFormat.avgBytesPerSec) * 1e7);
+
+            if (offset >= startOffset && offset <= endOffset) {
+                return value.chunk.timeReceived;
+            }
+        }
+
+        return 0;
+    }
 }
 
 // Primary use of this class is to help debugging problems with the replay
 // code. If the memory cost of alloc / dealloc gets too much, drop it and just use
 // the ArrayBuffer directly.
 // tslint:disable-next-line:max-classes-per-file
 class BufferEntry {
-    public buffer: ArrayBuffer;
+    public chunk: IStreamChunk<ArrayBuffer>;
     public serial: number;
     public byteOffset: number;
 
-    public constructor(buffer: ArrayBuffer, serial: number, byteOffset: number) {
-        this.buffer = buffer;
+    public constructor(chunk: IStreamChunk<ArrayBuffer>, serial: number, byteOffset: number) {
+        this.chunk = chunk;
         this.serial = serial;
         this.byteOffset = byteOffset;
     }

diff --git a/src/common.speech/IntentServiceRecognizer.ts b/src/common.speech/IntentServiceRecognizer.ts
@@ -84,6 +84,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                     connectionMessage.textBody,
                     resultProps);
 
+                this.privRequestSession.onHypothesis(result.offset);
+
                 ev = new IntentRecognitionEventArgs(result, speechHypothesis.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
 
                 if (!!this.privIntentRecognizer.recognizing) {
@@ -110,14 +112,9 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                     connectionMessage.textBody,
                     resultProps);
 
-                ev = new IntentRecognitionEventArgs(result, result.offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
+                ev = new IntentRecognitionEventArgs(result, result.offset, this.privRequestSession.sessionId);
 
                 const sendEvent: () => void = () => {
-                    if (this.privRecognizerConfig.isContinuousRecognition) {
-                        // For continuous recognition telemetry has to be sent for every phrase as per spec.
-                        this.sendTelemetryData();
-                    }
-
                     if (!!this.privIntentRecognizer.recognized) {
                         try {
                             this.privIntentRecognizer.recognized(this.privIntentRecognizer, ev);
@@ -148,6 +145,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                 // If intent data was sent, the terminal result for this recognizer is an intent being found.
                 // If no intent data was sent, the terminal event is speech recognition being successful.
                 if (false === this.privIntentDataSent || ResultReason.NoMatch === ev.result.reason) {
+                    // Advance the buffers.
+                    this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);
                     sendEvent();
                 } else {
                     // Squirrel away the args, when the response event arrives it will build upon them
@@ -158,11 +157,6 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                 break;
             case "response":
                 // Response from LUIS
-                if (this.privRecognizerConfig.isContinuousRecognition) {
-                    // For continuous recognition telemetry has to be sent for every phrase as per spec.
-                    this.sendTelemetryData();
-                }
-
                 ev = this.privPendingIntentArgs;
                 this.privPendingIntentArgs = undefined;
 
@@ -209,13 +203,14 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                             reason,
                             ev.result.text,
                             ev.result.duration,
-                            ev.result.offset + this.privRequestSession.currentTurnAudioOffset,
+                            ev.result.offset,
                             ev.result.errorDetails,
                             ev.result.json,
                             properties),
-                        ev.offset + this.privRequestSession.currentTurnAudioOffset,
+                        ev.offset,
                         ev.sessionId);
                 }
+                this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);
 
                 if (!!this.privIntentRecognizer.recognized) {
                     try {

diff --git a/src/common.speech/RecognizerConfig.ts b/src/common.speech/RecognizerConfig.ts
@@ -96,6 +96,7 @@ export class SpeechServiceConfig {
 export class Context {
     public system: System;
     public os: OS;
+    public audio: ISpeechConfigAudio;
 
     constructor(os: OS) {
         this.system = new System();
@@ -146,3 +147,39 @@ export class Device {
         this.version = version;
     }
 }
+
+export interface ISpeechConfigAudio {
+    source?: ISpeechConfigAudioDevice;
+    playback?: ISpeechConfigAudioDevice;
+}
+
+export interface ISpeechConfigAudioDevice {
+    manufacturer: string;
+    model: string;
+    connectivity: connectivity;
+    type: type;
+    samplerate: number;
+    bitspersample: number;
+    channelcount: number;
+}
+
+export enum connectivity {
+    Bluetooth = "Bluetooth",
+    Wired = "Wired",
+    WiFi = "WiFi",
+    Cellular = "Cellular",
+    InBuilt = "InBuilt",
+    Unknown = "Unknown",
+}
+
+export enum type {
+    Phone = "Phone",
+    Speaker = "Speaker",
+    Car = "Car",
+    Headset = "Headset",
+    Thermostat = "Thermostat",
+    Microphones = "Microphones",
+    Deskphone = "Deskphone",
+    RemoteControl = "RemoteControl",
+    Unknown  = "Unknown"
+}