microsoft · rhurey · Mar 21, 2019 · Mar 14, 2019 · Mar 19, 2019
diff --git a/src/common.browser/FileAudioSource.ts b/src/common.browser/FileAudioSource.ts
@@ -2,6 +2,11 @@
 // Licensed under the MIT license.
 
 import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
+import {
+    connectivity,
+    ISpeechConfigAudioDevice,
+    type,
+} from "../common.speech/Exports";
 import {
     AudioSourceErrorEvent,
     AudioSourceEvent,
@@ -33,8 +38,6 @@ export class FileAudioSource implements IAudioSource {
     // per second, with the chunk size == sample rate in bytes per second * 2 / 5).
     private static readonly CHUNK_SIZE: number = FileAudioSource.SAMPLE_RATE * 2 / 5;
 
-    private static readonly UPLOAD_INTERVAL: number = 200; // milliseconds
-
     // 10 seconds of audio in bytes =
     // sample rate (bytes/second) * 600 (seconds) + 44 (size of the wave header).
     private static readonly MAX_SIZE: number = FileAudioSource.SAMPLE_RATE * 600 + 44;
@@ -132,6 +135,18 @@ export class FileAudioSource implements IAudioSource {
         return this.privEvents;
     }
 
+    public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
+        return PromiseHelper.fromResult({
+            bitspersample: FileAudioSource.FILEFORMAT.bitsPerSample,
+            channelcount: FileAudioSource.FILEFORMAT.channels,
+            connectivity: connectivity.Unknown,
+            manufacturer: "Speech SDK",
+            model: "File",
+            samplerate: FileAudioSource.FILEFORMAT.samplesPerSec,
+            type: type.Unknown,
+        });
+    }
+
     private upload = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
         return this.turnOn()
             .onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {
@@ -149,7 +164,11 @@ export class FileAudioSource implements IAudioSource {
                         return; // output stream was closed (somebody called TurnOff). We're done here.
                     }
 
-                    stream.write(reader.result as ArrayBuffer);
+                    stream.writeStreamChunk({
+                        buffer: reader.result as ArrayBuffer,
+                        isEnd: false,
+                        timeRecieved: Date.now(),
+                    });
 
                     if (endOffset < this.privFile.size) {
                         startOffset = endOffset;

diff --git a/src/common.browser/MicAudioSource.ts b/src/common.browser/MicAudioSource.ts
@@ -1,7 +1,15 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT license.
 
-import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
+import {
+    AudioStreamFormat,
+    AudioStreamFormatImpl,
+} from "../../src/sdk/Audio/AudioStreamFormat";
+import {
+    connectivity,
+    ISpeechConfigAudioDevice,
+    type
+} from "../common.speech/Exports";
 import {
     AudioSourceErrorEvent,
     AudioSourceEvent,
@@ -49,6 +57,8 @@ export class MicAudioSource implements IAudioSource {
 
     private privContext: AudioContext;
 
+    private privMicrophoneLabel: string;
+
     public constructor(private readonly privRecorder: IRecorder, audioSourceId?: string, private readonly deviceId?: string) {
         this.privId = audioSourceId ? audioSourceId : createNoDashGuid();
         this.privEvents = new EventSource<AudioSourceEvent>();
@@ -184,6 +194,58 @@ export class MicAudioSource implements IAudioSource {
         return this.privEvents;
     }
 
+    public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
+        return this.getMicrophoneLabel().onSuccessContinueWith((label: string) => {
+            return {
+                bitspersample: MicAudioSource.AUDIOFORMAT.bitsPerSample,
+                channelcount: MicAudioSource.AUDIOFORMAT.channels,
+                connectivity: connectivity.Unknown,
+                manufacturer: "Speech SDK",
+                model: label,
+                samplerate: MicAudioSource.AUDIOFORMAT.samplesPerSec,
+                type: type.Microphones,
+            };
+        });
+    }
+
+    private getMicrophoneLabel(): Promise<string> {
+        const defaultMicrophoneName: string = "microphone";
+
+        // If we did this already, return the value.
+        if (this.privMicrophoneLabel !== undefined) {
+            return PromiseHelper.fromResult(this.privMicrophoneLabel);
+        }
+
+        // If the stream isn't currently running, we can't query devices because security.
+        if (this.privMediaStream === undefined || !this.privMediaStream.active) {
+            return PromiseHelper.fromResult(defaultMicrophoneName);
+        }
+
+        // Get the id of the device running the audio track.
+        const microphoneDeviceId: string = this.privMediaStream.getTracks()[0].getSettings().deviceId;
+
+        // If the browser doesn't support getting the device ID, set a default and return.
+        if (undefined === microphoneDeviceId) {
+            this.privMicrophoneLabel = defaultMicrophoneName;
+            return PromiseHelper.fromResult(this.privMicrophoneLabel);
+        }
+
+        const deferred: Deferred<string> = new Deferred<string>();
+
+        // Enumerate the media devices.
+        navigator.mediaDevices.enumerateDevices().then((devices: MediaDeviceInfo[]) => {
+            for (const device of devices) {
+                if (device.deviceId === microphoneDeviceId) {
+                    // Found the device
+                    this.privMicrophoneLabel = device.label;
+                    deferred.resolve(this.privMicrophoneLabel);
+                }
+            }
+        });
+
+        return deferred.promise();
+    }
+
     private listen = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
         return this.turnOn()
             .onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {

diff --git a/src/common.browser/OpusRecorder.ts b/src/common.browser/OpusRecorder.ts
@@ -23,7 +23,11 @@ export class OpusRecorder implements IRecorder {
                 const reader = new FileReader();
                 reader.readAsArrayBuffer(dataAvailableEvent.data);
                 reader.onloadend = (event: ProgressEvent) => {
-                    outputStream.write(reader.result as ArrayBuffer);
+                    outputStream.writeStreamChunk({
+                        buffer: reader.result as ArrayBuffer,
+                        isEnd: false,
+                        timeRecieved: Date.now(),
+                    });
                 };
             }
         };

diff --git a/src/common.browser/PCMRecorder.ts b/src/common.browser/PCMRecorder.ts
@@ -35,7 +35,11 @@ export class PcmRecorder implements IRecorder {
             if (outputStream && !outputStream.isClosed) {
                 const waveFrame = waveStreamEncoder.encode(needHeader, inputFrame);
                 if (!!waveFrame) {
-                    outputStream.write(waveFrame);
+                    outputStream.writeStreamChunk({
+                        buffer: waveFrame,
+                        isEnd: false,
+                        timeRecieved: Date.now(),
+                    });
                     needHeader = false;
                 }
             }

diff --git a/src/common.browser/ReplayableAudioNode.ts b/src/common.browser/ReplayableAudioNode.ts
@@ -19,6 +19,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
     private privBufferSerial: number = 0;
     private privBufferedBytes: number = 0;
     private privReplay: boolean = false;
+    private privLastChunkAcquiredTime: number = 0;
 
     public constructor(audioSource: IAudioStreamNode, format: AudioStreamFormatImpl) {
         this.privAudioNode = audioSource;
@@ -48,11 +49,11 @@ export class ReplayableAudioNode implements IAudioStreamNode {
 
             let i: number = 0;
 
-            while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
-                bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
+            while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
+                bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
             }
 
-            const retVal: ArrayBuffer = this.privBuffers[i].buffer.slice(bytesToSeek);
+            const retVal: ArrayBuffer = this.privBuffers[i].chunk.buffer.slice(bytesToSeek);
 
             this.privReplayOffset += (retVal.byteLength / this.privFormat.avgBytesPerSec) * 1e+7;
 
@@ -64,14 +65,14 @@ export class ReplayableAudioNode implements IAudioStreamNode {
             return PromiseHelper.fromResult<IStreamChunk<ArrayBuffer>>({
                 buffer: retVal,
                 isEnd: false,
+                timeRecieved: this.privBuffers[i].chunk.timeRecieved,
             });
         }
 
         return this.privAudioNode.read()
             .onSuccessContinueWith((result: IStreamChunk<ArrayBuffer>) => {
                 if (result.buffer) {
-
-                    this.privBuffers.push(new BufferEntry(result.buffer, this.privBufferSerial++, this.privBufferedBytes));
+                    this.privBuffers.push(new BufferEntry(result, this.privBufferSerial++, this.privBufferedBytes));
                     this.privBufferedBytes += result.buffer.byteLength;
                 }
                 return result;
@@ -91,7 +92,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
     }
 
     // Shrinks the existing audio buffers to start at the new offset, or at the
-    // beginnign of the buffer closest to the requested offset.
+    // beginning of the buffer closest to the requested offset.
     // A replay request will start from the last shrink point.
     public shrinkBuffers(offset: number): void {
         this.privLastShrinkOffset = offset;
@@ -105,26 +106,43 @@ export class ReplayableAudioNode implements IAudioStreamNode {
 
         let i: number = 0;
 
-        while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
-            bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
+        while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
+            bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
         }
         this.privBufferStartOffset = Math.round(offset - ((bytesToSeek / this.privFormat.avgBytesPerSec) * 1e+7));
-
         this.privBuffers = this.privBuffers.slice(i);
     }
+
+    // Finds the time a buffer of audio was first seen by offset.
+    public findTimeAtOffset(offset: number): number {
+        if (offset < this.privBufferStartOffset) {
+            return 0;
+        }
+
+        for (const value of this.privBuffers) {
+            const startOffset: number = (value.byteOffset / this.privFormat.avgBytesPerSec) * 1e7;
+            const endOffset: number = startOffset + ((value.chunk.buffer.byteLength / this.privFormat.avgBytesPerSec) * 1e7);
+
+            if (offset >= startOffset && offset <= endOffset) {
+                return value.chunk.timeRecieved;
+            }
+        }
+
+        return 0;
+    }
 }
 
 // Primary use of this class is to help debugging problems with the replay
 // code. If the memory cost of alloc / dealloc gets too much, drop it and just use
 // the ArrayBuffer directly.
 // tslint:disable-next-line:max-classes-per-file
 class BufferEntry {
-    public buffer: ArrayBuffer;
+    public chunk: IStreamChunk<ArrayBuffer>;
     public serial: number;
     public byteOffset: number;
 
-    public constructor(buffer: ArrayBuffer, serial: number, byteOffset: number) {
-        this.buffer = buffer;
+    public constructor(chunk: IStreamChunk<ArrayBuffer>, serial: number, byteOffset: number) {
+        this.chunk = chunk;
         this.serial = serial;
         this.byteOffset = byteOffset;
     }

diff --git a/src/common.speech/IntentServiceRecognizer.ts b/src/common.speech/IntentServiceRecognizer.ts
@@ -84,6 +84,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                     connectionMessage.textBody,
                     resultProps);
 
+                this.privRequestSession.onHypothesis(result.offset);
+
                 ev = new IntentRecognitionEventArgs(result, speechHypothesis.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
 
                 if (!!this.privIntentRecognizer.recognizing) {
@@ -110,14 +112,9 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                     connectionMessage.textBody,
                     resultProps);
 
-                ev = new IntentRecognitionEventArgs(result, result.offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
+                ev = new IntentRecognitionEventArgs(result, result.offset, this.privRequestSession.sessionId);
 
                 const sendEvent: () => void = () => {
-                    if (this.privRecognizerConfig.isContinuousRecognition) {
-                        // For continuous recognition telemetry has to be sent for every phrase as per spec.
-                        this.sendTelemetryData();
-                    }
-
                     if (!!this.privIntentRecognizer.recognized) {
                         try {
                             this.privIntentRecognizer.recognized(this.privIntentRecognizer, ev);
@@ -148,6 +145,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                 // If intent data was sent, the terminal result for this recognizer is an intent being found.
                 // If no intent data was sent, the terminal event is speech recognition being successful.
                 if (false === this.privIntentDataSent || ResultReason.NoMatch === ev.result.reason) {
+                    // Advance the buffers.
+                    this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);
                     sendEvent();
                 } else {
                     // Squirrel away the args, when the response event arrives it will build upon them
@@ -158,11 +157,6 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                 break;
             case "response":
                 // Response from LUIS
-                if (this.privRecognizerConfig.isContinuousRecognition) {
-                    // For continuous recognition telemetry has to be sent for every phrase as per spec.
-                    this.sendTelemetryData();
-                }
-
                 ev = this.privPendingIntentArgs;
                 this.privPendingIntentArgs = undefined;
 
@@ -209,13 +203,14 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
                             reason,
                             ev.result.text,
                             ev.result.duration,
-                            ev.result.offset + this.privRequestSession.currentTurnAudioOffset,
+                            ev.result.offset,
                             ev.result.errorDetails,
                             ev.result.json,
                             properties),
-                        ev.offset + this.privRequestSession.currentTurnAudioOffset,
+                        ev.offset,
                         ev.sessionId);
                 }
+                this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);
 
                 if (!!this.privIntentRecognizer.recognized) {
                     try {

diff --git a/src/common.speech/RecognizerConfig.ts b/src/common.speech/RecognizerConfig.ts
@@ -96,6 +96,7 @@ export class SpeechServiceConfig {
 export class Context {
     public system: System;
     public os: OS;
+    public audio: ISpeechConfigAudio;
 
     constructor(os: OS) {
         this.system = new System();
@@ -146,3 +147,39 @@ export class Device {
         this.version = version;
     }
 }
+
+export interface ISpeechConfigAudio {
+    source?: ISpeechConfigAudioDevice;
+    playback?: ISpeechConfigAudioDevice;
+}
+
+export interface ISpeechConfigAudioDevice {
+    manufacturer: string;
+    model: string;
+    connectivity: connectivity;
+    type: type;
+    samplerate: number;
+    bitspersample: number;
+    channelcount: number;
+}
+
+export enum connectivity {
+    Bluetooth = "Bluetooth",
+    Wired = "Wired",
+    WiFi = "WiFi",
+    Cellular = "Cellular",
+    InBuilt = "InBuilt",
+    Unknown = "Unknown",
+}
+
+export enum type {
+    Phone = "Phone",
+    Speaker = "Speaker",
+    Car = "Car",
+    Headset = "Headset",
+    Thermostat = "Thermostat",
+    Microphones = "Microphones",
+    Deskphone = "Deskphone",
+    RemoteControl = "RemoteControl",
+    Unknown  = "Unknown"
+}