Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve telemetry sent to Speech Service. #41

Merged
merged 2 commits into from
Mar 21, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/common.browser/FileAudioSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
// Licensed under the MIT license.

import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
import {
connectivity,
ISpeechConfigAudioDevice,
type,
} from "../common.speech/Exports";
import {
AudioSourceErrorEvent,
AudioSourceEvent,
Expand Down Expand Up @@ -33,8 +38,6 @@ export class FileAudioSource implements IAudioSource {
// per second, with the chunk size == sample rate in bytes per second * 2 / 5).
private static readonly CHUNK_SIZE: number = FileAudioSource.SAMPLE_RATE * 2 / 5;

private static readonly UPLOAD_INTERVAL: number = 200; // milliseconds

// 10 seconds of audio in bytes =
// sample rate (bytes/second) * 600 (seconds) + 44 (size of the wave header).
private static readonly MAX_SIZE: number = FileAudioSource.SAMPLE_RATE * 600 + 44;
Expand Down Expand Up @@ -132,6 +135,18 @@ export class FileAudioSource implements IAudioSource {
return this.privEvents;
}

public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
return PromiseHelper.fromResult({
bitspersample: FileAudioSource.FILEFORMAT.bitsPerSample,
channelcount: FileAudioSource.FILEFORMAT.channels,
connectivity: connectivity.Unknown,
manufacturer: "Speech SDK",
model: "File",
samplerate: FileAudioSource.FILEFORMAT.samplesPerSec,
type: type.Unknown,
});
}

private upload = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
return this.turnOn()
.onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {
Expand All @@ -149,7 +164,11 @@ export class FileAudioSource implements IAudioSource {
return; // output stream was closed (somebody called TurnOff). We're done here.
}

stream.write(reader.result as ArrayBuffer);
stream.writeStreamChunk({
buffer: reader.result as ArrayBuffer,
isEnd: false,
timeRecieved: Date.now(),
});

if (endOffset < this.privFile.size) {
startOffset = endOffset;
Expand Down
64 changes: 63 additions & 1 deletion src/common.browser/MicAudioSource.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.

import { AudioStreamFormat, AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
import {
AudioStreamFormat,
AudioStreamFormatImpl,
} from "../../src/sdk/Audio/AudioStreamFormat";
import {
connectivity,
ISpeechConfigAudioDevice,
type
} from "../common.speech/Exports";
import {
AudioSourceErrorEvent,
AudioSourceEvent,
Expand Down Expand Up @@ -49,6 +57,8 @@ export class MicAudioSource implements IAudioSource {

private privContext: AudioContext;

private privMicrophoneLabel: string;

public constructor(private readonly privRecorder: IRecorder, audioSourceId?: string, private readonly deviceId?: string) {
this.privId = audioSourceId ? audioSourceId : createNoDashGuid();
this.privEvents = new EventSource<AudioSourceEvent>();
Expand Down Expand Up @@ -184,6 +194,58 @@ export class MicAudioSource implements IAudioSource {
return this.privEvents;
}

public get deviceInfo(): Promise<ISpeechConfigAudioDevice> {
return this.getMicrophoneLabel().onSuccessContinueWith((label: string) => {
return {
bitspersample: MicAudioSource.AUDIOFORMAT.bitsPerSample,
channelcount: MicAudioSource.AUDIOFORMAT.channels,
connectivity: connectivity.Unknown,
manufacturer: "Speech SDK",
model: label,
samplerate: MicAudioSource.AUDIOFORMAT.samplesPerSec,
type: type.Microphones,
};
});
}

private getMicrophoneLabel(): Promise<string> {
const defaultMicrophoneName: string = "microphone";

// If we did this already, return the value.
if (this.privMicrophoneLabel !== undefined) {
return PromiseHelper.fromResult(this.privMicrophoneLabel);
}

// If the stream isn't currently running, we can't query devices because security.
if (this.privMediaStream === undefined || !this.privMediaStream.active) {
return PromiseHelper.fromResult(defaultMicrophoneName);
}

// Get the id of the device running the audio track.
const microphoneDeviceId: string = this.privMediaStream.getTracks()[0].getSettings().deviceId;

// If the browser doesn't support getting the device ID, set a default and return.
if (undefined === microphoneDeviceId) {
this.privMicrophoneLabel = defaultMicrophoneName;
return PromiseHelper.fromResult(this.privMicrophoneLabel);
}

const deferred: Deferred<string> = new Deferred<string>();

// Enumerate the media devices.
navigator.mediaDevices.enumerateDevices().then((devices: MediaDeviceInfo[]) => {
for (const device of devices) {
if (device.deviceId === microphoneDeviceId) {
// Found the device
this.privMicrophoneLabel = device.label;
deferred.resolve(this.privMicrophoneLabel);
}
}
});

return deferred.promise();
}

private listen = (audioNodeId: string): Promise<StreamReader<ArrayBuffer>> => {
return this.turnOn()
.onSuccessContinueWith<StreamReader<ArrayBuffer>>((_: boolean) => {
Expand Down
6 changes: 5 additions & 1 deletion src/common.browser/OpusRecorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ export class OpusRecorder implements IRecorder {
const reader = new FileReader();
reader.readAsArrayBuffer(dataAvailableEvent.data);
reader.onloadend = (event: ProgressEvent) => {
outputStream.write(reader.result as ArrayBuffer);
outputStream.writeStreamChunk({
buffer: reader.result as ArrayBuffer,
isEnd: false,
timeRecieved: Date.now(),
});
};
}
};
Expand Down
6 changes: 5 additions & 1 deletion src/common.browser/PCMRecorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ export class PcmRecorder implements IRecorder {
if (outputStream && !outputStream.isClosed) {
const waveFrame = waveStreamEncoder.encode(needHeader, inputFrame);
if (!!waveFrame) {
outputStream.write(waveFrame);
outputStream.writeStreamChunk({
buffer: waveFrame,
isEnd: false,
timeRecieved: Date.now(),
});
needHeader = false;
}
}
Expand Down
42 changes: 30 additions & 12 deletions src/common.browser/ReplayableAudioNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
private privBufferSerial: number = 0;
private privBufferedBytes: number = 0;
private privReplay: boolean = false;
private privLastChunkAcquiredTime: number = 0;

public constructor(audioSource: IAudioStreamNode, format: AudioStreamFormatImpl) {
this.privAudioNode = audioSource;
Expand Down Expand Up @@ -48,11 +49,11 @@ export class ReplayableAudioNode implements IAudioStreamNode {

let i: number = 0;

while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
}

const retVal: ArrayBuffer = this.privBuffers[i].buffer.slice(bytesToSeek);
const retVal: ArrayBuffer = this.privBuffers[i].chunk.buffer.slice(bytesToSeek);

this.privReplayOffset += (retVal.byteLength / this.privFormat.avgBytesPerSec) * 1e+7;

Expand All @@ -64,14 +65,14 @@ export class ReplayableAudioNode implements IAudioStreamNode {
return PromiseHelper.fromResult<IStreamChunk<ArrayBuffer>>({
buffer: retVal,
isEnd: false,
timeRecieved: this.privBuffers[i].chunk.timeRecieved,
});
}

return this.privAudioNode.read()
.onSuccessContinueWith((result: IStreamChunk<ArrayBuffer>) => {
if (result.buffer) {

this.privBuffers.push(new BufferEntry(result.buffer, this.privBufferSerial++, this.privBufferedBytes));
this.privBuffers.push(new BufferEntry(result, this.privBufferSerial++, this.privBufferedBytes));
this.privBufferedBytes += result.buffer.byteLength;
}
return result;
Expand All @@ -91,7 +92,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
}

// Shrinks the existing audio buffers to start at the new offset, or at the
// beginnign of the buffer closest to the requested offset.
// beginning of the buffer closest to the requested offset.
// A replay request will start from the last shrink point.
public shrinkBuffers(offset: number): void {
this.privLastShrinkOffset = offset;
Expand All @@ -105,26 +106,43 @@ export class ReplayableAudioNode implements IAudioStreamNode {

let i: number = 0;

while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].buffer.byteLength;
while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
}
this.privBufferStartOffset = Math.round(offset - ((bytesToSeek / this.privFormat.avgBytesPerSec) * 1e+7));

this.privBuffers = this.privBuffers.slice(i);
}

// Finds the time a buffer of audio was first seen by offset.
public findTimeAtOffset(offset: number): number {
if (offset < this.privBufferStartOffset) {
return 0;
}

for (const value of this.privBuffers) {
const startOffset: number = (value.byteOffset / this.privFormat.avgBytesPerSec) * 1e7;
const endOffset: number = startOffset + ((value.chunk.buffer.byteLength / this.privFormat.avgBytesPerSec) * 1e7);

if (offset >= startOffset && offset <= endOffset) {
return value.chunk.timeRecieved;
}
}

return 0;
}
}

// Primary use of this class is to help debugging problems with the replay
// code. If the memory cost of alloc / dealloc gets too much, drop it and just use
// the ArrayBuffer directly.
// tslint:disable-next-line:max-classes-per-file
class BufferEntry {
public buffer: ArrayBuffer;
public chunk: IStreamChunk<ArrayBuffer>;
public serial: number;
public byteOffset: number;

public constructor(buffer: ArrayBuffer, serial: number, byteOffset: number) {
this.buffer = buffer;
public constructor(chunk: IStreamChunk<ArrayBuffer>, serial: number, byteOffset: number) {
this.chunk = chunk;
this.serial = serial;
this.byteOffset = byteOffset;
}
Expand Down
21 changes: 8 additions & 13 deletions src/common.speech/IntentServiceRecognizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
connectionMessage.textBody,
resultProps);

this.privRequestSession.onHypothesis(result.offset);

ev = new IntentRecognitionEventArgs(result, speechHypothesis.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);

if (!!this.privIntentRecognizer.recognizing) {
Expand All @@ -110,14 +112,9 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
connectionMessage.textBody,
resultProps);

ev = new IntentRecognitionEventArgs(result, result.offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
ev = new IntentRecognitionEventArgs(result, result.offset, this.privRequestSession.sessionId);

const sendEvent: () => void = () => {
if (this.privRecognizerConfig.isContinuousRecognition) {
// For continuous recognition telemetry has to be sent for every phrase as per spec.
this.sendTelemetryData();
}

if (!!this.privIntentRecognizer.recognized) {
try {
this.privIntentRecognizer.recognized(this.privIntentRecognizer, ev);
Expand Down Expand Up @@ -148,6 +145,8 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
// If intent data was sent, the terminal result for this recognizer is an intent being found.
// If no intent data was sent, the terminal event is speech recognition being successful.
if (false === this.privIntentDataSent || ResultReason.NoMatch === ev.result.reason) {
// Advance the buffers.
this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);
sendEvent();
} else {
// Squirrel away the args, when the response event arrives it will build upon them
Expand All @@ -158,11 +157,6 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
break;
case "response":
// Response from LUIS
if (this.privRecognizerConfig.isContinuousRecognition) {
// For continuous recognition telemetry has to be sent for every phrase as per spec.
this.sendTelemetryData();
}

ev = this.privPendingIntentArgs;
this.privPendingIntentArgs = undefined;

Expand Down Expand Up @@ -209,13 +203,14 @@ export class IntentServiceRecognizer extends ServiceRecognizerBase {
reason,
ev.result.text,
ev.result.duration,
ev.result.offset + this.privRequestSession.currentTurnAudioOffset,
ev.result.offset,
ev.result.errorDetails,
ev.result.json,
properties),
ev.offset + this.privRequestSession.currentTurnAudioOffset,
ev.offset,
ev.sessionId);
}
this.privRequestSession.onPhraseRecognized(ev.offset + ev.result.duration);

if (!!this.privIntentRecognizer.recognized) {
try {
Expand Down
37 changes: 37 additions & 0 deletions src/common.speech/RecognizerConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export class SpeechServiceConfig {
export class Context {
public system: System;
public os: OS;
public audio: ISpeechConfigAudio;

constructor(os: OS) {
this.system = new System();
Expand Down Expand Up @@ -146,3 +147,39 @@ export class Device {
this.version = version;
}
}

export interface ISpeechConfigAudio {
source?: ISpeechConfigAudioDevice;
playback?: ISpeechConfigAudioDevice;
}

export interface ISpeechConfigAudioDevice {
manufacturer: string;
model: string;
connectivity: connectivity;
type: type;
samplerate: number;
bitspersample: number;
channelcount: number;
}

export enum connectivity {
Bluetooth = "Bluetooth",
Wired = "Wired",
WiFi = "WiFi",
Cellular = "Cellular",
InBuilt = "InBuilt",
Unknown = "Unknown",
}

export enum type {
Phone = "Phone",
Speaker = "Speaker",
Car = "Car",
Headset = "Headset",
Thermostat = "Thermostat",
Microphones = "Microphones",
Deskphone = "Deskphone",
RemoteControl = "RemoteControl",
Unknown = "Unknown"
}
Loading