microsoft · glharper · May 10, 2023 · May 22, 2023 · May 25, 2023 · May 25, 2023
diff --git a/src/common.browser/ReplayableAudioNode.ts b/src/common.browser/ReplayableAudioNode.ts
@@ -30,7 +30,7 @@ export class ReplayableAudioNode implements IAudioStreamNode {
 
     // Reads and returns the next chunk of audio buffer.
     // If replay of existing buffers are needed, read() will first seek and replay
-    // existing content, and upoin completion it will read new content from the underlying
+    // existing content, and upon completion it will read new content from the underlying
     // audio node, saving that content into the replayable buffers.
     public read(): Promise<IStreamChunk<ArrayBuffer>> {
         // if there is a replay request to honor.
@@ -46,13 +46,14 @@ export class ReplayableAudioNode implements IAudioStreamNode {
             }
 
             let i: number = 0;
-
             while (i < this.privBuffers.length && bytesToSeek >= this.privBuffers[i].chunk.buffer.byteLength) {
                 bytesToSeek -= this.privBuffers[i++].chunk.buffer.byteLength;
             }
 
             if (i < this.privBuffers.length) {
                 const retVal: ArrayBuffer = this.privBuffers[i].chunk.buffer.slice(bytesToSeek);
+                const timeReceived = this.privBuffers[i].chunk.timeReceived;
+                const isEnd: boolean = false;
 
                 this.privReplayOffset += (retVal.byteLength / this.privBytesPerSecond) * 1e+7;
 
@@ -63,8 +64,8 @@ export class ReplayableAudioNode implements IAudioStreamNode {
 
                 return Promise.resolve<IStreamChunk<ArrayBuffer>>({
                     buffer: retVal,
-                    isEnd: false,
-                    timeReceived: this.privBuffers[i].chunk.timeReceived,
+                    isEnd,
+                    timeReceived,
                 });
             }
         }
@@ -85,12 +86,16 @@ export class ReplayableAudioNode implements IAudioStreamNode {
     }
 
     public replay(): void {
-        if (this.privBuffers && 0 !== this.privBuffers.length) {
+        if (!this.isEmpty()) {
             this.privReplay = true;
             this.privReplayOffset = this.privLastShrinkOffset;
         }
     }
 
+    public isEmpty(): boolean {
+        return !this.privBuffers || this.privBuffers.length === 0;
+    }
+
     // Shrinks the existing audio buffers to start at the new offset, or at the
     // beginning of the buffer closest to the requested offset.
     // A replay request will start from the last shrink point.

diff --git a/src/common.speech/RequestSession.ts b/src/common.speech/RequestSession.ts
@@ -149,16 +149,24 @@ export class RequestSession {
     public async onServiceTurnEndResponse(continuousRecognition: boolean): Promise<void> {
         this.privTurnDeferral.resolve();
 
-        if (!continuousRecognition || this.isSpeechEnded) {
+        if (!continuousRecognition || this.privAudioNode.isEmpty()) {
             await this.onComplete();
             this.privInTurn = false;
         } else {
+            // Trailing audio issue: Restart ServiceRecognizerBase.sendAudio and do below
             // Start a new request set.
             this.privTurnStartAudioOffset = this.privLastRecoOffset;
             this.privAudioNode.replay();
+            if (this.isSpeechEnded) {
+                this.privIsSpeechEnded = false;
+            }
         }
     }
 
+    public get audioNode(): ReplayableAudioNode {
+        return this.privAudioNode;
+    }
+
     public onSpeechContext(): void {
         this.privRequestId = createNoDashGuid();
     }

diff --git a/src/common.speech/ServiceRecognizerBase.ts b/src/common.speech/ServiceRecognizerBase.ts
@@ -338,7 +338,12 @@ export abstract class ServiceRecognizerBase implements IDisposable {
         }
 
         void this.receiveMessage();
-        const audioSendPromise = this.sendAudio(audioNode);
+        void this.startSendingAudio(audioNode);
+        return;
+    }
+
+    public startSendingAudio(audioStreamNode: IAudioStreamNode): Promise<void> {
+        const audioSendPromise = this.sendAudio(audioStreamNode);
 
         audioSendPromise.catch(async (error: string): Promise<void> => {
             await this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.RuntimeError, error);
@@ -550,7 +555,7 @@ export abstract class ServiceRecognizerBase implements IDisposable {
 
                     case "turn.end":
                         await this.sendTelemetryData();
-                        if (this.privRequestSession.isSpeechEnded && this.privMustReportEndOfStream) {
+                        if (this.privRequestSession.isSpeechEnded && this.privMustReportEndOfStream && this.privRequestSession.audioNode.isEmpty()) {
                             this.privMustReportEndOfStream = false;
                             await this.cancelRecognitionLocal(CancellationReason.EndOfStream, CancellationErrorCode.NoError, undefined);
                         }
@@ -563,7 +568,12 @@ export abstract class ServiceRecognizerBase implements IDisposable {
                             return;
                         } else {
                             connection = await this.fetchConnection();
-                            await this.sendPrePayloadJSON(connection);
+                            const restartAudio = !this.privRequestSession.audioNode.isEmpty();
+                            await this.sendSpeechServiceConfig(connection, this.privRequestSession, this.privRecognizerConfig.SpeechServiceConfig.serialize());
+                            await this.sendPrePayloadJSON(connection, restartAudio);
+                            if (restartAudio) {
+                                void this.startSendingAudio(this.privRequestSession.audioNode);
+                            }
                         }
                         break;
 
@@ -743,7 +753,18 @@ export abstract class ServiceRecognizerBase implements IDisposable {
                 this.privRequestSession.recogNumber === startRecogNumber) {
 
                 const connection: IConnection = await this.fetchConnection();
-                const audioStreamChunk: IStreamChunk<ArrayBuffer> = await audioStreamNode.read();
+                let audioStreamChunk: IStreamChunk<ArrayBuffer> = {
+                    buffer: null,
+                    isEnd: true,
+                    timeReceived: Date.now()};
+                try {
+                    audioStreamChunk = await audioStreamNode.read();
+                } catch (error) {
+                    if (!this.privIsLiveAudio) {
+                        this.privRequestSession.onSpeechEnded();
+                    }
+                    return;
+                }
                 // we have a new audio chunk to upload.
                 if (this.privRequestSession.isSpeechEnded) {
                     // If service already recognized audio end then don't send any more audio

diff --git a/tests/SpeechRecognizerSilenceTests.ts b/tests/SpeechRecognizerSilenceTests.ts
@@ -88,30 +88,29 @@ const BuildSpeechConfig: () => sdk.SpeechConfig = (): sdk.SpeechConfig => {
     return s;
 };
 
-describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
+describe.each([true])("Service based tests", (forceNodeWebSocket: boolean): void => {
 
-    beforeAll(() => {
+    beforeAll((): void => {
         WebsocketMessageAdapter.forceNpmWebSocket = forceNodeWebSocket;
     });
 
-    afterAll(() => {
+    afterAll((): void => {
         WebsocketMessageAdapter.forceNpmWebSocket = false;
     });
 
-    describe("Intiial Silence Tests", () => {
-        test("InitialSilenceTimeout (pull)", (done: jest.DoneCallback) => {
+    describe("Intiial Silence Tests", (): void => {
+        test("InitialSilenceTimeout (pull)", (done: jest.DoneCallback): void => {
             // eslint-disable-next-line no-console
             console.info("Name: InitialSilenceTimeout (pull)");
-            let p: sdk.PullAudioInputStream;
             let bytesSent: number = 0;
 
             // To make sure we don't send a ton of extra data.
             // For reference, before the throttling was implemented, we sent 6-10x the required data.
             const startTime: number = Date.now();
 
-            p = sdk.AudioInputStream.createPullStream(
+            const p = sdk.AudioInputStream.createPullStream(
                 {
-                    close: () => { return; },
+                    close: (): void => { return; },
                     read: (buffer: ArrayBuffer): number => {
                         bytesSent += buffer.byteLength;
                         return buffer.byteLength;
@@ -120,7 +119,7 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
 
             const config: sdk.AudioConfig = sdk.AudioConfig.fromStreamInput(p);
 
-            testInitialSilenceTimeout(config, done, (): void => {
+            testInitialSilenceTimeout(config, done, 2, (): void => {
                 const elapsed: number = Date.now() - startTime;
 
                 // We should have sent 5 seconds of audio unthrottled and then 2x the time reco took until we got a response.
@@ -130,7 +129,7 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
             });
         }, 15000);
 
-        test("InitialSilenceTimeout (push)", (done: jest.DoneCallback) => {
+        test("InitialSilenceTimeout (push)", (done: jest.DoneCallback): void => {
             // eslint-disable-next-line no-console
             console.info("Name: InitialSilenceTimeout (push)");
             const p: sdk.PushAudioInputStream = sdk.AudioInputStream.createPushStream();
@@ -143,7 +142,41 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
             testInitialSilenceTimeout(config, done);
         }, 15000);
 
-        Settings.testIfDOMCondition("InitialSilenceTimeout (File)", (done: jest.DoneCallback) => {
+        test.only("Multi-turn silence test", (done: jest.DoneCallback) => {
+            // eslint-disable-next-line no-console
+            console.info("Name: Multi-turn silence test");
+            const s: sdk.SpeechConfig = BuildSpeechConfig();
+            objsToClose.push(s);
+
+            s.setProperty(sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "3000");
+
+            const p: sdk.PushAudioInputStream = sdk.AudioInputStream.createPushStream();
+            objsToClose.push(p);
+            p.write(new ArrayBuffer(2 * 16000 * 5)); // 5 seconds of silence
+            p.close();
+
+            const r: sdk.SpeechRecognizer = new sdk.SpeechRecognizer(s, sdk.AudioConfig.fromStreamInput(p));
+            objsToClose.push(r);
+
+            let lastOffset: number = 0;
+
+            r.speechEndDetected = (r: sdk.Recognizer, e: sdk.RecognitionEventArgs): void => {
+                lastOffset = e.offset;
+            };
+
+            r.canceled = (r: sdk.Recognizer, e: sdk.SpeechRecognitionCanceledEventArgs): void => {
+                if (lastOffset !== 50000000) {
+                    done("Got unexpected offset: " + lastOffset.toString());
+                } else {
+                    done();
+                }
+            };
+
+            r.startContinuousRecognitionAsync();
+
+        }, 25000);
+
+        Settings.testIfDOMCondition("InitialSilenceTimeout (File)", (done: jest.DoneCallback): void => {
             // eslint-disable-next-line no-console
             console.info("Name: InitialSilenceTimeout (File)");
             const audioFormat: AudioStreamFormatImpl = sdk.AudioStreamFormat.getDefaultInputFormat() as AudioStreamFormatImpl;
@@ -155,7 +188,7 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
             testInitialSilenceTimeout(config, done);
         }, 15000);
 
-        const testInitialSilenceTimeout = (config: sdk.AudioConfig, done: jest.DoneCallback, addedChecks?: () => void): void => {
+        const testInitialSilenceTimeout = (config: sdk.AudioConfig, done: jest.DoneCallback, expectedNumReports: number = 2, addedChecks?: () => void): void => {
             const s: sdk.SpeechConfig = BuildSpeechConfig();
             objsToClose.push(s);
 
@@ -169,11 +202,11 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
 
             let numReports: number = 0;
 
-            r.canceled = (o: sdk.Recognizer, e: sdk.SpeechRecognitionCanceledEventArgs) => {
+            r.canceled = (o: sdk.Recognizer, e: sdk.SpeechRecognitionCanceledEventArgs): void => {
                 done(e.errorDetails);
             };
 
-            r.recognized = (o: sdk.Recognizer, e: sdk.SpeechRecognitionEventArgs) => {
+            r.recognized = (o: sdk.Recognizer, e: sdk.SpeechRecognitionEventArgs): void => {
                 try {
                     const res: sdk.SpeechRecognitionResult = e.result;
                     expect(res).not.toBeUndefined();
@@ -193,7 +226,7 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
             };
 
             r.recognizeOnceAsync(
-                (p2: sdk.SpeechRecognitionResult) => {
+                (p2: sdk.SpeechRecognitionResult): void => {
                     try {
                         const res: sdk.SpeechRecognitionResult = p2;
                         numReports++;
@@ -211,11 +244,11 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
                         done(error);
                     }
                 },
-                (error: string) => {
+                (error: string): void => {
                     fail(error);
                 });
 
-            WaitForCondition(() => (numReports === 2), () => {
+            WaitForCondition((): boolean => (numReports === expectedNumReports), (): void => {
                 try {
                     if (!!addedChecks) {
                         addedChecks();