From 8d06a545d8c0a19b12f0d916c185b27725b87807 Mon Sep 17 00:00:00 2001 From: William Wong Date: Thu, 31 Oct 2019 07:42:14 +0800 Subject: [PATCH] Keep AudioContext object and add WILL_START dictate state (#2520) * Keep AudioContext object and add WILL_START dictate state * Add entries --- CHANGELOG.md | 4 +- packages/bundle/package.json | 3 +- ...veServicesSpeechServicesPonyfillFactory.js | 22 ++++++++++ packages/component/src/Composer.js | 8 +++- packages/component/src/Dictation.js | 40 +------------------ .../component/src/SendBox/MicrophoneButton.js | 6 ++- .../src/Styles/StyleSet/MicrophoneButton.js | 8 +++- packages/core/src/constants/DictateState.js | 9 +++-- packages/core/src/reducers/dictateState.js | 6 ++- packages/core/src/sagas.js | 2 + ...DictateOnIncomingActivityFromOthersSaga.js | 5 ++- .../sagas/startDictateOnSpeakCompleteSaga.js | 23 +++++++++++ packages/core/src/selectors/dictateState.js | 1 + 13 files changed, 83 insertions(+), 54 deletions(-) create mode 100644 packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js create mode 100644 packages/core/src/selectors/dictateState.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c156d7786..783ad11617 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -97,7 +97,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Fixes [#2473](https://github.com/microsoft/BotFramework-WebChat/issues/2473). Fix samples 13 using wrong region for Speech Services credentials, by [@compulim](https://github.com/compulim) in PR [#2482](https://github.com/microsoft/BotFramework-WebChat/pull/2482) - Fixes [#2420](https://github.com/microsoft/BotFramework-WebChat/issues/2420). Fix saga error should not result in an unhandled exception, by [@compulim](https://github.com/compulim) in PR [#2421](https://github.com/microsoft/BotFramework-WebChat/pull/2421) - Fixes [#2513](https://github.com/microsoft/BotFramework-WebChat/issues/2513). Fix `core-js` not loading properly, by [@compulim](https://github.com/compulim) in PR [#2514](https://github.com/microsoft/BotFramework-WebChat/pull/2514) -- Fixes [#2516](https://github.com/microsoft/BotFramework-WebChat/issues/2516). Disable microphone input for `expecting` input hint on Safari, by [@compulim](https://github.com/compulim) in PR [#2517](https://github.com/microsoft/BotFramework-WebChat/pull/2517) +- Fixes [#2516](https://github.com/microsoft/BotFramework-WebChat/issues/2516). Disable microphone input for `expecting` input hint on Safari, by [@compulim](https://github.com/compulim) in PR [#2517](https://github.com/microsoft/BotFramework-WebChat/pull/2517) and PR [#2520](https://github.com/microsoft/BotFramework-WebChat/pull/2520) +- Fixes [#2518](https://github.com/microsoft/BotFramework-WebChat/issues/2518). Synthesis of bot activities with input hint expecting, should be interruptible, by [@compulim](https://github.com/compulim) in PR [#2520](https://github.com/microsoft/BotFramework-WebChat/pull/2520) +- Fixes [#2519](https://github.com/microsoft/BotFramework-WebChat/issues/2519). On Safari, microphone should turn on after synthesis of bot activities with input hint expecting, by [@compulim](https://github.com/compulim) in PR [#2520](https://github.com/microsoft/BotFramework-WebChat/pull/2520) ### Added diff --git a/packages/bundle/package.json b/packages/bundle/package.json index a8ecb035d1..6e8474aa18 100644 --- a/packages/bundle/package.json +++ b/packages/bundle/package.json @@ -42,11 +42,12 @@ "markdown-it": "^8.4.2", "markdown-it-for-inline": "^0.1.1", "memoize-one": "^5.0.2", + "microsoft-cognitiveservices-speech-sdk": "1.6.0", "microsoft-speech-browser-sdk": "^0.0.12", "prop-types": "^15.7.2", "sanitize-html": "^1.19.0", "url-search-params-polyfill": "^5.0.0", - "web-speech-cognitive-services": "^5.0.1", + "web-speech-cognitive-services": "5.0.1", "whatwg-fetch": "^3.0.0" }, "devDependencies": { diff --git a/packages/bundle/src/createCognitiveServicesSpeechServicesPonyfillFactory.js b/packages/bundle/src/createCognitiveServicesSpeechServicesPonyfillFactory.js index 3820d3343e..8dd0daa7e6 100644 --- a/packages/bundle/src/createCognitiveServicesSpeechServicesPonyfillFactory.js +++ b/packages/bundle/src/createCognitiveServicesSpeechServicesPonyfillFactory.js @@ -1,3 +1,4 @@ +import { AudioConfig } from 'microsoft-cognitiveservices-speech-sdk'; import createPonyfill from 'web-speech-cognitive-services/lib/SpeechServices'; export default function createCognitiveServicesSpeechServicesPonyfillFactory({ @@ -15,6 +16,27 @@ export default function createCognitiveServicesSpeechServicesPonyfillFactory({ 'Web Chat: Cognitive Services Speech Services support is currently in preview. If you encounter any problems, please file us an issue at https://github.com/microsoft/BotFramework-WebChat/issues/.' ); + // HACK: We should prevent AudioContext object from being recreated because they may be blessed and UX-wise expensive to recreate. + // In Cognitive Services SDK, if they detect the "end" function is falsy, they will not call "end" but "suspend" instead. + // And on next recognition, they will re-use the AudioContext object. + if (!audioConfig) { + audioConfig = AudioConfig.fromDefaultMicrophoneInput(); + // audioConfig.privSource.privContext = new (window.AudioContext || window.webkitAudioContext)(); + + const source = audioConfig.privSource; + + // This piece of code is adopted from microsoft-cognitiveservices-speech-sdk/common.browser/MicAudioSource.ts. + // Instead of closing the AudioContext, it will just suspend it. And the next time it is needed, it will be resumed (by the original code). + source.destroyAudioContext = () => { + if (!source.privContext) { + return; + } + + source.privRecorder.releaseMediaResources(source.privContext); + source.privContext.state === 'running' && source.privContext.suspend(); + }; + } + return ({ referenceGrammarID }) => { const ponyfill = createPonyfill({ audioConfig, diff --git a/packages/component/src/Composer.js b/packages/component/src/Composer.js index 28b5c33fc4..003cee47bf 100644 --- a/packages/component/src/Composer.js +++ b/packages/component/src/Composer.js @@ -198,7 +198,13 @@ const Composer = ({ }, [dispatch, patchedSendTypingIndicator]); useEffect(() => { - dispatch(createConnectAction({ directLine, userID, username })); + dispatch( + createConnectAction({ + directLine, + userID, + username + }) + ); return () => { // TODO: [P3] disconnect() is an async call (pending -> fulfilled), we need to wait, or change it to reconnect() diff --git a/packages/component/src/Dictation.js b/packages/component/src/Dictation.js index 32ba29c46a..238eb73ac9 100644 --- a/packages/component/src/Dictation.js +++ b/packages/component/src/Dictation.js @@ -9,26 +9,6 @@ const { DictateState: { DICTATING, IDLE, STARTING } } = Constants; -const PrefixedAudioContext = window.AudioContext || window.webkitAudioContext; - -// The result of this check is asynchronous and it will fail on user interaction requirement. -async function canOpenMicrophone() { - const audioContext = new PrefixedAudioContext(); - - try { - if (audioContext.state === 'suspended') { - return await Promise.race([ - audioContext.resume().then(() => true), - new Promise(resolve => setImmediate(resolve)).then(() => false) - ]); - } - - return true; - } finally { - await audioContext.close(); - } -} - const Dictation = ({ dictateState, disabled, @@ -82,24 +62,6 @@ const Dictation = ({ onError && onError(event); }, [dictateState, onError, setDictateState, stopDictate]); - const shouldStart = !disabled && (dictateState === STARTING || dictateState === DICTATING) && !numSpeakingActivities; - - // We need to check if the browser allow us to do open microphone. - // In Safari, it block microphone access if the code was not executed based on user interaction. - - // Since the check call is asynchronous, the result will always fail the user interaction requirement. - // Thus, we can never open microphone after we receive the check result. - // Instead, we will both open microphone and check the result. If the result is negative, we will close the microphone. - - // TODO: [P3] Investigate if a resumed AudioContext instance is kept across multiple session, can we workaround Safari's restrictions. - useMemo(async () => { - if (shouldStart) { - const canStart = await canOpenMicrophone(); - - !canStart && stopDictate(); - } - }, [shouldStart, stopDictate]); - return ( ); }; diff --git a/packages/component/src/SendBox/MicrophoneButton.js b/packages/component/src/SendBox/MicrophoneButton.js index 97de6addca..782b8cf9c9 100644 --- a/packages/component/src/SendBox/MicrophoneButton.js +++ b/packages/component/src/SendBox/MicrophoneButton.js @@ -56,7 +56,9 @@ const connectMicrophoneButton = (...selectors) => { webSpeechPonyfill: { speechSynthesis, SpeechSynthesisUtterance } = {} }) => ({ click: () => { - if (dictateState === DictateState.STARTING || dictateState === DictateState.DICTATING) { + if (dictateState === DictateState.WILL_START) { + stopSpeakingActivity(); + } else if (dictateState === DictateState.DICTATING) { stopDictate(); setSendBox(dictateInterims.join(' ')); } else { @@ -67,7 +69,7 @@ const connectMicrophoneButton = (...selectors) => { primeSpeechSynthesis(speechSynthesis, SpeechSynthesisUtterance); }, dictating: dictateState === DictateState.DICTATING, - disabled: disabled || (dictateState === DictateState.STARTING || dictateState === DictateState.STOPPING), + disabled: disabled || (dictateState === DictateState.STARTING && dictateState === DictateState.STOPPING), language }), ...selectors diff --git a/packages/component/src/Styles/StyleSet/MicrophoneButton.js b/packages/component/src/Styles/StyleSet/MicrophoneButton.js index fb6b5b3473..d432c7a510 100644 --- a/packages/component/src/Styles/StyleSet/MicrophoneButton.js +++ b/packages/component/src/Styles/StyleSet/MicrophoneButton.js @@ -1,8 +1,12 @@ export default function createMicrophoneButtonStyle({ microphoneButtonColorOnDictate }) { return { // TODO: [P3] This path should not know anything about the DOM tree of - '&.dictating > button svg': { - fill: microphoneButtonColorOnDictate + '&.dictating > button': { + '&, &:focus, &:hover': { + '& svg': { + fill: microphoneButtonColorOnDictate + } + } } }; } diff --git a/packages/core/src/constants/DictateState.js b/packages/core/src/constants/DictateState.js index d037088573..7aef0394c4 100644 --- a/packages/core/src/constants/DictateState.js +++ b/packages/core/src/constants/DictateState.js @@ -1,6 +1,7 @@ const IDLE = 0; -const STARTING = 1; -const DICTATING = 2; -const STOPPING = 3; +const WILL_START = 1; +const STARTING = 2; +const DICTATING = 3; +const STOPPING = 4; -export { DICTATING, IDLE, STARTING, STOPPING }; +export { DICTATING, IDLE, STARTING, STOPPING, WILL_START }; diff --git a/packages/core/src/reducers/dictateState.js b/packages/core/src/reducers/dictateState.js index 2061f0728c..2b94ae8736 100644 --- a/packages/core/src/reducers/dictateState.js +++ b/packages/core/src/reducers/dictateState.js @@ -1,4 +1,4 @@ -import { DICTATING, IDLE, STARTING, STOPPING } from '../constants/DictateState'; +import { DICTATING, IDLE, STARTING, STOPPING, WILL_START } from '../constants/DictateState'; import { SET_DICTATE_STATE } from '../actions/setDictateState'; import { START_DICTATE } from '../actions/startDictate'; @@ -13,7 +13,7 @@ export default function dictateState(state = DEFAULT_STATE, { payload, type }) { break; case START_DICTATE: - if (state === IDLE || state === STOPPING) { + if (state === IDLE || state === STOPPING || state === WILL_START) { state = STARTING; } @@ -22,6 +22,8 @@ export default function dictateState(state = DEFAULT_STATE, { payload, type }) { case STOP_DICTATE: if (state === STARTING || state === DICTATING) { state = STOPPING; + } else if (state === WILL_START) { + state = IDLE; } break; diff --git a/packages/core/src/sagas.js b/packages/core/src/sagas.js index 2fb8e2237f..2340d205c2 100644 --- a/packages/core/src/sagas.js +++ b/packages/core/src/sagas.js @@ -15,6 +15,7 @@ import sendMessageToPostActivitySaga from './sagas/sendMessageToPostActivitySaga import sendPostBackToPostActivitySaga from './sagas/sendPostBackToPostActivitySaga'; import sendTypingIndicatorOnSetSendBoxSaga from './sagas/sendTypingIndicatorOnSetSendBoxSaga'; import speakActivityAndStartDictateOnIncomingActivityFromOthersSaga from './sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga'; +import startDictateOnSpeakCompleteSaga from './sagas/startDictateOnSpeakCompleteSaga'; import startSpeakActivityOnPostActivitySaga from './sagas/startSpeakActivityOnPostActivitySaga'; import stopDictateOnCardActionSaga from './sagas/stopDictateOnCardActionSaga'; import stopSpeakingActivityOnInputSaga from './sagas/stopSpeakingActivityOnInputSaga'; @@ -38,6 +39,7 @@ export default function* sagas() { yield fork(sendPostBackToPostActivitySaga); yield fork(sendTypingIndicatorOnSetSendBoxSaga); yield fork(speakActivityAndStartDictateOnIncomingActivityFromOthersSaga); + yield fork(startDictateOnSpeakCompleteSaga); yield fork(startSpeakActivityOnPostActivitySaga); yield fork(stopDictateOnCardActionSaga); yield fork(stopSpeakingActivityOnInputSaga); diff --git a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js index c59e954ca8..529b88ecb6 100644 --- a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js +++ b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js @@ -1,10 +1,11 @@ import { put, select, takeEvery } from 'redux-saga/effects'; import { INCOMING_ACTIVITY } from '../actions/incomingActivity'; +import { WILL_START } from '../constants/DictateState'; import markActivity from '../actions/markActivity'; +import setDictateState from '../actions/setDictateState'; import shouldSpeakIncomingActivitySelector from '../selectors/shouldSpeakIncomingActivity'; import speakableActivity from '../definitions/speakableActivity'; -import startDictate from '../actions/startDictate'; import stopDictate from '../actions/stopDictate'; import whileConnected from './effects/whileConnected'; @@ -25,7 +26,7 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) { } if (shouldSpeak && activity.inputHint === 'expectingInput') { - yield put(startDictate()); + yield put(setDictateState(WILL_START)); } else if (activity.inputHint === 'ignoringInput') { yield put(stopDictate()); } diff --git a/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js b/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js new file mode 100644 index 0000000000..0d8945b52e --- /dev/null +++ b/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js @@ -0,0 +1,23 @@ +import { put, select, takeEvery } from 'redux-saga/effects'; + +import { MARK_ACTIVITY } from '../../lib/actions/markActivity'; +import { of as activitiesOf } from '../selectors/activities'; +import { SET_DICTATE_STATE } from '../../lib/actions/setDictateState'; +import { WILL_START } from '../constants/DictateState'; +import dictateStateSelector from '../selectors/dictateState'; +import speakingActivity from '../definitions/speakingActivity'; +import startDictate from '../actions/startDictate'; + +function* startDictateOnSpeakComplete() { + const speakingActivities = yield select(activitiesOf(speakingActivity)); + const dictateState = yield select(dictateStateSelector); + + if (dictateState === WILL_START && !speakingActivities.length) { + yield put(startDictate()); + } +} + +// TODO: [P4] We should turn this into a reducer instead +export default function* startDictateOnSpeakCompleteSaga() { + yield takeEvery(({ type }) => type === MARK_ACTIVITY || type === SET_DICTATE_STATE, startDictateOnSpeakComplete); +} diff --git a/packages/core/src/selectors/dictateState.js b/packages/core/src/selectors/dictateState.js new file mode 100644 index 0000000000..1d3bc58776 --- /dev/null +++ b/packages/core/src/selectors/dictateState.js @@ -0,0 +1 @@ +export default ({ dictateState }) => dictateState;