From 58f9106f41c60af02856cc9a2c13a8b16d819503 Mon Sep 17 00:00:00 2001 From: dessant Date: Sat, 8 Dec 2018 02:47:49 +0200 Subject: [PATCH] feat: add IBM Speech to Text --- src/_locales/en/messages.json | 16 ++++++ src/options/App.vue | 29 ++++++++++- src/solve/main.js | 63 ++++++++++++++++++++---- src/storage/versions/local/ONiJBs00o.js | 28 +++++++++++ src/storage/versions/local/versions.json | 2 +- src/storage/versions/sync/ONiJBs00o.js | 28 +++++++++++ src/storage/versions/sync/versions.json | 2 +- src/utils/data.js | 34 ++++++++++++- 8 files changed, 187 insertions(+), 15 deletions(-) create mode 100644 src/storage/versions/local/ONiJBs00o.js create mode 100644 src/storage/versions/sync/ONiJBs00o.js diff --git a/src/_locales/en/messages.json b/src/_locales/en/messages.json index 03c06b8..eb32f85 100644 --- a/src/_locales/en/messages.json +++ b/src/_locales/en/messages.json @@ -29,6 +29,16 @@ "description": "Value of the option." }, + "optionValue_speechService_ibmSpeechApi": { + "message": "IBM Speech to Text", + "description": "Value of the option." + }, + + "inputLabel_url": { + "message": "URL", + "description": "Placeholder of the input." + }, + "inputLabel_apiKey": { "message": "API key", "description": "Placeholder of the input." @@ -70,6 +80,12 @@ "description": "Error message." }, + "error_missingApiUrl": { + "message": + "API URL missing. Visit the options page to configure the service.", + "description": "Error message." + }, + "error_missingApiKey": { "message": "API key missing. Visit the options page to configure the service.", diff --git a/src/options/App.vue b/src/options/App.vue index b92c5ed..2b802cb 100644 --- a/src/options/App.vue +++ b/src/options/App.vue @@ -17,6 +17,18 @@ :label="getText('inputLabel_apiKey')"> +
+ + +
+
+ + +
@@ -42,12 +54,18 @@ export default { dataLoaded: false, selectOptions: getOptionLabels({ - speechService: ['googleSpeechApiDemo', 'googleSpeechApi'] + speechService: [ + 'googleSpeechApiDemo', + 'googleSpeechApi', + 'ibmSpeechApi' + ] }), options: { speechService: '', - googleSpeechApiKey: '' + googleSpeechApiKey: '', + ibmSpeechApiUrl: '', + ibmSpeechApiKey: '' } }; }, @@ -62,6 +80,13 @@ export default { for (const option of Object.keys(this.options)) { this.options[option] = options[option]; this.$watch(`options.${option}`, async function(value) { + if ( + ['googleSpeechApiKey', 'ibmSpeechApiUrl', 'ibmSpeechApiKey'].includes( + option + ) + ) { + value = value.trim(); + } await storage.set({[option]: value}, 'sync'); }); } diff --git a/src/solve/main.js b/src/solve/main.js index aa1c750..73fc87c 100644 --- a/src/solve/main.js +++ b/src/solve/main.js @@ -3,7 +3,10 @@ import audioBufferToWav from 'audiobuffer-to-wav'; import storage from 'storage/storage'; import {getText, waitForElement, arrayBufferToBase64} from 'utils/common'; -import {captchaGoogleSpeechApiLangCodes} from 'utils/data'; +import { + captchaGoogleSpeechApiLangCodes, + captchaIbmSpeechApiLangCodes +} from 'utils/data'; let solverRunning = false; @@ -61,9 +64,7 @@ async function prepareAudio(audio) { // discard 1 second noise from beginning/end source.start(0, 1, data.duration - 2); - return arrayBufferToBase64( - audioBufferToWav(await offlineCtx.startRendering()) - ); + return audioBufferToWav(await offlineCtx.startRendering()); } function dispatchEnter(node) { @@ -116,6 +117,7 @@ async function solve() { audioUrl = result.audioUrl; } + const lang = document.documentElement.lang; const audioRsp = await fetch(audioUrl, {referrer: ''}); const audioContent = await prepareAudio(await audioRsp.arrayBuffer()); @@ -143,13 +145,11 @@ async function solve() { const data = { audio: { - content: audioContent + content: arrayBufferToBase64(audioContent) }, config: { encoding: 'LINEAR16', - languageCode: - captchaGoogleSpeechApiLangCodes[document.documentElement.lang] || - 'en-US', + languageCode: captchaGoogleSpeechApiLangCodes[lang] || 'en-US', model: 'default', sampleRateHertz: 16000 } @@ -161,9 +161,54 @@ async function solve() { body: JSON.stringify(data) }); + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + const results = (await rsp.json()).results; if (results) { - solution = results[0].alternatives[0].transcript; + solution = results[0].alternatives[0].transcript.trim(); + } + } + + if (speechService === 'ibmSpeechApi') { + const { + ibmSpeechApiUrl: apiUrl, + ibmSpeechApiKey: apiKey + } = await storage.get(['ibmSpeechApiUrl', 'ibmSpeechApiKey'], 'sync'); + if (!apiUrl) { + browser.runtime.sendMessage({ + id: 'notification', + messageId: 'error_missingApiUrl' + }); + return; + } + if (!apiKey) { + browser.runtime.sendMessage({ + id: 'notification', + messageId: 'error_missingApiKey' + }); + return; + } + const model = captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel'; + + const rsp = await fetch(`${apiUrl}?model=${model}&profanity_filter=false`, { + referrer: '', + mode: 'cors', + method: 'POST', + headers: { + Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey) + }, + body: new Blob([audioContent], {type: 'audio/wav'}) + }); + + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + + const results = (await rsp.json()).results; + if (results && results.length) { + solution = results[0].alternatives[0].transcript.trim(); } } diff --git a/src/storage/versions/local/ONiJBs00o.js b/src/storage/versions/local/ONiJBs00o.js new file mode 100644 index 0000000..f5ad1eb --- /dev/null +++ b/src/storage/versions/local/ONiJBs00o.js @@ -0,0 +1,28 @@ +import browser from 'webextension-polyfill'; + +const message = 'Add IBM Speech to Text'; + +const revision = 'ONiJBs00o'; +const downRevision = 'UoT3kGyBH'; + +const storage = browser.storage.local; + +async function upgrade() { + const changes = { + ibmSpeechApiUrl: '', + ibmSpeechApiKey: '' + }; + + changes.storageVersion = revision; + return storage.set(changes); +} + +async function downgrade() { + const changes = {}; + await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']); + + changes.storageVersion = downRevision; + return storage.set(changes); +} + +export {message, revision, upgrade, downgrade}; diff --git a/src/storage/versions/local/versions.json b/src/storage/versions/local/versions.json index 291e517..a96c132 100644 --- a/src/storage/versions/local/versions.json +++ b/src/storage/versions/local/versions.json @@ -1 +1 @@ -{"versions": ["UoT3kGyBH"]} +{"versions": ["UoT3kGyBH", "ONiJBs00o"]} diff --git a/src/storage/versions/sync/ONiJBs00o.js b/src/storage/versions/sync/ONiJBs00o.js new file mode 100644 index 0000000..b667b61 --- /dev/null +++ b/src/storage/versions/sync/ONiJBs00o.js @@ -0,0 +1,28 @@ +import browser from 'webextension-polyfill'; + +const message = 'Add IBM Speech to Text'; + +const revision = 'ONiJBs00o'; +const downRevision = 'UoT3kGyBH'; + +const storage = browser.storage.sync; + +async function upgrade() { + const changes = { + ibmSpeechApiUrl: '', + ibmSpeechApiKey: '' + }; + + changes.storageVersion = revision; + return storage.set(changes); +} + +async function downgrade() { + const changes = {}; + await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']); + + changes.storageVersion = downRevision; + return storage.set(changes); +} + +export {message, revision, upgrade, downgrade}; diff --git a/src/storage/versions/sync/versions.json b/src/storage/versions/sync/versions.json index 291e517..a96c132 100644 --- a/src/storage/versions/sync/versions.json +++ b/src/storage/versions/sync/versions.json @@ -1 +1 @@ -{"versions": ["UoT3kGyBH"]} +{"versions": ["UoT3kGyBH", "ONiJBs00o"]} diff --git a/src/utils/data.js b/src/utils/data.js index c50b561..28d2b5a 100755 --- a/src/utils/data.js +++ b/src/utils/data.js @@ -1,6 +1,11 @@ import browser from 'webextension-polyfill'; -const optionKeys = ['speechService', 'googleSpeechApiKey']; +const optionKeys = [ + 'speechService', + 'googleSpeechApiKey', + 'ibmSpeechApiUrl', + 'ibmSpeechApiKey' +]; // https://developers.google.com/recaptcha/docs/language // https://cloud.google.com/speech-to-text/docs/languages @@ -77,4 +82,29 @@ const captchaGoogleSpeechApiLangCodes = { zu: 'zu-ZA' }; -export {optionKeys, captchaGoogleSpeechApiLangCodes}; +// https://cloud.ibm.com/apidocs/speech-to-text#recognize-audio +const captchaIbmSpeechApiLangCodes = { + ar: 'ar-AR_BroadbandModel', + 'zh-CN': 'zh-CN_BroadbandModel', + 'zh-TW': 'zh-CN_BroadbandModel', + 'en-GB': 'en-GB_BroadbandModel', + en: 'en-US_BroadbandModel', + fr: 'fr-FR_BroadbandModel', + 'fr-CA': 'fr-FR_BroadbandModel', + de: 'de-DE_BroadbandModel', + 'de-AT': 'de-DE_BroadbandModel', + 'de-CH': 'de-DE_BroadbandModel', + ja: 'ja-JP_BroadbandModel', + ko: 'ko-KR_BroadbandModel', + pt: 'pt-BR_BroadbandModel', + 'pt-BR': 'pt-BR_BroadbandModel', + 'pt-PT': 'pt-BR_BroadbandModel', + es: 'es-ES_BroadbandModel', + 'es-419': 'es-ES_BroadbandModel' +}; + +export { + optionKeys, + captchaGoogleSpeechApiLangCodes, + captchaIbmSpeechApiLangCodes +};