From f8c1dde068bc741d53c9ca75f7840659e03bf8bd Mon Sep 17 00:00:00 2001 From: dessant Date: Sun, 9 Dec 2018 00:41:33 +0200 Subject: [PATCH] feat: add Microsoft Azure Speech to Text API --- src/_locales/en/messages.json | 52 +++++++++++++++++++- src/options/App.vue | 30 +++++++++++- src/solve/main.js | 54 ++++++++++++++++++--- src/storage/versions/local/ONiJBs00o.js | 4 +- src/storage/versions/local/UidMDYaYA.js | 28 +++++++++++ src/storage/versions/local/UoT3kGyBH.js | 2 +- src/storage/versions/local/versions.json | 2 +- src/storage/versions/sync/ONiJBs00o.js | 4 +- src/storage/versions/sync/UidMDYaYA.js | 28 +++++++++++ src/storage/versions/sync/UoT3kGyBH.js | 2 +- src/storage/versions/sync/versions.json | 2 +- src/utils/data.js | 61 +++++++++++++++++++++++- 12 files changed, 250 insertions(+), 19 deletions(-) create mode 100644 src/storage/versions/local/UidMDYaYA.js create mode 100644 src/storage/versions/sync/UidMDYaYA.js diff --git a/src/_locales/en/messages.json b/src/_locales/en/messages.json index 5081797..6b468d2 100644 --- a/src/_locales/en/messages.json +++ b/src/_locales/en/messages.json @@ -30,7 +30,12 @@ }, "optionValue_speechService_ibmSpeechApi": { - "message": "IBM Speech to Text", + "message": "IBM Watson Speech to Text API", + "description": "Value of the option." + }, + + "optionValue_speechService_microsoftSpeechApi": { + "message": "Microsoft Azure Speech to Text API", "description": "Value of the option." }, @@ -64,6 +69,51 @@ "description": "Value of the option." }, + "optionTitle_microsoftSpeechApiLoc": { + "message": "API location", + "description": "Title of the option." + }, + + "optionValue_microsoftSpeechApiLoc_eastUs": { + "message": "East US", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_eastUs2": { + "message": "East US 2", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_westUs": { + "message": "West US", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_westUs2": { + "message": "West US 2", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_eastAsia": { + "message": "East Asia", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_southeastAsia": { + "message": "Southeast Asia", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_westEu": { + "message": "West Europe", + "description": "Value of the option." + }, + + "optionValue_microsoftSpeechApiLoc_northEu": { + "message": "North Europe", + "description": "Value of the option." + }, + "inputLabel_apiKey": { "message": "API key", "description": "Placeholder of the input." diff --git a/src/options/App.vue b/src/options/App.vue index b2e7942..a130384 100644 --- a/src/options/App.vue +++ b/src/options/App.vue @@ -30,6 +30,19 @@ :label="getText('inputLabel_apiKey')"> +
+ + +
+
+ + +
@@ -58,7 +71,8 @@ export default { speechService: [ 'googleSpeechApiDemo', 'googleSpeechApi', - 'ibmSpeechApi' + 'ibmSpeechApi', + 'microsoftSpeechApi' ], ibmSpeechApiLoc: [ 'frankfurt', @@ -66,6 +80,16 @@ export default { 'washington', 'sydney', 'tokyo' + ], + microsoftSpeechApiLoc: [ + 'eastUs', + 'eastUs2', + 'westUs', + 'westUs2', + 'eastAsia', + 'southeastAsia', + 'westEu', + 'northEu' ] }), @@ -73,7 +97,9 @@ export default { speechService: '', googleSpeechApiKey: '', ibmSpeechApiLoc: '', - ibmSpeechApiKey: '' + ibmSpeechApiKey: '', + microsoftSpeechApiLoc: '', + microsoftSpeechApiKey: '' } }; }, diff --git a/src/solve/main.js b/src/solve/main.js index 7a4eec9..c915775 100644 --- a/src/solve/main.js +++ b/src/solve/main.js @@ -6,7 +6,9 @@ import {getText, waitForElement, arrayBufferToBase64} from 'utils/common'; import { captchaGoogleSpeechApiLangCodes, captchaIbmSpeechApiLangCodes, - ibmSpeechApiUrls + captchaMicrosoftSpeechApiLangCodes, + ibmSpeechApiUrls, + microsoftSpeechApiUrls } from 'utils/data'; let solverWorking = false; @@ -199,16 +201,56 @@ async function solve() { }); return; } + const apiUrl = ibmSpeechApiUrls[apiLoc]; const model = captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel'; + const rsp = await fetch(`${apiUrl}?model=${model}&profanity_filter=false`, { + referrer: '', + mode: 'cors', + method: 'POST', + headers: { + Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey) + }, + body: new Blob([audioContent], {type: 'audio/wav'}) + }); + + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + + const results = (await rsp.json()).results; + if (results && results.length) { + solution = results[0].alternatives[0].transcript.trim(); + } + } + + if (speechService === 'microsoftSpeechApi') { + const { + microsoftSpeechApiLoc: apiLoc, + microsoftSpeechApiKey: apiKey + } = await storage.get( + ['microsoftSpeechApiLoc', 'microsoftSpeechApiKey'], + 'sync' + ); + if (!apiKey) { + browser.runtime.sendMessage({ + id: 'notification', + messageId: 'error_missingApiKey' + }); + return; + } + const apiUrl = microsoftSpeechApiUrls[apiLoc]; + const language = captchaMicrosoftSpeechApiLangCodes[lang] || 'en-US'; + const rsp = await fetch( - `${ibmSpeechApiUrls[apiLoc]}?model=${model}&profanity_filter=false`, + `${apiUrl}?language=${language}&format=detailed&profanity=raw`, { referrer: '', mode: 'cors', method: 'POST', headers: { - Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey) + 'Ocp-Apim-Subscription-Key': apiKey, + 'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000' }, body: new Blob([audioContent], {type: 'audio/wav'}) } @@ -218,9 +260,9 @@ async function solve() { throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); } - const results = (await rsp.json()).results; - if (results && results.length) { - solution = results[0].alternatives[0].transcript.trim(); + const results = (await rsp.json()).NBest; + if (results) { + solution = results[0].Lexical.trim(); } } diff --git a/src/storage/versions/local/ONiJBs00o.js b/src/storage/versions/local/ONiJBs00o.js index 2897fa4..70a5fe9 100644 --- a/src/storage/versions/local/ONiJBs00o.js +++ b/src/storage/versions/local/ONiJBs00o.js @@ -1,6 +1,6 @@ import browser from 'webextension-polyfill'; -const message = 'Add IBM Speech to Text'; +const message = 'Add IBM Watson Speech to Text API'; const revision = 'ONiJBs00o'; const downRevision = 'UoT3kGyBH'; @@ -9,7 +9,7 @@ const storage = browser.storage.local; async function upgrade() { const changes = { - ibmSpeechApiLoc: 'frankfurt', // frankfurt, dallas, washington, sydney, tokyo + ibmSpeechApiLoc: 'frankfurt', // 'frankfurt', 'dallas', 'washington', 'sydney', 'tokyo' ibmSpeechApiKey: '' }; diff --git a/src/storage/versions/local/UidMDYaYA.js b/src/storage/versions/local/UidMDYaYA.js new file mode 100644 index 0000000..e8a9e47 --- /dev/null +++ b/src/storage/versions/local/UidMDYaYA.js @@ -0,0 +1,28 @@ +import browser from 'webextension-polyfill'; + +const message = 'Add Microsoft Azure Speech to Text API'; + +const revision = 'UidMDYaYA'; +const downRevision = 'ONiJBs00o'; + +const storage = browser.storage.local; + +async function upgrade() { + const changes = { + microsoftSpeechApiLoc: 'eastUs', // 'eastUs', 'eastUs2', 'westUs', 'westUs2', 'eastAsia', 'southeastAsia', 'westEu', 'northEu' + microsoftSpeechApiKey: '' + }; + + changes.storageVersion = revision; + return storage.set(changes); +} + +async function downgrade() { + const changes = {}; + await storage.remove(['microsoftSpeechApiLoc', 'microsoftSpeechApiKey']); + + changes.storageVersion = downRevision; + return storage.set(changes); +} + +export {message, revision, upgrade, downgrade}; diff --git a/src/storage/versions/local/UoT3kGyBH.js b/src/storage/versions/local/UoT3kGyBH.js index 2d363ba..6e3378c 100644 --- a/src/storage/versions/local/UoT3kGyBH.js +++ b/src/storage/versions/local/UoT3kGyBH.js @@ -9,7 +9,7 @@ const storage = browser.storage.local; async function upgrade() { const changes = { - speechService: 'googleSpeechApiDemo', // 'googleSpeechApiDemo', 'googleSpeechApi' + speechService: 'googleSpeechApiDemo', // 'googleSpeechApiDemo', 'googleSpeechApi', 'ibmSpeechApi', 'microsoftSpeechApi' googleSpeechApiKey: '', installTime: new Date().getTime(), useCount: 0 diff --git a/src/storage/versions/local/versions.json b/src/storage/versions/local/versions.json index a96c132..d6cdb8d 100644 --- a/src/storage/versions/local/versions.json +++ b/src/storage/versions/local/versions.json @@ -1 +1 @@ -{"versions": ["UoT3kGyBH", "ONiJBs00o"]} +{"versions": ["UoT3kGyBH", "ONiJBs00o", "UidMDYaYA"]} diff --git a/src/storage/versions/sync/ONiJBs00o.js b/src/storage/versions/sync/ONiJBs00o.js index 758f989..3f442cb 100644 --- a/src/storage/versions/sync/ONiJBs00o.js +++ b/src/storage/versions/sync/ONiJBs00o.js @@ -1,6 +1,6 @@ import browser from 'webextension-polyfill'; -const message = 'Add IBM Speech to Text'; +const message = 'Add IBM Watson Speech to Text API'; const revision = 'ONiJBs00o'; const downRevision = 'UoT3kGyBH'; @@ -9,7 +9,7 @@ const storage = browser.storage.sync; async function upgrade() { const changes = { - ibmSpeechApiLoc: 'frankfurt', // frankfurt, dallas, washington, sydney, tokyo + ibmSpeechApiLoc: 'frankfurt', // 'frankfurt', 'dallas', 'washington', 'sydney', 'tokyo' ibmSpeechApiKey: '' }; diff --git a/src/storage/versions/sync/UidMDYaYA.js b/src/storage/versions/sync/UidMDYaYA.js new file mode 100644 index 0000000..50b2ff8 --- /dev/null +++ b/src/storage/versions/sync/UidMDYaYA.js @@ -0,0 +1,28 @@ +import browser from 'webextension-polyfill'; + +const message = 'Add Microsoft Azure Speech to Text API'; + +const revision = 'UidMDYaYA'; +const downRevision = 'ONiJBs00o'; + +const storage = browser.storage.sync; + +async function upgrade() { + const changes = { + microsoftSpeechApiLoc: 'eastUs', // 'eastUs', 'eastUs2', 'westUs', 'westUs2', 'eastAsia', 'southeastAsia', 'westEu', 'northEu' + microsoftSpeechApiKey: '' + }; + + changes.storageVersion = revision; + return storage.set(changes); +} + +async function downgrade() { + const changes = {}; + await storage.remove(['microsoftSpeechApiLoc', 'microsoftSpeechApiKey']); + + changes.storageVersion = downRevision; + return storage.set(changes); +} + +export {message, revision, upgrade, downgrade}; diff --git a/src/storage/versions/sync/UoT3kGyBH.js b/src/storage/versions/sync/UoT3kGyBH.js index fe66f5c..2cd125e 100644 --- a/src/storage/versions/sync/UoT3kGyBH.js +++ b/src/storage/versions/sync/UoT3kGyBH.js @@ -9,7 +9,7 @@ const storage = browser.storage.sync; async function upgrade() { const changes = { - speechService: 'googleSpeechApiDemo', // 'googleSpeechApiDemo', 'googleSpeechApi' + speechService: 'googleSpeechApiDemo', // 'googleSpeechApiDemo', 'googleSpeechApi', 'ibmSpeechApi', 'microsoftSpeechApi' googleSpeechApiKey: '', installTime: new Date().getTime(), useCount: 0 diff --git a/src/storage/versions/sync/versions.json b/src/storage/versions/sync/versions.json index a96c132..d6cdb8d 100644 --- a/src/storage/versions/sync/versions.json +++ b/src/storage/versions/sync/versions.json @@ -1 +1 @@ -{"versions": ["UoT3kGyBH", "ONiJBs00o"]} +{"versions": ["UoT3kGyBH", "ONiJBs00o", "UidMDYaYA"]} diff --git a/src/utils/data.js b/src/utils/data.js index fdffe1c..1f6ade1 100755 --- a/src/utils/data.js +++ b/src/utils/data.js @@ -4,7 +4,9 @@ const optionKeys = [ 'speechService', 'googleSpeechApiKey', 'ibmSpeechApiLoc', - 'ibmSpeechApiKey' + 'ibmSpeechApiKey', + 'microsoftSpeechApiLoc', + 'microsoftSpeechApiKey' ]; // https://developers.google.com/recaptcha/docs/language @@ -103,6 +105,39 @@ const captchaIbmSpeechApiLangCodes = { 'es-419': 'es-ES_BroadbandModel' }; +// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech-to-text +const captchaMicrosoftSpeechApiLangCodes = { + ar: 'ar-EG', + ca: 'ca-ES', + 'zh-HK': 'zh-HK', + 'zh-CN': 'zh-CN', + 'zh-TW': 'zh-TW', + da: 'da-DK', + nl: 'nl-NL', + 'en-GB': 'en-GB', + en: 'en-US', + fi: 'fi-FI', + fr: 'fr-FR', + 'fr-CA': 'fr-CA', + de: 'de-DE', + 'de-AT': 'de-DE', + 'de-CH': 'de-DE', + hi: 'hi-IN', + it: 'it-IT', + ja: 'ja-JP', + ko: 'ko-KR', + no: 'nb-NO', + pl: 'pl-PL', + pt: 'pt-PT', + 'pt-BR': 'pt-BR', + 'pt-PT': 'pt-PT', + ru: 'ru-RU', + es: 'es-ES', + 'es-419': 'es-MX', + sv: 'sv-SE', + th: 'th-TH' +}; + // https://cloud.ibm.com/apidocs/speech-to-text#service-endpoint const ibmSpeechApiUrls = { frankfurt: @@ -116,9 +151,31 @@ const ibmSpeechApiUrls = { 'https://gateway-tok.watsonplatform.net/speech-to-text/api/v1/recognize' }; +// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-apis#regions-and-endpoints +const microsoftSpeechApiUrls = { + eastUs: + 'https://eastus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + eastUs2: + 'https://eastus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + westUs: + 'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + westUs2: + 'https://westus2.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + eastAsia: + 'https://eastasia.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + southeastAsia: + 'https://southeastasia.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + westEu: + 'https://westeurope.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', + northEu: + 'https://northeurope.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1' +}; + export { optionKeys, captchaGoogleSpeechApiLangCodes, captchaIbmSpeechApiLangCodes, - ibmSpeechApiUrls + captchaMicrosoftSpeechApiLangCodes, + ibmSpeechApiUrls, + microsoftSpeechApiUrls };