Skip to content

Commit

Permalink
feat: add IBM Speech to Text
Browse files Browse the repository at this point in the history
  • Loading branch information
dessant committed Dec 8, 2018
1 parent f51c3b8 commit 58f9106
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 15 deletions.
16 changes: 16 additions & 0 deletions src/_locales/en/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@
"description": "Value of the option."
},

"optionValue_speechService_ibmSpeechApi": {
"message": "IBM Speech to Text",
"description": "Value of the option."
},

"inputLabel_url": {
"message": "URL",
"description": "Placeholder of the input."
},

"inputLabel_apiKey": {
"message": "API key",
"description": "Placeholder of the input."
Expand Down Expand Up @@ -70,6 +80,12 @@
"description": "Error message."
},

"error_missingApiUrl": {
"message":
"API URL missing. Visit the options page to configure the service.",
"description": "Error message."
},

"error_missingApiKey": {
"message":
"API key missing. Visit the options page to configure the service.",
Expand Down
29 changes: 27 additions & 2 deletions src/options/App.vue
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@
:label="getText('inputLabel_apiKey')">
</v-textfield>
</div>
<div class="option text-field"
v-if="options.speechService === 'ibmSpeechApi'">
<v-textfield v-model="options.ibmSpeechApiUrl"
:label="getText('inputLabel_url')">
</v-textfield>
</div>
<div class="option text-field"
v-if="options.speechService === 'ibmSpeechApi'">
<v-textfield v-model="options.ibmSpeechApiKey"
:label="getText('inputLabel_apiKey')">
</v-textfield>
</div>
</div>
</div>
</div>
Expand All @@ -42,12 +54,18 @@ export default {
dataLoaded: false,
selectOptions: getOptionLabels({
speechService: ['googleSpeechApiDemo', 'googleSpeechApi']
speechService: [
'googleSpeechApiDemo',
'googleSpeechApi',
'ibmSpeechApi'
]
}),
options: {
speechService: '',
googleSpeechApiKey: ''
googleSpeechApiKey: '',
ibmSpeechApiUrl: '',
ibmSpeechApiKey: ''
}
};
},
Expand All @@ -62,6 +80,13 @@ export default {
for (const option of Object.keys(this.options)) {
this.options[option] = options[option];
this.$watch(`options.${option}`, async function(value) {
if (
['googleSpeechApiKey', 'ibmSpeechApiUrl', 'ibmSpeechApiKey'].includes(
option
)
) {
value = value.trim();
}
await storage.set({[option]: value}, 'sync');
});
}
Expand Down
63 changes: 54 additions & 9 deletions src/solve/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ import audioBufferToWav from 'audiobuffer-to-wav';

import storage from 'storage/storage';
import {getText, waitForElement, arrayBufferToBase64} from 'utils/common';
import {captchaGoogleSpeechApiLangCodes} from 'utils/data';
import {
captchaGoogleSpeechApiLangCodes,
captchaIbmSpeechApiLangCodes
} from 'utils/data';

let solverRunning = false;

Expand Down Expand Up @@ -61,9 +64,7 @@ async function prepareAudio(audio) {
// discard 1 second noise from beginning/end
source.start(0, 1, data.duration - 2);

return arrayBufferToBase64(
audioBufferToWav(await offlineCtx.startRendering())
);
return audioBufferToWav(await offlineCtx.startRendering());
}

function dispatchEnter(node) {
Expand Down Expand Up @@ -116,6 +117,7 @@ async function solve() {
audioUrl = result.audioUrl;
}

const lang = document.documentElement.lang;
const audioRsp = await fetch(audioUrl, {referrer: ''});
const audioContent = await prepareAudio(await audioRsp.arrayBuffer());

Expand Down Expand Up @@ -143,13 +145,11 @@ async function solve() {

const data = {
audio: {
content: audioContent
content: arrayBufferToBase64(audioContent)
},
config: {
encoding: 'LINEAR16',
languageCode:
captchaGoogleSpeechApiLangCodes[document.documentElement.lang] ||
'en-US',
languageCode: captchaGoogleSpeechApiLangCodes[lang] || 'en-US',
model: 'default',
sampleRateHertz: 16000
}
Expand All @@ -161,9 +161,54 @@ async function solve() {
body: JSON.stringify(data)
});

if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}

const results = (await rsp.json()).results;
if (results) {
solution = results[0].alternatives[0].transcript;
solution = results[0].alternatives[0].transcript.trim();
}
}

if (speechService === 'ibmSpeechApi') {
const {
ibmSpeechApiUrl: apiUrl,
ibmSpeechApiKey: apiKey
} = await storage.get(['ibmSpeechApiUrl', 'ibmSpeechApiKey'], 'sync');
if (!apiUrl) {
browser.runtime.sendMessage({
id: 'notification',
messageId: 'error_missingApiUrl'
});
return;
}
if (!apiKey) {
browser.runtime.sendMessage({
id: 'notification',
messageId: 'error_missingApiKey'
});
return;
}
const model = captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel';

const rsp = await fetch(`${apiUrl}?model=${model}&profanity_filter=false`, {
referrer: '',
mode: 'cors',
method: 'POST',
headers: {
Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey)
},
body: new Blob([audioContent], {type: 'audio/wav'})
});

if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}

const results = (await rsp.json()).results;
if (results && results.length) {
solution = results[0].alternatives[0].transcript.trim();
}
}

Expand Down
28 changes: 28 additions & 0 deletions src/storage/versions/local/ONiJBs00o.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import browser from 'webextension-polyfill';

const message = 'Add IBM Speech to Text';

const revision = 'ONiJBs00o';
const downRevision = 'UoT3kGyBH';

const storage = browser.storage.local;

async function upgrade() {
const changes = {
ibmSpeechApiUrl: '',
ibmSpeechApiKey: ''
};

changes.storageVersion = revision;
return storage.set(changes);
}

async function downgrade() {
const changes = {};
await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']);

changes.storageVersion = downRevision;
return storage.set(changes);
}

export {message, revision, upgrade, downgrade};
2 changes: 1 addition & 1 deletion src/storage/versions/local/versions.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"versions": ["UoT3kGyBH"]}
{"versions": ["UoT3kGyBH", "ONiJBs00o"]}
28 changes: 28 additions & 0 deletions src/storage/versions/sync/ONiJBs00o.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import browser from 'webextension-polyfill';

const message = 'Add IBM Speech to Text';

const revision = 'ONiJBs00o';
const downRevision = 'UoT3kGyBH';

const storage = browser.storage.sync;

async function upgrade() {
const changes = {
ibmSpeechApiUrl: '',
ibmSpeechApiKey: ''
};

changes.storageVersion = revision;
return storage.set(changes);
}

async function downgrade() {
const changes = {};
await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']);

changes.storageVersion = downRevision;
return storage.set(changes);
}

export {message, revision, upgrade, downgrade};
2 changes: 1 addition & 1 deletion src/storage/versions/sync/versions.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"versions": ["UoT3kGyBH"]}
{"versions": ["UoT3kGyBH", "ONiJBs00o"]}
34 changes: 32 additions & 2 deletions src/utils/data.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import browser from 'webextension-polyfill';

const optionKeys = ['speechService', 'googleSpeechApiKey'];
const optionKeys = [
'speechService',
'googleSpeechApiKey',
'ibmSpeechApiUrl',
'ibmSpeechApiKey'
];

// https://developers.google.com/recaptcha/docs/language
// https://cloud.google.com/speech-to-text/docs/languages
Expand Down Expand Up @@ -77,4 +82,29 @@ const captchaGoogleSpeechApiLangCodes = {
zu: 'zu-ZA'
};

export {optionKeys, captchaGoogleSpeechApiLangCodes};
// https://cloud.ibm.com/apidocs/speech-to-text#recognize-audio
const captchaIbmSpeechApiLangCodes = {
ar: 'ar-AR_BroadbandModel',
'zh-CN': 'zh-CN_BroadbandModel',
'zh-TW': 'zh-CN_BroadbandModel',
'en-GB': 'en-GB_BroadbandModel',
en: 'en-US_BroadbandModel',
fr: 'fr-FR_BroadbandModel',
'fr-CA': 'fr-FR_BroadbandModel',
de: 'de-DE_BroadbandModel',
'de-AT': 'de-DE_BroadbandModel',
'de-CH': 'de-DE_BroadbandModel',
ja: 'ja-JP_BroadbandModel',
ko: 'ko-KR_BroadbandModel',
pt: 'pt-BR_BroadbandModel',
'pt-BR': 'pt-BR_BroadbandModel',
'pt-PT': 'pt-BR_BroadbandModel',
es: 'es-ES_BroadbandModel',
'es-419': 'es-ES_BroadbandModel'
};

export {
optionKeys,
captchaGoogleSpeechApiLangCodes,
captchaIbmSpeechApiLangCodes
};

0 comments on commit 58f9106

Please sign in to comment.