Skip to content

Commit

Permalink
feat: transcribe audio in background script
Browse files Browse the repository at this point in the history
Chrome 73 no longer allows bypassing CORS from content sripts,
speech service APIs are now called from the background script.

The origin of background requests is removed for privacy.

Closes #81.
  • Loading branch information
dessant committed May 28, 2019
1 parent 43f9ce5 commit 2c89926
Show file tree
Hide file tree
Showing 3 changed files with 327 additions and 278 deletions.
320 changes: 318 additions & 2 deletions src/background/main.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import browser from 'webextension-polyfill';
import audioBufferToWav from 'audiobuffer-to-wav';

import {initStorage} from 'storage/init';
import storage from 'storage/storage';
Expand All @@ -13,9 +14,18 @@ import {
scriptsAllowed,
functionInContext,
getBrowser,
getPlatform
getPlatform,
arrayBufferToBase64
} from 'utils/common';
import {clientAppVersion} from 'utils/config';
import {
captchaGoogleSpeechApiLangCodes,
captchaIbmSpeechApiLangCodes,
captchaMicrosoftSpeechApiLangCodes,
captchaWitSpeechApiLangCodes,
ibmSpeechApiUrls,
microsoftSpeechApiUrls
} from 'utils/data';
import {clientAppVersion, witApiKeys} from 'utils/config';

let nativePort;

Expand Down Expand Up @@ -132,6 +142,305 @@ async function setChallengeLocale() {
}
}

function removeRequestOrigin(details) {
const origin = window.location.origin;
const headers = details.requestHeaders;
for (const header of headers) {
if (header.name.toLowerCase() === 'origin' && header.value === origin) {
headers.splice(headers.indexOf(header), 1);
break;
}
}

return {requestHeaders: headers};
}

function addBackgroundRequestLitener() {
if (
!browser.webRequest.onBeforeSendHeaders.hasListener(removeRequestOrigin)
) {
const urls = [
'https://www.google.com/*',
'https://api.wit.ai/*',
'https://speech.googleapis.com/*',
'https://stream-fra.watsonplatform.net/*',
'https://stream.watsonplatform.net/*',
'https://gateway-wdc.watsonplatform.net/*',
'https://gateway-syd.watsonplatform.net/*',
'https://gateway-tok.watsonplatform.net/*',
'https://eastus.stt.speech.microsoft.com/*',
'https://westus.stt.speech.microsoft.com/*',
'https://westus2.stt.speech.microsoft.com/*',
'https://eastasia.stt.speech.microsoft.com/*',
'https://southeastasia.stt.speech.microsoft.com/*',
'https://westeurope.stt.speech.microsoft.com/*',
'https://northeurope.stt.speech.microsoft.com/*'
];

browser.webRequest.onBeforeSendHeaders.addListener(
removeRequestOrigin,
{
urls,
types: ['xmlhttprequest']
},
['blocking', 'requestHeaders']
);
}
}

function removeBackgroundRequestLitener() {
if (browser.webRequest.onBeforeSendHeaders.hasListener(removeRequestOrigin)) {
browser.webRequest.onBeforeSendHeaders.removeListener(removeRequestOrigin);
}
}

async function prepareAudio(audio) {
const ctx = new AudioContext();
const data = await ctx.decodeAudioData(audio);
await ctx.close();

const offlineCtx = new OfflineAudioContext(
// force mono output
1,
16000 * data.duration,
16000
);
const source = offlineCtx.createBufferSource();
source.buffer = data;
source.connect(offlineCtx.destination);
// discard 1.5 second noise from beginning/end
source.start(0, 1.5, data.duration - 3);

return audioBufferToWav(await offlineCtx.startRendering());
}

async function getWitSpeechApiKey(speechService, language) {
if (speechService === 'witSpeechApiDemo') {
return witApiKeys[language];
} else {
const {witSpeechApiKeys: apiKeys} = await storage.get(
'witSpeechApiKeys',
'sync'
);
return apiKeys[language];
}
}

async function getWitSpeechApiResult(apiKey, audioContent) {
const rsp = await fetch('https://api.wit.ai/speech', {
referrer: '',
mode: 'cors',
method: 'POST',
headers: {
Authorization: 'Bearer ' + apiKey
},
body: new Blob([audioContent], {type: 'audio/wav'})
});

if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}

return (await rsp.json())._text.trim();
}

async function getIbmSpeechApiResult(apiUrl, apiKey, audioContent, language) {
const rsp = await fetch(
`${apiUrl}?model=${language}&profanity_filter=false`,
{
referrer: '',
mode: 'cors',
method: 'POST',
headers: {
Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey),
'X-Watson-Learning-Opt-Out': 'true'
},
body: new Blob([audioContent], {type: 'audio/wav'})
}
);

if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}

const results = (await rsp.json()).results;
if (results && results.length) {
return results[0].alternatives[0].transcript.trim();
}
}

async function getMicrosoftSpeechApiResult(
apiUrl,
apiKey,
audioContent,
language
) {
const rsp = await fetch(
`${apiUrl}?language=${language}&format=detailed&profanity=raw`,
{
referrer: '',
mode: 'cors',
method: 'POST',
headers: {
'Ocp-Apim-Subscription-Key': apiKey,
'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000'
},
body: new Blob([audioContent], {type: 'audio/wav'})
}
);

if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}

const results = (await rsp.json()).NBest;
if (results) {
return results[0].Lexical.trim();
}
}

async function transcribeAudio(audioUrl, lang) {
let solution;

const audioRsp = await fetch(audioUrl, {referrer: ''});
const audioContent = await prepareAudio(await audioRsp.arrayBuffer());

const {speechService, tryEnglishSpeechModel} = await storage.get(
['speechService', 'tryEnglishSpeechModel'],
'sync'
);

if (['witSpeechApiDemo', 'witSpeechApi'].includes(speechService)) {
const language = captchaWitSpeechApiLangCodes[lang] || 'english';

const apiKey = await getWitSpeechApiKey(speechService, language);
if (!apiKey) {
showNotification({messageId: 'error_missingApiKey'});
return;
}

solution = await getWitSpeechApiResult(apiKey, audioContent);
if (!solution && language !== 'english' && tryEnglishSpeechModel) {
const apiKey = await getWitSpeechApiKey(speechService, 'english');
if (!apiKey) {
showNotification({messageId: 'error_missingApiKey'});
return;
}
solution = await getWitSpeechApiResult(apiKey, audioContent);
}
} else if (speechService === 'googleSpeechApi') {
const {googleSpeechApiKey: apiKey} = await storage.get(
'googleSpeechApiKey',
'sync'
);
if (!apiKey) {
showNotification({messageId: 'error_missingApiKey'});
return;
}
const apiUrl = `https://speech.googleapis.com/v1p1beta1/speech:recognize?key=${apiKey}`;

const language = captchaGoogleSpeechApiLangCodes[lang] || 'en-US';

const data = {
audio: {
content: arrayBufferToBase64(audioContent)
},
config: {
encoding: 'LINEAR16',
languageCode: language,
model: 'video',
sampleRateHertz: 16000
}
};
if (!['en-US', 'en-GB'].includes(language) && tryEnglishSpeechModel) {
data.config.model = 'default';
data.config.alternativeLanguageCodes = ['en-US'];
}

const rsp = await fetch(apiUrl, {
referrer: '',
mode: 'cors',
method: 'POST',
body: JSON.stringify(data)
});

if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}

const results = (await rsp.json()).results;
if (results) {
solution = results[0].alternatives[0].transcript.trim();
}
} else if (speechService === 'ibmSpeechApi') {
const {
ibmSpeechApiLoc: apiLoc,
ibmSpeechApiKey: apiKey
} = await storage.get(['ibmSpeechApiLoc', 'ibmSpeechApiKey'], 'sync');
if (!apiKey) {
showNotification({messageId: 'error_missingApiKey'});
return;
}
const apiUrl = ibmSpeechApiUrls[apiLoc];
const language =
captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel';

solution = await getIbmSpeechApiResult(
apiUrl,
apiKey,
audioContent,
language
);
if (
!solution &&
!['en-US_BroadbandModel', 'en-GB_BroadbandModel'].includes(language) &&
tryEnglishSpeechModel
) {
solution = await getIbmSpeechApiResult(
apiUrl,
apiKey,
audioContent,
'en-US_BroadbandModel'
);
}
} else if (speechService === 'microsoftSpeechApi') {
const {
microsoftSpeechApiLoc: apiLoc,
microsoftSpeechApiKey: apiKey
} = await storage.get(
['microsoftSpeechApiLoc', 'microsoftSpeechApiKey'],
'sync'
);
if (!apiKey) {
showNotification({messageId: 'error_missingApiKey'});
return;
}
const apiUrl = microsoftSpeechApiUrls[apiLoc];
const language = captchaMicrosoftSpeechApiLangCodes[lang] || 'en-US';

solution = await getMicrosoftSpeechApiResult(
apiUrl,
apiKey,
audioContent,
language
);
if (
!solution &&
!['en-US', 'en-GB'].includes(language) &&
tryEnglishSpeechModel
) {
solution = await getMicrosoftSpeechApiResult(
apiUrl,
apiKey,
audioContent,
'en-US'
);
}
}

return solution;
}

async function onMessage(request, sender) {
if (request.id === 'notification') {
showNotification({
Expand All @@ -147,6 +456,13 @@ async function onMessage(request, sender) {
if ([30, 100].includes(useCount)) {
await showContributePage('use');
}
} else if (request.id === 'transcribeAudio') {
addBackgroundRequestLitener();
try {
return transcribeAudio(request.audioUrl, request.lang);
} finally {
removeBackgroundRequestLitener();
}
} else if (request.id === 'resetCaptcha') {
await resetCaptcha(sender.tab.id, sender.frameId, request.challengeUrl);
} else if (request.id === 'getFramePos') {
Expand Down
Loading

0 comments on commit 2c89926

Please sign in to comment.