diff --git a/src/background/main.js b/src/background/main.js index 2b6a737..9aba3b6 100644 --- a/src/background/main.js +++ b/src/background/main.js @@ -1,4 +1,5 @@ import browser from 'webextension-polyfill'; +import audioBufferToWav from 'audiobuffer-to-wav'; import {initStorage} from 'storage/init'; import storage from 'storage/storage'; @@ -13,9 +14,18 @@ import { scriptsAllowed, functionInContext, getBrowser, - getPlatform + getPlatform, + arrayBufferToBase64 } from 'utils/common'; -import {clientAppVersion} from 'utils/config'; +import { + captchaGoogleSpeechApiLangCodes, + captchaIbmSpeechApiLangCodes, + captchaMicrosoftSpeechApiLangCodes, + captchaWitSpeechApiLangCodes, + ibmSpeechApiUrls, + microsoftSpeechApiUrls +} from 'utils/data'; +import {clientAppVersion, witApiKeys} from 'utils/config'; let nativePort; @@ -132,6 +142,305 @@ async function setChallengeLocale() { } } +function removeRequestOrigin(details) { + const origin = window.location.origin; + const headers = details.requestHeaders; + for (const header of headers) { + if (header.name.toLowerCase() === 'origin' && header.value === origin) { + headers.splice(headers.indexOf(header), 1); + break; + } + } + + return {requestHeaders: headers}; +} + +function addBackgroundRequestLitener() { + if ( + !browser.webRequest.onBeforeSendHeaders.hasListener(removeRequestOrigin) + ) { + const urls = [ + 'https://www.google.com/*', + 'https://api.wit.ai/*', + 'https://speech.googleapis.com/*', + 'https://stream-fra.watsonplatform.net/*', + 'https://stream.watsonplatform.net/*', + 'https://gateway-wdc.watsonplatform.net/*', + 'https://gateway-syd.watsonplatform.net/*', + 'https://gateway-tok.watsonplatform.net/*', + 'https://eastus.stt.speech.microsoft.com/*', + 'https://westus.stt.speech.microsoft.com/*', + 'https://westus2.stt.speech.microsoft.com/*', + 'https://eastasia.stt.speech.microsoft.com/*', + 'https://southeastasia.stt.speech.microsoft.com/*', + 'https://westeurope.stt.speech.microsoft.com/*', + 'https://northeurope.stt.speech.microsoft.com/*' + ]; + + browser.webRequest.onBeforeSendHeaders.addListener( + removeRequestOrigin, + { + urls, + types: ['xmlhttprequest'] + }, + ['blocking', 'requestHeaders'] + ); + } +} + +function removeBackgroundRequestLitener() { + if (browser.webRequest.onBeforeSendHeaders.hasListener(removeRequestOrigin)) { + browser.webRequest.onBeforeSendHeaders.removeListener(removeRequestOrigin); + } +} + +async function prepareAudio(audio) { + const ctx = new AudioContext(); + const data = await ctx.decodeAudioData(audio); + await ctx.close(); + + const offlineCtx = new OfflineAudioContext( + // force mono output + 1, + 16000 * data.duration, + 16000 + ); + const source = offlineCtx.createBufferSource(); + source.buffer = data; + source.connect(offlineCtx.destination); + // discard 1.5 second noise from beginning/end + source.start(0, 1.5, data.duration - 3); + + return audioBufferToWav(await offlineCtx.startRendering()); +} + +async function getWitSpeechApiKey(speechService, language) { + if (speechService === 'witSpeechApiDemo') { + return witApiKeys[language]; + } else { + const {witSpeechApiKeys: apiKeys} = await storage.get( + 'witSpeechApiKeys', + 'sync' + ); + return apiKeys[language]; + } +} + +async function getWitSpeechApiResult(apiKey, audioContent) { + const rsp = await fetch('https://api.wit.ai/speech', { + referrer: '', + mode: 'cors', + method: 'POST', + headers: { + Authorization: 'Bearer ' + apiKey + }, + body: new Blob([audioContent], {type: 'audio/wav'}) + }); + + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + + return (await rsp.json())._text.trim(); +} + +async function getIbmSpeechApiResult(apiUrl, apiKey, audioContent, language) { + const rsp = await fetch( + `${apiUrl}?model=${language}&profanity_filter=false`, + { + referrer: '', + mode: 'cors', + method: 'POST', + headers: { + Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey), + 'X-Watson-Learning-Opt-Out': 'true' + }, + body: new Blob([audioContent], {type: 'audio/wav'}) + } + ); + + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + + const results = (await rsp.json()).results; + if (results && results.length) { + return results[0].alternatives[0].transcript.trim(); + } +} + +async function getMicrosoftSpeechApiResult( + apiUrl, + apiKey, + audioContent, + language +) { + const rsp = await fetch( + `${apiUrl}?language=${language}&format=detailed&profanity=raw`, + { + referrer: '', + mode: 'cors', + method: 'POST', + headers: { + 'Ocp-Apim-Subscription-Key': apiKey, + 'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000' + }, + body: new Blob([audioContent], {type: 'audio/wav'}) + } + ); + + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + + const results = (await rsp.json()).NBest; + if (results) { + return results[0].Lexical.trim(); + } +} + +async function transcribeAudio(audioUrl, lang) { + let solution; + + const audioRsp = await fetch(audioUrl, {referrer: ''}); + const audioContent = await prepareAudio(await audioRsp.arrayBuffer()); + + const {speechService, tryEnglishSpeechModel} = await storage.get( + ['speechService', 'tryEnglishSpeechModel'], + 'sync' + ); + + if (['witSpeechApiDemo', 'witSpeechApi'].includes(speechService)) { + const language = captchaWitSpeechApiLangCodes[lang] || 'english'; + + const apiKey = await getWitSpeechApiKey(speechService, language); + if (!apiKey) { + showNotification({messageId: 'error_missingApiKey'}); + return; + } + + solution = await getWitSpeechApiResult(apiKey, audioContent); + if (!solution && language !== 'english' && tryEnglishSpeechModel) { + const apiKey = await getWitSpeechApiKey(speechService, 'english'); + if (!apiKey) { + showNotification({messageId: 'error_missingApiKey'}); + return; + } + solution = await getWitSpeechApiResult(apiKey, audioContent); + } + } else if (speechService === 'googleSpeechApi') { + const {googleSpeechApiKey: apiKey} = await storage.get( + 'googleSpeechApiKey', + 'sync' + ); + if (!apiKey) { + showNotification({messageId: 'error_missingApiKey'}); + return; + } + const apiUrl = `https://speech.googleapis.com/v1p1beta1/speech:recognize?key=${apiKey}`; + + const language = captchaGoogleSpeechApiLangCodes[lang] || 'en-US'; + + const data = { + audio: { + content: arrayBufferToBase64(audioContent) + }, + config: { + encoding: 'LINEAR16', + languageCode: language, + model: 'video', + sampleRateHertz: 16000 + } + }; + if (!['en-US', 'en-GB'].includes(language) && tryEnglishSpeechModel) { + data.config.model = 'default'; + data.config.alternativeLanguageCodes = ['en-US']; + } + + const rsp = await fetch(apiUrl, { + referrer: '', + mode: 'cors', + method: 'POST', + body: JSON.stringify(data) + }); + + if (rsp.status !== 200) { + throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); + } + + const results = (await rsp.json()).results; + if (results) { + solution = results[0].alternatives[0].transcript.trim(); + } + } else if (speechService === 'ibmSpeechApi') { + const { + ibmSpeechApiLoc: apiLoc, + ibmSpeechApiKey: apiKey + } = await storage.get(['ibmSpeechApiLoc', 'ibmSpeechApiKey'], 'sync'); + if (!apiKey) { + showNotification({messageId: 'error_missingApiKey'}); + return; + } + const apiUrl = ibmSpeechApiUrls[apiLoc]; + const language = + captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel'; + + solution = await getIbmSpeechApiResult( + apiUrl, + apiKey, + audioContent, + language + ); + if ( + !solution && + !['en-US_BroadbandModel', 'en-GB_BroadbandModel'].includes(language) && + tryEnglishSpeechModel + ) { + solution = await getIbmSpeechApiResult( + apiUrl, + apiKey, + audioContent, + 'en-US_BroadbandModel' + ); + } + } else if (speechService === 'microsoftSpeechApi') { + const { + microsoftSpeechApiLoc: apiLoc, + microsoftSpeechApiKey: apiKey + } = await storage.get( + ['microsoftSpeechApiLoc', 'microsoftSpeechApiKey'], + 'sync' + ); + if (!apiKey) { + showNotification({messageId: 'error_missingApiKey'}); + return; + } + const apiUrl = microsoftSpeechApiUrls[apiLoc]; + const language = captchaMicrosoftSpeechApiLangCodes[lang] || 'en-US'; + + solution = await getMicrosoftSpeechApiResult( + apiUrl, + apiKey, + audioContent, + language + ); + if ( + !solution && + !['en-US', 'en-GB'].includes(language) && + tryEnglishSpeechModel + ) { + solution = await getMicrosoftSpeechApiResult( + apiUrl, + apiKey, + audioContent, + 'en-US' + ); + } + } + + return solution; +} + async function onMessage(request, sender) { if (request.id === 'notification') { showNotification({ @@ -147,6 +456,13 @@ async function onMessage(request, sender) { if ([30, 100].includes(useCount)) { await showContributePage('use'); } + } else if (request.id === 'transcribeAudio') { + addBackgroundRequestLitener(); + try { + return transcribeAudio(request.audioUrl, request.lang); + } finally { + removeBackgroundRequestLitener(); + } } else if (request.id === 'resetCaptcha') { await resetCaptcha(sender.tab.id, sender.frameId, request.challengeUrl); } else if (request.id === 'getFramePos') { diff --git a/src/solve/main.js b/src/solve/main.js index c3250ec..f4cd0f8 100644 --- a/src/solve/main.js +++ b/src/solve/main.js @@ -1,24 +1,9 @@ import browser from 'webextension-polyfill'; -import audioBufferToWav from 'audiobuffer-to-wav'; import storage from 'storage/storage'; import {meanSleep, pingClientApp} from 'utils/app'; -import { - getText, - waitForElement, - arrayBufferToBase64, - getRandomFloat, - sleep -} from 'utils/common'; -import { - captchaGoogleSpeechApiLangCodes, - captchaIbmSpeechApiLangCodes, - captchaMicrosoftSpeechApiLangCodes, - captchaWitSpeechApiLangCodes, - ibmSpeechApiUrls, - microsoftSpeechApiUrls -} from 'utils/data'; -import {clientAppVersion, witApiKeys} from 'utils/config'; +import {getText, waitForElement, getRandomFloat, sleep} from 'utils/common'; +import {clientAppVersion} from 'utils/config'; let solverWorking = false; @@ -97,26 +82,6 @@ function isBlocked({timeout = 0} = {}) { return Boolean(document.querySelector(selector)); } -async function prepareAudio(audio) { - const ctx = new AudioContext(); - const data = await ctx.decodeAudioData(audio); - await ctx.close(); - - const offlineCtx = new OfflineAudioContext( - // force mono output - 1, - 16000 * data.duration, - 16000 - ); - const source = offlineCtx.createBufferSource(); - source.buffer = data; - source.connect(offlineCtx.destination); - // discard 1.5 second noise from beginning/end - source.start(0, 1.5, data.duration - 3); - - return audioBufferToWav(await offlineCtx.startRendering()); -} - function dispatchEnter(node) { const keyEvent = { code: 'Enter', @@ -246,7 +211,7 @@ async function getElementScreenRect(node, browserBorder) { height /= osScale; } -return {x, y, width, height}; + return {x, y, width, height}; } async function getClickPos(node, browserBorder) { @@ -258,94 +223,7 @@ async function getClickPos(node, browserBorder) { }; } -async function getWitSpeechApiKey(speechService, language) { - if (speechService === 'witSpeechApiDemo') { - return witApiKeys[language]; - } else { - const {witSpeechApiKeys: apiKeys} = await storage.get( - 'witSpeechApiKeys', - 'sync' - ); - return apiKeys[language]; - } -} - -async function getWitSpeechApiResult(apiKey, audioContent) { - const rsp = await fetch('https://api.wit.ai/speech', { - referrer: '', - mode: 'cors', - method: 'POST', - headers: { - Authorization: 'Bearer ' + apiKey - }, - body: new Blob([audioContent], {type: 'audio/wav'}) - }); - - if (rsp.status !== 200) { - throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); - } - - return (await rsp.json())._text.trim(); -} - -async function getIbmSpeechApiResult(apiUrl, apiKey, audioContent, language) { - const rsp = await fetch( - `${apiUrl}?model=${language}&profanity_filter=false`, - { - referrer: '', - mode: 'cors', - method: 'POST', - headers: { - Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey), - 'X-Watson-Learning-Opt-Out': 'true' - }, - body: new Blob([audioContent], {type: 'audio/wav'}) - } - ); - - if (rsp.status !== 200) { - throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); - } - - const results = (await rsp.json()).results; - if (results && results.length) { - return results[0].alternatives[0].transcript.trim(); - } -} - -async function getMicrosoftSpeechApiResult( - apiUrl, - apiKey, - audioContent, - language -) { - const rsp = await fetch( - `${apiUrl}?language=${language}&format=detailed&profanity=raw`, - { - referrer: '', - mode: 'cors', - method: 'POST', - headers: { - 'Ocp-Apim-Subscription-Key': apiKey, - 'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000' - }, - body: new Blob([audioContent], {type: 'audio/wav'}) - } - ); - - if (rsp.status !== 200) { - throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); - } - - const results = (await rsp.json()).NBest; - if (results) { - return results[0].Lexical.trim(); - } -} - async function solve(simulateUserInput, clickEvent) { - let solution; - if (isBlocked()) { return; } @@ -435,158 +313,13 @@ async function solve(simulateUserInput, clickEvent) { } const audioUrl = audioEl.src; - const lang = document.documentElement.lang; - const audioRsp = await fetch(audioUrl, {referrer: ''}); - const audioContent = await prepareAudio(await audioRsp.arrayBuffer()); - - const {speechService, tryEnglishSpeechModel} = await storage.get( - ['speechService', 'tryEnglishSpeechModel'], - 'sync' - ); - if (['witSpeechApiDemo', 'witSpeechApi'].includes(speechService)) { - const language = captchaWitSpeechApiLangCodes[lang] || 'english'; - - const apiKey = await getWitSpeechApiKey(speechService, language); - if (!apiKey) { - browser.runtime.sendMessage({ - id: 'notification', - messageId: 'error_missingApiKey' - }); - return; - } - - solution = await getWitSpeechApiResult(apiKey, audioContent); - if (!solution && language !== 'english' && tryEnglishSpeechModel) { - const apiKey = await getWitSpeechApiKey(speechService, 'english'); - if (!apiKey) { - browser.runtime.sendMessage({ - id: 'notification', - messageId: 'error_missingApiKey' - }); - return; - } - solution = await getWitSpeechApiResult(apiKey, audioContent); - } - } else if (speechService === 'googleSpeechApi') { - const {googleSpeechApiKey: apiKey} = await storage.get( - 'googleSpeechApiKey', - 'sync' - ); - if (!apiKey) { - browser.runtime.sendMessage({ - id: 'notification', - messageId: 'error_missingApiKey' - }); - return; - } - const apiUrl = `https://speech.googleapis.com/v1p1beta1/speech:recognize?key=${apiKey}`; - - const language = captchaGoogleSpeechApiLangCodes[lang] || 'en-US'; - - const data = { - audio: { - content: arrayBufferToBase64(audioContent) - }, - config: { - encoding: 'LINEAR16', - languageCode: language, - model: 'video', - sampleRateHertz: 16000 - } - }; - if (!['en-US', 'en-GB'].includes(language) && tryEnglishSpeechModel) { - data.config.model = 'default'; - data.config.alternativeLanguageCodes = ['en-US']; - } - - const rsp = await fetch(apiUrl, { - referrer: '', - mode: 'cors', - method: 'POST', - body: JSON.stringify(data) - }); - - if (rsp.status !== 200) { - throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`); - } - - const results = (await rsp.json()).results; - if (results) { - solution = results[0].alternatives[0].transcript.trim(); - } - } else if (speechService === 'ibmSpeechApi') { - const { - ibmSpeechApiLoc: apiLoc, - ibmSpeechApiKey: apiKey - } = await storage.get(['ibmSpeechApiLoc', 'ibmSpeechApiKey'], 'sync'); - if (!apiKey) { - browser.runtime.sendMessage({ - id: 'notification', - messageId: 'error_missingApiKey' - }); - return; - } - const apiUrl = ibmSpeechApiUrls[apiLoc]; - const language = - captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel'; - - solution = await getIbmSpeechApiResult( - apiUrl, - apiKey, - audioContent, - language - ); - if ( - !solution && - !['en-US_BroadbandModel', 'en-GB_BroadbandModel'].includes(language) && - tryEnglishSpeechModel - ) { - solution = await getIbmSpeechApiResult( - apiUrl, - apiKey, - audioContent, - 'en-US_BroadbandModel' - ); - } - } else if (speechService === 'microsoftSpeechApi') { - const { - microsoftSpeechApiLoc: apiLoc, - microsoftSpeechApiKey: apiKey - } = await storage.get( - ['microsoftSpeechApiLoc', 'microsoftSpeechApiKey'], - 'sync' - ); - if (!apiKey) { - browser.runtime.sendMessage({ - id: 'notification', - messageId: 'error_missingApiKey' - }); - return; - } - const apiUrl = microsoftSpeechApiUrls[apiLoc]; - const language = captchaMicrosoftSpeechApiLangCodes[lang] || 'en-US'; - - solution = await getMicrosoftSpeechApiResult( - apiUrl, - apiKey, - audioContent, - language - ); - if ( - !solution && - !['en-US', 'en-GB'].includes(language) && - tryEnglishSpeechModel - ) { - solution = await getMicrosoftSpeechApiResult( - apiUrl, - apiKey, - audioContent, - 'en-US' - ); - } - } + const solution = await browser.runtime.sendMessage({ + id: 'transcribeAudio', + audioUrl, + lang + }); if (!solution) { browser.runtime.sendMessage({ diff --git a/src/utils/data.js b/src/utils/data.js index 3559c8d..1dcf609 100755 --- a/src/utils/data.js +++ b/src/utils/data.js @@ -323,7 +323,7 @@ const ibmSpeechApiUrls = { 'https://gateway-tok.watsonplatform.net/speech-to-text/api/v1/recognize' }; -// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-apis#regions-and-endpoints +// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-speech-to-text const microsoftSpeechApiUrls = { eastUs: 'https://eastus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1',