-
Notifications
You must be signed in to change notification settings - Fork 246
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(rosetta): improve translation throughput (#3083)
Previously, Rosetta would divide all the examples to translate into `N` equally sized arrays, and spawn `N` workers to translate them all. Experimentation shows that the time required to translate samples is very unequally divided, and many workers used to be idle for half of the time after having finished their `1/Nth` of the samples, hurting throughput. Switch to a model where we have `N` workers, and we constantly feed them a small amount of work until all the work is done. This keeps all workers busy until the work is complete, improving the throughput a lot. On my machine, improves a run of Rosetta on the CDK repository with 8 workers from ~30m to ~15m. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
- Loading branch information
Showing
5 changed files
with
82 additions
and
94 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,31 @@ | ||
/** | ||
* Pool worker for extract.ts | ||
*/ | ||
import * as worker from 'worker_threads'; | ||
import * as workerpool from 'workerpool'; | ||
|
||
import * as logging from '../logging'; | ||
import { TypeScriptSnippet } from '../snippet'; | ||
import { TranslatedSnippetSchema } from '../tablets/schema'; | ||
import { RosettaDiagnostic } from '../translate'; | ||
import { singleThreadedTranslateAll } from './extract'; | ||
|
||
export interface TranslateRequest { | ||
includeCompilerDiagnostics: boolean; | ||
snippets: TypeScriptSnippet[]; | ||
export interface TranslateBatchRequest { | ||
readonly snippets: TypeScriptSnippet[]; | ||
readonly includeCompilerDiagnostics: boolean; | ||
} | ||
|
||
export interface TranslateResponse { | ||
diagnostics: RosettaDiagnostic[]; | ||
export interface TranslateBatchResponse { | ||
// Cannot be 'TranslatedSnippet' because needs to be serializable | ||
translatedSnippetSchemas: TranslatedSnippetSchema[]; | ||
readonly translatedSchemas: TranslatedSnippetSchema[]; | ||
readonly diagnostics: RosettaDiagnostic[]; | ||
} | ||
|
||
function translateSnippet(request: TranslateRequest): TranslateResponse { | ||
function translateBatch(request: TranslateBatchRequest): TranslateBatchResponse { | ||
const result = singleThreadedTranslateAll(request.snippets[Symbol.iterator](), request.includeCompilerDiagnostics); | ||
|
||
return { | ||
translatedSchemas: result.translatedSnippets.map((s) => s.toSchema()), | ||
diagnostics: result.diagnostics, | ||
translatedSnippetSchemas: result.translatedSnippets.map((s) => s.toSchema()), | ||
}; | ||
} | ||
|
||
if (worker.isMainThread) { | ||
// Throw an error to prevent accidental require() of this module. In principle not a big | ||
// deal, but we want to be compatible with run modes where 'worker_threads' is not available | ||
// and by doing this people on platforms where 'worker_threads' is available don't accidentally | ||
// add a require(). | ||
throw new Error('This script should be run as a worker, not included directly.'); | ||
} | ||
|
||
const request: TranslateRequest = worker.workerData; | ||
const startTime = Date.now(); | ||
const response = translateSnippet(request); | ||
const delta = (Date.now() - startTime) / 1000; | ||
// eslint-disable-next-line prettier/prettier | ||
logging.info(`Finished translation of ${request.snippets.length} in ${delta.toFixed(0)}s (${response.translatedSnippetSchemas.length} responses)`); | ||
worker.parentPort!.postMessage(response); | ||
workerpool.worker({ translateBatch }); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.