Skip to content

Commit

Permalink
feat(rosetta): expose the 'extract' API (#3161)
Browse files Browse the repository at this point in the history
Expose parts of the `extract` API (cache reading and mass translating)
to consumers, so that downstream tools can build example manipulators
as well.

* The original exposed `Rosetta` class is renamed to `RosettaTabletReader`, and is intended to be used by `pacmak` (or tools that primarily read tablets).
* A new class is introduced, `RosettaTranslator`, which is intended to be used by tools that need to do mass translation. This new API exposes the worker-based caching and `translateAll` functionality.
* The `extract` command is rewritten to use `RosettaTranslator`
* The `translateAll` features are moved from `extract.ts` and `extract_worker.ts` to `translate_all.ts` and `translate_all_worker.ts`.
* Add a function to build a snippet from source with `/// !show|!hide` directives, make it clear when each is supposed to be used.
* Fix the `console.error()` suppression from the TypeScript compiler that was using an incorrect substring.

---

By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].

[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
  • Loading branch information
rix0rrr authored Nov 12, 2021
1 parent 7c428e6 commit c3b30c0
Show file tree
Hide file tree
Showing 18 changed files with 554 additions and 264 deletions.
230 changes: 24 additions & 206 deletions packages/jsii-rosetta/lib/commands/extract.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
import * as os from 'os';
import * as path from 'path';
import * as workerpool from 'workerpool';

import { loadAssemblies, allTypeScriptSnippets } from '../jsii/assemblies';
import { TypeFingerprinter } from '../jsii/fingerprinting';
import { TARGET_LANGUAGES } from '../languages';
import * as logging from '../logging';
import { TypeScriptSnippet, completeSource } from '../snippet';
import { RosettaTranslator, RosettaTranslatorOptions } from '../rosetta-translator';
import { TypeScriptSnippet } from '../snippet';
import { snippetKey } from '../tablets/key';
import { LanguageTablet, TranslatedSnippet } from '../tablets/tablets';
import { RosettaDiagnostic, Translator, makeRosettaDiagnostic } from '../translate';
import type { TranslateBatchRequest, TranslateBatchResponse } from './extract_worker';
import { LanguageTablet } from '../tablets/tablets';
import { RosettaDiagnostic } from '../translate';

export interface ExtractResult {
diagnostics: RosettaDiagnostic[];
Expand All @@ -29,15 +23,11 @@ export interface ExtractOptions {
readonly cacheTabletFile?: string;

/**
* Call the given translation function on the snippets.
*
* Optional, only for testing. Uses `translateAll` by default.
* Make a translator (just for testing)
*/
readonly translationFunction?: TranslationFunc;
readonly translatorFactory?: (opts: RosettaTranslatorOptions) => RosettaTranslator;
}

type TranslationFunc = typeof translateAll;

/**
* Extract all samples from the given assemblies into a tablet
*/
Expand All @@ -50,50 +40,49 @@ export async function extractSnippets(

logging.info(`Loading ${assemblyLocations.length} assemblies`);
const assemblies = await loadAssemblies(assemblyLocations, options.validateAssemblies);
const fingerprinter = new TypeFingerprinter(assemblies.map((a) => a.assembly));

let snippets = Array.from(allTypeScriptSnippets(assemblies, loose));
if (only.length > 0) {
snippets = filterSnippets(snippets, only);
}

const tablet = new LanguageTablet();
const translatorOptions: RosettaTranslatorOptions = {
assemblies: assemblies.map((a) => a.assembly),
};

const translator = options.translatorFactory
? options.translatorFactory(translatorOptions)
: new RosettaTranslator(translatorOptions);

if (options.cacheTabletFile) {
await reuseTranslationsFromCache(snippets, tablet, options.cacheTabletFile, fingerprinter);
await translator.loadCache(options.cacheTabletFile);
const { translations, remaining } = translator.readFromCache(snippets);
logging.info(`Reused ${translations.length} translations from cache ${options.cacheTabletFile}`);
snippets = remaining;
}

const translateCount = snippets.length;
const diagnostics = [];
if (translateCount > 0) {
if (snippets.length > 0) {
logging.info('Translating');
const startTime = Date.now();

const result = await (options.translationFunction ?? translateAll)(snippets, options.includeCompilerDiagnostics);

for (const snippet of result.translatedSnippets) {
const fingerprinted = snippet.withFingerprint(fingerprinter.fingerprintAll(snippet.fqnsReferenced()));
tablet.addSnippet(fingerprinted);
}
const result = await translator.translateAll(snippets);

const delta = (Date.now() - startTime) / 1000;
logging.info(
`Translated ${translateCount} snippets in ${delta} seconds (${(delta / translateCount).toPrecision(3)}s/snippet)`,
`Translated ${snippets.length} snippets in ${delta} seconds (${(delta / snippets.length).toPrecision(
3,
)}s/snippet)`,
);
diagnostics.push(...result.diagnostics);
} else {
logging.info('Nothing left to translate');
}

logging.info(`Saving language tablet to ${options.outputFile}`);
await tablet.save(options.outputFile);
await translator.tablet.save(options.outputFile);

return { diagnostics, tablet };
}

interface TranslateAllResult {
translatedSnippets: TranslatedSnippet[];
diagnostics: RosettaDiagnostic[];
return { diagnostics, tablet: translator.tablet };
}

/**
Expand All @@ -102,174 +91,3 @@ interface TranslateAllResult {
function filterSnippets(ts: TypeScriptSnippet[], includeIds: string[]) {
return ts.filter((t) => includeIds.includes(snippetKey(t)));
}

/**
* Translate all snippets
*
* We are now always using workers, as we are targeting Node 12+.
*/
async function translateAll(
snippets: TypeScriptSnippet[],
includeCompilerDiagnostics: boolean,
): Promise<TranslateAllResult> {
return workerBasedTranslateAll(snippets, includeCompilerDiagnostics);
}

/**
* Translate the given snippets using a single compiler
*
* Used both here (directly) and via extract_worker to translate a batch of
* snippets in parallel.
*/
export function singleThreadedTranslateAll(
snippets: TypeScriptSnippet[],
includeCompilerDiagnostics: boolean,
): TranslateAllResult {
const translatedSnippets = new Array<TranslatedSnippet>();

const failures = new Array<RosettaDiagnostic>();

const translator = new Translator(includeCompilerDiagnostics);
for (const block of snippets) {
try {
translatedSnippets.push(translator.translate(block));
} catch (e) {
failures.push(
makeRosettaDiagnostic(true, `rosetta: error translating snippet: ${e}\n${e.stack}\n${block.completeSource}`),
);
}
}

return {
translatedSnippets,
diagnostics: [...translator.diagnostics, ...failures],
};
}

/**
* Divide the work evenly over all processors by running 'extract_worker' in Worker Threads, then combine results
*
* The workers are fed small queues of work each. We used to divide the entire queue into N
* but since the work is divided unevenly that led to some workers stopping early, idling while
* waiting for more work.
*
* Never include 'extract_worker' directly, only do TypeScript type references (so that in
* the script we may assume that 'worker_threads' successfully imports).
*/
async function workerBasedTranslateAll(
snippets: TypeScriptSnippet[],
includeCompilerDiagnostics: boolean,
): Promise<TranslateAllResult> {
// Use about half the advertised cores because hyperthreading doesn't seem to
// help that much, or we become I/O-bound at some point. On my machine, using
// more than half the cores actually makes it slower.
// Cap to a reasonable top-level limit to prevent thrash on machines with many, many cores.
const maxWorkers = parseInt(process.env.JSII_ROSETTA_MAX_WORKER_COUNT ?? '16');
const N = Math.min(maxWorkers, Math.max(1, Math.ceil(os.cpus().length / 2)));
const snippetArr = Array.from(snippets);
logging.info(`Translating ${snippetArr.length} snippets using ${N} workers`);

const pool = workerpool.pool(path.join(__dirname, 'extract_worker.js'), {
maxWorkers: N,
});

try {
const requests = batchSnippets(snippetArr, includeCompilerDiagnostics);

const responses: TranslateBatchResponse[] = await Promise.all(
requests.map((request) => pool.exec('translateBatch', [request])),
);

const diagnostics = new Array<RosettaDiagnostic>();
const translatedSnippets = new Array<TranslatedSnippet>();

// Combine results
for (const response of responses) {
diagnostics.push(...response.diagnostics);
translatedSnippets.push(...response.translatedSchemas.map(TranslatedSnippet.fromSchema));
}
return { diagnostics, translatedSnippets };
} finally {
// Not waiting on purpose
void pool.terminate();
}
}

function batchSnippets(
snippets: TypeScriptSnippet[],
includeCompilerDiagnostics: boolean,
batchSize = 10,
): TranslateBatchRequest[] {
const ret = [];

for (let i = 0; i < snippets.length; i += batchSize) {
ret.push({
snippets: snippets.slice(i, i + batchSize),
includeCompilerDiagnostics,
});
}

return ret;
}

/**
* Try and read as many snippet translations from the cache as possible, adding them to the target tablet
*
* Removes the already translated snippets from the input array.
*/
async function reuseTranslationsFromCache(
snippets: TypeScriptSnippet[],
tablet: LanguageTablet,
cacheFile: string,
fingerprinter: TypeFingerprinter,
) {
try {
const cache = await LanguageTablet.fromFile(cacheFile);

let snippetsFromCacheCtr = 0;
let i = 0;
while (i < snippets.length) {
const fromCache = tryReadFromCache(snippets[i], cache, fingerprinter);
if (fromCache) {
tablet.addSnippet(fromCache);
snippets.splice(i, 1);
snippetsFromCacheCtr += 1;
} else {
i += 1;
}
}

logging.info(`Reused ${snippetsFromCacheCtr} translations from cache ${cacheFile}`);
} catch (e) {
logging.warn(`Error reading cache ${cacheFile}: ${e.message}`);
}
}

/**
* Try to find the translation for the given snippet in the given cache
*
* Rules for cacheability are:
* - id is the same (== visible source didn't change)
* - complete source is the same (== fixture didn't change)
* - all types involved have the same fingerprint (== API surface didn't change)
* - the versions of all translations match the versions on the available translators (== translator itself didn't change)
*
* For the versions check: we could have selectively picked some translations
* from the cache while performing others. However, since the big work is in
* parsing the TypeScript, and the rendering itself is peanutes (assumption), it
* doesn't really make a lot of difference. So, for simplification's sake,
* we'll regen all translations if there's at least one that's outdated.
*/
function tryReadFromCache(sourceSnippet: TypeScriptSnippet, cache: LanguageTablet, fingerprinter: TypeFingerprinter) {
const fromCache = cache.tryGetSnippet(snippetKey(sourceSnippet));

const cacheable =
fromCache &&
completeSource(sourceSnippet) === fromCache.snippet.fullSource &&
Object.entries(TARGET_LANGUAGES).every(
([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version,
) &&
fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) === fromCache.snippet.fqnsFingerprint;

return cacheable ? fromCache : undefined;
}
31 changes: 0 additions & 31 deletions packages/jsii-rosetta/lib/commands/extract_worker.ts

This file was deleted.

8 changes: 4 additions & 4 deletions packages/jsii-rosetta/lib/commands/transliterate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { resolve } from 'path';
import { fixturize } from '../fixtures';
import { TargetLanguage } from '../languages';
import { debug } from '../logging';
import { Rosetta, UnknownSnippetMode } from '../rosetta';
import { RosettaTabletReader, UnknownSnippetMode } from '../rosetta-reader';
import { SnippetParameters, typeScriptSnippetFromSource, ApiLocation } from '../snippet';
import { Translation } from '../tablets/tablets';

Expand Down Expand Up @@ -51,7 +51,7 @@ export async function transliterateAssembly(
targetLanguages: readonly TargetLanguage[],
options: TransliterateAssemblyOptions = {},
): Promise<void> {
const rosetta = new Rosetta({
const rosetta = new RosettaTabletReader({
includeCompilerDiagnostics: true,
unknownSnippets: UnknownSnippetMode.TRANSLATE,
loose: options.loose,
Expand Down Expand Up @@ -110,7 +110,7 @@ export async function transliterateAssembly(
*/
async function loadAssemblies(
directories: readonly string[],
rosetta: Rosetta,
rosetta: RosettaTabletReader,
): Promise<ReadonlyMap<string, AssemblyLoader>> {
const result = new Map<string, AssemblyLoader>();

Expand Down Expand Up @@ -157,7 +157,7 @@ function prefixDisclaimer(translation: Translation): string {

function transliterateType(
type: Type,
rosetta: Rosetta,
rosetta: RosettaTabletReader,
language: TargetLanguage,
workingDirectory: string,
loose = false,
Expand Down
3 changes: 2 additions & 1 deletion packages/jsii-rosetta/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ export { CSharpVisitor } from './languages/csharp';
export { JavaVisitor } from './languages/java';
export { PythonVisitor } from './languages/python';
export * from './tablets/tablets';
export * from './rosetta';
export * from './rosetta-reader';
export * from './rosetta-translator';
export * from './snippet';
export * from './markdown';
export * from './strict';
Loading

0 comments on commit c3b30c0

Please sign in to comment.