From c3b30c093515e8bce4922eed1a88536e8c2080e8 Mon Sep 17 00:00:00 2001 From: Rico Huijbers Date: Fri, 12 Nov 2021 15:32:25 +0100 Subject: [PATCH] feat(rosetta): expose the 'extract' API (#3161) Expose parts of the `extract` API (cache reading and mass translating) to consumers, so that downstream tools can build example manipulators as well. * The original exposed `Rosetta` class is renamed to `RosettaTabletReader`, and is intended to be used by `pacmak` (or tools that primarily read tablets). * A new class is introduced, `RosettaTranslator`, which is intended to be used by tools that need to do mass translation. This new API exposes the worker-based caching and `translateAll` functionality. * The `extract` command is rewritten to use `RosettaTranslator` * The `translateAll` features are moved from `extract.ts` and `extract_worker.ts` to `translate_all.ts` and `translate_all_worker.ts`. * Add a function to build a snippet from source with `/// !show|!hide` directives, make it clear when each is supposed to be used. * Fix the `console.error()` suppression from the TypeScript compiler that was using an incorrect substring. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0 --- packages/jsii-rosetta/lib/commands/extract.ts | 230 ++---------------- .../lib/commands/extract_worker.ts | 31 --- .../lib/commands/transliterate.ts | 8 +- packages/jsii-rosetta/lib/index.ts | 3 +- .../lib/{rosetta.ts => rosetta-reader.ts} | 15 +- .../jsii-rosetta/lib/rosetta-translator.ts | 141 +++++++++++ packages/jsii-rosetta/lib/snippet.ts | 63 ++++- packages/jsii-rosetta/lib/tablets/tablets.ts | 51 +++- packages/jsii-rosetta/lib/translate.ts | 4 +- packages/jsii-rosetta/lib/translate_all.ts | 80 ++++++ .../jsii-rosetta/lib/translate_all_worker.ts | 60 +++++ .../lib/typescript/visible-spans.ts | 17 +- .../test/commands/extract.test.ts | 13 +- .../test/commands/transliterate.test.ts | 7 +- .../test/jsii/visible-spans.test.ts | 31 ++- .../test/rosetta-translator.test.ts | 39 +++ packages/jsii-rosetta/test/snippet.test.ts | 20 ++ packages/jsii-rosetta/test/testutil.ts | 5 + 18 files changed, 554 insertions(+), 264 deletions(-) delete mode 100644 packages/jsii-rosetta/lib/commands/extract_worker.ts rename packages/jsii-rosetta/lib/{rosetta.ts => rosetta-reader.ts} (95%) create mode 100644 packages/jsii-rosetta/lib/rosetta-translator.ts create mode 100644 packages/jsii-rosetta/lib/translate_all.ts create mode 100644 packages/jsii-rosetta/lib/translate_all_worker.ts create mode 100644 packages/jsii-rosetta/test/rosetta-translator.test.ts create mode 100644 packages/jsii-rosetta/test/snippet.test.ts diff --git a/packages/jsii-rosetta/lib/commands/extract.ts b/packages/jsii-rosetta/lib/commands/extract.ts index 74c4cb9450..57234b9361 100644 --- a/packages/jsii-rosetta/lib/commands/extract.ts +++ b/packages/jsii-rosetta/lib/commands/extract.ts @@ -1,16 +1,10 @@ -import * as os from 'os'; -import * as path from 'path'; -import * as workerpool from 'workerpool'; - import { loadAssemblies, allTypeScriptSnippets } from '../jsii/assemblies'; -import { TypeFingerprinter } from '../jsii/fingerprinting'; -import { TARGET_LANGUAGES } from '../languages'; import * as logging from '../logging'; -import { TypeScriptSnippet, completeSource } from '../snippet'; +import { RosettaTranslator, RosettaTranslatorOptions } from '../rosetta-translator'; +import { TypeScriptSnippet } from '../snippet'; import { snippetKey } from '../tablets/key'; -import { LanguageTablet, TranslatedSnippet } from '../tablets/tablets'; -import { RosettaDiagnostic, Translator, makeRosettaDiagnostic } from '../translate'; -import type { TranslateBatchRequest, TranslateBatchResponse } from './extract_worker'; +import { LanguageTablet } from '../tablets/tablets'; +import { RosettaDiagnostic } from '../translate'; export interface ExtractResult { diagnostics: RosettaDiagnostic[]; @@ -29,15 +23,11 @@ export interface ExtractOptions { readonly cacheTabletFile?: string; /** - * Call the given translation function on the snippets. - * - * Optional, only for testing. Uses `translateAll` by default. + * Make a translator (just for testing) */ - readonly translationFunction?: TranslationFunc; + readonly translatorFactory?: (opts: RosettaTranslatorOptions) => RosettaTranslator; } -type TranslationFunc = typeof translateAll; - /** * Extract all samples from the given assemblies into a tablet */ @@ -50,35 +40,39 @@ export async function extractSnippets( logging.info(`Loading ${assemblyLocations.length} assemblies`); const assemblies = await loadAssemblies(assemblyLocations, options.validateAssemblies); - const fingerprinter = new TypeFingerprinter(assemblies.map((a) => a.assembly)); let snippets = Array.from(allTypeScriptSnippets(assemblies, loose)); if (only.length > 0) { snippets = filterSnippets(snippets, only); } - const tablet = new LanguageTablet(); + const translatorOptions: RosettaTranslatorOptions = { + assemblies: assemblies.map((a) => a.assembly), + }; + + const translator = options.translatorFactory + ? options.translatorFactory(translatorOptions) + : new RosettaTranslator(translatorOptions); if (options.cacheTabletFile) { - await reuseTranslationsFromCache(snippets, tablet, options.cacheTabletFile, fingerprinter); + await translator.loadCache(options.cacheTabletFile); + const { translations, remaining } = translator.readFromCache(snippets); + logging.info(`Reused ${translations.length} translations from cache ${options.cacheTabletFile}`); + snippets = remaining; } - const translateCount = snippets.length; const diagnostics = []; - if (translateCount > 0) { + if (snippets.length > 0) { logging.info('Translating'); const startTime = Date.now(); - const result = await (options.translationFunction ?? translateAll)(snippets, options.includeCompilerDiagnostics); - - for (const snippet of result.translatedSnippets) { - const fingerprinted = snippet.withFingerprint(fingerprinter.fingerprintAll(snippet.fqnsReferenced())); - tablet.addSnippet(fingerprinted); - } + const result = await translator.translateAll(snippets); const delta = (Date.now() - startTime) / 1000; logging.info( - `Translated ${translateCount} snippets in ${delta} seconds (${(delta / translateCount).toPrecision(3)}s/snippet)`, + `Translated ${snippets.length} snippets in ${delta} seconds (${(delta / snippets.length).toPrecision( + 3, + )}s/snippet)`, ); diagnostics.push(...result.diagnostics); } else { @@ -86,14 +80,9 @@ export async function extractSnippets( } logging.info(`Saving language tablet to ${options.outputFile}`); - await tablet.save(options.outputFile); + await translator.tablet.save(options.outputFile); - return { diagnostics, tablet }; -} - -interface TranslateAllResult { - translatedSnippets: TranslatedSnippet[]; - diagnostics: RosettaDiagnostic[]; + return { diagnostics, tablet: translator.tablet }; } /** @@ -102,174 +91,3 @@ interface TranslateAllResult { function filterSnippets(ts: TypeScriptSnippet[], includeIds: string[]) { return ts.filter((t) => includeIds.includes(snippetKey(t))); } - -/** - * Translate all snippets - * - * We are now always using workers, as we are targeting Node 12+. - */ -async function translateAll( - snippets: TypeScriptSnippet[], - includeCompilerDiagnostics: boolean, -): Promise { - return workerBasedTranslateAll(snippets, includeCompilerDiagnostics); -} - -/** - * Translate the given snippets using a single compiler - * - * Used both here (directly) and via extract_worker to translate a batch of - * snippets in parallel. - */ -export function singleThreadedTranslateAll( - snippets: TypeScriptSnippet[], - includeCompilerDiagnostics: boolean, -): TranslateAllResult { - const translatedSnippets = new Array(); - - const failures = new Array(); - - const translator = new Translator(includeCompilerDiagnostics); - for (const block of snippets) { - try { - translatedSnippets.push(translator.translate(block)); - } catch (e) { - failures.push( - makeRosettaDiagnostic(true, `rosetta: error translating snippet: ${e}\n${e.stack}\n${block.completeSource}`), - ); - } - } - - return { - translatedSnippets, - diagnostics: [...translator.diagnostics, ...failures], - }; -} - -/** - * Divide the work evenly over all processors by running 'extract_worker' in Worker Threads, then combine results - * - * The workers are fed small queues of work each. We used to divide the entire queue into N - * but since the work is divided unevenly that led to some workers stopping early, idling while - * waiting for more work. - * - * Never include 'extract_worker' directly, only do TypeScript type references (so that in - * the script we may assume that 'worker_threads' successfully imports). - */ -async function workerBasedTranslateAll( - snippets: TypeScriptSnippet[], - includeCompilerDiagnostics: boolean, -): Promise { - // Use about half the advertised cores because hyperthreading doesn't seem to - // help that much, or we become I/O-bound at some point. On my machine, using - // more than half the cores actually makes it slower. - // Cap to a reasonable top-level limit to prevent thrash on machines with many, many cores. - const maxWorkers = parseInt(process.env.JSII_ROSETTA_MAX_WORKER_COUNT ?? '16'); - const N = Math.min(maxWorkers, Math.max(1, Math.ceil(os.cpus().length / 2))); - const snippetArr = Array.from(snippets); - logging.info(`Translating ${snippetArr.length} snippets using ${N} workers`); - - const pool = workerpool.pool(path.join(__dirname, 'extract_worker.js'), { - maxWorkers: N, - }); - - try { - const requests = batchSnippets(snippetArr, includeCompilerDiagnostics); - - const responses: TranslateBatchResponse[] = await Promise.all( - requests.map((request) => pool.exec('translateBatch', [request])), - ); - - const diagnostics = new Array(); - const translatedSnippets = new Array(); - - // Combine results - for (const response of responses) { - diagnostics.push(...response.diagnostics); - translatedSnippets.push(...response.translatedSchemas.map(TranslatedSnippet.fromSchema)); - } - return { diagnostics, translatedSnippets }; - } finally { - // Not waiting on purpose - void pool.terminate(); - } -} - -function batchSnippets( - snippets: TypeScriptSnippet[], - includeCompilerDiagnostics: boolean, - batchSize = 10, -): TranslateBatchRequest[] { - const ret = []; - - for (let i = 0; i < snippets.length; i += batchSize) { - ret.push({ - snippets: snippets.slice(i, i + batchSize), - includeCompilerDiagnostics, - }); - } - - return ret; -} - -/** - * Try and read as many snippet translations from the cache as possible, adding them to the target tablet - * - * Removes the already translated snippets from the input array. - */ -async function reuseTranslationsFromCache( - snippets: TypeScriptSnippet[], - tablet: LanguageTablet, - cacheFile: string, - fingerprinter: TypeFingerprinter, -) { - try { - const cache = await LanguageTablet.fromFile(cacheFile); - - let snippetsFromCacheCtr = 0; - let i = 0; - while (i < snippets.length) { - const fromCache = tryReadFromCache(snippets[i], cache, fingerprinter); - if (fromCache) { - tablet.addSnippet(fromCache); - snippets.splice(i, 1); - snippetsFromCacheCtr += 1; - } else { - i += 1; - } - } - - logging.info(`Reused ${snippetsFromCacheCtr} translations from cache ${cacheFile}`); - } catch (e) { - logging.warn(`Error reading cache ${cacheFile}: ${e.message}`); - } -} - -/** - * Try to find the translation for the given snippet in the given cache - * - * Rules for cacheability are: - * - id is the same (== visible source didn't change) - * - complete source is the same (== fixture didn't change) - * - all types involved have the same fingerprint (== API surface didn't change) - * - the versions of all translations match the versions on the available translators (== translator itself didn't change) - * - * For the versions check: we could have selectively picked some translations - * from the cache while performing others. However, since the big work is in - * parsing the TypeScript, and the rendering itself is peanutes (assumption), it - * doesn't really make a lot of difference. So, for simplification's sake, - * we'll regen all translations if there's at least one that's outdated. - */ -function tryReadFromCache(sourceSnippet: TypeScriptSnippet, cache: LanguageTablet, fingerprinter: TypeFingerprinter) { - const fromCache = cache.tryGetSnippet(snippetKey(sourceSnippet)); - - const cacheable = - fromCache && - completeSource(sourceSnippet) === fromCache.snippet.fullSource && - Object.entries(TARGET_LANGUAGES).every( - ([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version, - ) && - fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) === fromCache.snippet.fqnsFingerprint; - - return cacheable ? fromCache : undefined; -} diff --git a/packages/jsii-rosetta/lib/commands/extract_worker.ts b/packages/jsii-rosetta/lib/commands/extract_worker.ts deleted file mode 100644 index 9a46ac924e..0000000000 --- a/packages/jsii-rosetta/lib/commands/extract_worker.ts +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Pool worker for extract.ts - */ -import * as workerpool from 'workerpool'; - -import { TypeScriptSnippet } from '../snippet'; -import { TranslatedSnippetSchema } from '../tablets/schema'; -import { RosettaDiagnostic } from '../translate'; -import { singleThreadedTranslateAll } from './extract'; - -export interface TranslateBatchRequest { - readonly snippets: TypeScriptSnippet[]; - readonly includeCompilerDiagnostics: boolean; -} - -export interface TranslateBatchResponse { - // Cannot be 'TranslatedSnippet' because needs to be serializable - readonly translatedSchemas: TranslatedSnippetSchema[]; - readonly diagnostics: RosettaDiagnostic[]; -} - -function translateBatch(request: TranslateBatchRequest): TranslateBatchResponse { - const result = singleThreadedTranslateAll(request.snippets, request.includeCompilerDiagnostics); - - return { - translatedSchemas: result.translatedSnippets.map((s) => s.snippet), - diagnostics: result.diagnostics, - }; -} - -workerpool.worker({ translateBatch }); diff --git a/packages/jsii-rosetta/lib/commands/transliterate.ts b/packages/jsii-rosetta/lib/commands/transliterate.ts index 082c5a85f5..e92eb5f2e7 100644 --- a/packages/jsii-rosetta/lib/commands/transliterate.ts +++ b/packages/jsii-rosetta/lib/commands/transliterate.ts @@ -5,7 +5,7 @@ import { resolve } from 'path'; import { fixturize } from '../fixtures'; import { TargetLanguage } from '../languages'; import { debug } from '../logging'; -import { Rosetta, UnknownSnippetMode } from '../rosetta'; +import { RosettaTabletReader, UnknownSnippetMode } from '../rosetta-reader'; import { SnippetParameters, typeScriptSnippetFromSource, ApiLocation } from '../snippet'; import { Translation } from '../tablets/tablets'; @@ -51,7 +51,7 @@ export async function transliterateAssembly( targetLanguages: readonly TargetLanguage[], options: TransliterateAssemblyOptions = {}, ): Promise { - const rosetta = new Rosetta({ + const rosetta = new RosettaTabletReader({ includeCompilerDiagnostics: true, unknownSnippets: UnknownSnippetMode.TRANSLATE, loose: options.loose, @@ -110,7 +110,7 @@ export async function transliterateAssembly( */ async function loadAssemblies( directories: readonly string[], - rosetta: Rosetta, + rosetta: RosettaTabletReader, ): Promise> { const result = new Map(); @@ -157,7 +157,7 @@ function prefixDisclaimer(translation: Translation): string { function transliterateType( type: Type, - rosetta: Rosetta, + rosetta: RosettaTabletReader, language: TargetLanguage, workingDirectory: string, loose = false, diff --git a/packages/jsii-rosetta/lib/index.ts b/packages/jsii-rosetta/lib/index.ts index f26bb05f93..7d9710639b 100644 --- a/packages/jsii-rosetta/lib/index.ts +++ b/packages/jsii-rosetta/lib/index.ts @@ -5,7 +5,8 @@ export { CSharpVisitor } from './languages/csharp'; export { JavaVisitor } from './languages/java'; export { PythonVisitor } from './languages/python'; export * from './tablets/tablets'; -export * from './rosetta'; +export * from './rosetta-reader'; +export * from './rosetta-translator'; export * from './snippet'; export * from './markdown'; export * from './strict'; diff --git a/packages/jsii-rosetta/lib/rosetta.ts b/packages/jsii-rosetta/lib/rosetta-reader.ts similarity index 95% rename from packages/jsii-rosetta/lib/rosetta.ts rename to packages/jsii-rosetta/lib/rosetta-reader.ts index 77e60f7c02..5e823965e2 100644 --- a/packages/jsii-rosetta/lib/rosetta.ts +++ b/packages/jsii-rosetta/lib/rosetta-reader.ts @@ -69,7 +69,7 @@ export interface RosettaOptions { } /** - * Entry point class for consumers for Rosetta functionality + * Entry point class for consumers of Rosetta tablets (primarily: pacmak) * * Rosetta can work in one of two modes: * @@ -80,7 +80,7 @@ export interface RosettaOptions { * sample compilation and is recommended, but the first method will do * when the second one is not necessary. */ -export class Rosetta { +export class RosettaTabletReader { /** * Newly translated samples * @@ -110,6 +110,10 @@ export class Rosetta { /** * Load a tablet as a source for translateable snippets + * + * Note: the snippets loaded from this tablet will NOT be validated for + * their fingerprints or translator versions! If a matching snippet is found + * in the tablet, it will always be returned, whether or not it is stale. */ public async loadTabletFromFile(tabletFile: string) { const tablet = new LanguageTablet(); @@ -298,3 +302,10 @@ export class Rosetta { function id(x: Translation) { return x; } + +/** + * Backwards compatibility + * + * @deprecated use RosettaTabletReader instead + */ +export class Rosetta extends RosettaTabletReader {} diff --git a/packages/jsii-rosetta/lib/rosetta-translator.ts b/packages/jsii-rosetta/lib/rosetta-translator.ts new file mode 100644 index 0000000000..4257a92f02 --- /dev/null +++ b/packages/jsii-rosetta/lib/rosetta-translator.ts @@ -0,0 +1,141 @@ +import * as spec from '@jsii/spec'; + +import { TypeFingerprinter } from './jsii/fingerprinting'; +import { TARGET_LANGUAGES } from './languages'; +import * as logging from './logging'; +import { TypeScriptSnippet, completeSource } from './snippet'; +import { snippetKey } from './tablets/key'; +import { LanguageTablet, TranslatedSnippet } from './tablets/tablets'; +import { translateAll, TranslateAllResult } from './translate_all'; + +export interface RosettaTranslatorOptions { + /** + * Assemblies to use for fingerprinting + * + * The set of assemblies here are used to invalidate the cache. Any types that are + * used in snippets are looked up in this set of assemblies. If found, their type + * information is fingerprinted and compared to the type information at the time + * compilation of the cached sample. If different, this is considered to be a cache + * miss. + * + * You must use the same set of assemblies when generating and reading the cache + * file, otherwise the fingerprint is guaranteed to be different and the cache will + * be useless (e.g. if you generate the cache WITH assembly information but + * read it without, or vice versa). + * + * @default No assemblies. + */ + readonly assemblies?: spec.Assembly[]; + + /** + * Whether to include compiler diagnostics in the compilation results. + * + * @default false + */ + readonly includeCompilerDiagnostics?: boolean; +} + +/** + * Entry point for consumers that want to translate code on-the-fly + * + * If you want to generate and translate code on-the-fly, in ways that cannot + * be achieved by the rosetta CLI, use this class. + */ +export class RosettaTranslator { + public readonly tablet = new LanguageTablet(); + private readonly fingerprinter: TypeFingerprinter; + private readonly cache = new LanguageTablet(); + private readonly includeCompilerDiagnostics: boolean; + + public constructor(options: RosettaTranslatorOptions = {}) { + this.fingerprinter = new TypeFingerprinter(options?.assemblies ?? []); + this.includeCompilerDiagnostics = options.includeCompilerDiagnostics ?? false; + } + + public async loadCache(fileName: string) { + try { + await this.cache.load(fileName); + } catch (e) { + logging.warn(`Error reading cache ${fileName}: ${e.message}`); + } + } + + /** + * For all the given snippets, try to read translations from the cache + * + * Will remove the cached snippets from the input array. + */ + public readFromCache(snippets: TypeScriptSnippet[], addToTablet = true): ReadFromCacheResults { + const remaining = [...snippets]; + const translations = new Array(); + + let i = 0; + while (i < remaining.length) { + const fromCache = tryReadFromCache(remaining[i], this.cache, this.fingerprinter); + if (fromCache) { + if (addToTablet) { + this.tablet.addSnippet(fromCache); + } + remaining.splice(i, 1); + translations.push(fromCache); + } else { + i += 1; + } + } + + return { translations, remaining }; + } + + public async translateAll(snippets: TypeScriptSnippet[], addToTablet = true): Promise { + const result = await translateAll(snippets, this.includeCompilerDiagnostics); + + const fingerprinted = result.translatedSnippets.map((snippet) => + snippet.withFingerprint(this.fingerprinter.fingerprintAll(snippet.fqnsReferenced())), + ); + + if (addToTablet) { + for (const translation of fingerprinted) { + this.tablet.addSnippet(translation); + } + } + + return { + translatedSnippets: fingerprinted, + diagnostics: result.diagnostics, + }; + } +} + +/** + * Try to find the translation for the given snippet in the given cache + * + * Rules for cacheability are: + * - id is the same (== visible source didn't change) + * - complete source is the same (== fixture didn't change) + * - all types involved have the same fingerprint (== API surface didn't change) + * - the versions of all translations match the versions on the available translators (== translator itself didn't change) + * + * For the versions check: we could have selectively picked some translations + * from the cache while performing others. However, since the big work is in + * parsing the TypeScript, and the rendering itself is peanutes (assumption), it + * doesn't really make a lot of difference. So, for simplification's sake, + * we'll regen all translations if there's at least one that's outdated. + */ +function tryReadFromCache(sourceSnippet: TypeScriptSnippet, cache: LanguageTablet, fingerprinter: TypeFingerprinter) { + const fromCache = cache.tryGetSnippet(snippetKey(sourceSnippet)); + + const cacheable = + fromCache && + completeSource(sourceSnippet) === fromCache.snippet.fullSource && + Object.entries(TARGET_LANGUAGES).every( + ([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version, + ) && + fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) === fromCache.snippet.fqnsFingerprint; + + return cacheable ? fromCache : undefined; +} + +export interface ReadFromCacheResults { + readonly translations: TranslatedSnippet[]; + readonly remaining: TypeScriptSnippet[]; +} diff --git a/packages/jsii-rosetta/lib/snippet.ts b/packages/jsii-rosetta/lib/snippet.ts index c8d1a6d07c..88c1d74c7a 100644 --- a/packages/jsii-rosetta/lib/snippet.ts +++ b/packages/jsii-rosetta/lib/snippet.ts @@ -1,3 +1,5 @@ +import { trimCompleteSourceToVisible } from './typescript/visible-spans'; + /** * A piece of TypeScript code found in an assembly, ready to be translated */ @@ -100,20 +102,77 @@ export function renderApiLocation(apiLoc: ApiLocation): string { } } +/** + * Construct a TypeScript snippet from visible source + * + * Will parse parameters from a directive in the given source, but will not + * interpret `/// !show` and `/// !hide` directives. + * + * `/// !show` and `/// !hide` directives WILL affect what gets displayed by + * the translator, but they will NOT affect the snippet's cache key (i.e. the + * cache key will be based on the full source given here). + * + * Use this if you are looking up a snippet in a tablet, which has been translated + * previously using a fixture. + */ +export function typeScriptSnippetFromVisibleSource( + typeScriptSource: string, + location: SnippetLocation, + strict: boolean, + parameters: Record = {}, +): TypeScriptSnippet { + const [source, sourceParameters] = parametersFromSourceDirectives(typeScriptSource); + const visibleSource = source.trimRight(); + + return { + visibleSource, + location, + parameters: Object.assign({}, parameters, sourceParameters), + strict, + }; +} + /** * Construct a TypeScript snippet from literal source * - * Will parse parameters from a directive in the given source. + * @deprecated Use `typeScriptSnippetFromVisibleSource` */ export function typeScriptSnippetFromSource( typeScriptSource: string, location: SnippetLocation, strict: boolean, parameters: Record = {}, +): TypeScriptSnippet { + return typeScriptSnippetFromVisibleSource(typeScriptSource, location, strict, parameters); +} + +/** + * Construct a TypeScript snippet from complete source + * + * Will parse parameters from a directive in the given source, and will + * interpret `/// !show` and `/// !hide` directives. + * + * The snippet's cache key will be based on the source that remains after + * these directives are processed. + * + * Use this if you are building a snippet to be translated, and take care + * to store the return object's `visibleSource` in the assembly (not the original + * source you passed in). + */ +export function typeScriptSnippetFromCompleteSource( + typeScriptSource: string, + location: SnippetLocation, + strict: boolean, + parameters: Record = {}, ): TypeScriptSnippet { const [source, sourceParameters] = parametersFromSourceDirectives(typeScriptSource); + const completeSource = source.trimRight(); + + const visibleSource = trimCompleteSourceToVisible(completeSource); + return { - visibleSource: source.trimRight(), + visibleSource, + completeSource: visibleSource !== completeSource ? completeSource : undefined, location, parameters: Object.assign({}, parameters, sourceParameters), strict, diff --git a/packages/jsii-rosetta/lib/tablets/tablets.ts b/packages/jsii-rosetta/lib/tablets/tablets.ts index 38e192906e..ff13c60673 100644 --- a/packages/jsii-rosetta/lib/tablets/tablets.ts +++ b/packages/jsii-rosetta/lib/tablets/tablets.ts @@ -2,7 +2,7 @@ import * as fs from 'fs-extra'; import * as path from 'path'; import { TargetLanguage } from '../languages'; -import { TypeScriptSnippet, SnippetLocation } from '../snippet'; +import { TypeScriptSnippet, SnippetLocation, completeSource } from '../snippet'; import { mapValues } from '../util'; import { snippetKey } from './key'; import { TabletSchema, TranslatedSnippetSchema, ORIGINAL_SNIPPET_KEY } from './schema'; @@ -18,12 +18,28 @@ export const CURRENT_SCHEMA_VERSION = '2'; * A tablet containing various snippets in multiple languages */ export class LanguageTablet { + /** + * Load a tablet from a file + */ public static async fromFile(filename: string) { const ret = new LanguageTablet(); await ret.load(filename); return ret; } + /** + * Load a tablet from a file that may not exist + * + * Will return an empty tablet if the file does not exist + */ + public static async fromOptionalFile(filename: string) { + const ret = new LanguageTablet(); + if (fs.existsSync(filename)) { + await ret.load(filename); + } + return ret; + } + private readonly snippets: Record = {}; public addSnippet(snippet: TranslatedSnippet) { @@ -35,15 +51,46 @@ export class LanguageTablet { return Object.keys(this.snippets); } + /** + * Add all snippets from the given tablet into this one + */ + public addTablet(tablet: LanguageTablet) { + for (const snippet of Object.values(tablet.snippets)) { + this.addSnippet(snippet); + } + } + public tryGetSnippet(key: string): TranslatedSnippet | undefined { return this.snippets[key]; } + /** + * Look up a single translation of a source snippet + * + * @deprecated Use `lookupTranslationBySource` instead. + */ public lookup(typeScriptSource: TypeScriptSnippet, language: TargetLanguage): Translation | undefined { + return this.lookupTranslationBySource(typeScriptSource, language); + } + + /** + * Look up a single translation of a source snippet + */ + public lookupTranslationBySource( + typeScriptSource: TypeScriptSnippet, + language: TargetLanguage, + ): Translation | undefined { const snippet = this.snippets[snippetKey(typeScriptSource)]; return snippet?.get(language); } + /** + * Lookup the translated verion of a TypeScript snippet + */ + public lookupBySource(typeScriptSource: TypeScriptSnippet): TranslatedSnippet | undefined { + return this.snippets[snippetKey(typeScriptSource)]; + } + public async load(filename: string) { const obj = (await fs.readJson(filename, { encoding: 'utf-8' })) as TabletSchema; @@ -101,7 +148,7 @@ export class TranslatedSnippet { }, didCompile: didCompile, location: original.location, - fullSource: original.completeSource, + fullSource: completeSource(original), }); } diff --git a/packages/jsii-rosetta/lib/translate.ts b/packages/jsii-rosetta/lib/translate.ts index 3a27f4617a..5232512979 100644 --- a/packages/jsii-rosetta/lib/translate.ts +++ b/packages/jsii-rosetta/lib/translate.ts @@ -67,7 +67,7 @@ export class Translator { location: snip.location, didCompile: translator.didSuccessfullyCompile, fqnsReferenced: translator.fqnsReferenced(), - fullSource: snip.completeSource, + fullSource: completeSource(snip), syntaxKindCounter: translator.syntaxKindCounter(), }); } @@ -199,7 +199,7 @@ export class SnippetTranslator { try { return call(...args); } catch (err) { - const isExpectedTypescriptError = err.message.includes('Error: Debug Failure'); + const isExpectedTypescriptError = err.message.includes('Debug Failure'); if (!isExpectedTypescriptError) { console.error(`Failed to execute ${call.name}: ${err}`); diff --git a/packages/jsii-rosetta/lib/translate_all.ts b/packages/jsii-rosetta/lib/translate_all.ts new file mode 100644 index 0000000000..84081689e6 --- /dev/null +++ b/packages/jsii-rosetta/lib/translate_all.ts @@ -0,0 +1,80 @@ +import * as os from 'os'; +import * as path from 'path'; +import * as workerpool from 'workerpool'; + +import * as logging from './logging'; +import { TypeScriptSnippet } from './snippet'; +import { TranslatedSnippet } from './tablets/tablets'; +import { RosettaDiagnostic } from './translate'; +import type { TranslateBatchRequest, TranslateBatchResponse } from './translate_all_worker'; + +/** + * Divide the work evenly over all processors by running 'translate_all_worker' in Worker Threads, then combine results + * + * The workers are fed small queues of work each. We used to divide the entire queue into N + * but since the work is divided unevenly that led to some workers stopping early, idling while + * waiting for more work. + * + * Never include 'translate_all_worker' directly, only do TypeScript type references (so that in + * the script we may assume that 'worker_threads' successfully imports). + */ +export async function translateAll( + snippets: TypeScriptSnippet[], + includeCompilerDiagnostics: boolean, +): Promise { + // Use about half the advertised cores because hyperthreading doesn't seem to + // help that much, or we become I/O-bound at some point. On my machine, using + // more than half the cores actually makes it slower. + // Cap to a reasonable top-level limit to prevent thrash on machines with many, many cores. + const maxWorkers = parseInt(process.env.JSII_ROSETTA_MAX_WORKER_COUNT ?? '16'); + const N = Math.min(maxWorkers, Math.max(1, Math.ceil(os.cpus().length / 2))); + const snippetArr = Array.from(snippets); + logging.info(`Translating ${snippetArr.length} snippets using ${N} workers`); + + const pool = workerpool.pool(path.join(__dirname, 'translate_all_worker.js'), { + maxWorkers: N, + }); + + try { + const requests = batchSnippets(snippetArr, includeCompilerDiagnostics); + + const responses: TranslateBatchResponse[] = await Promise.all( + requests.map((request) => pool.exec('translateBatch', [request])), + ); + + const diagnostics = new Array(); + const translatedSnippets = new Array(); + + // Combine results + for (const response of responses) { + diagnostics.push(...response.diagnostics); + translatedSnippets.push(...response.translatedSchemas.map(TranslatedSnippet.fromSchema)); + } + return { diagnostics, translatedSnippets }; + } finally { + // Not waiting on purpose + void pool.terminate(); + } +} + +function batchSnippets( + snippets: TypeScriptSnippet[], + includeCompilerDiagnostics: boolean, + batchSize = 10, +): TranslateBatchRequest[] { + const ret = []; + + for (let i = 0; i < snippets.length; i += batchSize) { + ret.push({ + snippets: snippets.slice(i, i + batchSize), + includeCompilerDiagnostics, + }); + } + + return ret; +} + +export interface TranslateAllResult { + translatedSnippets: TranslatedSnippet[]; + diagnostics: RosettaDiagnostic[]; +} diff --git a/packages/jsii-rosetta/lib/translate_all_worker.ts b/packages/jsii-rosetta/lib/translate_all_worker.ts new file mode 100644 index 0000000000..b7f1d1bc58 --- /dev/null +++ b/packages/jsii-rosetta/lib/translate_all_worker.ts @@ -0,0 +1,60 @@ +/** + * Pool worker for extract.ts + */ +import * as workerpool from 'workerpool'; + +import { TypeScriptSnippet } from './snippet'; +import { TranslatedSnippetSchema } from './tablets/schema'; +import { TranslatedSnippet } from './tablets/tablets'; +import { RosettaDiagnostic, Translator, makeRosettaDiagnostic } from './translate'; +import { TranslateAllResult } from './translate_all'; + +export interface TranslateBatchRequest { + readonly snippets: TypeScriptSnippet[]; + readonly includeCompilerDiagnostics: boolean; +} + +export interface TranslateBatchResponse { + // Cannot be 'TranslatedSnippet' because needs to be serializable + readonly translatedSchemas: TranslatedSnippetSchema[]; + readonly diagnostics: RosettaDiagnostic[]; +} + +function translateBatch(request: TranslateBatchRequest): TranslateBatchResponse { + const result = singleThreadedTranslateAll(request.snippets, request.includeCompilerDiagnostics); + + return { + translatedSchemas: result.translatedSnippets.map((s) => s.snippet), + diagnostics: result.diagnostics, + }; +} + +/** + * Translate the given snippets using a single compiler + */ +export function singleThreadedTranslateAll( + snippets: TypeScriptSnippet[], + includeCompilerDiagnostics: boolean, +): TranslateAllResult { + const translatedSnippets = new Array(); + + const failures = new Array(); + + const translator = new Translator(includeCompilerDiagnostics); + for (const block of snippets) { + try { + translatedSnippets.push(translator.translate(block)); + } catch (e) { + failures.push( + makeRosettaDiagnostic(true, `rosetta: error translating snippet: ${e}\n${e.stack}\n${block.completeSource}`), + ); + } + } + + return { + translatedSnippets, + diagnostics: [...translator.diagnostics, ...failures], + }; +} + +workerpool.worker({ translateBatch }); diff --git a/packages/jsii-rosetta/lib/typescript/visible-spans.ts b/packages/jsii-rosetta/lib/typescript/visible-spans.ts index d7fb6cb7b1..3a69280ef5 100644 --- a/packages/jsii-rosetta/lib/typescript/visible-spans.ts +++ b/packages/jsii-rosetta/lib/typescript/visible-spans.ts @@ -85,6 +85,15 @@ export class Spans { } } +export function trimCompleteSourceToVisible(source: string): string { + const spans = Spans.visibleSpansFromSource(source); + + return spans.spans + .map((span) => source.substring(span.start, span.end)) + .join('') + .trimRight(); +} + export interface MarkedSpan { start: number; end: number; @@ -92,7 +101,7 @@ export interface MarkedSpan { } function calculateMarkedSpans(source: string): MarkedSpan[] { - const regEx = /[/]{3}[ \t]*(!(?:show|hide))[ \t]*$/gm; + const regEx = /^[ \t]*[/]{3}[ \t]*(!(?:show|hide))[ \t]*$/gm; const ret = new Array(); let match; @@ -111,7 +120,9 @@ function calculateMarkedSpans(source: string): MarkedSpan[] { ret.push({ start: spanStart, end: directiveStart, visible }); } visible = isShow; - spanStart = match.index + match[0].length; + + // A directive eats its trailing newline. + spanStart = match.index + match[0].length + 1; } } @@ -119,7 +130,7 @@ function calculateMarkedSpans(source: string): MarkedSpan[] { ret.push({ start: spanStart ?? 0, end: source.length, visible }); // Filter empty spans and return - return ret.filter((s) => s.start !== s.end); + return ret.filter((s) => s.start < s.end); } /** diff --git a/packages/jsii-rosetta/test/commands/extract.test.ts b/packages/jsii-rosetta/test/commands/extract.test.ts index 089de4f40b..0969adb715 100644 --- a/packages/jsii-rosetta/test/commands/extract.test.ts +++ b/packages/jsii-rosetta/test/commands/extract.test.ts @@ -1,6 +1,6 @@ import * as path from 'path'; -import { LanguageTablet } from '../../lib'; +import { LanguageTablet, RosettaTranslator, RosettaTranslatorOptions } from '../../lib'; import * as extract from '../../lib/commands/extract'; import { TARGET_LANGUAGES } from '../../lib/languages'; import { TestJsiiModule, DUMMY_ASSEMBLY_TARGETS } from '../testutil'; @@ -71,7 +71,7 @@ describe('with cache file', () => { await extract.extractSnippets([assembly.moduleDirectory], { outputFile: path.join(assembly.moduleDirectory, 'dummy.tabl.json'), cacheTabletFile, - translationFunction, + translatorFactory: (o) => new MockTranslator(o, translationFunction), ...defaultExtractOptions, }); @@ -87,7 +87,7 @@ describe('with cache file', () => { await extract.extractSnippets([assembly.moduleDirectory], { outputFile: path.join(assembly.moduleDirectory, 'dummy.tabl.json'), cacheTabletFile, - translationFunction, + translatorFactory: (o) => new MockTranslator(o, translationFunction), ...defaultExtractOptions, }); @@ -133,3 +133,10 @@ test('do not ignore example strings', async () => { await assembly.cleanup(); } }); + +class MockTranslator extends RosettaTranslator { + public constructor(opts: RosettaTranslatorOptions, translatorFn: jest.Mock) { + super(opts); + this.translateAll = translatorFn; + } +} diff --git a/packages/jsii-rosetta/test/commands/transliterate.test.ts b/packages/jsii-rosetta/test/commands/transliterate.test.ts index 3fcd5d8109..86fbac3a16 100644 --- a/packages/jsii-rosetta/test/commands/transliterate.test.ts +++ b/packages/jsii-rosetta/test/commands/transliterate.test.ts @@ -1,11 +1,11 @@ import { SPEC_FILE_NAME } from '@jsii/spec'; import * as fs from 'fs-extra'; import * as jsii from 'jsii'; -import * as os from 'os'; import * as path from 'path'; import { transliterateAssembly } from '../../lib/commands/transliterate'; import { TargetLanguage } from '../../lib/languages/target-language'; +import { withTemporaryDirectory } from '../testutil'; jest.setTimeout(60_000); @@ -1339,8 +1339,3 @@ new SampleClass('omitted-literate'); `, ); })); - -async function withTemporaryDirectory(callback: (dir: string) => Promise): Promise { - const tmpdir = fs.mkdtempSync(path.join(os.tmpdir(), path.basename(__filename))); - return callback(tmpdir).finally(() => fs.removeSync(tmpdir)); -} diff --git a/packages/jsii-rosetta/test/jsii/visible-spans.test.ts b/packages/jsii-rosetta/test/jsii/visible-spans.test.ts index 3adb5e63c6..d8bf05dc69 100644 --- a/packages/jsii-rosetta/test/jsii/visible-spans.test.ts +++ b/packages/jsii-rosetta/test/jsii/visible-spans.test.ts @@ -1,4 +1,4 @@ -import { Spans } from '../../lib/typescript/visible-spans'; +import { Spans, trimCompleteSourceToVisible } from '../../lib/typescript/visible-spans'; test('full text visible by default', () => { const vis = Spans.visibleSpansFromSource('asdf'); @@ -11,8 +11,11 @@ test('initial span visible if directive is hiding', () => { }); test('initial span invisible if directive is showing', () => { + const s = 'asdf\n/// !show\nxyz'; const vis = Spans.visibleSpansFromSource('asdf\n/// !show\nxyz'); - expect(vis.spans).toEqual([{ start: 14, end: 18, visible: true }]); + + expect(s.substring(vis.spans[0].start, vis.spans[0].end)).toEqual('xyz'); + expect(vis.spans).toEqual([{ start: 15, end: 18, visible: true }]); }); test('merge adjacent spans', () => { @@ -23,3 +26,27 @@ test('merge adjacent spans', () => { expect(spans.spans).toEqual([{ start: 10, end: 20 }]); }); + +test('trim source to spans', () => { + const source = ['a', '/// !show', 'b', 'c', '/// !hide', 'd'].join('\n'); + + const trimmed = trimCompleteSourceToVisible(source); + + expect(trimmed).toEqual('b\nc'); +}); + +test('trim source to spans with leading whitespace ', () => { + const source = ['a', ' /// !show', 'b', 'c', ' /// !hide', 'd'].join('\n'); + + const trimmed = trimCompleteSourceToVisible(source); + + expect(trimmed).toEqual('b\nc'); +}); + +test('trim source to spans with trailing whitespace ', () => { + const source = ['a', '/// !show ', 'b', 'c', ' /// !hide ', 'd'].join('\n'); + + const trimmed = trimCompleteSourceToVisible(source); + + expect(trimmed).toEqual('b\nc'); +}); diff --git a/packages/jsii-rosetta/test/rosetta-translator.test.ts b/packages/jsii-rosetta/test/rosetta-translator.test.ts new file mode 100644 index 0000000000..3397dc40d1 --- /dev/null +++ b/packages/jsii-rosetta/test/rosetta-translator.test.ts @@ -0,0 +1,39 @@ +import { RosettaTranslator, typeScriptSnippetFromVisibleSource, SnippetLocation, TargetLanguage } from '../lib'; +import { withTemporaryDirectory } from './testutil'; + +const location: SnippetLocation = { api: { api: 'file', fileName: 'test.ts' } }; + +test('translator can translate', async () => { + const translator = new RosettaTranslator({ + includeCompilerDiagnostics: true, + }); + + const snippet = typeScriptSnippetFromVisibleSource('console.log("hello world");', location, true); + + const { translatedSnippets } = await translator.translateAll([snippet]); + + expect(translatedSnippets).toHaveLength(1); + expect(translatedSnippets[0].get(TargetLanguage.PYTHON)?.source).toEqual('print("hello world")'); + + expect(translator.tablet.snippetKeys).toHaveLength(1); +}); + +test('translator can read from cache', async () => { + await withTemporaryDirectory(async () => { + // GIVEN: prepare cache + const cacheBuilder = new RosettaTranslator({ includeCompilerDiagnostics: true }); + const snippet = typeScriptSnippetFromVisibleSource('console.log("hello world");', location, true); + await cacheBuilder.translateAll([snippet]); + await cacheBuilder.tablet.save('temp.tabl.json'); + + // WHEN: new translatro + const translator = new RosettaTranslator({ includeCompilerDiagnostics: true }); + await translator.loadCache('temp.tabl.json'); + + const cached = translator.readFromCache([snippet]); + + expect(cached.translations).toHaveLength(1); + expect(cached.remaining).toHaveLength(0); + expect(translator.tablet.snippetKeys).toHaveLength(1); + }); +}); diff --git a/packages/jsii-rosetta/test/snippet.test.ts b/packages/jsii-rosetta/test/snippet.test.ts new file mode 100644 index 0000000000..7c38288f6b --- /dev/null +++ b/packages/jsii-rosetta/test/snippet.test.ts @@ -0,0 +1,20 @@ +import { typeScriptSnippetFromVisibleSource, SnippetLocation, typeScriptSnippetFromCompleteSource } from '../lib'; + +describe('construct TypeScriptSnippets', () => { + const source = ['a', '/// !show', 'b', '/// !hide', 'c'].join('\n'); + const location: SnippetLocation = { api: { api: 'file', fileName: 'x' } }; + + test('using typeScriptSnippetFromVisibleSource', () => { + const snippet = typeScriptSnippetFromVisibleSource(source, location, true); + + expect(snippet.visibleSource).toEqual(source); + expect(snippet.completeSource).toEqual(undefined); + }); + + test('using typeScriptSnippetFromCompleteSource', () => { + const snippet = typeScriptSnippetFromCompleteSource(source, location, true); + + expect(snippet.visibleSource).toEqual('b'); + expect(snippet.completeSource).toEqual(source); + }); +}); diff --git a/packages/jsii-rosetta/test/testutil.ts b/packages/jsii-rosetta/test/testutil.ts index 130b4a4514..ff39f98007 100644 --- a/packages/jsii-rosetta/test/testutil.ts +++ b/packages/jsii-rosetta/test/testutil.ts @@ -103,3 +103,8 @@ export const DUMMY_ASSEMBLY_TARGETS = { module: 'example_test_demo', }, }; + +export async function withTemporaryDirectory(callback: (dir: string) => Promise): Promise { + const tmpdir = fs.mkdtempSync(path.join(os.tmpdir(), path.basename(__filename))); + return callback(tmpdir).finally(() => fs.removeSync(tmpdir)); +}