From 1e7b604c15a0083f27ceafd8ca32ff9b6cf61759 Mon Sep 17 00:00:00 2001 From: Rico Huijbers Date: Fri, 3 Dec 2021 13:32:50 +0100 Subject: [PATCH] feat(rosetta): generate rosetta tablets next to each assembly (#3223) Rather than write our Rosetta translations to a single tablet file which we throw away at the end of the build, `rosetta extract` will now write translations to individual `.jsii.tabl.json` files, located next to the assemblies themselves. Construct library authors can publish these to NPM, so that downstream tools that process JSII modules for documentation have access to the translations and don't need to redo the work (especially relevant for large libraries with lots of examples, where otherwise a lot of CPU time would be wasted). The "single output tablet" can still be used, but is now intended to be used as a cache to speed up repeated runs of `rosetta extract` (to skip translating unchanged snippets). Add options to trim the cache files down so they won't grow without bounds. This PR also contains a bugfix: `infuse` did not re-inject a copied snippet's metadata into the assembly's `exampleMetadata` field. This is strictly speaking not a problem as long as the tablet always stays with the assembly, but better to fix anyway. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0 --- packages/jsii-rosetta/bin/jsii-rosetta.ts | 126 +++++++---- packages/jsii-rosetta/lib/commands/extract.ts | 87 ++++++-- packages/jsii-rosetta/lib/commands/infuse.ts | 210 ++++++++++++------ .../lib/commands/transliterate.ts | 4 +- .../jsii-rosetta/lib/commands/trim-cache.ts | 32 +++ packages/jsii-rosetta/lib/jsii/assemblies.ts | 65 +++--- .../jsii-rosetta/lib/rosetta-translator.ts | 6 + packages/jsii-rosetta/lib/snippet.ts | 9 + packages/jsii-rosetta/lib/tablets/tablets.ts | 46 +++- packages/jsii-rosetta/lib/typescript/types.ts | 5 +- packages/jsii-rosetta/lib/util.ts | 8 + .../test/commands/extract.test.ts | 153 +++++++++++-- .../jsii-rosetta/test/commands/infuse.test.ts | 39 +++- .../test/commands/trim-cache.test.ts | 85 +++++++ 14 files changed, 675 insertions(+), 200 deletions(-) create mode 100644 packages/jsii-rosetta/lib/commands/trim-cache.ts create mode 100644 packages/jsii-rosetta/test/commands/trim-cache.test.ts diff --git a/packages/jsii-rosetta/bin/jsii-rosetta.ts b/packages/jsii-rosetta/bin/jsii-rosetta.ts index a0af061d98..8fbae7a2a2 100644 --- a/packages/jsii-rosetta/bin/jsii-rosetta.ts +++ b/packages/jsii-rosetta/bin/jsii-rosetta.ts @@ -4,12 +4,13 @@ import * as fs from 'fs-extra'; import * as path from 'path'; import * as yargs from 'yargs'; -import { TranslateResult, DEFAULT_TABLET_NAME, translateTypeScript, RosettaDiagnostic } from '../lib'; +import { TranslateResult, translateTypeScript, RosettaDiagnostic } from '../lib'; import { translateMarkdown } from '../lib/commands/convert'; -import { extractAndInfuse, ExtractResult, extractSnippets } from '../lib/commands/extract'; +import { extractAndInfuse, extractSnippets, ExtractOptions } from '../lib/commands/extract'; import { infuse, DEFAULT_INFUSION_RESULTS_NAME } from '../lib/commands/infuse'; import { readTablet } from '../lib/commands/read'; import { transliterateAssembly } from '../lib/commands/transliterate'; +import { trimCache } from '../lib/commands/trim-cache'; import { TargetLanguage } from '../lib/languages'; import { PythonVisitor } from '../lib/languages/python'; import { VisualizeAstVisitor } from '../lib/languages/visualize'; @@ -69,37 +70,31 @@ function main() { '(EXPERIMENTAL) mutates one or more assemblies by adding documentation examples to top-level types', (command) => command - .positional('TABLET', { - type: 'string', - required: true, - describe: 'Language tablet to read', - }) .positional('ASSEMBLY', { type: 'string', string: true, default: new Array(), describe: 'Assembly or directory to mutate', }) - .option('log', { + .option('log-file', { alias: 'l', - type: 'boolean', - describe: 'Test all algorithms and log results to an html file', - default: false, - }) - .option('output', { - alias: 'o', type: 'string', describe: 'Output file to store logging results. Ignored if -log is not true', default: DEFAULT_INFUSION_RESULTS_NAME, }) - .demandOption('TABLET'), + .option('cache-to', { + alias: 'o', + type: 'string', + describe: 'Append all translated snippets to the given tablet file', + requiresArg: true, + default: undefined, + }), wrapHandler(async (args) => { const absAssemblies = (args.ASSEMBLY.length > 0 ? args.ASSEMBLY : ['.']).map((x) => path.resolve(x)); - const absOutput = path.resolve(args.output); - const result = await infuse(absAssemblies, args.TABLET, { - outputFile: absOutput, - log: args.log, - tabletOutputFile: args.TABLET, + const cacheToFile = fmap(args['cache-to'], path.resolve); + const result = await infuse(absAssemblies, { + logFile: args['log-file'], + cacheToFile: cacheToFile, }); let totalTypes = 0; @@ -131,10 +126,10 @@ function main() { describe: 'Assembly or directory to extract from', }) .option('output', { - alias: 'o', type: 'string', - describe: 'Output file where to store the sample tablets', - default: DEFAULT_TABLET_NAME, + describe: 'Additional output file where to store translated samples (deprecated, alias for --cache-to)', + requiresArg: true, + default: undefined, }) .option('compile', { alias: 'c', @@ -173,10 +168,33 @@ function main() { alias: 'C', type: 'string', // eslint-disable-next-line prettier/prettier - describe: 'Reuse translations from the given tablet file if the snippet and type definitions did not change', + describe: + 'Reuse translations from the given tablet file if the snippet and type definitions did not change', + requiresArg: true, + default: undefined, + }) + .option('cache-to', { + alias: 'o', + type: 'string', + describe: 'Append all translated snippets to the given tablet file', + requiresArg: true, + default: undefined, + }) + .conflicts('cache-to', 'output') + .option('cache', { + alias: 'k', + type: 'string', + describe: 'Alias for --cache-from and --cache-to together', requiresArg: true, default: undefined, }) + .conflicts('cache', 'cache-from') + .conflicts('cache', 'cache-to') + .option('trim-cache', { + alias: 'T', + type: 'boolean', + describe: 'Remove translations that are not referenced by any of the assemblies anymore from the cache', + }) .option('strict', { alias: 'S', type: 'boolean', @@ -197,30 +215,26 @@ function main() { // compilerhost. Have to make all file references absolute before we chdir // though. const absAssemblies = (args.ASSEMBLY.length > 0 ? args.ASSEMBLY : ['.']).map((x) => path.resolve(x)); - const absOutput = path.resolve(args.output); - const absCache = fmap(args['cache-from'], path.resolve); + + const absCacheFrom = fmap(args.cache ?? args['cache-from'], path.resolve); + const absCacheTo = fmap(args.cache ?? args['cache-to'] ?? args.output, path.resolve); + if (args.directory) { process.chdir(args.directory); } - let result: ExtractResult; - if (args.infuse) { - result = await extractAndInfuse(absAssemblies, { - outputFile: absOutput, - includeCompilerDiagnostics: !!args.compile, - validateAssemblies: args['validate-assemblies'], - only: args.include, - cacheTabletFile: absCache, - }); - } else { - result = await extractSnippets(absAssemblies, { - outputFile: absOutput, - includeCompilerDiagnostics: !!args.compile, - validateAssemblies: args['validate-assemblies'], - only: args.include, - cacheTabletFile: absCache, - }); - } + const extractOptions: ExtractOptions = { + includeCompilerDiagnostics: !!args.compile, + validateAssemblies: args['validate-assemblies'], + only: args.include, + cacheFromFile: absCacheFrom, + cacheToFile: absCacheTo, + trimCache: args['trim-cache'], + }; + + const result = args.infuse + ? await extractAndInfuse(absAssemblies, extractOptions) + : await extractSnippets(absAssemblies, extractOptions); handleDiagnostics(result.diagnostics, args.fail, result.tablet.count); }), @@ -282,6 +296,30 @@ function main() { return transliterateAssembly(assemblies, languages, args); }), ) + .command( + 'trim-cache [ASSEMBLY..]', + 'Retain only those snippets in the cache which occur in one of the given assemblies', + (command) => + command + .positional('TABLET', { + type: 'string', + required: true, + describe: 'Language tablet to trim', + }) + .positional('ASSEMBLY', { + type: 'string', + string: true, + default: new Array(), + describe: 'Assembly or directory to search', + }) + .demandOption('TABLET'), + wrapHandler(async (args) => { + await trimCache({ + cacheFile: args.TABLET, + assemblyLocations: args.ASSEMBLY, + }); + }), + ) .command( 'read [KEY] [LANGUAGE]', 'Display snippets in a language tablet file', diff --git a/packages/jsii-rosetta/lib/commands/extract.ts b/packages/jsii-rosetta/lib/commands/extract.ts index 5f8c9bf323..b43ac224b8 100644 --- a/packages/jsii-rosetta/lib/commands/extract.ts +++ b/packages/jsii-rosetta/lib/commands/extract.ts @@ -1,10 +1,13 @@ -import { loadAssemblies, allTypeScriptSnippets } from '../jsii/assemblies'; +import * as path from 'path'; + +import { loadAssemblies, allTypeScriptSnippets, loadAllDefaultTablets } from '../jsii/assemblies'; import * as logging from '../logging'; import { RosettaTranslator, RosettaTranslatorOptions } from '../rosetta-translator'; -import { TypeScriptSnippet } from '../snippet'; +import { TypeScriptSnippet, SnippetParameters } from '../snippet'; import { snippetKey } from '../tablets/key'; -import { LanguageTablet } from '../tablets/tablets'; +import { LanguageTablet, DEFAULT_TABLET_NAME } from '../tablets/tablets'; import { RosettaDiagnostic } from '../translate'; +import { groupBy, isDefined } from '../util'; import { infuse } from './infuse'; export interface ExtractResult { @@ -13,15 +16,26 @@ export interface ExtractResult { } export interface ExtractOptions { - readonly outputFile: string; - readonly includeCompilerDiagnostics: boolean; - readonly validateAssemblies: boolean; + readonly includeCompilerDiagnostics?: boolean; + readonly validateAssemblies?: boolean; readonly only?: string[]; /** * A tablet file to be loaded and used as a source for caching */ - readonly cacheTabletFile?: string; + readonly cacheFromFile?: string; + + /** + * A tablet file to append translated snippets to + */ + readonly cacheToFile?: string; + + /** + * Trim cache to only contain translations found in the current assemblies + * + * @default false + */ + readonly trimCache?: boolean; /** * Make a translator (just for testing) @@ -35,8 +49,9 @@ export async function extractAndInfuse( loose = false, ): Promise { const result = await extractSnippets(assemblyLocations, options, loose); - await infuse(assemblyLocations, options.outputFile, { - tabletOutputFile: options.outputFile, + await infuse(assemblyLocations, { + cacheFromFile: options.cacheFromFile, + cacheToFile: options.cacheToFile, }); return result; } @@ -46,21 +61,28 @@ export async function extractAndInfuse( */ export async function extractSnippets( assemblyLocations: string[], - options: ExtractOptions, + options: ExtractOptions = {}, loose = false, ): Promise { const only = options.only ?? []; logging.info(`Loading ${assemblyLocations.length} assemblies`); - const assemblies = await loadAssemblies(assemblyLocations, options.validateAssemblies); + const assemblies = await loadAssemblies(assemblyLocations, options.validateAssemblies ?? false); let snippets = Array.from(allTypeScriptSnippets(assemblies, loose)); if (only.length > 0) { snippets = filterSnippets(snippets, only); } + // Map every assembly to a list of snippets, so that we know what implicit + // tablet to write the translations to later on. + const snippetsPerAssembly = groupBy( + snippets.map((s) => ({ key: snippetKey(s), location: projectDirectory(s) })), + (x) => x.location, + ); + const translatorOptions: RosettaTranslatorOptions = { - includeCompilerDiagnostics: options.includeCompilerDiagnostics, + includeCompilerDiagnostics: options.includeCompilerDiagnostics ?? false, assemblies: assemblies.map((a) => a.assembly), }; @@ -68,10 +90,14 @@ export async function extractSnippets( ? options.translatorFactory(translatorOptions) : new RosettaTranslator(translatorOptions); - if (options.cacheTabletFile) { - await translator.addToCache(options.cacheTabletFile); + // Prime the snippet cache with: + // - Cache source file + // - Default tablets found next to each assembly + if (options.cacheFromFile) { + await translator.addToCache(options.cacheFromFile); } - await translator.addToCache(options.outputFile); + translator.addTabletsToCache(...Object.values(await loadAllDefaultTablets(assemblies))); + if (translator.hasCache()) { const { translations, remaining } = translator.readFromCache(snippets); logging.info(`Reused ${translations.length} translations from cache`); @@ -96,8 +122,27 @@ export async function extractSnippets( logging.info('Nothing left to translate'); } - logging.info(`Saving language tablet to ${options.outputFile}`); - await translator.tablet.save(options.outputFile); + // Save to individual tablet files, and optionally append to the output file + await Promise.all( + Object.entries(snippetsPerAssembly).map(async ([location, snips]) => { + const asmTabletFile = path.join(location, DEFAULT_TABLET_NAME); + logging.debug(`Writing ${snips.length} translations to ${asmTabletFile}`); + const translations = snips.map(({ key }) => translator.tablet.tryGetSnippet(key)).filter(isDefined); + + const asmTablet = new LanguageTablet(); + asmTablet.addSnippets(...translations); + await asmTablet.save(asmTabletFile); + }), + ); + + if (options.cacheToFile) { + logging.info(`Adding translations to ${options.cacheToFile}`); + const output = options.trimCache + ? new LanguageTablet() + : await LanguageTablet.fromOptionalFile(options.cacheToFile); + output.addTablet(translator.tablet); + await output.save(options.cacheToFile); + } return { diagnostics, tablet: translator.tablet }; } @@ -108,3 +153,11 @@ export async function extractSnippets( function filterSnippets(ts: TypeScriptSnippet[], includeIds: string[]) { return ts.filter((t) => includeIds.includes(snippetKey(t))); } + +function projectDirectory(ts: TypeScriptSnippet) { + const dir = ts.parameters?.[SnippetParameters.$PROJECT_DIRECTORY]; + if (!dir) { + throw new Error(`Snippet does not have associated project directory: ${JSON.stringify(ts.location)}`); + } + return dir; +} diff --git a/packages/jsii-rosetta/lib/commands/infuse.ts b/packages/jsii-rosetta/lib/commands/infuse.ts index 7962c79fc1..6d20cf5e95 100644 --- a/packages/jsii-rosetta/lib/commands/infuse.ts +++ b/packages/jsii-rosetta/lib/commands/infuse.ts @@ -1,9 +1,19 @@ import * as spec from '@jsii/spec'; import * as fs from 'fs-extra'; +import * as path from 'path'; -import { loadAssemblies, replaceAssembly } from '../jsii/assemblies'; +import { + loadAssemblies, + replaceAssembly, + loadAllDefaultTablets, + LoadedAssembly, + allTypeScriptSnippets, +} from '../jsii/assemblies'; +import { renderMetadataline, TypeScriptSnippet } from '../snippet'; import { SnippetSelector, mean, meanLength, shortest, longest } from '../snippet-selectors'; -import { LanguageTablet, TranslatedSnippet } from '../tablets/tablets'; +import { snippetKey } from '../tablets/key'; +import { LanguageTablet, TranslatedSnippet, DEFAULT_TABLET_NAME } from '../tablets/tablets'; +import { isDefined, mkDict, fmap, indexBy } from '../util'; export interface InfuseResult { readonly coverageResults: Record; @@ -15,14 +25,17 @@ export interface InfuseTypes { } export interface InfuseOptions { - readonly outputFile?: string; + readonly logFile?: string; - readonly log?: boolean; + /** + * Where to read additional translations + */ + readonly cacheFromFile?: string; /** - * Where to write the updated tablet back + * In addition to the implicit tablets, also write all added examples to this additional output tablet */ - readonly tabletOutputFile?: string; + readonly cacheToFile?: string; } export const DEFAULT_INFUSION_RESULTS_NAME = 'infusion-results.html'; @@ -40,68 +53,84 @@ class DefaultRecord { } } -export async function infuse( - assemblyLocations: string[], - tabletFile: string, - options?: InfuseOptions, -): Promise { +/** + * Infuse will analyze the snippets in a set of tablets, and update the assembly to add + * examples to types that don't have any yet, based on snippets that use the given type. + */ +export async function infuse(assemblyLocations: string[], options?: InfuseOptions): Promise { let stream: fs.WriteStream | undefined = undefined; - if (options?.log) { - if (!options.outputFile) { - throw new Error("If 'log' is set, 'outputFile' must be set as well."); - } - + if (options?.logFile) { // Create stream for html file and insert some styling - stream = fs.createWriteStream(options.outputFile, { - encoding: 'utf-8', - }); + stream = fs.createWriteStream(options.logFile, { encoding: 'utf-8' }); startFile(stream); } // Load tablet file and assemblies - const tab = new LanguageTablet(); - await tab.load(tabletFile); - const assemblies = await loadAssemblies(assemblyLocations, true); - - const snippetsFromFqn = mapFqns(tab); - const coverageResults: Record = {}; - for (const { assembly, directory } of assemblies) { - stream?.write(`

@aws-cdk/${directory.split('/').pop()}

\n`); - - let typesWithInsertedExamples = 0; - const filteredTypes = filterForTypesWithoutExamples(assembly.types ?? {}); - for (const [typeFqn, type] of Object.entries(filteredTypes)) { - if (snippetsFromFqn[typeFqn] !== undefined) { - const meanResult = mean(snippetsFromFqn[typeFqn]); - if (options?.log) { - const selected = Object.entries(ADDITIONAL_SELECTORS).map( - ([name, fn]) => [name, fn(snippetsFromFqn[typeFqn])] as const, - ); - const selectedFromSelector = { - ...makeDict(selected), - mean: meanResult, - }; - logOutput(stream, typeFqn, createHtmlEntry(selectedFromSelector)); - } - insertExample(meanResult, type, tab); - typesWithInsertedExamples++; - } - } + const assemblies = await loadAssemblies(assemblyLocations, false); + const defaultTablets = await loadAllDefaultTablets(assemblies); - // eslint-disable-next-line no-await-in-loop - await replaceAssembly(assembly, directory); - coverageResults[directory] = { - types: Object.keys(filteredTypes).length, - typesWithInsertedExamples, - }; + const availableTranslations = new LanguageTablet(); + if (options?.cacheFromFile) { + availableTranslations.addTablet(await LanguageTablet.fromOptionalFile(options.cacheFromFile)); } + availableTranslations.addTablets(...Object.values(defaultTablets)); + + const { translationsByFqn, originalsByKey } = availableSnippetsPerFqn(assemblies, availableTranslations); + + const additionalOutputTablet = options?.cacheToFile + ? await LanguageTablet.fromOptionalFile(options?.cacheToFile) + : new LanguageTablet(); + + const coverageResults = mkDict( + await Promise.all( + assemblies.map(async ({ assembly, directory }) => { + stream?.write(`

${assembly.name}

\n`); + + const implicitTablet = defaultTablets[directory]; + if (!implicitTablet) { + throw new Error(`No tablet found for ${directory}`); + } + + let insertedExamples = 0; + const filteredTypes = filterForTypesWithoutExamples(assembly.types ?? {}); + for (const [typeFqn, type] of Object.entries(filteredTypes)) { + const available = translationsByFqn[typeFqn]; + if (!available) { + continue; + } + + const example = pickBestExample(typeFqn, available, stream); + const original = originalsByKey[example.key]; + insertExample(example, original, type, [implicitTablet, additionalOutputTablet]); + insertedExamples++; + } + + if (insertedExamples > 0) { + // Save the updated assembly and implicit tablets + // eslint-disable-next-line no-await-in-loop + await Promise.all([ + replaceAssembly(assembly, directory), + implicitTablet.save(path.join(directory, DEFAULT_TABLET_NAME)), + ]); + } + + return [ + directory, + { + types: Object.keys(filteredTypes).length, + typesWithInsertedExamples: insertedExamples, + } as InfuseTypes, + ] as const; + }), + ), + ); stream?.close(); // If we copied examples onto different types, we'll also have inserted new snippets // with different keys into the tablet. We must now write the updated tablet somewhere. - if (options?.tabletOutputFile) { - await tab.save(options.tabletOutputFile); + if (options?.cacheToFile) { + await additionalOutputTablet.save(options.cacheToFile); } return { @@ -109,6 +138,19 @@ export async function infuse( }; } +function pickBestExample(typeFqn: string, choices: TranslatedSnippet[], logStream?: fs.WriteStream) { + const meanResult = mean(choices); + if (logStream) { + const selected = Object.entries(ADDITIONAL_SELECTORS).map(([name, fn]) => [name, fn(choices)] as const); + const selectedFromSelector = { + ...makeDict(selected), + mean: meanResult, + }; + logOutput(logStream, typeFqn, createHtmlEntry(selectedFromSelector)); + } + return meanResult; +} + function startFile(stream: fs.WriteStream) { stream.write('