Skip to content

Commit

Permalink
feat(rosetta): generate rosetta tablets next to each assembly (#3223)
Browse files Browse the repository at this point in the history
Rather than write our Rosetta translations to a single tablet file which
we throw away at the end of the build, `rosetta extract` will now write
translations to individual `.jsii.tabl.json` files, located next to
the assemblies themselves.

Construct library authors can publish these to NPM, so that downstream
tools that process JSII modules for documentation have access to the
translations and don't need to redo the work (especially relevant
for large libraries with lots of examples, where otherwise a lot
of CPU time would be wasted).

The "single output tablet" can still be used, but is now intended to be
used as a cache to speed up repeated runs of `rosetta extract` (to skip
translating unchanged snippets). Add options to trim the cache files
down so they won't grow without bounds.

This PR also contains a bugfix: `infuse` did not re-inject a copied
snippet's metadata into the assembly's `exampleMetadata` field. This
is strictly speaking not a problem as long as the tablet always stays
with the assembly, but better to fix anyway.



---

By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].

[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
  • Loading branch information
rix0rrr authored Dec 3, 2021
1 parent 937f8c3 commit 1e7b604
Show file tree
Hide file tree
Showing 14 changed files with 675 additions and 200 deletions.
126 changes: 82 additions & 44 deletions packages/jsii-rosetta/bin/jsii-rosetta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ import * as fs from 'fs-extra';
import * as path from 'path';
import * as yargs from 'yargs';

import { TranslateResult, DEFAULT_TABLET_NAME, translateTypeScript, RosettaDiagnostic } from '../lib';
import { TranslateResult, translateTypeScript, RosettaDiagnostic } from '../lib';
import { translateMarkdown } from '../lib/commands/convert';
import { extractAndInfuse, ExtractResult, extractSnippets } from '../lib/commands/extract';
import { extractAndInfuse, extractSnippets, ExtractOptions } from '../lib/commands/extract';
import { infuse, DEFAULT_INFUSION_RESULTS_NAME } from '../lib/commands/infuse';
import { readTablet } from '../lib/commands/read';
import { transliterateAssembly } from '../lib/commands/transliterate';
import { trimCache } from '../lib/commands/trim-cache';
import { TargetLanguage } from '../lib/languages';
import { PythonVisitor } from '../lib/languages/python';
import { VisualizeAstVisitor } from '../lib/languages/visualize';
Expand Down Expand Up @@ -69,37 +70,31 @@ function main() {
'(EXPERIMENTAL) mutates one or more assemblies by adding documentation examples to top-level types',
(command) =>
command
.positional('TABLET', {
type: 'string',
required: true,
describe: 'Language tablet to read',
})
.positional('ASSEMBLY', {
type: 'string',
string: true,
default: new Array<string>(),
describe: 'Assembly or directory to mutate',
})
.option('log', {
.option('log-file', {
alias: 'l',
type: 'boolean',
describe: 'Test all algorithms and log results to an html file',
default: false,
})
.option('output', {
alias: 'o',
type: 'string',
describe: 'Output file to store logging results. Ignored if -log is not true',
default: DEFAULT_INFUSION_RESULTS_NAME,
})
.demandOption('TABLET'),
.option('cache-to', {
alias: 'o',
type: 'string',
describe: 'Append all translated snippets to the given tablet file',
requiresArg: true,
default: undefined,
}),
wrapHandler(async (args) => {
const absAssemblies = (args.ASSEMBLY.length > 0 ? args.ASSEMBLY : ['.']).map((x) => path.resolve(x));
const absOutput = path.resolve(args.output);
const result = await infuse(absAssemblies, args.TABLET, {
outputFile: absOutput,
log: args.log,
tabletOutputFile: args.TABLET,
const cacheToFile = fmap(args['cache-to'], path.resolve);
const result = await infuse(absAssemblies, {
logFile: args['log-file'],
cacheToFile: cacheToFile,
});

let totalTypes = 0;
Expand Down Expand Up @@ -131,10 +126,10 @@ function main() {
describe: 'Assembly or directory to extract from',
})
.option('output', {
alias: 'o',
type: 'string',
describe: 'Output file where to store the sample tablets',
default: DEFAULT_TABLET_NAME,
describe: 'Additional output file where to store translated samples (deprecated, alias for --cache-to)',
requiresArg: true,
default: undefined,
})
.option('compile', {
alias: 'c',
Expand Down Expand Up @@ -173,10 +168,33 @@ function main() {
alias: 'C',
type: 'string',
// eslint-disable-next-line prettier/prettier
describe: 'Reuse translations from the given tablet file if the snippet and type definitions did not change',
describe:
'Reuse translations from the given tablet file if the snippet and type definitions did not change',
requiresArg: true,
default: undefined,
})
.option('cache-to', {
alias: 'o',
type: 'string',
describe: 'Append all translated snippets to the given tablet file',
requiresArg: true,
default: undefined,
})
.conflicts('cache-to', 'output')
.option('cache', {
alias: 'k',
type: 'string',
describe: 'Alias for --cache-from and --cache-to together',
requiresArg: true,
default: undefined,
})
.conflicts('cache', 'cache-from')
.conflicts('cache', 'cache-to')
.option('trim-cache', {
alias: 'T',
type: 'boolean',
describe: 'Remove translations that are not referenced by any of the assemblies anymore from the cache',
})
.option('strict', {
alias: 'S',
type: 'boolean',
Expand All @@ -197,30 +215,26 @@ function main() {
// compilerhost. Have to make all file references absolute before we chdir
// though.
const absAssemblies = (args.ASSEMBLY.length > 0 ? args.ASSEMBLY : ['.']).map((x) => path.resolve(x));
const absOutput = path.resolve(args.output);
const absCache = fmap(args['cache-from'], path.resolve);

const absCacheFrom = fmap(args.cache ?? args['cache-from'], path.resolve);
const absCacheTo = fmap(args.cache ?? args['cache-to'] ?? args.output, path.resolve);

if (args.directory) {
process.chdir(args.directory);
}

let result: ExtractResult;
if (args.infuse) {
result = await extractAndInfuse(absAssemblies, {
outputFile: absOutput,
includeCompilerDiagnostics: !!args.compile,
validateAssemblies: args['validate-assemblies'],
only: args.include,
cacheTabletFile: absCache,
});
} else {
result = await extractSnippets(absAssemblies, {
outputFile: absOutput,
includeCompilerDiagnostics: !!args.compile,
validateAssemblies: args['validate-assemblies'],
only: args.include,
cacheTabletFile: absCache,
});
}
const extractOptions: ExtractOptions = {
includeCompilerDiagnostics: !!args.compile,
validateAssemblies: args['validate-assemblies'],
only: args.include,
cacheFromFile: absCacheFrom,
cacheToFile: absCacheTo,
trimCache: args['trim-cache'],
};

const result = args.infuse
? await extractAndInfuse(absAssemblies, extractOptions)
: await extractSnippets(absAssemblies, extractOptions);

handleDiagnostics(result.diagnostics, args.fail, result.tablet.count);
}),
Expand Down Expand Up @@ -282,6 +296,30 @@ function main() {
return transliterateAssembly(assemblies, languages, args);
}),
)
.command(
'trim-cache <TABLET> [ASSEMBLY..]',
'Retain only those snippets in the cache which occur in one of the given assemblies',
(command) =>
command
.positional('TABLET', {
type: 'string',
required: true,
describe: 'Language tablet to trim',
})
.positional('ASSEMBLY', {
type: 'string',
string: true,
default: new Array<string>(),
describe: 'Assembly or directory to search',
})
.demandOption('TABLET'),
wrapHandler(async (args) => {
await trimCache({
cacheFile: args.TABLET,
assemblyLocations: args.ASSEMBLY,
});
}),
)
.command(
'read <TABLET> [KEY] [LANGUAGE]',
'Display snippets in a language tablet file',
Expand Down
87 changes: 70 additions & 17 deletions packages/jsii-rosetta/lib/commands/extract.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import { loadAssemblies, allTypeScriptSnippets } from '../jsii/assemblies';
import * as path from 'path';

import { loadAssemblies, allTypeScriptSnippets, loadAllDefaultTablets } from '../jsii/assemblies';
import * as logging from '../logging';
import { RosettaTranslator, RosettaTranslatorOptions } from '../rosetta-translator';
import { TypeScriptSnippet } from '../snippet';
import { TypeScriptSnippet, SnippetParameters } from '../snippet';
import { snippetKey } from '../tablets/key';
import { LanguageTablet } from '../tablets/tablets';
import { LanguageTablet, DEFAULT_TABLET_NAME } from '../tablets/tablets';
import { RosettaDiagnostic } from '../translate';
import { groupBy, isDefined } from '../util';
import { infuse } from './infuse';

export interface ExtractResult {
Expand All @@ -13,15 +16,26 @@ export interface ExtractResult {
}

export interface ExtractOptions {
readonly outputFile: string;
readonly includeCompilerDiagnostics: boolean;
readonly validateAssemblies: boolean;
readonly includeCompilerDiagnostics?: boolean;
readonly validateAssemblies?: boolean;
readonly only?: string[];

/**
* A tablet file to be loaded and used as a source for caching
*/
readonly cacheTabletFile?: string;
readonly cacheFromFile?: string;

/**
* A tablet file to append translated snippets to
*/
readonly cacheToFile?: string;

/**
* Trim cache to only contain translations found in the current assemblies
*
* @default false
*/
readonly trimCache?: boolean;

/**
* Make a translator (just for testing)
Expand All @@ -35,8 +49,9 @@ export async function extractAndInfuse(
loose = false,
): Promise<ExtractResult> {
const result = await extractSnippets(assemblyLocations, options, loose);
await infuse(assemblyLocations, options.outputFile, {
tabletOutputFile: options.outputFile,
await infuse(assemblyLocations, {
cacheFromFile: options.cacheFromFile,
cacheToFile: options.cacheToFile,
});
return result;
}
Expand All @@ -46,32 +61,43 @@ export async function extractAndInfuse(
*/
export async function extractSnippets(
assemblyLocations: string[],
options: ExtractOptions,
options: ExtractOptions = {},
loose = false,
): Promise<ExtractResult> {
const only = options.only ?? [];

logging.info(`Loading ${assemblyLocations.length} assemblies`);
const assemblies = await loadAssemblies(assemblyLocations, options.validateAssemblies);
const assemblies = await loadAssemblies(assemblyLocations, options.validateAssemblies ?? false);

let snippets = Array.from(allTypeScriptSnippets(assemblies, loose));
if (only.length > 0) {
snippets = filterSnippets(snippets, only);
}

// Map every assembly to a list of snippets, so that we know what implicit
// tablet to write the translations to later on.
const snippetsPerAssembly = groupBy(
snippets.map((s) => ({ key: snippetKey(s), location: projectDirectory(s) })),
(x) => x.location,
);

const translatorOptions: RosettaTranslatorOptions = {
includeCompilerDiagnostics: options.includeCompilerDiagnostics,
includeCompilerDiagnostics: options.includeCompilerDiagnostics ?? false,
assemblies: assemblies.map((a) => a.assembly),
};

const translator = options.translatorFactory
? options.translatorFactory(translatorOptions)
: new RosettaTranslator(translatorOptions);

if (options.cacheTabletFile) {
await translator.addToCache(options.cacheTabletFile);
// Prime the snippet cache with:
// - Cache source file
// - Default tablets found next to each assembly
if (options.cacheFromFile) {
await translator.addToCache(options.cacheFromFile);
}
await translator.addToCache(options.outputFile);
translator.addTabletsToCache(...Object.values(await loadAllDefaultTablets(assemblies)));

if (translator.hasCache()) {
const { translations, remaining } = translator.readFromCache(snippets);
logging.info(`Reused ${translations.length} translations from cache`);
Expand All @@ -96,8 +122,27 @@ export async function extractSnippets(
logging.info('Nothing left to translate');
}

logging.info(`Saving language tablet to ${options.outputFile}`);
await translator.tablet.save(options.outputFile);
// Save to individual tablet files, and optionally append to the output file
await Promise.all(
Object.entries(snippetsPerAssembly).map(async ([location, snips]) => {
const asmTabletFile = path.join(location, DEFAULT_TABLET_NAME);
logging.debug(`Writing ${snips.length} translations to ${asmTabletFile}`);
const translations = snips.map(({ key }) => translator.tablet.tryGetSnippet(key)).filter(isDefined);

const asmTablet = new LanguageTablet();
asmTablet.addSnippets(...translations);
await asmTablet.save(asmTabletFile);
}),
);

if (options.cacheToFile) {
logging.info(`Adding translations to ${options.cacheToFile}`);
const output = options.trimCache
? new LanguageTablet()
: await LanguageTablet.fromOptionalFile(options.cacheToFile);
output.addTablet(translator.tablet);
await output.save(options.cacheToFile);
}

return { diagnostics, tablet: translator.tablet };
}
Expand All @@ -108,3 +153,11 @@ export async function extractSnippets(
function filterSnippets(ts: TypeScriptSnippet[], includeIds: string[]) {
return ts.filter((t) => includeIds.includes(snippetKey(t)));
}

function projectDirectory(ts: TypeScriptSnippet) {
const dir = ts.parameters?.[SnippetParameters.$PROJECT_DIRECTORY];
if (!dir) {
throw new Error(`Snippet does not have associated project directory: ${JSON.stringify(ts.location)}`);
}
return dir;
}
Loading

0 comments on commit 1e7b604

Please sign in to comment.