Skip to content

Commit

Permalink
Merge pull request #1410 from microsoft/octogonz/ae-name-escaping
Browse files Browse the repository at this point in the history
[api-extractor] ApiItem.name is now quoted when it contains invalid identifier characters
  • Loading branch information
octogonz authored Jul 23, 2019
2 parents c81518d + 793f139 commit ae327a5
Show file tree
Hide file tree
Showing 24 changed files with 491 additions and 45 deletions.
6 changes: 3 additions & 3 deletions apps/api-documenter/src/documenters/MarkdownDocumenter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ export class MarkdownDocumenter {
let baseName: string = '';
for (const hierarchyItem of apiItem.getHierarchy()) {
// For overloaded methods, add a suffix such as "MyClass.myMethod_2".
let qualifiedName: string = hierarchyItem.displayName;
let qualifiedName: string = Utilities.getSafeFilenameForName(hierarchyItem.displayName);
if (ApiParameterListMixin.isBaseClassOf(hierarchyItem)) {
if (hierarchyItem.overloadIndex > 1) {
// Subtract one for compatibility with earlier releases of API Documenter.
Expand All @@ -805,13 +805,13 @@ export class MarkdownDocumenter {
case ApiItemKind.EntryPoint:
break;
case ApiItemKind.Package:
baseName = PackageName.getUnscopedName(hierarchyItem.displayName);
baseName = Utilities.getSafeFilenameForName(PackageName.getUnscopedName(hierarchyItem.displayName));
break;
default:
baseName += '.' + qualifiedName;
}
}
return baseName.toLowerCase() + '.md';
return baseName + '.md';
}

private _getLinkFilenameForApiItem(apiItem: ApiItem): string {
Expand Down
6 changes: 3 additions & 3 deletions apps/api-documenter/src/documenters/YamlDocumenter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -693,19 +693,19 @@ export class YamlDocumenter {
case ApiItemKind.EntryPoint:
break;
case ApiItemKind.Package:
result += PackageName.getUnscopedName(current.displayName);
result += Utilities.getSafeFilenameForName(PackageName.getUnscopedName(current.displayName));
break;
default:
if (current.parent && current.parent.kind === ApiItemKind.EntryPoint) {
result += '/';
} else {
result += '.';
}
result += current.displayName;
result += Utilities.getSafeFilenameForName(current.displayName);
break;
}
}
return path.join(this._outputFolder, result.toLowerCase() + '.yml');
return path.join(this._outputFolder, result + '.yml');
}

private _deleteOldOutputFiles(): void {
Expand Down
10 changes: 10 additions & 0 deletions apps/api-documenter/src/utils/Utilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
} from '@microsoft/api-extractor-model';

export class Utilities {
private static readonly _badFilenameCharsRegExp: RegExp = /[^a-z0-9_\-\.]/ig;
/**
* Generates a concise signature for a function. Example: "getArea(width, height)"
*/
Expand All @@ -16,4 +17,13 @@ export class Utilities {
}
return apiItem.displayName;
}

/**
* Converts bad filename characters to underscores.
*/
public static getSafeFilenameForName(name: string): string {
// TODO: This can introduce naming collisions.
// We will fix that as part of https://github.com/microsoft/web-build-tools/issues/1308
return name.replace(Utilities._badFilenameCharsRegExp, '_').toLowerCase();
}
}
113 changes: 84 additions & 29 deletions apps/api-extractor/src/analyzer/AstSymbolTable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { ExportAnalyzer } from './ExportAnalyzer';
import { AstImport } from './AstImport';
import { MessageRouter } from '../collector/MessageRouter';
import { TypeScriptInternals } from './TypeScriptInternals';
import { StringChecks } from './StringChecks';

export type AstEntity = AstSymbol | AstImport;

Expand Down Expand Up @@ -210,6 +211,88 @@ export class AstSymbolTable {
return this._entitiesByIdentifierNode.get(identifier);
}

/**
* Builds an AstSymbol.localName for a given ts.Symbol. In the current implementation, the localName is
* a TypeScript-like expression that may be a string literal or ECMAScript symbol expression.
*
* ```ts
* class X {
* // localName="identifier"
* public identifier: number = 1;
* // localName="\"identifier\""
* public "quoted string!": number = 2;
* // localName="[MyNamespace.MySymbol]"
* public [MyNamespace.MySymbol]: number = 3;
* }
* ```
*/
public static getLocalNameForSymbol(symbol: ts.Symbol): string {
const symbolName: string = symbol.name;

// TypeScript binds well-known ECMAScript symbols like "[Symbol.iterator]" as "__@iterator".
// Decode it back into "[Symbol.iterator]".
const wellKnownSymbolName: string | undefined = TypeScriptHelpers.tryDecodeWellKnownSymbolName(symbolName);
if (wellKnownSymbolName) {
return wellKnownSymbolName;
}

const isUniqueSymbol: boolean = TypeScriptHelpers.isUniqueSymbolName(symbolName);

// We will try to obtain the name from a declaration; otherwise we'll fall back to the symbol name.
let unquotedName: string = symbolName;

for (const declaration of symbol.declarations || []) {
// Handle cases such as "export default class X { }" where the symbol name is "default"
// but the local name is "X".
const localSymbol: ts.Symbol | undefined = TypeScriptInternals.tryGetLocalSymbol(declaration);
if (localSymbol) {
unquotedName = localSymbol.name;
}

// If it is a non-well-known symbol, then return the late-bound name. For example, "X.Y.z" in this example:
//
// namespace X {
// export namespace Y {
// export const z: unique symbol = Symbol("z");
// }
// }
//
// class C {
// public [X.Y.z](): void { }
// }
//
if (isUniqueSymbol) {
const declarationName: ts.DeclarationName | undefined = ts.getNameOfDeclaration(declaration);
if (declarationName && ts.isComputedPropertyName(declarationName)) {
const lateBoundName: string | undefined = TypeScriptHelpers.tryGetLateBoundName(declarationName);
if (lateBoundName) {
// Here the string may contain an expression such as "[X.Y.z]". Names starting with "[" are always
// expressions. If a string literal contains those characters, the code below will JSON.stringify() it
// to avoid a collision.
return lateBoundName;
}
}
}
}

// Otherwise that name may come from a quoted string or pseudonym like `__constructor`.
// If the string is not a safe identifier, then we must add quotes.
// Note that if it was quoted but did not need to be quoted, here we will remove the quotes.
if (!StringChecks.isSafeUnquotedMemberIdentifier(unquotedName)) {
// For API Extractor's purposes, a canonical form is more appropriate than trying to reflect whatever
// appeared in the source code. The code is not even guaranteed to be consistent, for example:
//
// class X {
// public "f1"(x: string): void;
// public f1(x: boolean): void;
// public 'f1'(x: string | boolean): void { }
// }
return JSON.stringify(unquotedName);
}

return unquotedName;
}

/**
* Used by analyze to recursively analyze the entire child tree.
*/
Expand Down Expand Up @@ -425,35 +508,7 @@ export class AstSymbolTable {
}
}

let localName: string | undefined = options.localName;

if (localName === undefined) {
// We will try to obtain the name from a declaration; otherwise we'll fall back to the symbol name
// This handles cases such as "export default class X { }" where the symbol name is "default"
// but the declaration name is "X".
localName = followedSymbol.name;
if (TypeScriptHelpers.isWellKnownSymbolName(localName)) {
// TypeScript binds well-known ECMAScript symbols like "Symbol.iterator" as "__@iterator".
// This converts a string like "__@iterator" into the property name "[Symbol.iterator]".
localName = `[Symbol.${localName.slice(3)}]`;
} else {
const isUniqueSymbol: boolean = TypeScriptHelpers.isUniqueSymbolName(localName);
for (const declaration of followedSymbol.declarations || []) {
const declarationName: ts.DeclarationName | undefined = ts.getNameOfDeclaration(declaration);
if (declarationName && ts.isIdentifier(declarationName)) {
localName = declarationName.getText().trim();
break;
}
if (isUniqueSymbol && declarationName && ts.isComputedPropertyName(declarationName)) {
const lateBoundName: string | undefined = TypeScriptHelpers.tryGetLateBoundName(declarationName);
if (lateBoundName) {
localName = lateBoundName;
break;
}
}
}
}
}
const localName: string | undefined = options.localName || AstSymbolTable.getLocalNameForSymbol(followedSymbol);

astSymbol = new AstSymbol({
followedSymbol: followedSymbol,
Expand Down
51 changes: 51 additions & 0 deletions apps/api-extractor/src/analyzer/StringChecks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.
// See LICENSE in the project root for license information.

/**
* Helpers for validating various text string formats.
*/
export class StringChecks {
// Note: In addition to letters, numbers, underscores, and dollar signs, modern ECMAScript
// also allows Unicode categories such as letters, combining marks, digits, and connector punctuation.
// These are mostly supported in all environments except IE11, so if someone wants it, we would accept
// a PR to allow them (although the test surface might be somewhat large).
private static readonly _identifierBadCharRegExp: RegExp = /[^a-z0-9_$]/i;

// Identifiers most not start with a number.
private static readonly _identifierNumberStartRegExp: RegExp = /^[0-9]/;

/**
* Tests whether the input string is safe to use as an ECMAScript identifier without quotes.
*
* @remarks
* For example:
*
* ```ts
* class X {
* public okay: number = 1;
* public "not okay!": number = 2;
* }
* ```
*
* A precise check is extremely complicated and highly dependent on the ECMAScript standard version
* and how faithfully the interpreter implements it. To keep things simple, `isValidUnquotedIdentifier()`
* conservatively checks for basic alphanumeric identifiers and returns false otherwise.
*
* Based on `StringChecks.explainIfInvalidUnquotedIdentifier()` from TSDoc.
*/
public static isSafeUnquotedMemberIdentifier(identifier: string): boolean {
if (identifier.length === 0) {
return false; // cannot be empty
}

if (StringChecks._identifierBadCharRegExp.test(identifier)) {
return false; // cannot contain bad characters
}

if (StringChecks._identifierNumberStartRegExp.test(identifier)) {
return false; // cannot start with a number
}

return true;
}
}
18 changes: 14 additions & 4 deletions apps/api-extractor/src/analyzer/TypeScriptHelpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,23 @@ export class TypeScriptHelpers {

// Matches TypeScript's encoded names for well-known ECMAScript symbols like
// "__@iterator" or "__@toStringTag".
private static readonly _wellKnownSymbolNameRegExp: RegExp = /^__@\w+$/;
private static readonly _wellKnownSymbolNameRegExp: RegExp = /^__@(\w+)$/;

/**
* Returns whether the provided name was generated for a built-in ECMAScript symbol.
* Decodes the names that the compiler generates for a built-in ECMAScript symbol.
*
* @remarks
* TypeScript binds well-known ECMAScript symbols like `[Symbol.iterator]` as `__@iterator`.
* If `name` is of this form, then `tryGetWellKnownSymbolName()` converts it back into e.g. `[Symbol.iterator]`.
* If the string does not start with `__@` then `undefined` is returned.
*/
public static isWellKnownSymbolName(name: string): boolean {
return TypeScriptHelpers._wellKnownSymbolNameRegExp.test(name);
public static tryDecodeWellKnownSymbolName(name: string): string | undefined {
const match: RegExpExecArray | null = TypeScriptHelpers._wellKnownSymbolNameRegExp.exec(name);
if (match) {
const identifier: string = match[1];
return `[Symbol.${identifier}]`;
}
return undefined;
}

// Matches TypeScript's encoded names for late-bound symbols derived from `unique symbol` declarations
Expand Down
9 changes: 9 additions & 0 deletions apps/api-extractor/src/analyzer/TypeScriptInternals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,13 @@ export class TypeScriptInternals {
public static getSymbolParent(symbol: ts.Symbol): ts.Symbol | undefined {
return (symbol as any).parent;
}

/**
* In an statement like `export default class X { }`, the `Symbol.name` will be `default`
* whereas the `localSymbol` is `X`.
*/
public static tryGetLocalSymbol(declaration: ts.Declaration): ts.Symbol | undefined {
return (declaration as any).localSymbol;
}

}
Loading

0 comments on commit ae327a5

Please sign in to comment.