Adds SafeHtml module.

This represents a "safe by construction" object, similar to `TrustedHTML`. When given an instance of type `SafeHtml`, it's content can be trusted and rendered to the end user. Currently there are two ways to construct a `SafeHtml` object: 1. `safe` - A tagged template function which asserts that no arguments are given and therefore the template is a hard-coded constant. 2. `unsafeTreatStringAsSafeHtml()` - A function which takes unsafe string content and constructs a `SafeHtml` object from it. The latter option is named to be clearly unsafe and performs no sanitization on its inputs. This is intended to be used only on the output of other trusted template engines such as `lit-html`. I deliberately kept the `SafeHtml` class private so it is not easy to get a reference to it (though still possible) and froze all the relevant symbols to try to limit potential vulnerabilities. I also exported the unsafe function from a separate file with a separate Bazel visibility list, which should hopefully help mitigate its usage. Unfortunately it still needs to be exported from `rules_prerender`, but hopefully the name is enough to make people uncomfortable with using it.
dgp1130 · Mar 11, 2023 · 0bf6e70 · 0bf6e70
1 parent 04daf66
commit 0bf6e70
Show file tree

Hide file tree

Showing 11 changed files with 288 additions and 13 deletions.
diff --git a/common/models/BUILD.bazel b/common/models/BUILD.bazel
@@ -63,7 +63,10 @@ ts_project(
     name = "prerender_resource",
     srcs = ["prerender_resource.mts"],
     visibility = ["//:__subpackages__"],
-    deps = [":url_path"],
+    deps = [
+        ":url_path",
+        "//common/safe_html",
+    ],
 )
 
 ts_project(
@@ -72,6 +75,7 @@ ts_project(
     testonly = True,
     deps = [
         ":prerender_resource",
+        "//common/safe_html",
         "//:node_modules/@types/jasmine",
     ],
 )

diff --git a/common/models/prerender_resource.mts b/common/models/prerender_resource.mts
@@ -1,3 +1,4 @@
+import { SafeHtml, isSafeHtml } from '../safe_html/safe_html.mjs';
 import { UrlPath } from './url_path.mjs';
 
 /** Represents a resource to be rendered / generated at a particular path. */
@@ -26,13 +27,13 @@ export class PrerenderResource {
      * 
      * @param path The path the file will be generated at relative to the final
      *     generated site. Must begin with a `/` character.
-     * @param contents A `string`, {@link ArrayBuffer}, or {@link TypedArray}
-     *     object with the file contents of the resource. If a string is given,
-     *     it is encoded in UTF-8. If an {@link ArrayBuffer} or
-     *     {@link TypedArray} is given, it used as is.
+     * @param contents A {@link SafeHtml}, {@link ArrayBuffer}, or
+     *     {@link TypedArray} object with the file contents of the resource. If
+     *     {@link SafeHtml} is given, it is encoded as a UTF-8 string. If an
+     *     {@link ArrayBuffer} or {@link TypedArray} is given, it used as is.
      * @returns A `PrerenderResource` object representing the resource.
      */
-    public static of(path: string, contents: string | ArrayBuffer | TypedArray):
+    public static of(path: string, contents: string | SafeHtml | ArrayBuffer | TypedArray):
             PrerenderResource {
         return new PrerenderResource({
             urlPath: UrlPath.of(path),
@@ -44,8 +45,8 @@ export class PrerenderResource {
 /**
  * Accepts various input types and normalizes them to a simple
  * {@link ArrayBuffer} representing the input content. If the input is a
- * `string`, it will be encoded as UTF-8. If the input is an {@link ArrayBuffer}
- * or a {@link TypedArray}, its content is used as is.
+ * {@link SafeHtml}, it will be encoded as a UTF-8 string. If the input is an
+ * {@link ArrayBuffer} or a {@link TypedArray}, its content is used as is.
  * 
  * NOTE: {@link TypedArray} does **not** extend {@link ArrayBuffer}, however
  * they are unfortunately compatible from a structural typing perspective, so
@@ -54,18 +55,23 @@ export class PrerenderResource {
  * This is an easy foot-gun for users to encounter, so we should support such
  * inputs as a result.
  */
-function normalizeContents(contents: string | ArrayBuffer | TypedArray):
+function normalizeContents(contents: string | SafeHtml | ArrayBuffer | TypedArray):
         ArrayBuffer {
     if (contents instanceof ArrayBuffer) return contents;
     if (typeof contents === 'string') {
         return new TextEncoder().encode(contents).buffer;
     }
     if (isTypedArray(contents)) return contents.buffer;
+    if (isSafeHtml(contents)) {
+        return new TextEncoder().encode(contents.getHtmlAsString()).buffer;
+    }
 
     // Should never happen if TypeScript types are respected, but JavaScript
     // users or unsound input types may hit this case.
     throw new Error(
-        `Input is not a string, ArrayBuffer, or TypedArray:\n${contents}`);
+        `Input is not a \`SafeHtml\`, \`ArrayBuffer\`, or \`TypedArray\`:\n${
+            contents}`,
+    );
 }
 
 /**

diff --git a/common/models/prerender_resource_test.mts b/common/models/prerender_resource_test.mts
@@ -1,3 +1,4 @@
+import { safe } from '../safe_html/safe_html.mjs';
 import { PrerenderResource } from './prerender_resource.mjs';
 
 describe('PrerenderResource', () => {
@@ -9,6 +10,14 @@ describe('PrerenderResource', () => {
             expect(new TextDecoder().decode(res.contents)).toBe('Hello World!');
         });
 
+        it('returns a `PrerenderResource` from `SafeHtml` data', () => {
+            const res = PrerenderResource.of(
+                '/foo/bar.html', safe`<div></div>`);
+
+            expect(res.path).toBe('/foo/bar.html');
+            expect(new TextDecoder().decode(res.contents)).toBe('<div></div>');
+        });
+
         it('returns a `PrerenderResource` from binary data', () => {
             const res = PrerenderResource.of(
                 '/foo/bar.html', new Uint8Array([ 0, 1, 2, 3 ]));

diff --git a/common/safe_html/BUILD.bazel b/common/safe_html/BUILD.bazel
@@ -0,0 +1,38 @@
+load("//tools/jasmine:defs.bzl", "jasmine_node_test")
+load("//tools/typescript:defs.bzl", "ts_project")
+
+ts_project(
+    name = "safe_html",
+    srcs = ["safe_html.mts"],
+    visibility = ["//visibility:public"],
+    deps = [":safe_html_impl"],
+)
+
+ts_project(
+    name = "unsafe_html",
+    srcs = ["unsafe_html.mts"],
+    visibility = [
+        "//packages/rules_prerender:__pkg__",
+    ],
+    deps = [":safe_html_impl"],
+)
+
+ts_project(
+    name = "safe_html_impl",
+    srcs = ["safe_html_impl.mts"],
+)
+
+ts_project(
+    name = "safe_html_impl_test_lib",
+    srcs = ["safe_html_impl_test.mts"],
+    testonly = True,
+    deps = [
+        ":safe_html_impl",
+        "//:node_modules/@types/jasmine",
+    ],
+)
+
+jasmine_node_test(
+    name = "safe_html_impl_test",
+    deps = [":safe_html_impl_test_lib"],
+)
diff --git a/common/safe_html/safe_html.mts b/common/safe_html/safe_html.mts
@@ -0,0 +1 @@
+export { SafeHtml, isSafeHtml, safe } from './safe_html_impl.mjs';
diff --git a/common/safe_html/safe_html_impl.mts b/common/safe_html/safe_html_impl.mts
@@ -0,0 +1,107 @@
+/**
+ * @fileoverview Contains the {@link SafeHtml} definition for representing
+ * sanitized HTML content as well as some helper utilities for manipulating the
+ * object.
+ */
+
+/**
+ * Tagged template function which returns the given string literal as a
+ * {@link SafeHtml} object. Interpolation are not supported and throw if used.
+ * 
+ * Usage:
+ * 
+ * ```typescript
+ * const safeHtml: SafeHtml = safe`<div>Hello, World!</div>`;
+ * ```
+ * 
+ * This is safe because tagged template literals are invoked with
+ * spec-guaranteed separation between string literals and template
+ * interpolations. String literals are hard-coded by the developer and cannot
+ * have content injected by malicious actors. This means string literals can be
+ * trusted as safe HTML as long as there are no interpolations (or they are
+ * properly sanitized).
+ */
+export function safe(literals: TemplateStringsArray, ...args: unknown[]):
+        SafeHtml {
+    if (args.length !== 0) {
+        throw new Error(`\`safe\` template literal only supports a raw string, no interpolations.`);
+    }
+
+    return unsafeTreatStringAsSafeHtml(literals.join(''));
+}
+
+/**
+ * Converts the given raw, untrusted string into trusted {@link SafeHtml}. This
+ * is an **UNSAFE** operation because it converts untrusted content (a raw
+ * string) into trusted HTML. If an attacker compromises the input, they will be
+ * able to inject malicious content into the HTML. This should *only* be used
+ * when the input is coming from another trusted format (such as a Lit
+ * `TemplateResult`) and *never* on end user input.
+ */
+export function unsafeTreatStringAsSafeHtml(unsanitizedString: string):
+        SafeHtml {
+    return SafeHtmlImpl.unsafeTrustRawStringContent(unsanitizedString);
+}
+
+/**
+ * Represents an HTML string which is considered "safe" by construction. It
+ * contains HTML content which is safe to render to a browser without further
+ * sanitization. This is typically constructed via the {@link safe} function for
+ * string literals, or a separate templating engine responsible for sanitization
+ * such as `@rules_prerender/lit_engine`.
+ */
+export type SafeHtml = SafeHtmlImpl;
+
+/** Represents an HTML string which is considered "safe" by construction. */
+class SafeHtmlImpl {
+    readonly #html: string;
+
+    private constructor({ html }: { html: string }) {
+        // Prevent subclassing to avoid malicious code overriding methods.
+        if (this.constructor !== SafeHtmlImpl) {
+            throw new Error('`SafeHtml` cannot be subclassed.');
+        }
+
+        this.#html = html;
+    }
+
+    /**
+     * Accepts the raw HTML string input as a trusted {@link SafeHtmlImpl}
+     * object. This is **UNSAFE** because it effectively "promotes" the raw
+     * string to be considered trusted HTML content with no validation or
+     * sanitization. This function should only be used on trusted inputs or the
+     * output of a sanitized format (such as Lit `TemplateResult`).
+     */
+    public static unsafeTrustRawStringContent(html: string): SafeHtmlImpl {
+        // Clone and freeze the input HTML so the internal reference is not
+        // leaked in a way which could be mutated after the fact.
+        return Object.freeze(
+            new SafeHtmlImpl({ html: `${html}` })) as SafeHtmlImpl;
+    }
+
+    /**
+     * Returns the HTML content as a plain JavaScript string. This effectively
+     * _downgrades_ the privilege of the content, but can be useful when passing
+     * into an HTML sink API.
+     */
+    public getHtmlAsString(): string {
+        // Clones the string into a new value so the internal reference is not
+        // leaked in a way which could be mutated.
+        return `${this.#html}`;
+    }
+}
+
+// Freeze everything so even if malicious code gets a reference to `SafeHtml`,
+// it shouldn't be able to compromise the contract.
+Object.freeze(SafeHtmlImpl); // The class itself.
+Object.freeze(SafeHtmlImpl.prototype); // The `SafeHtml` instance prototype.
+Object.freeze(Object.getPrototypeOf(SafeHtmlImpl)); // The class prototype.
+
+/**
+ * Returns whether or not the given object is a `SafeHtml` string. Done via an
+ * `instanceof` check, meaning this is nominally type checked, not inferred
+ * based on the structure of the input.
+ */
+export function isSafeHtml(obj: unknown): obj is SafeHtml {
+    return obj instanceof SafeHtmlImpl;
+}
diff --git a/common/safe_html/safe_html_impl_test.mts b/common/safe_html/safe_html_impl_test.mts
@@ -0,0 +1,104 @@
+import { SafeHtml, isSafeHtml, safe, unsafeTreatStringAsSafeHtml } from './safe_html_impl.mjs';
+
+describe('safe_html', () => {
+    describe('SafeHtml', () => {
+        it('does not export the `SafeHtml` implementation', () => {
+            // @ts-expect-error Not a value reference.
+            expect(() => SafeHtml).toThrow(); // Reference error.
+        });
+
+        it('throws an error when subclassed', () => {
+            const safeHtml = safe`<div></div>`;
+            const SafeHtml = (safeHtml.constructor as any);
+            class FakeSafeHtml extends SafeHtml {
+                constructor() {
+                    super({ html: '<span></span>' });
+                }
+            }
+
+            expect(() => new FakeSafeHtml()).toThrow();
+        });
+
+        it('freezes `SafeHtml` instances', () => {
+            const safeHtml = safe`<div></div>`;
+
+            // Adding a property.
+            expect(() => { (safeHtml as any).foo = 'test'; }).toThrow();
+
+            // Mutating an existing property.
+            expect(() => { safeHtml.getHtmlAsString = () => 'test' }).toThrow();
+
+            // Mutating the prototype.
+            const SafeHtml = Object.getPrototypeOf(safeHtml);
+            expect(() => { SafeHtml.foo = 'test'; }).toThrow();
+            const SafeHtmlAgain = (safeHtml as any).__proto__;
+            expect(() => { SafeHtmlAgain.foo = 'test'; }).toThrow();
+        });
+
+        it('freezes the `SafeHtml` class', () => {
+            const safeHtml = safe`<div></div>`;
+            const SafeHtml = safeHtml.constructor as any;
+
+            // Adding a property.
+            expect(() => { SafeHtml.foo = 'test'; }).toThrow();
+
+            // Mutating an existing property.
+            expect(() => {
+                SafeHtml.unsafeTrustRawStringContent = () => 'test';
+            }).toThrow();
+
+            // Mutating prototype.
+            expect(() => { Object.getPrototypeOf(SafeHtml).foo = 'test' })
+                .toThrow();
+            expect(() => { SafeHtml.__proto__.foo = 'test' }).toThrow();
+        });
+    });
+
+    describe('safe()', () => {
+        it('treats a single string literal as safe', () => {
+            const safeHtml = safe`<div></div>`;
+
+            expect(isSafeHtml(safeHtml)).toBeTrue();
+            expect(safeHtml.getHtmlAsString()).toBe('<div></div>');
+        });
+
+        it('throws when given an interpolation', () => {
+            expect(() => safe`<${'div'}></div>`).toThrow();
+        });
+    });
+
+    describe('unsafeTreatStringAsSafeHtml()', () => {
+        it('unsafeTreatStringAsSafeHtml() trusts unsafe content', () => {
+            const html = unsafeTreatStringAsSafeHtml('<div>Hello, World</div>');
+            expect(html.getHtmlAsString()).toBe('<div>Hello, World</div>');
+        });
+    });
+
+    describe('isSafeHtml()', () => {
+        it('returns `true` for `SafeHtml` inputs', () => {
+            const html = safe`<div></div>`;
+
+            expect(isSafeHtml(html)).toBeTrue();
+        });
+
+        it('returns `false` for non-`SafeHtml` inputs', () => {
+            expect(isSafeHtml(true)).toBeFalse();
+            expect(isSafeHtml(1)).toBeFalse();
+            expect(isSafeHtml('<div></div>')).toBeFalse();
+            expect(isSafeHtml({})).toBeFalse();
+            expect(isSafeHtml([])).toBeFalse();
+            expect(isSafeHtml(Symbol())).toBeFalse();
+
+            // Assignable to `SafeHtml` because it matches the same structure,
+            // but is not the same nominal type.
+            const fakeSafeHtml = new class {
+                public static unsafeTrustRawStringContent(_html: string): void {}
+
+                public getHtmlAsString(): string {
+                    return '<div></div>';
+                }
+            }();
+            expect(isSafeHtml(fakeSafeHtml)).toBeFalse();
+        });
+    });
+});
diff --git a/common/safe_html/unsafe_html.mts b/common/safe_html/unsafe_html.mts
@@ -0,0 +1 @@
+export { unsafeTreatStringAsSafeHtml } from './safe_html_impl.mjs';
diff --git a/examples/minimal/site.mts b/examples/minimal/site.mts
@@ -1,9 +1,9 @@
 import { bar, foo } from './dep.mjs';
-import { PrerenderResource } from 'rules_prerender';
+import { PrerenderResource, unsafeTreatStringAsSafeHtml } from 'rules_prerender';
 
 /** Renders the page. */
 export default function*(): Generator<PrerenderResource, void, void> {
-    yield PrerenderResource.of('/index.html', `
+    yield PrerenderResource.of('/index.html', unsafeTreatStringAsSafeHtml(`
         <!DOCTYPE html>
         <html>
             <head>
@@ -15,5 +15,5 @@ export default function*(): Generator<PrerenderResource, void, void> {
                 <span id="bar">${bar}</span>
             </body>
         </html>
-    `.trim());
+    `.trim()));
 }
diff --git a/packages/rules_prerender/BUILD.bazel b/packages/rules_prerender/BUILD.bazel
@@ -39,6 +39,8 @@ ts_project(
         ":scripts",
         ":styles",
         "//common/models:prerender_resource",
+        "//common/safe_html",
+        "//common/safe_html:unsafe_html",
     ],
 )
 

diff --git a/packages/rules_prerender/index.mts b/packages/rules_prerender/index.mts
@@ -5,3 +5,6 @@ export { includeScript } from './scripts.mjs';
 export { inlineStyle, InlineStyleNotFoundError as InternalInlineStyleNotFoundError } from './styles.mjs';
 
 export { setMap as internalSetInlineStyleMap, resetMapForTesting as internalResetInlineStyleMapForTesting } from './inline_style_map.mjs';
+
+export { SafeHtml, isSafeHtml } from '../../common/safe_html/safe_html.mjs';
+export { unsafeTreatStringAsSafeHtml } from '../../common/safe_html/unsafe_html.mjs';
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		export { SafeHtml, isSafeHtml, safe } from './safe_html_impl.mjs';
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		export { unsafeTreatStringAsSafeHtml } from './safe_html_impl.mjs';