Skip to content

Commit

Permalink
fix: force protocol to be lowercase for better protocol filtering
Browse files Browse the repository at this point in the history
* fix: force protocol to be lowercase for better protocol filtering

See https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
"Although schemes are case-insensitive, the canonical form is lowercase"

* Update snapshots

* Rewrite to use new URL if available, and reject weird/invalid protocols

* Fix prefix pattern, ? handling, add comments and tests

* Remove new URL to simplify code

* Lint fixes
  • Loading branch information
edemaine committed Mar 24, 2024
1 parent 085e21b commit fc5af64
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 7 deletions.
2 changes: 2 additions & 0 deletions docs/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ You can provide an object of options as the last argument to [`katex.render` and
- `trust`: `boolean` or `function` (default: `false`). If `false` (do not trust input), prevent any commands like `\includegraphics` that could enable adverse behavior, rendering them instead in `errorColor`. If `true` (trust input), allow all such commands. Provide a custom function `handler(context)` to customize behavior depending on the context (command, arguments e.g. a URL, etc.). A list of possible contexts:

- `{command: "\\url", url, protocol}`
where `protocol` is a lowercased string like `"http"` or `"https"`
that appears before a colon, or `"_relative"` for relative URLs.
- `{command: "\\href", url, protocol}`
- `{command: "\\includegraphics", url, protocol}`
- `{command: "\\htmlClass", class}`
Expand Down
6 changes: 5 additions & 1 deletion src/Settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,11 @@ export default class Settings {
*/
isTrusted(context: AnyTrustContext): boolean {
if (context.url && !context.protocol) {
context.protocol = utils.protocolFromUrl(context.url);
const protocol = utils.protocolFromUrl(context.url);
if (protocol == null) {
return false;
}
context.protocol = protocol;
}
const trust = typeof this.trust === "function"
? this.trust(context)
Expand Down
27 changes: 23 additions & 4 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,30 @@ export const assert = function<T>(value: ?T): T {

/**
* Return the protocol of a URL, or "_relative" if the URL does not specify a
* protocol (and thus is relative).
* protocol (and thus is relative), or `null` if URL has invalid protocol
* (so should be outright rejected).
*/
export const protocolFromUrl = function(url: string): string {
const protocol = /^\s*([^\\/#]*?)(?::|&#0*58|&#x0*3a)/i.exec(url);
return (protocol != null ? protocol[1] : "_relative");
export const protocolFromUrl = function(url: string): string | null {
// Check for possible leading protocol.
// https://url.spec.whatwg.org/#url-parsing strips leading whitespace
// (U+20) or C0 control (U+00-U+1F) characters.
// eslint-disable-next-line no-control-regex
const protocol = /^[\x00-\x20]*([^\\/#?]*?)(:|&#0*58|&#x0*3a|&colon)/i
.exec(url);
if (!protocol) {
return "_relative";
}
// Reject weird colons
if (protocol[2] !== ":") {
return null;
}
// Reject invalid characters in scheme according to
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
if (!/^[a-zA-Z][a-zA-Z0-9+\-.]*$/.test(protocol[1])) {
return null;
}
// Lowercase the protocol
return protocol[1].toLowerCase();
};

export default {
Expand Down
45 changes: 44 additions & 1 deletion test/__snapshots__/katex-spec.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1468,7 +1468,50 @@ exports[`href and url commands should not affect spacing around 1`] = `
]
`;

exports[`href and url commands should not allow explicitly disallow protocols 1`] = `
exports[`href and url commands should not allow explicitly disallowed protocols 1`] = `
[
{
"type": "color",
"body": [
{
"type": "text",
"body": [
{
"type": "textord",
"mode": "text",
"text": "\\\\"
},
{
"type": "textord",
"mode": "text",
"text": "h"
},
{
"type": "textord",
"mode": "text",
"text": "r"
},
{
"type": "textord",
"mode": "text",
"text": "e"
},
{
"type": "textord",
"mode": "text",
"text": "f"
}
],
"mode": "math"
}
],
"color": "#cc0000",
"mode": "math"
}
]
`;

exports[`href and url commands should not allow explicitly uppercased disallowed protocols 1`] = `
[
{
"type": "color",
Expand Down
35 changes: 34 additions & 1 deletion test/katex-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -3005,13 +3005,46 @@ describe("href and url commands", function() {
expect(parsed).toMatchSnapshot();
});

it("should not allow explicitly disallow protocols", () => {
it("should not allow explicitly disallowed protocols", () => {
const parsed = getParsed(
"\\href{javascript:alert('x')}{foo}",
new Settings({trust: context => context.protocol !== "javascript"}),
);
expect(parsed).toMatchSnapshot();
});

it("should not allow explicitly uppercased disallowed protocols", () => {
const parsed = getParsed(
"\\href{JavaScript:alert('x')}{foo}",
new Settings({trust: context => context.protocol !== "javascript"}),
);
expect(parsed).toMatchSnapshot();
});

function getProtocolViaTrust(url) {
let protocol;
getParsed(`\\url{${url}}`, new Settings({
trust: context => protocol = context.protocol,
}));
return protocol;
}

it("should get protocols correctly", () => {
expect(getProtocolViaTrust("foo")).toBe("_relative");
expect(getProtocolViaTrust("Foo:")).toBe("foo");
expect(getProtocolViaTrust("Foo:bar")).toBe("foo");
expect(getProtocolViaTrust("JavaScript:")).toBe("javascript");
expect(getProtocolViaTrust("JavaScript:code")).toBe("javascript");
expect(getProtocolViaTrust("!:")).toBeUndefined();
expect(getProtocolViaTrust("foo&colon;")).toBeUndefined();
expect(getProtocolViaTrust("?query=string&colon=")).toBe("_relative");
expect(getProtocolViaTrust("#query=string&colon=")).toBe("_relative");
expect(getProtocolViaTrust("dir/file&colon")).toBe("_relative");
expect(getProtocolViaTrust("//foo")).toBe("_relative");
expect(getProtocolViaTrust("://foo")).toBeUndefined();
expect(getProtocolViaTrust(" \t http://")).toBe("http");
expect(getProtocolViaTrust(" \t http://foo")).toBe("http");
});
});

describe("A raw text parser", function() {
Expand Down

0 comments on commit fc5af64

Please sign in to comment.