From c42906f8811e085a23d853597a0bcb5f32ca709f Mon Sep 17 00:00:00 2001 From: Kasper Isager Date: Fri, 2 Oct 2020 11:52:47 +0200 Subject: [PATCH] Introduce `@siteimprove/alfa-url` package (#421) --- docs/examples/custom-testing/crawling/test.ts | 8 +- docs/examples/custom-testing/scraping/test.ts | 8 +- .../alfa-cli/bin/alfa/command/scrape/run.ts | 3 +- packages/alfa-cli/package.json | 1 + packages/alfa-cli/tsconfig.json | 3 + packages/alfa-crawler/package.json | 1 + packages/alfa-crawler/src/crawler.ts | 15 +- packages/alfa-crawler/test/crawler.spec.ts | 2 +- packages/alfa-crawler/tsconfig.json | 3 + packages/alfa-frontier/package.json | 3 +- packages/alfa-frontier/src/frontier.ts | 36 +- packages/alfa-frontier/test/frontier.spec.ts | 2 +- packages/alfa-frontier/tsconfig.json | 3 + packages/alfa-http/package.json | 3 +- packages/alfa-http/src/request.ts | 20 +- packages/alfa-http/src/response.ts | 18 +- packages/alfa-http/tsconfig.json | 3 + packages/alfa-json-ld/package.json | 3 +- packages/alfa-json-ld/src/expand.ts | 13 +- packages/alfa-json-ld/tsconfig.json | 3 + packages/alfa-scraper/package.json | 1 + packages/alfa-scraper/src/scraper.ts | 31 +- packages/alfa-scraper/test/scraper.spec.ts | 10 +- packages/alfa-scraper/tsconfig.json | 3 + packages/alfa-url/package.json | 36 ++ packages/alfa-url/src/index.ts | 1 + packages/alfa-url/src/url.ts | 364 ++++++++++++++++++ packages/alfa-url/src/url/builtin.ts | 4 + packages/alfa-url/test/url.spec.ts | 52 +++ packages/alfa-url/tsconfig.json | 36 ++ packages/alfa-web/src/page.ts | 4 +- packages/tsconfig.json | 1 + 32 files changed, 616 insertions(+), 78 deletions(-) create mode 100644 packages/alfa-url/package.json create mode 100644 packages/alfa-url/src/index.ts create mode 100644 packages/alfa-url/src/url.ts create mode 100644 packages/alfa-url/src/url/builtin.ts create mode 100644 packages/alfa-url/test/url.spec.ts create mode 100644 packages/alfa-url/tsconfig.json diff --git a/docs/examples/custom-testing/crawling/test.ts b/docs/examples/custom-testing/crawling/test.ts index d1f3863631..739103bf97 100644 --- a/docs/examples/custom-testing/crawling/test.ts +++ b/docs/examples/custom-testing/crawling/test.ts @@ -25,9 +25,9 @@ Crawler.with(async (crawler) => { const earl = outcomes.map((outcome) => outcome.toEARL()); - const url = new URL(input.response.url); + const { url } = input.response; - console.group(url.href); + console.group(url.toString()); logStats(outcomes); console.groupEnd(); @@ -35,8 +35,8 @@ Crawler.with(async (crawler) => { path.join( __dirname, "outcomes", - url.host, - url.pathname.replace(/\/$/, "") + url.host.get(), + ...url.path.filter((segment) => segment !== "") ) + ".json"; fs.mkdirSync(path.dirname(file), { recursive: true }); diff --git a/docs/examples/custom-testing/scraping/test.ts b/docs/examples/custom-testing/scraping/test.ts index cc282978bd..d1d6658757 100644 --- a/docs/examples/custom-testing/scraping/test.ts +++ b/docs/examples/custom-testing/scraping/test.ts @@ -21,9 +21,9 @@ Scraper.with(async (scraper) => { const earl = outcomes.map((outcome) => outcome.toEARL()); - const url = new URL(input.response.url); + const { url } = input.response; - console.group(url.href); + console.group(url.toString()); logStats(outcomes); console.groupEnd(); @@ -31,8 +31,8 @@ Scraper.with(async (scraper) => { path.join( __dirname, "outcomes", - url.host, - url.pathname.replace(/\/$/, "") + url.host.get(), + ...url.path.filter((segment) => segment !== "") ) + ".json"; fs.mkdirSync(path.dirname(file), { recursive: true }); diff --git a/packages/alfa-cli/bin/alfa/command/scrape/run.ts b/packages/alfa-cli/bin/alfa/command/scrape/run.ts index 61010256bc..f6587d575c 100644 --- a/packages/alfa-cli/bin/alfa/command/scrape/run.ts +++ b/packages/alfa-cli/bin/alfa/command/scrape/run.ts @@ -15,6 +15,7 @@ import { Screenshot, } from "@siteimprove/alfa-scraper"; import { Timeout } from "@siteimprove/alfa-time"; +import { URL } from "@siteimprove/alfa-url"; import type { Arguments } from "./arguments"; import type { Flags } from "./flags"; @@ -139,7 +140,7 @@ export const run: Command.Runner = async ({ const timeout = Timeout.of(flags.timeout); const result = await scraper.scrape( - new URL(target, url.pathToFileURL(process.cwd() + path.sep)), + URL.parse(target, url.pathToFileURL(process.cwd() + path.sep).href).get(), { timeout, awaiter, diff --git a/packages/alfa-cli/package.json b/packages/alfa-cli/package.json index a7b038a180..3e5ea37034 100644 --- a/packages/alfa-cli/package.json +++ b/packages/alfa-cli/package.json @@ -35,6 +35,7 @@ "@siteimprove/alfa-rules": "^0.5.0", "@siteimprove/alfa-scraper": "^0.5.0", "@siteimprove/alfa-time": "^0.5.0", + "@siteimprove/alfa-url": "^0.5.0", "@siteimprove/alfa-web": "^0.5.0", "@siteimprove/alfa-xpath": "^0.5.0", "@types/node": "^14.0.12", diff --git a/packages/alfa-cli/tsconfig.json b/packages/alfa-cli/tsconfig.json index 3c58e64109..7261b536af 100644 --- a/packages/alfa-cli/tsconfig.json +++ b/packages/alfa-cli/tsconfig.json @@ -90,6 +90,9 @@ { "path": "../alfa-time" }, + { + "path": "../alfa-url" + }, { "path": "../alfa-web" }, diff --git a/packages/alfa-crawler/package.json b/packages/alfa-crawler/package.json index 8fc208508a..88258759c1 100644 --- a/packages/alfa-crawler/package.json +++ b/packages/alfa-crawler/package.json @@ -23,6 +23,7 @@ "@siteimprove/alfa-mapper": "^0.5.0", "@siteimprove/alfa-result": "^0.5.0", "@siteimprove/alfa-scraper": "^0.5.0", + "@siteimprove/alfa-url": "^0.5.0", "@siteimprove/alfa-web": "^0.5.0" }, "devDependencies": { diff --git a/packages/alfa-crawler/src/crawler.ts b/packages/alfa-crawler/src/crawler.ts index 5466ee4fd3..8480165e87 100644 --- a/packages/alfa-crawler/src/crawler.ts +++ b/packages/alfa-crawler/src/crawler.ts @@ -3,6 +3,7 @@ import { Frontier } from "@siteimprove/alfa-frontier"; import { Mapper } from "@siteimprove/alfa-mapper"; import { Result } from "@siteimprove/alfa-result"; import { Scraper, Screenshot } from "@siteimprove/alfa-scraper"; +import { URL } from "@siteimprove/alfa-url"; import { Page } from "@siteimprove/alfa-web"; const { isElement } = Element; @@ -19,10 +20,12 @@ export class Crawler { scraper?: Promise ): Promise { const crawler = await this.of(scraper); - const result = await mapper(crawler); - await crawler.close(); - return result; + try { + return await mapper(crawler); + } finally { + await crawler.close(); + } } private readonly _scraper: Scraper; @@ -53,8 +56,8 @@ export class Crawler { frontier.complete(url); for (const page of result) { - if (page.response.url !== url.href) { - frontier.redirect(url.href, page.response.url); + if (!page.response.url.equals(url)) { + frontier.redirect(url, page.response.url); } for (const url of urls(page)) { @@ -94,7 +97,7 @@ function* urls(page: Page): Iterable { if (isElement(node) && node.name === "a") { yield* node .attribute("href") - .map((href) => new URL(href.value, page.response.url)); + .map((href) => URL.parse(href.value, page.response.url).get()); } } } diff --git a/packages/alfa-crawler/test/crawler.spec.ts b/packages/alfa-crawler/test/crawler.spec.ts index ab35a7df40..04cbb004bf 100644 --- a/packages/alfa-crawler/test/crawler.spec.ts +++ b/packages/alfa-crawler/test/crawler.spec.ts @@ -15,7 +15,7 @@ test("#crawl() crawls a frontier", async (t) => for await (const result of crawler.crawl(frontier)) { t.equal(result.isOk(), true); - pages.push(result.get().response.url); + pages.push(result.get().response.url.toString()); } t.deepEqual(pages, [ diff --git a/packages/alfa-crawler/tsconfig.json b/packages/alfa-crawler/tsconfig.json index b59557f829..dcea92efe0 100644 --- a/packages/alfa-crawler/tsconfig.json +++ b/packages/alfa-crawler/tsconfig.json @@ -21,6 +21,9 @@ { "path": "../alfa-test" }, + { + "path": "../alfa-url" + }, { "path": "../alfa-web" } diff --git a/packages/alfa-frontier/package.json b/packages/alfa-frontier/package.json index 68b15bc0bb..f79cb66ff0 100644 --- a/packages/alfa-frontier/package.json +++ b/packages/alfa-frontier/package.json @@ -21,7 +21,8 @@ "@siteimprove/alfa-equatable": "^0.5.0", "@siteimprove/alfa-json": "^0.5.0", "@siteimprove/alfa-option": "^0.5.0", - "@siteimprove/alfa-predicate": "^0.5.0" + "@siteimprove/alfa-predicate": "^0.5.0", + "@siteimprove/alfa-url": "^0.5.0" }, "devDependencies": { "@siteimprove/alfa-test": "^0.5.0" diff --git a/packages/alfa-frontier/src/frontier.ts b/packages/alfa-frontier/src/frontier.ts index 4fbdc2c0f2..c0d41c8efc 100644 --- a/packages/alfa-frontier/src/frontier.ts +++ b/packages/alfa-frontier/src/frontier.ts @@ -2,6 +2,7 @@ import { Equatable } from "@siteimprove/alfa-equatable"; import { Serializable } from "@siteimprove/alfa-json"; import { Option, None } from "@siteimprove/alfa-option"; import { Predicate } from "@siteimprove/alfa-predicate"; +import { URL } from "@siteimprove/alfa-url"; import * as json from "@siteimprove/alfa-json"; @@ -35,7 +36,7 @@ export class Frontier implements Equatable, Serializable { } public isInScope(url: string | URL): boolean { - return toURL(url).href.startsWith(this._scope.href); + return toURL(url).toString().startsWith(this._scope.toString()); } public hasWaiting(): boolean { @@ -186,7 +187,7 @@ export class Frontier implements Equatable, Serializable { public equals(value: unknown): value is this { return ( value instanceof Frontier && - value._scope.href === this._scope.href && + value._scope.equals(this._scope) && value._items.length === this._items.length && value._items.every((item, i) => item.equals(this._items[i])) ); @@ -194,7 +195,7 @@ export class Frontier implements Equatable, Serializable { public toJSON(): Frontier.JSON { return { - scope: this._scope.href, + scope: this._scope.toString(), items: this._items.map((item) => item.toJSON()), }; } @@ -269,10 +270,7 @@ class Item implements Equatable, Serializable { public matches(url: string | URL): boolean { url = toURL(url); - return ( - this._url.href === url.href || - this._aliases.some(property("href", equals(url.href))) - ); + return this._url.equals(url) || this._aliases.some(equals(url)); } public transition(state: State): boolean { @@ -304,10 +302,7 @@ class Item implements Equatable, Serializable { public alias(url: string | URL): boolean { url = toURL(url); - if ( - this._url.href === url.href || - this._aliases.some(property("href", equals(url.href))) - ) { + if (this._url.equals(url) || this._aliases.some(equals(url))) { return false; } @@ -319,7 +314,7 @@ class Item implements Equatable, Serializable { public redirect(target: string | URL): boolean { target = toURL(target); - if (this._url.href === target.href) { + if (this._url.equals(target)) { return false; } @@ -332,19 +327,17 @@ class Item implements Equatable, Serializable { public equals(value: unknown): value is this { return ( value instanceof Item && - value._url.href === this._url.href && + value._url.equals(this._url) && value._aliases.length === this._aliases.length && - value._aliases.every( - (alias, i) => alias.href === this._aliases[i].href - ) && + value._aliases.every((alias, i) => alias.equals(this._aliases[i])) && value._state === this._state ); } public toJSON(): Item.JSON { return { - url: this._url.href, - aliases: this._aliases.map((url) => url.href), + url: this._url.toString(), + aliases: this._aliases.map((url) => url.toString()), state: this._state, }; } @@ -360,11 +353,10 @@ namespace Item { } function toURL(url: string | URL): URL { - url = typeof url === "string" ? new URL(url) : url; - url.hash = ""; - return url; + url = typeof url === "string" ? URL.parse(url).get() : url; + return url.withoutFragment(); } function isInScope(scope: string | URL, url: string | URL): boolean { - return toURL(url).href.startsWith(toURL(scope).href); + return toURL(url).toString().startsWith(toURL(scope).toString()); } diff --git a/packages/alfa-frontier/test/frontier.spec.ts b/packages/alfa-frontier/test/frontier.spec.ts index 4d23cd1af9..66b98a91f2 100644 --- a/packages/alfa-frontier/test/frontier.spec.ts +++ b/packages/alfa-frontier/test/frontier.spec.ts @@ -106,7 +106,7 @@ test("#enqueue() doesn't change the state of an already seen URL", (t) => { test("#dequeue() gets the next waiting URL in queue and moves it to in progress", (t) => { const frontier = Frontier.of("https://example.com/"); - t.deepEqual(frontier.dequeue().get(), new URL("https://example.com")); + t.deepEqual(frontier.dequeue().get().toString(), "https://example.com/"); t.deepEqual(frontier.toJSON(), { scope: "https://example.com/", diff --git a/packages/alfa-frontier/tsconfig.json b/packages/alfa-frontier/tsconfig.json index 374857947f..7b813ddeee 100644 --- a/packages/alfa-frontier/tsconfig.json +++ b/packages/alfa-frontier/tsconfig.json @@ -17,6 +17,9 @@ }, { "path": "../alfa-test" + }, + { + "path": "../alfa-url" } ] } diff --git a/packages/alfa-http/package.json b/packages/alfa-http/package.json index c8943f20bb..c923bb25c2 100644 --- a/packages/alfa-http/package.json +++ b/packages/alfa-http/package.json @@ -25,7 +25,8 @@ "@siteimprove/alfa-json": "^0.5.0", "@siteimprove/alfa-map": "^0.5.0", "@siteimprove/alfa-option": "^0.5.0", - "@siteimprove/alfa-refinement": "^0.5.0" + "@siteimprove/alfa-refinement": "^0.5.0", + "@siteimprove/alfa-url": "^0.5.0" }, "devDependencies": { "@siteimprove/alfa-test": "^0.5.0" diff --git a/packages/alfa-http/src/request.ts b/packages/alfa-http/src/request.ts index 7a0549e5ce..a4cc4d2e67 100644 --- a/packages/alfa-http/src/request.ts +++ b/packages/alfa-http/src/request.ts @@ -1,4 +1,6 @@ import { Decoder, Encoder } from "@siteimprove/alfa-encoding"; +import { URL } from "@siteimprove/alfa-url"; + import * as earl from "@siteimprove/alfa-earl"; import * as json from "@siteimprove/alfa-json"; @@ -11,25 +13,27 @@ import { Headers } from "./headers"; export class Request implements Body, json.Serializable, earl.Serializable { public static of( method: string, - url: string, + url: URL, headers: Headers = Headers.empty(), body: ArrayBuffer = new ArrayBuffer(0) ): Request { return new Request(method, url, headers, body); } + private static _empty = Request.of("GET", URL.parse("about:blank").get()); + public static empty(): Request { - return Request.of("GET", "about:blank"); + return this._empty; } private readonly _method: string; - private readonly _url: string; + private readonly _url: URL; private readonly _headers: Headers; private readonly _body: ArrayBuffer; private constructor( method: string, - url: string, + url: URL, headers: Headers, body: ArrayBuffer ) { @@ -49,7 +53,7 @@ export class Request implements Body, json.Serializable, earl.Serializable { /** * @see https://fetch.spec.whatwg.org/#dom-request-url */ - public get url(): string { + public get url(): URL { return this._url; } @@ -70,7 +74,7 @@ export class Request implements Body, json.Serializable, earl.Serializable { public toJSON(): Request.JSON { return { method: this._method, - url: this._url, + url: this._url.toString(), headers: this._headers.toJSON(), body: Decoder.decode(new Uint8Array(this._body)), }; @@ -83,7 +87,7 @@ export class Request implements Body, json.Serializable, earl.Serializable { }, "@type": ["http:Message", "http:Request"], "http:methodName": this._method, - "http:requestURI": this._url, + "http:requestURI": this._url.toString(), "http:headers": this._headers.toEARL(), "http:body": { "@context": { @@ -130,7 +134,7 @@ export namespace Request { export function from(json: JSON): Request { return Request.of( json.method, - json.url, + URL.parse(json.url).get(), Headers.from(json.headers), Encoder.encode(json.body) ); diff --git a/packages/alfa-http/src/response.ts b/packages/alfa-http/src/response.ts index 773a61f28b..ad40dd84c8 100644 --- a/packages/alfa-http/src/response.ts +++ b/packages/alfa-http/src/response.ts @@ -1,4 +1,6 @@ import { Decoder, Encoder } from "@siteimprove/alfa-encoding"; +import { URL } from "@siteimprove/alfa-url"; + import * as earl from "@siteimprove/alfa-earl"; import * as json from "@siteimprove/alfa-json"; @@ -10,7 +12,7 @@ import { Headers } from "./headers"; */ export class Response implements Body, json.Serializable, earl.Serializable { public static of( - url: string, + url: URL, status: number, headers: Headers = Headers.empty(), body: ArrayBuffer = new ArrayBuffer(0) @@ -18,17 +20,19 @@ export class Response implements Body, json.Serializable, earl.Serializable { return new Response(url, status, headers, body); } + private static _empty = Response.of(URL.parse("about:blank").get(), 200); + public static empty(): Response { - return Response.of("about:blank", 200); + return this._empty; } - private readonly _url: string; + private readonly _url: URL; private readonly _status: number; private readonly _headers: Headers; private readonly _body: ArrayBuffer; private constructor( - url: string, + url: URL, status: number, headers: Headers, body: ArrayBuffer @@ -42,7 +46,7 @@ export class Response implements Body, json.Serializable, earl.Serializable { /** * @see https://fetch.spec.whatwg.org/#dom-response-url */ - public get url(): string { + public get url(): URL { return this._url; } @@ -69,7 +73,7 @@ export class Response implements Body, json.Serializable, earl.Serializable { public toJSON(): Response.JSON { return { - url: this._url, + url: this._url.toString(), status: this._status, headers: this._headers.toJSON(), body: Decoder.decode(new Uint8Array(this._body)), @@ -127,7 +131,7 @@ export namespace Response { export function from(json: JSON): Response { return Response.of( - json.url, + URL.parse(json.url).get(), json.status, Headers.from(json.headers), Encoder.encode(json.body) diff --git a/packages/alfa-http/tsconfig.json b/packages/alfa-http/tsconfig.json index f7f35aa270..ca60786dec 100644 --- a/packages/alfa-http/tsconfig.json +++ b/packages/alfa-http/tsconfig.json @@ -38,6 +38,9 @@ }, { "path": "../alfa-test" + }, + { + "path": "../alfa-url" } ] } diff --git a/packages/alfa-json-ld/package.json b/packages/alfa-json-ld/package.json index 2123b60ac1..12049c9064 100644 --- a/packages/alfa-json-ld/package.json +++ b/packages/alfa-json-ld/package.json @@ -19,7 +19,8 @@ ], "dependencies": { "@siteimprove/alfa-option": "^0.5.0", - "@siteimprove/alfa-result": "^0.5.0" + "@siteimprove/alfa-result": "^0.5.0", + "@siteimprove/alfa-url": "^0.5.0" }, "devDependencies": { "@siteimprove/alfa-test": "^0.5.0" diff --git a/packages/alfa-json-ld/src/expand.ts b/packages/alfa-json-ld/src/expand.ts index 842076cd52..82ce03472d 100644 --- a/packages/alfa-json-ld/src/expand.ts +++ b/packages/alfa-json-ld/src/expand.ts @@ -2,6 +2,7 @@ import { None, Option, Some } from "@siteimprove/alfa-option"; import { Err, Ok, Result } from "@siteimprove/alfa-result"; +import { URL } from "@siteimprove/alfa-url"; import { isDictionary, @@ -871,10 +872,6 @@ function isRelativeIri(url: string): boolean { return relativeIri.test(url); } -function resolveUrl(target: string, base: string): string { - return new URL(target, base).href; -} - function getMapping( context: Context, property: string | null, @@ -957,7 +954,13 @@ function processContext( const base = result["@base"]; if (typeof base === "string") { - result["@base"] = resolveUrl(value, base); + const url = URL.parse(value, base); + + if (url.isErr()) { + return url; + } + + result["@base"] = url.get().toString(); } } diff --git a/packages/alfa-json-ld/tsconfig.json b/packages/alfa-json-ld/tsconfig.json index 082638a41d..6d149aa483 100644 --- a/packages/alfa-json-ld/tsconfig.json +++ b/packages/alfa-json-ld/tsconfig.json @@ -18,6 +18,9 @@ }, { "path": "../alfa-test" + }, + { + "path": "../alfa-url" } ] } diff --git a/packages/alfa-scraper/package.json b/packages/alfa-scraper/package.json index e7adf9e186..33a0893ed2 100644 --- a/packages/alfa-scraper/package.json +++ b/packages/alfa-scraper/package.json @@ -26,6 +26,7 @@ "@siteimprove/alfa-puppeteer": "^0.5.0", "@siteimprove/alfa-result": "^0.5.0", "@siteimprove/alfa-time": "^0.5.0", + "@siteimprove/alfa-url": "^0.5.0", "@siteimprove/alfa-web": "^0.5.0", "@types/puppeteer": "^3.0.1", "puppeteer": "^5.2.1" diff --git a/packages/alfa-scraper/src/scraper.ts b/packages/alfa-scraper/src/scraper.ts index 39d2e5983d..7a811fd5c0 100644 --- a/packages/alfa-scraper/src/scraper.ts +++ b/packages/alfa-scraper/src/scraper.ts @@ -11,6 +11,7 @@ import { Mapper } from "@siteimprove/alfa-mapper"; import { Puppeteer } from "@siteimprove/alfa-puppeteer"; import { Result, Ok, Err } from "@siteimprove/alfa-result"; import { Timeout } from "@siteimprove/alfa-time"; +import { URL } from "@siteimprove/alfa-url"; import { Page } from "@siteimprove/alfa-web"; import * as puppeteer from "puppeteer"; @@ -41,10 +42,12 @@ export class Scraper { browser?: Promise ): Promise { const scraper = await this.of(browser); - const result = await mapper(scraper); - await scraper.close(); - return result; + try { + return await mapper(scraper); + } finally { + await scraper.close(); + } } private readonly _browser: puppeteer.Browser; @@ -60,7 +63,17 @@ export class Scraper { url: string | URL, options: Scraper.scrape.Options = {} ): Promise> { - const { href, protocol } = typeof url === "string" ? new URL(url) : url; + if (typeof url === "string") { + const result = URL.parse(url); + + if (result.isErr()) { + return result; + } + + url = result.get(); + } + + const scheme = url.scheme; const { timeout = Timeout.of(10000), @@ -109,19 +122,19 @@ export class Scraper { }, {}) ); - if (protocol === "http:" || protocol === "https:") { + if (scheme === "http" || scheme === "https") { await page.setCookie( ...[...cookies].map((cookie) => { return { name: cookie.name, value: cookie.value, - url: href, + url: url.toString(), }; }) ); } - let origin = href; + let origin = url.toString(); while (true) { try { @@ -189,7 +202,7 @@ export namespace Scraper { function parseRequest(request: puppeteer.Request): Request { return Request.of( request.method(), - request.url(), + URL.parse(request.url()).get(), Headers.of( entries(request.headers()).map(([name, value]) => Header.of(name, value)) ) @@ -198,7 +211,7 @@ function parseRequest(request: puppeteer.Request): Request { async function parseResponse(response: puppeteer.Response): Promise { return Response.of( - response.url(), + URL.parse(response.url()).get(), response.status(), Headers.of( entries(response.headers()).map(([name, value]) => Header.of(name, value)) diff --git a/packages/alfa-scraper/test/scraper.spec.ts b/packages/alfa-scraper/test/scraper.spec.ts index 7c1177834a..4225295b22 100644 --- a/packages/alfa-scraper/test/scraper.spec.ts +++ b/packages/alfa-scraper/test/scraper.spec.ts @@ -11,7 +11,7 @@ test("#scrape() scrapes a page with a hash fragment", async (t) => const { response } = result.get(); - t.equal(response.url, url); + t.equal(response.url.toString(), url); })); test("#scrape() scrapes a page with an immediate meta refresh", async (t) => @@ -23,7 +23,7 @@ test("#scrape() scrapes a page with an immediate meta refresh", async (t) => const { response } = result.get(); - t.equal(response.url, "https://example.com/"); + t.equal(response.url.toString(), "https://example.com/"); })); test("#scrape() scrapes a page with a delayed meta refresh", async (t) => @@ -35,7 +35,7 @@ test("#scrape() scrapes a page with a delayed meta refresh", async (t) => const { response } = result.get(); - t.equal(response.url, url); + t.equal(response.url.toString(), url); })); test("#scrape() scrapes a page with an immediate location change", async (t) => @@ -47,7 +47,7 @@ test("#scrape() scrapes a page with an immediate location change", async (t) => const { response } = result.get(); - t.equal(response.url, "https://example.com/"); + t.equal(response.url.toString(), "https://example.com/"); })); test("#scrape() scrapes a page with a delayed location change", async (t) => @@ -59,5 +59,5 @@ test("#scrape() scrapes a page with a delayed location change", async (t) => const { response } = result.get(); - t.equal(response.url, url); + t.equal(response.url.toString(), url); })); diff --git a/packages/alfa-scraper/tsconfig.json b/packages/alfa-scraper/tsconfig.json index 1f72b2bb18..a8c1d85ee1 100644 --- a/packages/alfa-scraper/tsconfig.json +++ b/packages/alfa-scraper/tsconfig.json @@ -37,6 +37,9 @@ { "path": "../alfa-time" }, + { + "path": "../alfa-url" + }, { "path": "../alfa-web" } diff --git a/packages/alfa-url/package.json b/packages/alfa-url/package.json new file mode 100644 index 0000000000..aeed6e9963 --- /dev/null +++ b/packages/alfa-url/package.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json.schemastore.org/package", + "name": "@siteimprove/alfa-url", + "homepage": "https://siteimprove.com", + "version": "0.5.0", + "license": "MIT", + "description": "Functionality for working with immutable URLs", + "repository": { + "type": "git", + "url": "https://github.com/siteimprove/alfa.git", + "directory": "packages/alfa-url" + }, + "bugs": "https://github.com/siteimprove/alfa/issues", + "main": "src/index.js", + "types": "src/index.d.ts", + "files": [ + "src/**/*.js", + "src/**/*.d.ts" + ], + "dependencies": { + "@siteimprove/alfa-equatable": "^0.5.0", + "@siteimprove/alfa-hash": "^0.5.0", + "@siteimprove/alfa-iterable": "^0.5.0", + "@siteimprove/alfa-json": "^0.5.0", + "@siteimprove/alfa-option": "^0.5.0", + "@siteimprove/alfa-result": "^0.5.0", + "@siteimprove/alfa-sequence": "^0.5.0" + }, + "devDependencies": { + "@siteimprove/alfa-test": "^0.5.0" + }, + "publishConfig": { + "access": "public", + "registry": "https://npm.pkg.github.com/" + } +} diff --git a/packages/alfa-url/src/index.ts b/packages/alfa-url/src/index.ts new file mode 100644 index 0000000000..eaf19f820a --- /dev/null +++ b/packages/alfa-url/src/index.ts @@ -0,0 +1 @@ +export * from "./url"; diff --git a/packages/alfa-url/src/url.ts b/packages/alfa-url/src/url.ts new file mode 100644 index 0000000000..f97f37e621 --- /dev/null +++ b/packages/alfa-url/src/url.ts @@ -0,0 +1,364 @@ +import { Equatable } from "@siteimprove/alfa-equatable"; +import { Hash, Hashable } from "@siteimprove/alfa-hash"; +import { Iterable } from "@siteimprove/alfa-iterable"; +import { Serializable } from "@siteimprove/alfa-json"; +import { Option, None } from "@siteimprove/alfa-option"; +import { Result, Err } from "@siteimprove/alfa-result"; +import { Sequence } from "@siteimprove/alfa-sequence"; + +import * as json from "@siteimprove/alfa-json"; + +import { Builtin } from "./url/builtin"; + +const { isEmpty } = Iterable; + +/** + * @see https://url.spec.whatwg.org/ + */ +export class URL implements Equatable, Hashable, Serializable { + public static of( + scheme: string, + username: Option = None, + password: Option = None, + host: Option = None, + port: Option = None, + path: Iterable = [], + query: Option = None, + fragment: Option = None + ): URL { + return new URL( + scheme, + username, + password, + host, + port, + Sequence.from(path), + query, + fragment + ); + } + + private readonly _scheme: string; + private readonly _username: Option; + private readonly _password: Option; + private readonly _host: Option; + private readonly _port: Option; + private readonly _path: Sequence; + private readonly _query: Option; + private readonly _fragment: Option; + + private constructor( + scheme: string, + username: Option, + password: Option, + host: Option, + port: Option, + path: Sequence, + query: Option, + fragment: Option + ) { + this._scheme = scheme; + this._username = username; + this._password = password; + this._host = host; + this._port = port; + this._path = path; + this._query = query; + this._fragment = fragment; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-scheme + */ + public get scheme(): string { + return this._scheme; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-username + */ + public get username(): Option { + return this._username; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-password + */ + public get password(): Option { + return this._password; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-host + */ + public get host(): Option { + return this._host; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-port + */ + public get port(): Option { + return this._port; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-path + */ + public get path(): Sequence { + return this._path; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-query + */ + public get query(): Option { + return this._query; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-fragment + */ + public get fragment(): Option { + return this._fragment; + } + + /** + * @see https://url.spec.whatwg.org/#include-credentials + */ + public hasCredentials(): boolean { + return this._username.isSome() || this._password.isSome(); + } + + /** + * Remove the fragment portion of this URL. + * + * @remarks + * This method is useful for contexts in which the fragment portion of the URL, + * which isn't passed from client to server, is of no interest. + */ + public withoutFragment(): URL { + if (this._fragment.isNone()) { + return this; + } + + return new URL( + this._scheme, + this._username, + this._password, + this._host, + this._port, + this._path, + this._query, + None + ); + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-equals + */ + public equals(value: URL): boolean; + + /** + * @see https://url.spec.whatwg.org/#concept-url-equals + */ + public equals(value: unknown): value is this; + + public equals(value: unknown): boolean { + return ( + value instanceof URL && + value._scheme === this._scheme && + value._username.equals(this._username) && + value._password.equals(this._password) && + value._host.equals(this._host) && + value._port.equals(this._port) && + value._path.equals(this._path) && + value._query.equals(this._query) && + value._fragment.equals(this._fragment) + ); + } + + public hash(hash: Hash): void { + Hash.writeString(hash, this._scheme); + this._username.hash(hash); + this._password.hash(hash); + this._host.hash(hash); + this._port.hash(hash); + this._path.hash(hash); + this._query.hash(hash); + this._fragment.hash(hash); + } + + public toJSON(): URL.JSON { + return { + scheme: this._scheme, + username: this._username.getOr(null), + password: this._password.getOr(null), + host: this._host.getOr(null), + port: this._port.getOr(null), + path: this._path.toArray(), + query: this._query.getOr(null), + fragment: this._fragment.getOr(null), + }; + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-serializer + */ + public toString(): string { + let output = this._scheme + ":"; + + for (const host of this._host) { + output += "//"; + + if (this.hasCredentials()) { + for (const username of this._username) { + output += username; + } + + for (const password of this._password) { + output += ":" + password; + } + + output += "@"; + } + + output += host; + + for (const port of this._port) { + output += ":" + port.toString(10); + } + } + + if (this._host.isNone() && this._scheme === "file") { + output += "//"; + } + + if ( + this._host.isNone() && + this._path.size > 1 && + this._path.first().includes("") + ) { + output += "/."; + } + + for (const segment of this._path) { + output += "/" + segment; + } + + for (const query of this._query) { + output += "?" + query; + } + + for (const fragment of this._fragment) { + output += "#" + fragment; + } + + return output; + } +} + +export namespace URL { + export interface JSON { + [key: string]: json.JSON; + scheme: string; + username: string | null; + password: string | null; + host: string | null; + port: number | null; + path: Array; + query: string | null; + fragment: string | null; + } + + export function from(json: JSON): URL { + return URL.of( + json.scheme, + Option.from(json.username), + Option.from(json.password), + Option.from(json.host), + Option.from(json.port), + json.path, + Option.from(json.query), + Option.from(json.fragment) + ); + } + + /** + * @see https://url.spec.whatwg.org/#concept-url-parser + * + * @remarks + * Parsing URLs is tricky business and so this function relies on the presence + * of a globally available WHATWG URL class. This API is available in both + * browsers, Node.js, and Deno. + */ + export function parse(url: string, base?: string | URL): Result { + if (typeof base === "string") { + const result = parse(base); + + if (result.isErr()) { + return result; + } + + base = result.get(); + } + + try { + const { + // https://url.spec.whatwg.org/#dom-url-protocol + protocol, + // https://url.spec.whatwg.org/#dom-url-username + username, + // https://url.spec.whatwg.org/#dom-url-password + password, + // https://url.spec.whatwg.org/#dom-url-hostname + hostname, + // https://url.spec.whatwg.org/#dom-url-port + port, + // https://url.spec.whatwg.org/#dom-url-pathname + pathname, + // https://url.spec.whatwg.org/#dom-url-search + search, + // https://url.spec.whatwg.org/#dom-url-hash + hash, + } = new Builtin(url, base?.toString()); + + return Result.of( + URL.of( + // `URL#protocol` appends a ":" to the scheme which we need to remove. + protocol.replace(/:$/, ""), + + // `URL#username`, `URL#password`, and `URL#hostname` expose the + // username, password, and host as-is and so the only thing we need to + // do is reject them when empty. + Option.of(username).reject(isEmpty), + Option.of(password).reject(isEmpty), + Option.of(hostname).reject(isEmpty), + + // `URL#port` exposes the port number as a string to we convert it to + // a number. + Option.of(port).reject(isEmpty).map(Number), + + // `URL#pathname` exposes the path segments with a leading "/" and + // joins the segments with "/". We therefore remove the leading "/" + // and split the segments by "/" into an array. + pathname.replace(/^\//, "").split("/"), + + // `URL#search` exposes the query portion of the URL with a leading + // "?" which we need to remove. + Option.of(search) + .reject(isEmpty) + .map((search) => search.replace(/^\?/, "")), + + // `URL#hash` exposes the fragment portion of the URL with a leading + // "#" which we need to remove. + Option.of(hash) + .reject(isEmpty) + .map((hash) => hash.replace(/^#/, "")) + ) + ); + } catch (err) { + return Err.of(err.message); + } + } +} diff --git a/packages/alfa-url/src/url/builtin.ts b/packages/alfa-url/src/url/builtin.ts new file mode 100644 index 0000000000..dcffd5f179 --- /dev/null +++ b/packages/alfa-url/src/url/builtin.ts @@ -0,0 +1,4 @@ +/** + * @internal + */ +export const Builtin = URL; diff --git a/packages/alfa-url/test/url.spec.ts b/packages/alfa-url/test/url.spec.ts new file mode 100644 index 0000000000..f726f7dc33 --- /dev/null +++ b/packages/alfa-url/test/url.spec.ts @@ -0,0 +1,52 @@ +import { test } from "@siteimprove/alfa-test"; + +import { URL } from "../src/url"; + +test(".parse() parses an absolute URL", (t) => { + t.deepEqual(URL.parse("https://example.com/page.html").get().toJSON(), { + scheme: "https", + username: null, + password: null, + host: "example.com", + port: null, + path: ["page.html"], + query: null, + fragment: null, + }); +}); + +test(".parse() parses a relative URL against a base URL", (t) => { + t.deepEqual(URL.parse("/page.html", "https://example.com/").get().toJSON(), { + scheme: "https", + username: null, + password: null, + host: "example.com", + port: null, + path: ["page.html"], + query: null, + fragment: null, + }); +}); + +test(".parse() parses the special about:blank URL", (t) => { + t.deepEqual(URL.parse("about:blank").get().toJSON(), { + scheme: "about", + username: null, + password: null, + host: null, + port: null, + path: ["blank"], + query: null, + fragment: null, + }); +}); + +test("#equals() checks if two URLs are equal", (t) => { + const a = URL.parse("foo", "file:").get(); + const b = URL.parse("foo", "file:").get(); + const c = URL.parse("bar", "file:").get(); + + t.equal(a.equals(a), true); + t.equal(a.equals(b), true); + t.equal(a.equals(c), false); +}); diff --git a/packages/alfa-url/tsconfig.json b/packages/alfa-url/tsconfig.json new file mode 100644 index 0000000000..475adb1076 --- /dev/null +++ b/packages/alfa-url/tsconfig.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json.schemastore.org/tsconfig", + "extends": "../tsconfig.json", + "files": [ + "src/index.ts", + "src/url.ts", + "src/url/builtin.ts", + "test/url.spec.ts" + ], + "references": [ + { + "path": "../alfa-equatable" + }, + { + "path": "../alfa-hash" + }, + { + "path": "../alfa-iterable" + }, + { + "path": "../alfa-json" + }, + { + "path": "../alfa-option" + }, + { + "path": "../alfa-result" + }, + { + "path": "../alfa-sequence" + }, + { + "path": "../alfa-test" + } + ] +} diff --git a/packages/alfa-web/src/page.ts b/packages/alfa-web/src/page.ts index 4c4450977a..258af9cfff 100644 --- a/packages/alfa-web/src/page.ts +++ b/packages/alfa-web/src/page.ts @@ -69,8 +69,8 @@ export class Page implements Resource, json.Serializable, earl.Serializable { dct: "http://purl.org/dc/terms/", }, "@type": ["earl:TestSubject"], - "@id": this.response.url, - "dct:source": this.response.url, + "@id": this.response.url.toString(), + "dct:source": this.response.url.toString(), "dct:hasPart": [this._request.toEARL(), this._response.toEARL()], }; } diff --git a/packages/tsconfig.json b/packages/tsconfig.json index 90ef45128a..e6a80e3f75 100644 --- a/packages/tsconfig.json +++ b/packages/tsconfig.json @@ -84,6 +84,7 @@ { "path": "alfa-trampoline" }, { "path": "alfa-trilean" }, { "path": "alfa-unexpected" }, + { "path": "alfa-url" }, { "path": "alfa-vue" }, { "path": "alfa-web" }, { "path": "alfa-webdriver" },