Skip to content

Commit

Permalink
Introduce @siteimprove/alfa-url package (#421)
Browse files Browse the repository at this point in the history
  • Loading branch information
kasperisager authored Oct 2, 2020
1 parent 2db1ec7 commit c42906f
Show file tree
Hide file tree
Showing 32 changed files with 616 additions and 78 deletions.
8 changes: 4 additions & 4 deletions docs/examples/custom-testing/crawling/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,18 @@ Crawler.with(async (crawler) => {

const earl = outcomes.map((outcome) => outcome.toEARL());

const url = new URL(input.response.url);
const { url } = input.response;

console.group(url.href);
console.group(url.toString());
logStats(outcomes);
console.groupEnd();

const file =
path.join(
__dirname,
"outcomes",
url.host,
url.pathname.replace(/\/$/, "")
url.host.get(),
...url.path.filter((segment) => segment !== "")
) + ".json";

fs.mkdirSync(path.dirname(file), { recursive: true });
Expand Down
8 changes: 4 additions & 4 deletions docs/examples/custom-testing/scraping/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@ Scraper.with(async (scraper) => {

const earl = outcomes.map((outcome) => outcome.toEARL());

const url = new URL(input.response.url);
const { url } = input.response;

console.group(url.href);
console.group(url.toString());
logStats(outcomes);
console.groupEnd();

const file =
path.join(
__dirname,
"outcomes",
url.host,
url.pathname.replace(/\/$/, "")
url.host.get(),
...url.path.filter((segment) => segment !== "")
) + ".json";

fs.mkdirSync(path.dirname(file), { recursive: true });
Expand Down
3 changes: 2 additions & 1 deletion packages/alfa-cli/bin/alfa/command/scrape/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
Screenshot,
} from "@siteimprove/alfa-scraper";
import { Timeout } from "@siteimprove/alfa-time";
import { URL } from "@siteimprove/alfa-url";

import type { Arguments } from "./arguments";
import type { Flags } from "./flags";
Expand Down Expand Up @@ -139,7 +140,7 @@ export const run: Command.Runner<typeof Flags, typeof Arguments> = async ({
const timeout = Timeout.of(flags.timeout);

const result = await scraper.scrape(
new URL(target, url.pathToFileURL(process.cwd() + path.sep)),
URL.parse(target, url.pathToFileURL(process.cwd() + path.sep).href).get(),
{
timeout,
awaiter,
Expand Down
1 change: 1 addition & 0 deletions packages/alfa-cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"@siteimprove/alfa-rules": "^0.5.0",
"@siteimprove/alfa-scraper": "^0.5.0",
"@siteimprove/alfa-time": "^0.5.0",
"@siteimprove/alfa-url": "^0.5.0",
"@siteimprove/alfa-web": "^0.5.0",
"@siteimprove/alfa-xpath": "^0.5.0",
"@types/node": "^14.0.12",
Expand Down
3 changes: 3 additions & 0 deletions packages/alfa-cli/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@
{
"path": "../alfa-time"
},
{
"path": "../alfa-url"
},
{
"path": "../alfa-web"
},
Expand Down
1 change: 1 addition & 0 deletions packages/alfa-crawler/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"@siteimprove/alfa-mapper": "^0.5.0",
"@siteimprove/alfa-result": "^0.5.0",
"@siteimprove/alfa-scraper": "^0.5.0",
"@siteimprove/alfa-url": "^0.5.0",
"@siteimprove/alfa-web": "^0.5.0"
},
"devDependencies": {
Expand Down
15 changes: 9 additions & 6 deletions packages/alfa-crawler/src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Frontier } from "@siteimprove/alfa-frontier";
import { Mapper } from "@siteimprove/alfa-mapper";
import { Result } from "@siteimprove/alfa-result";
import { Scraper, Screenshot } from "@siteimprove/alfa-scraper";
import { URL } from "@siteimprove/alfa-url";
import { Page } from "@siteimprove/alfa-web";

const { isElement } = Element;
Expand All @@ -19,10 +20,12 @@ export class Crawler {
scraper?: Promise<Scraper>
): Promise<T> {
const crawler = await this.of(scraper);
const result = await mapper(crawler);

await crawler.close();
return result;
try {
return await mapper(crawler);
} finally {
await crawler.close();
}
}

private readonly _scraper: Scraper;
Expand Down Expand Up @@ -53,8 +56,8 @@ export class Crawler {
frontier.complete(url);

for (const page of result) {
if (page.response.url !== url.href) {
frontier.redirect(url.href, page.response.url);
if (!page.response.url.equals(url)) {
frontier.redirect(url, page.response.url);
}

for (const url of urls(page)) {
Expand Down Expand Up @@ -94,7 +97,7 @@ function* urls(page: Page): Iterable<URL> {
if (isElement(node) && node.name === "a") {
yield* node
.attribute("href")
.map((href) => new URL(href.value, page.response.url));
.map((href) => URL.parse(href.value, page.response.url).get());
}
}
}
2 changes: 1 addition & 1 deletion packages/alfa-crawler/test/crawler.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test("#crawl() crawls a frontier", async (t) =>
for await (const result of crawler.crawl(frontier)) {
t.equal(result.isOk(), true);

pages.push(result.get().response.url);
pages.push(result.get().response.url.toString());
}

t.deepEqual(pages, [
Expand Down
3 changes: 3 additions & 0 deletions packages/alfa-crawler/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
{
"path": "../alfa-test"
},
{
"path": "../alfa-url"
},
{
"path": "../alfa-web"
}
Expand Down
3 changes: 2 additions & 1 deletion packages/alfa-frontier/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
"@siteimprove/alfa-equatable": "^0.5.0",
"@siteimprove/alfa-json": "^0.5.0",
"@siteimprove/alfa-option": "^0.5.0",
"@siteimprove/alfa-predicate": "^0.5.0"
"@siteimprove/alfa-predicate": "^0.5.0",
"@siteimprove/alfa-url": "^0.5.0"
},
"devDependencies": {
"@siteimprove/alfa-test": "^0.5.0"
Expand Down
36 changes: 14 additions & 22 deletions packages/alfa-frontier/src/frontier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Equatable } from "@siteimprove/alfa-equatable";
import { Serializable } from "@siteimprove/alfa-json";
import { Option, None } from "@siteimprove/alfa-option";
import { Predicate } from "@siteimprove/alfa-predicate";
import { URL } from "@siteimprove/alfa-url";

import * as json from "@siteimprove/alfa-json";

Expand Down Expand Up @@ -35,7 +36,7 @@ export class Frontier implements Equatable, Serializable {
}

public isInScope(url: string | URL): boolean {
return toURL(url).href.startsWith(this._scope.href);
return toURL(url).toString().startsWith(this._scope.toString());
}

public hasWaiting(): boolean {
Expand Down Expand Up @@ -186,15 +187,15 @@ export class Frontier implements Equatable, Serializable {
public equals(value: unknown): value is this {
return (
value instanceof Frontier &&
value._scope.href === this._scope.href &&
value._scope.equals(this._scope) &&
value._items.length === this._items.length &&
value._items.every((item, i) => item.equals(this._items[i]))
);
}

public toJSON(): Frontier.JSON {
return {
scope: this._scope.href,
scope: this._scope.toString(),
items: this._items.map((item) => item.toJSON()),
};
}
Expand Down Expand Up @@ -269,10 +270,7 @@ class Item implements Equatable, Serializable {
public matches(url: string | URL): boolean {
url = toURL(url);

return (
this._url.href === url.href ||
this._aliases.some(property("href", equals(url.href)))
);
return this._url.equals(url) || this._aliases.some(equals(url));
}

public transition(state: State): boolean {
Expand Down Expand Up @@ -304,10 +302,7 @@ class Item implements Equatable, Serializable {
public alias(url: string | URL): boolean {
url = toURL(url);

if (
this._url.href === url.href ||
this._aliases.some(property("href", equals(url.href)))
) {
if (this._url.equals(url) || this._aliases.some(equals(url))) {
return false;
}

Expand All @@ -319,7 +314,7 @@ class Item implements Equatable, Serializable {
public redirect(target: string | URL): boolean {
target = toURL(target);

if (this._url.href === target.href) {
if (this._url.equals(target)) {
return false;
}

Expand All @@ -332,19 +327,17 @@ class Item implements Equatable, Serializable {
public equals(value: unknown): value is this {
return (
value instanceof Item &&
value._url.href === this._url.href &&
value._url.equals(this._url) &&
value._aliases.length === this._aliases.length &&
value._aliases.every(
(alias, i) => alias.href === this._aliases[i].href
) &&
value._aliases.every((alias, i) => alias.equals(this._aliases[i])) &&
value._state === this._state
);
}

public toJSON(): Item.JSON {
return {
url: this._url.href,
aliases: this._aliases.map((url) => url.href),
url: this._url.toString(),
aliases: this._aliases.map((url) => url.toString()),
state: this._state,
};
}
Expand All @@ -360,11 +353,10 @@ namespace Item {
}

function toURL(url: string | URL): URL {
url = typeof url === "string" ? new URL(url) : url;
url.hash = "";
return url;
url = typeof url === "string" ? URL.parse(url).get() : url;
return url.withoutFragment();
}

function isInScope(scope: string | URL, url: string | URL): boolean {
return toURL(url).href.startsWith(toURL(scope).href);
return toURL(url).toString().startsWith(toURL(scope).toString());
}
2 changes: 1 addition & 1 deletion packages/alfa-frontier/test/frontier.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ test("#enqueue() doesn't change the state of an already seen URL", (t) => {
test("#dequeue() gets the next waiting URL in queue and moves it to in progress", (t) => {
const frontier = Frontier.of("https://example.com/");

t.deepEqual(frontier.dequeue().get(), new URL("https://example.com"));
t.deepEqual(frontier.dequeue().get().toString(), "https://example.com/");

t.deepEqual(frontier.toJSON(), {
scope: "https://example.com/",
Expand Down
3 changes: 3 additions & 0 deletions packages/alfa-frontier/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
},
{
"path": "../alfa-test"
},
{
"path": "../alfa-url"
}
]
}
3 changes: 2 additions & 1 deletion packages/alfa-http/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"@siteimprove/alfa-json": "^0.5.0",
"@siteimprove/alfa-map": "^0.5.0",
"@siteimprove/alfa-option": "^0.5.0",
"@siteimprove/alfa-refinement": "^0.5.0"
"@siteimprove/alfa-refinement": "^0.5.0",
"@siteimprove/alfa-url": "^0.5.0"
},
"devDependencies": {
"@siteimprove/alfa-test": "^0.5.0"
Expand Down
20 changes: 12 additions & 8 deletions packages/alfa-http/src/request.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { Decoder, Encoder } from "@siteimprove/alfa-encoding";
import { URL } from "@siteimprove/alfa-url";

import * as earl from "@siteimprove/alfa-earl";
import * as json from "@siteimprove/alfa-json";

Expand All @@ -11,25 +13,27 @@ import { Headers } from "./headers";
export class Request implements Body, json.Serializable, earl.Serializable {
public static of(
method: string,
url: string,
url: URL,
headers: Headers = Headers.empty(),
body: ArrayBuffer = new ArrayBuffer(0)
): Request {
return new Request(method, url, headers, body);
}

private static _empty = Request.of("GET", URL.parse("about:blank").get());

public static empty(): Request {
return Request.of("GET", "about:blank");
return this._empty;
}

private readonly _method: string;
private readonly _url: string;
private readonly _url: URL;
private readonly _headers: Headers;
private readonly _body: ArrayBuffer;

private constructor(
method: string,
url: string,
url: URL,
headers: Headers,
body: ArrayBuffer
) {
Expand All @@ -49,7 +53,7 @@ export class Request implements Body, json.Serializable, earl.Serializable {
/**
* @see https://fetch.spec.whatwg.org/#dom-request-url
*/
public get url(): string {
public get url(): URL {
return this._url;
}

Expand All @@ -70,7 +74,7 @@ export class Request implements Body, json.Serializable, earl.Serializable {
public toJSON(): Request.JSON {
return {
method: this._method,
url: this._url,
url: this._url.toString(),
headers: this._headers.toJSON(),
body: Decoder.decode(new Uint8Array(this._body)),
};
Expand All @@ -83,7 +87,7 @@ export class Request implements Body, json.Serializable, earl.Serializable {
},
"@type": ["http:Message", "http:Request"],
"http:methodName": this._method,
"http:requestURI": this._url,
"http:requestURI": this._url.toString(),
"http:headers": this._headers.toEARL(),
"http:body": {
"@context": {
Expand Down Expand Up @@ -130,7 +134,7 @@ export namespace Request {
export function from(json: JSON): Request {
return Request.of(
json.method,
json.url,
URL.parse(json.url).get(),
Headers.from(json.headers),
Encoder.encode(json.body)
);
Expand Down
Loading

0 comments on commit c42906f

Please sign in to comment.