Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix and improve processing of IETF specs #1138

Merged
merged 1 commit into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 5 additions & 30 deletions specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -600,36 +600,11 @@
"https://www.rfc-editor.org/rfc/rfc8297",
"https://www.rfc-editor.org/rfc/rfc8470",
"https://www.rfc-editor.org/rfc/rfc8942",
{
"url": "https://www.rfc-editor.org/rfc/rfc9110",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9111",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9112",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9113",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
{
"url": "https://www.rfc-editor.org/rfc/rfc9114",
"nightly": {
"repository": "https://github.com/httpwg/httpwg.github.io"
}
},
"https://www.rfc-editor.org/rfc/rfc9110",
"https://www.rfc-editor.org/rfc/rfc9111",
"https://www.rfc-editor.org/rfc/rfc9112",
"https://www.rfc-editor.org/rfc/rfc9113",
"https://www.rfc-editor.org/rfc/rfc9114",
{
"url": "https://www.rfc-editor.org/rfc/rfc9163",
"nightly": {
Expand Down
163 changes: 131 additions & 32 deletions src/fetch-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const puppeteer = require("puppeteer");
const throttle = require("./throttle");
const throttledFetch = throttle(fetch, 2);
const computeShortname = require("./compute-shortname");
const Octokit = require("./octokit");

// Map spec statuses returned by Specref to those used in specs
// Note we typically won't get /TR statuses from Specref, since all /TR URLs
Expand Down Expand Up @@ -241,9 +242,85 @@ async function fetchInfoFromSpecref(specs, options) {


async function fetchInfoFromIETF(specs, options) {
async function fetchJSONDoc(draftName) {
const url = `https://datatracker.ietf.org/doc/${draftName}/doc.json`;
const res = await throttledFetch(url, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error for ${url}, status code is ${res.status}`);
}
try {
return await res.json();
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
}
}

async function fetchRFCName(docUrl) {
const res = await fetch(docUrl, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error for ${url}, status code is ${res.status}`);
}
try {
const body = await res.json();
if (!body.rfc) {
throw new Error(`Could not find an RFC name in ${docUrl}`);
}
return `rfc${body.rfc}`;
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
}
}

async function fetchObsoletedBy(draftName) {
if (!draftName.startsWith('rfc')) {
return [];
}
const url = `https://datatracker.ietf.org/api/v1/doc/relateddocument/?format=json&relationship__slug__in=obs&target__name__in=${draftName}`;
const res = await throttledFetch(url, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error for ${url}, status code is ${res.status}`);
}
let body;
try {
body = await res.json();
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
}

return Promise.all(body.objects
.map(obj => `https://datatracker.ietf.org${obj.source}`)
.map(fetchRFCName));
}

// Most RFCs published by the HTTP WG have a friendly version under:
// https://httpwg.org/specs
// ... but not all (e.g., not rfc9292) and some related specs from other
// groups are also published under httpwg.org. To get a current list of specs
// published under https://httpwg.org/specs, let's look at the contents of
// the underlying GitHub repository:
// https://github.com/httpwg/httpwg.github.io/
async function getHttpwgRFCs() {
let rfcs;
const octokit = new Octokit({ auth: options.githubToken });
const { data } = await octokit.git.getTree({
owner: 'httpwg',
repo: 'httpwg.github.io',
tree_sha: "HEAD",
recursive: true
});
const paths = data.tree;
return paths.filter(p => p.path.match(/^specs\/rfc\d+\.html$/))
.map(p => p.path.match(/(rfc\d+)\.html$/)[1]);
}
const httpwgRFCs = await getHttpwgRFCs();

const info = await Promise.all(specs.map(async spec => {
// IETF can only provide information about IETF specs
if (!spec.url.match(/\.ietf\.org/)) {
if (!spec.url.match(/\.rfc-editor\.org/) &&
!spec.url.match(/datatracker\.ietf\.org/)) {
return;
}

Expand All @@ -254,44 +331,66 @@ async function fetchInfoFromIETF(specs, options) {
if (!draftName) {
throw new Error(`IETF document follows an unexpected URL pattern: ${spec.url}`);
}
const url = `https://datatracker.ietf.org/doc/${draftName[1]}/doc.json`;
const res = await throttledFetch(url, options);
if (res.status !== 200) {
throw new Error(`IETF datatracker returned an error, status code is ${res.status}`);
const jsonDoc = await fetchJSONDoc(draftName[1]);
const lastRevision = jsonDoc.rev_history.pop();
if (lastRevision.name !== draftName[1]) {
throw new Error(`IETF spec ${spec.url} published under a new name "${lastRevision.name}". Canonical URL must be updated accordingly.`);
}
let body;
try {
body = await res.json();

// Compute the nightly URL from the spec name, publication status, and
// groups that develops it.
// Note we prefer the httpwg.org version for HTTP WG RFCs and drafts.
let nightly;
if (lastRevision.name.startsWith('rfc')) {
if (httpwgRFCs.includes(lastRevision.name)) {
nightly = `https://httpwg.org/specs/${lastRevision.name}.html`
}
else {
nightly = `https://www.rfc-editor.org/rfc/${lastRevision.name}`;
}
}
catch (err) {
throw new Error(`IETF datatracker returned invalid JSON for ${url}`);
else if (jsonDoc.group?.acronym === 'httpbis' || jsonDoc.group?.acronym === 'httpstate') {
nightly = `https://httpwg.org/http-extensions/${lastRevision.name}.html`
}
else {
nightly = `https://www.ietf.org/archive/id/${lastRevision.name}-${lastRevision.rev}.html`;
}

const lastRevision = body.rev_history.pop();
if (lastRevision.name !== body.name) {
throw new Error(`IETF spec ${spec.url} published under a new name "${lastRevision.name}". Canonical URL must be updated accordingly.`);
// For the status, use the std_level property, which contains one of the
// statuses in https://datatracker.ietf.org/api/v1/name/stdlevelname/
// The property is null for an unpublished Editor's Draft.
const status = jsonDoc.std_level ?? "Editor's Draft";

const specInfo = { title: jsonDoc.title, nightly, status };

// RFCs may have been obsoleted by another IETF spec. When that happens, we
// should flag the spec as discontinued and obsoleted by the other spec(s).
const obsoletedBy = await fetchObsoletedBy(draftName[1]);
const missingRFC = obsoletedBy.find(shortname => !specs.find(spec => spec.shortname === shortname));
if (missingRFC) {
throw new Error(`IETF spec at ${spec.url} is obsoleted by ${missingRFC} which is not in the list.`);
}

// Prefer the httpwg.org version for HTTP WG drafts
const nightly = (body.group?.acronym === 'httpbis') ?
`https://httpwg.org/http-extensions/${lastRevision.name}.html` :
`https://www.ietf.org/archive/id/${lastRevision.name}-${lastRevision.rev}.html`;
if (obsoletedBy.length > 0) {
specInfo.standing = "discontinued";
specInfo.obsoletedBy = obsoletedBy;
}

return {
title: body.title,
nightly: nightly,
state: body.state
};
return specInfo;
}));

// TODO: use "state" to return a better status than "Editor's Draft".
const results = {};
specs.forEach((spec, idx) => {
if (info[idx]) {
const specInfo = info[idx];
if (specInfo) {
results[spec.shortname] = {
nightly: { url: info[idx].nightly, status: "Editor's Draft" },
title: info[idx].title
nightly: { url: specInfo.nightly, status: specInfo.status },
title: specInfo.title
};
if (specInfo.standing === "discontinued") {
results[spec.shortname].standing = specInfo.standing;
results[spec.shortname].obsoletedBy = specInfo.obsoletedBy;
}
}
});
return results;
Expand Down Expand Up @@ -489,14 +588,14 @@ async function fetchInfo(specs, options) {
let remainingSpecs = specs;
const w3cInfo = await fetchInfoFromW3CApi(remainingSpecs, options);

// Compute information from Specref for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !w3cInfo[spec.shortname]);
const specrefInfo = await fetchInfoFromSpecref(remainingSpecs, options);

// Extract information from IETF datatracker for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !specrefInfo[spec.shortname]);
remainingSpecs = remainingSpecs.filter(spec => !w3cInfo[spec.shortname]);
const ietfInfo = await fetchInfoFromIETF(remainingSpecs, options);

// Compute information from Specref for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !ietfInfo[spec.shortname]);
const specrefInfo = await fetchInfoFromSpecref(remainingSpecs, options);

// Extract information directly from the spec for remaining specs
remainingSpecs = remainingSpecs.filter(spec => !ietfInfo[spec.shortname]);
const specInfo = await fetchInfoFromSpecs(remainingSpecs, options);
Expand All @@ -505,8 +604,8 @@ async function fetchInfo(specs, options) {
const results = {};
specs.map(spec => spec.shortname).forEach(name => results[name] =
(w3cInfo[name] ? Object.assign(w3cInfo[name], { source: "w3c" }) : null) ||
(specrefInfo[name] ? Object.assign(specrefInfo[name], { source: "specref" }) : null) ||
(ietfInfo[name] ? Object.assign(ietfInfo[name], { source: "ietf" }) : null) ||
(specrefInfo[name] ? Object.assign(specrefInfo[name], { source: "specref" }) : null) ||
(specInfo[name] ? Object.assign(specInfo[name], { source: "spec" }) : null));

// Add series info from W3C API
Expand Down
69 changes: 67 additions & 2 deletions test/fetch-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,37 @@ describe("fetch-info module", function () {
});

describe("fetch from IETF datatracker", () => {
it("fetches info about RFCs from datatracker", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc7578",
shortname: "rfc7578"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "Returning Values from Forms: multipart/form-data");
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://www.rfc-editor.org/rfc/rfc7578");
});

it("fetches info about HTTP WG RFCs from datatracker", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc9110",
shortname: "rfc9110"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "HTTP Semantics");
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://httpwg.org/specs/rfc9110.html");
});

it("extracts a suitable nightly URL from an IETF draft", async () => {
const spec = {
url: "https://datatracker.ietf.org/doc/html/draft-davidben-http-client-hint-reliability",
shortname: "client-hint-reliability"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "Client Hint Reliability");
assert.equal(info[spec.shortname].source, "ietf");
assert.match(info[spec.shortname].nightly.url, /^https:\/\/www\.ietf\.org\/archive\/id\/draft-davidben-http-client-hint-reliability-\d+\.html/);
});
Expand All @@ -79,11 +102,53 @@ describe("fetch-info module", function () {
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].title, "Digest Fields");
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://httpwg.org/http-extensions/draft-ietf-httpbis-digest-headers.html");
});

it("extracts a suitable nightly URL from an IETF HTTP State Management Mechanism WG RFC", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc6265",
shortname: "rfc6265"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, "https://httpwg.org/specs/rfc6265.html");
});

it("uses the rfc-editor URL as nightly for an IETF HTTP WG RFC not published under httpwg.org", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc9163",
shortname: "rfc9163"
};
const info = await fetchInfo([spec]);
assert.ok(info[spec.shortname]);
assert.equal(info[spec.shortname].source, "ietf");
assert.equal(info[spec.shortname].nightly.url, spec.url);
});

it("identifies discontinued IETF specs", async () => {
const info = await fetchInfo([
{ url: "https://www.rfc-editor.org/rfc/rfc7230", shortname: "rfc7230" },
{ url: "https://www.rfc-editor.org/rfc/rfc9110", shortname: "rfc9110" },
{ url: "https://www.rfc-editor.org/rfc/rfc9112", shortname: "rfc9112" }
]);
assert.ok(info["rfc7230"]);
assert.equal(info["rfc7230"].standing, "discontinued");
assert.deepStrictEqual(info["rfc7230"].obsoletedBy, ["rfc9110", "rfc9112"]);
});

it("throws when a discontinued IETF spec is obsoleted by an unknown spec", async () => {
const spec = {
url: "https://www.rfc-editor.org/rfc/rfc7230",
shortname: "rfc7230"
};
await assert.rejects(
fetchInfo([spec]),
/^Error: IETF spec at (.*)rfc7230 is obsoleted by rfc9110 which is not in the list.$/);
});

it("throws when an IETF URL needs to be updated", async () => {
const spec = {
url: "https://datatracker.ietf.org/doc/html/draft-ietf-websec-strict-transport-sec",
Expand Down