From fae7857f81b94d1febffeedf5a6a210ddf8ce8f6 Mon Sep 17 00:00:00 2001 From: Florian Scholz Date: Mon, 3 May 2021 18:02:40 +0200 Subject: [PATCH] Add a Specifications macro and extract a special specification section to the Document (#3518) * Prototype spec section from BCD spec_urls * Address some feedback, mostly rename things * Branch out building special sections in their own functions per Ryuno-Ki * Add a KumaScript test * Add extraction test * Add browser-specs data; update tests; update renderer * feedback on pr 3518 * Remove log calls, fix tests * Rename SpecificationTable to SpecificationSection/Specifications * Update browser-specs to latest * Add specfications to makeTOC * Move spec-section to ingredients * Remove comment Co-authored-by: Peter Bengtsson --- build/document-extractor.js | 285 ++++++++++++------ build/index.js | 3 +- client/src/document/index.tsx | 5 + .../src/document/ingredients/spec-section.tsx | 64 ++++ kumascript/macros/Specifications.ejs | 25 ++ .../tests/macros/Specifications.test.js | 24 ++ package.json | 1 + .../web/spec_section_extraction/index.html | 20 ++ testing/tests/index.test.js | 28 ++ yarn.lock | 5 + 10 files changed, 365 insertions(+), 95 deletions(-) create mode 100644 client/src/document/ingredients/spec-section.tsx create mode 100644 kumascript/macros/Specifications.ejs create mode 100644 kumascript/tests/macros/Specifications.test.js create mode 100644 testing/content/files/en-us/web/spec_section_extraction/index.html diff --git a/build/document-extractor.js b/build/document-extractor.js index 91f8a1e477ca..06084aa704b6 100644 --- a/build/document-extractor.js +++ b/build/document-extractor.js @@ -1,5 +1,6 @@ const cheerio = require("cheerio"); const { packageBCD } = require("./resolve-bcd"); +const specs = require("browser-specs"); /** Extract and mutate the $ if it as a "Quick_Links" section. * But only if it exists. @@ -110,16 +111,28 @@ function extractSections($) { * data: {....} * }] * - * At the time of writing (Jan 2020), there is only one single special type of - * section and that's BCD. The idea is we look for a bunch of special sections - * and if all else fails, just leave it as HTML as is. + * Another example is for the specification section. If the input is this: + * + *

Specifications

+ *
...
+ * + * Then, extract the data-bcd-query and return this: + * + * [{ + * type: "specifications", + * value: { + * query: "foo.bar.thing", + * id: "specifications", + * title: "Specifications", + * specifications: {....} + * }] */ function addSections($) { const flaws = []; - const countPotentialBCDDataDivs = $.find("div.bc-data").length; - if (countPotentialBCDDataDivs) { - /** If there's exactly 1 BCD table the only section to add is something + const countPotentialSpecialDivs = $.find("div.bc-data, div.bc-specs").length; + if (countPotentialSpecialDivs) { + /** If there's exactly 1 special table the only section to add is something * like this: * { * "type": "browser_compatibility", @@ -132,8 +145,8 @@ function addSections($) { * * Where the 'title' and 'id' values comes from the

tag (if available). * - * However, if there are **multiple BCD tables**, which is rare, the it - * needs to return something like this: + * However, if there are **multiple special tables**, + * it needs to return something like this: * * [{ * "type": "prose", @@ -154,7 +167,7 @@ function addSections($) { * "content": "Any other stuff before table maybe" * }, */ - if (countPotentialBCDDataDivs > 1) { + if (countPotentialSpecialDivs > 1) { const subSections = []; const section = cheerio .load("
", { @@ -163,19 +176,20 @@ function addSections($) { .eq(0); // Loop over each and every "root element" in the node and keep piling - // them up in a buffer, until you encounter a `div.bc-table` then + // them up in a buffer, until you encounter a `div.bc-data` or `div.bc-specs` then // add that to the stack, clear and repeat. const iterable = [...$[0].childNodes]; let c = 0; - let countBCDDataDivsFound = 0; + let countSpecialDivsFound = 0; iterable.forEach((child) => { if ( child.tagName === "div" && child.attribs && child.attribs.class && - /bc-data/.test(child.attribs.class) + (child.attribs.class.includes("bc-data") || + child.attribs.class.includes("bc-specs")) ) { - countBCDDataDivsFound++; + countSpecialDivsFound++; if (c) { const [proseSections, proseFlaws] = _addSectionProse( section.clone() @@ -186,10 +200,10 @@ function addSections($) { c = 0; // reset the counter } section.append(child); - // XXX That `_addSingleSectionBCD(section.clone())` might return a + // XXX That `_addSingleSpecialSection(section.clone())` might return a // and empty array and that means it failed and we should // bail. - subSections.push(..._addSingleSectionBCD(section.clone())); + subSections.push(..._addSingleSpecialSection(section.clone())); section.empty(); } else { section.append(child); @@ -201,28 +215,29 @@ function addSections($) { subSections.push(...proseSections); flaws.push(...proseFlaws); } - if (countBCDDataDivsFound !== countPotentialBCDDataDivs) { - const leftoverCount = countPotentialBCDDataDivs - countBCDDataDivsFound; - const explanation = `${leftoverCount} 'div.bc-data' element${ + if (countSpecialDivsFound !== countPotentialSpecialDivs) { + const leftoverCount = countPotentialSpecialDivs - countSpecialDivsFound; + const explanation = `${leftoverCount} 'div.bc-data' or 'div.bc-specs' element${ leftoverCount > 1 ? "s" : "" } found but deeply nested.`; flaws.push(explanation); } return [subSections, flaws]; } - const bcdSections = _addSingleSectionBCD($); + const specialSections = _addSingleSpecialSection($); - // The _addSingleSectionBCD() function will have sucked up the

or

- // and the `div.bc-data` to turn it into a BCD section. + // The _addSingleSpecialSection() function will have sucked up the

or

+ // and the `div.bc-data` or `div.bc-specs` to turn it into a special section. // First remove that, then put whatever HTML is left as a prose // section underneath. $.find("div.bc-data, h2, h3").remove(); + $.find("div.bc-specs, h2, h3").remove(); const [proseSections, proseFlaws] = _addSectionProse($); - bcdSections.push(...proseSections); + specialSections.push(...proseSections); flaws.push(...proseFlaws); - if (bcdSections.length) { - return [bcdSections, flaws]; + if (specialSections.length) { + return [specialSections, flaws]; } } @@ -233,7 +248,7 @@ function addSections($) { return [proseSections, flaws]; } -function _addSingleSectionBCD($) { +function _addSingleSpecialSection($) { let id = null; let title = null; let isH3 = false; @@ -251,7 +266,16 @@ function _addSingleSectionBCD($) { } } - const dataQuery = $.find("div.bc-data").attr("id"); + let dataQuery = null; + let specialSectionType = null; + if ($.find("div.bc-data").length) { + specialSectionType = "browser_compatibility"; + dataQuery = $.find("div.bc-data").attr("id"); + } else if ($.find("div.bc-specs").length) { + specialSectionType = "specifications"; + dataQuery = $.find("div.bc-specs").attr("data-bcd-query"); + } + // Some old legacy documents haven't been re-rendered yet, since it // was added, so the `div.bc-data` tag doesn't have a `id="bcd:..."` // attribute. If that's the case, bail and fail back on a regular @@ -263,7 +287,103 @@ function _addSingleSectionBCD($) { } const query = dataQuery.replace(/^bcd:/, ""); const { browsers, data } = packageBCD(query); - if (data === undefined) { + + if (specialSectionType === "browser_compatibility") { + if (data === undefined) { + return [ + { + type: specialSectionType, + value: { + title, + id, + isH3, + data: null, + query, + browsers: null, + }, + }, + ]; + } + return _buildSpecialBCDSection(); + } else if (specialSectionType === "specifications") { + if (data === undefined) { + return [ + { + type: specialSectionType, + value: { + title, + id, + isH3, + query, + specifications: [], + }, + }, + ]; + } + return _buildSpecialSpecSection(); + } + + throw new Error(`Unrecognized special section type '${specialSectionType}'`); + + function _buildSpecialBCDSection() { + // First extract a map of all release data, keyed by (normalized) browser + // name and the versions. + // You'll have a map that looks like this: + // + // 'chrome_android': { + // '28': { + // release_data: '2012-06-01', + // release_notes: '...', + // ... + // + // The reason we extract this to a locally scoped map, is so we can + // use it to augment the `__compat` blocks for the latest version + // when (if known) it was added. + const browserReleaseData = new Map(); + for (const [name, browser] of Object.entries(browsers)) { + const releaseData = new Map(); + for (const [version, data] of Object.entries(browser.releases || [])) { + if (data) { + releaseData.set(version, data); + } + } + browserReleaseData.set(name, releaseData); + } + + for (const [key, compat] of Object.entries(data)) { + let block; + if (key === "__compat") { + block = compat; + } else if (compat.__compat) { + block = compat.__compat; + } + if (block) { + for (let [browser, info] of Object.entries(block.support)) { + // `info` here will be one of the following: + // - a single simple_support_statement: + // { version_added: 42 } + // - an array of simple_support_statements: + // [ { version_added: 42 }, { prefix: '-moz', version_added: 35 } ] + // + // Standardize the first version to an array of one, so we don't have + // to deal with two different forms below + if (!Array.isArray(info)) { + info = [info]; + } + for (const infoEntry of info) { + const added = infoEntry.version_added; + if (browserReleaseData.has(browser)) { + if (browserReleaseData.get(browser).has(added)) { + infoEntry.release_date = browserReleaseData + .get(browser) + .get(added).release_date; + } + } + } + } + } + } + return [ { type: "browser_compatibility", @@ -271,85 +391,62 @@ function _addSingleSectionBCD($) { title, id, isH3, - data: null, + data, query, - browsers: null, + browsers, }, }, ]; } - // First extract a map of all release data, keyed by (normalized) browser - // name and the versions. - // You'll have a map that looks like this: - // - // 'chrome_android': { - // '28': { - // release_data: '2012-06-01', - // release_notes: '...', - // ... - // - // The reason we extract this to a locally scoped map, is so we can - // use it to augment the `__compat` blocks for the latest version - // when (if known) it was added. - const browserReleaseData = new Map(); - for (const [name, browser] of Object.entries(browsers)) { - const releaseData = new Map(); - for (const [version, data] of Object.entries(browser.releases || [])) { - if (data) { - releaseData.set(version, data); - } - } - browserReleaseData.set(name, releaseData); - } + function _buildSpecialSpecSection() { + // Collect spec_urls from a BCD feature. + // Can either be a string or an array of strings. + let specURLs = []; - for (const [key, compat] of Object.entries(data)) { - let block; - if (key === "__compat") { - block = compat; - } else if (compat.__compat) { - block = compat.__compat; - } - if (block) { - for (let [browser, info] of Object.entries(block.support)) { - // `info` here will be one of the following: - // - a single simple_support_statement: - // { version_added: 42 } - // - an array of simple_support_statements: - // [ { version_added: 42 }, { prefix: '-moz', version_added: 35 } ] - // - // Standardize the first version to an array of one, so we don't have - // to deal with two different forms below - if (!Array.isArray(info)) { - info = [info]; - } - for (const infoEntry of info) { - const added = infoEntry.version_added; - if (browserReleaseData.has(browser)) { - if (browserReleaseData.get(browser).has(added)) { - infoEntry.release_date = browserReleaseData - .get(browser) - .get(added).release_date; - } - } + for (const [key, compat] of Object.entries(data)) { + if (key === "__compat" && compat.spec_url) { + if (Array.isArray(compat.spec_url)) { + specURLs = compat.spec_url; + } else { + specURLs.push(compat.spec_url); } } } - } - return [ - { - type: "browser_compatibility", - value: { - title, - id, - isH3, - data, - query, - browsers, + // Use BCD specURLs to look up more specification data + // from the browser-specs package + const specifications = specURLs + .map((specURL) => { + const spec = specs.find( + (spec) => + specURL.startsWith(spec.url) || specURL.startsWith(spec.nightly.url) + ); + if (spec) { + // We only want to return exactly the keys that we will use in the + // client code that renders this in React. + return { + bcdSpecificationURL: specURL, + title: spec.title, + shortTitle: spec.shortTitle, + }; + } + }) + .filter(Boolean); + + return [ + { + type: "specifications", + value: { + title, + id, + isH3, + specifications, + query, + }, }, - }, - ]; + ]; + } } function _addSectionProse($) { diff --git a/build/index.js b/build/index.js index 41e328c6e375..fe6a2e6d36ac 100644 --- a/build/index.js +++ b/build/index.js @@ -212,7 +212,8 @@ function makeTOC(doc) { .map((section) => { if ( (section.type === "prose" || - section.type === "browser_compatibility") && + section.type === "browser_compatibility" || + section.type === "specifications") && section.value.id && section.value.title && !section.value.isH3 diff --git a/client/src/document/index.tsx b/client/src/document/index.tsx index cc955807cc7c..27ebb59a79a6 100644 --- a/client/src/document/index.tsx +++ b/client/src/document/index.tsx @@ -9,6 +9,7 @@ import { Doc } from "./types"; // Ingredients import { Prose, ProseWithHeading } from "./ingredients/prose"; import { LazyBrowserCompatibilityTable } from "./lazy-bcd-table"; +import { SpecificationSection } from "./ingredients/spec-section"; // Misc // Sub-components @@ -232,6 +233,10 @@ function RenderDocumentBody({ doc }) { {...section.value} /> ); + } else if (section.type === "specifications") { + return ( + + ); } else { console.warn(section); throw new Error(`No idea how to handle a '${section.type}' section`); diff --git a/client/src/document/ingredients/spec-section.tsx b/client/src/document/ingredients/spec-section.tsx new file mode 100644 index 000000000000..c1352568281d --- /dev/null +++ b/client/src/document/ingredients/spec-section.tsx @@ -0,0 +1,64 @@ +import { DisplayH2, DisplayH3 } from "./utils"; + +export function SpecificationSection({ + id, + title, + isH3, + specifications, + query, +}: { + id: string; + title: string; + isH3: boolean; + specifications: Array<{ + title: string; + bcdSpecificationURL: string; + shortTitle: string; + }>; + query: string; +}) { + return ( + <> + {title && !isH3 && } + {title && isH3 && } + + {specifications.length > 0 ? ( + + + + + + + + {specifications.map((spec) => ( + + + + ))} + +
Specification
+ + {spec.title} ({spec.shortTitle}) +
{" "} + #{spec.bcdSpecificationURL.split("#")[1]} +
+
+ ) : ( +
+

No specification found

+

+ No specification data found for {query}.
+ Check for problems with this page or + contribute a missing spec_url to{" "} + + mdn/browser-compat-data + + . Also make sure the specification is included in{" "} + w3c/browser-specs + . +

+
+ )} + + ); +} diff --git a/kumascript/macros/Specifications.ejs b/kumascript/macros/Specifications.ejs new file mode 100644 index 000000000000..149ae710502f --- /dev/null +++ b/kumascript/macros/Specifications.ejs @@ -0,0 +1,25 @@ +<% +/* +Placeholder to render a specification section with spec_urls from BCD + +Parameters + +$0 – A query string indicating for which feature to retrieve specification URLs for. + +Example calls + +{{Specifications}} +{{Specifications("html.element.abbr")}} + +*/ + +var query = $0 || env['browser-compat']; +if (!query) { + throw new Error("No first query argument or 'browser-compat' front-matter value passed"); +} +var output = `
+ If you're able to see this, something went wrong on this page. +
`; +%> + +<%-output%> diff --git a/kumascript/tests/macros/Specifications.test.js b/kumascript/tests/macros/Specifications.test.js new file mode 100644 index 000000000000..ffff3ed11f6b --- /dev/null +++ b/kumascript/tests/macros/Specifications.test.js @@ -0,0 +1,24 @@ +const { assert, itMacro, describeMacro, lintHTML } = require("./utils"); + +const jsdom = require("jsdom"); +const { JSDOM } = jsdom; + +describeMacro("Specifications", function () { + itMacro("Outputs a simple div tag", async (macro) => { + const result = await macro.call("api.feature"); + const dom = JSDOM.fragment(result); + assert.equal( + dom.querySelector("div.bc-specs").dataset.bcdQuery, + "api.feature" + ); + assert.equal( + dom.querySelector("div.bc-specs").textContent.trim(), + "If you're able to see this, something went wrong on this page." + ); + }); + + itMacro("Outputs valid HTML", async (macro) => { + const result = await macro.call("api.feature"); + expect(lintHTML(result)).toBeFalsy(); + }); +}); diff --git a/package.json b/package.json index 9b34cd50b398..9db9fe0155c3 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "@fast-csv/parse": "4.3.6", "@mdn/browser-compat-data": "3.3.2", "accept-language-parser": "1.5.0", + "browser-specs": "^1.34.2", "chalk": "4.1.1", "cheerio": "1.0.0-rc.6", "cli-progress": "^3.9.0", diff --git a/testing/content/files/en-us/web/spec_section_extraction/index.html b/testing/content/files/en-us/web/spec_section_extraction/index.html new file mode 100644 index 000000000000..927474afb5ad --- /dev/null +++ b/testing/content/files/en-us/web/spec_section_extraction/index.html @@ -0,0 +1,20 @@ +--- +title: Spec section extraction +browser-compat: javascript.builtins.Array.toLocaleString +slug: Web/Spec_Section_Extraction +--- + +

Intro

+

Text in Intro

+ +

Specifications

+ +

{{Specifications}}

+ +

Browser compatibility

+ +

{{Compat}}

+ +

See also

+ +

More stuff

diff --git a/testing/tests/index.test.js b/testing/tests/index.test.js index 15e653aa4576..3aac8309cb93 100644 --- a/testing/tests/index.test.js +++ b/testing/tests/index.test.js @@ -1056,6 +1056,34 @@ test("bcd table extraction followed by h3", () => { expect(doc.body[4].value.isH3).toBeTruthy(); }); +test("specifications and bcd extraction", () => { + const builtFolder = path.join( + buildRoot, + "en-us", + "docs", + "web", + "spec_section_extraction" + ); + expect(fs.existsSync(builtFolder)).toBeTruthy(); + const jsonFile = path.join(builtFolder, "index.json"); + const { doc } = JSON.parse(fs.readFileSync(jsonFile)); + expect(doc.body[0].type).toBe("prose"); + expect(doc.body[1].type).toBe("specifications"); + expect(doc.body[1].value.specifications[0].shortTitle).toBe("ECMAScript"); + expect(doc.body[1].value.specifications[0].bcdSpecificationURL).toBe( + "https://tc39.es/ecma262/#sec-array.prototype.tolocalestring" + ); + expect(doc.body[1].value.specifications[1].shortTitle).toBe( + "ECMAScript Internationalization API" + ); + expect(doc.body[1].value.specifications[1].bcdSpecificationURL).toBe( + "https://tc39.es/ecma402/#sup-array.prototype.tolocalestring" + ); + expect(doc.body[2].type).toBe("prose"); + expect(doc.body[3].type).toBe("browser_compatibility"); + expect(doc.body[4].type).toBe("prose"); +}); + test("headers within non-root elements is a 'sectioning' flaw", () => { const builtFolder = path.join( buildRoot, diff --git a/yarn.lock b/yarn.lock index 7936ed684b6f..e8eb257a2564 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5864,6 +5864,11 @@ browser-process-hrtime@^1.0.0: resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626" integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow== +browser-specs@^1.34.2: + version "1.35.1" + resolved "https://registry.yarnpkg.com/browser-specs/-/browser-specs-1.35.1.tgz#01c77221940b5d733995248438e869ca5342cc9c" + integrity sha512-y9rMHjHa2kXUOBqovbRHCQAQhCJARiPQYluiO3PBoBl4Wa7f0ukE72+zDBN7+0oYzdRyWngZaUtl2rqCVdZ1Aw== + browserify-aes@^1.0.0, browserify-aes@^1.0.4: version "1.2.0" resolved "https://registry.yarnpkg.com/browserify-aes/-/browserify-aes-1.2.0.tgz#326734642f403dabc3003209853bb70ad428ef48"