From 812570799d3db3c94bb0484ebac349d83baab2d0 Mon Sep 17 00:00:00 2001 From: Howard Edwards Date: Mon, 25 Sep 2023 09:28:23 -0500 Subject: [PATCH 1/3] Check if any of the known page ids include the hash being checked for --- scripts/link-checker.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/link-checker.js b/scripts/link-checker.js index 2cf249d217..001149f9ad 100644 --- a/scripts/link-checker.js +++ b/scripts/link-checker.js @@ -227,7 +227,7 @@ async function checkLinks() { let matchesHash = true; if (hash) { - matchesHash = !!matchingPage?.ids.includes(hash); + matchesHash = !!matchingPage?.ids.some((id) => id.includes(hash)); } const isLinkBroken = !( @@ -274,7 +274,11 @@ async function checkLinks() { hrefOrSrc.match(pattern) ); - if (!isHashCheckingDisabled && hash && !pageData.ids.includes(hash)) { + if ( + !isHashCheckingDisabled && + hash && + !pageData.ids.some((id) => id.includes(hash)) + ) { consoleError( `Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}, ` + 'hash not found on page' From 56d93dbc56487d3dfbe11c948a5b63f993dda70a Mon Sep 17 00:00:00 2001 From: Howard Edwards Date: Mon, 25 Sep 2023 13:10:52 -0500 Subject: [PATCH 2/3] Include clarifying comment --- scripts/link-checker.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/link-checker.js b/scripts/link-checker.js index 001149f9ad..aeab461e74 100644 --- a/scripts/link-checker.js +++ b/scripts/link-checker.js @@ -227,6 +227,15 @@ async function checkLinks() { let matchesHash = true; if (hash) { + // On some websites, the ids may not exactly match the hash included + // in the link. + // For e.g. GitHub will prepend client facing ids with their own + // calculated value. A heading in a README for example could be + // Foo bar, navigated to with https://github.com/foo/bar#foo-bar, + // but GitHub calculates the actual markup id included in the document + // as being user-content-foo-bar for its own page processing purposes. + // + // See https://github.com/w3c/aria-practices/issues/2809 matchesHash = !!matchingPage?.ids.some((id) => id.includes(hash)); } From be464a0c140ba5dd1bfae80462109c8640f65031 Mon Sep 17 00:00:00 2001 From: Howard Edwards Date: Mon, 2 Oct 2023 09:38:29 -0400 Subject: [PATCH 3/3] Set up cases where unique hashes need to be handled --- .link-checker.js | 9 ++++++++- scripts/link-checker.js | 34 +++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/.link-checker.js b/.link-checker.js index d3a1ff3fda..2377e0ef6a 100644 --- a/.link-checker.js +++ b/.link-checker.js @@ -11,9 +11,16 @@ module.exports = { '.carousel-image a', ], }, + hashCheckHandlers: [ + { + name: 'github', + pattern: /^https:\/\/github\.com\/.*/, + matchHash: (ids, hash) => + ids.includes(hash) || ids.includes(`user-content-${hash}`), + }, + ], ignoreHashesOnExternalPagesMatchingRegex: [ // Some hash links are resolved with JS and are therefore difficult to check algorithmically - /^https:\/\/github\.com\/.*\/wiki\//, /^https:\/\/html\.spec\.whatwg\.org\/multipage\//, ], }; diff --git a/scripts/link-checker.js b/scripts/link-checker.js index aeab461e74..f8c8649cc0 100644 --- a/scripts/link-checker.js +++ b/scripts/link-checker.js @@ -33,6 +33,23 @@ async function checkLinks() { return getLineNumber; }; + const checkPathForHash = (hrefOrSrc, ids = [], hash) => { + // On some websites, the ids may not exactly match the hash included + // in the link. + // For e.g. GitHub will prepend client facing ids with their own + // calculated value. A heading in a README for example could be + // 'Foo bar', navigated to with https://github.com/foo/bar#foo-bar, + // but GitHub calculates the actual markup id included in the document + // as being 'user-content-foo-bar' for its own page processing purposes. + // + // See https://github.com/w3c/aria-practices/issues/2809 + const handler = options.hashCheckHandlers.find(({ pattern }) => + pattern.test(hrefOrSrc) + ); + if (handler) return handler.matchHash(ids, hash); + else return ids.includes(hash); + }; + const countConsoleErrors = () => { let errorCount = 0; @@ -227,16 +244,11 @@ async function checkLinks() { let matchesHash = true; if (hash) { - // On some websites, the ids may not exactly match the hash included - // in the link. - // For e.g. GitHub will prepend client facing ids with their own - // calculated value. A heading in a README for example could be - // Foo bar, navigated to with https://github.com/foo/bar#foo-bar, - // but GitHub calculates the actual markup id included in the document - // as being user-content-foo-bar for its own page processing purposes. - // - // See https://github.com/w3c/aria-practices/issues/2809 - matchesHash = !!matchingPage?.ids.some((id) => id.includes(hash)); + matchesHash = !!checkPathForHash( + pathMinusHash, + matchingPage?.ids, + hash + ); } const isLinkBroken = !( @@ -286,7 +298,7 @@ async function checkLinks() { if ( !isHashCheckingDisabled && hash && - !pageData.ids.some((id) => id.includes(hash)) + !checkPathForHash(hrefOrSrc, pageData.ids, hash) ) { consoleError( `Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}, ` +