Skip to content

Commit

Permalink
feat(husky): check deletions and broken fragments in URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
OnkarRuikar committed Dec 24, 2023
1 parent 4e0e080 commit cdd4bf8
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 4 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/pr-check_url-issues.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Check URL issues

on:
pull_request:
branches:
- main
paths:
- "files/**/*.md"

jobs:
check_url_issues:
#if: github.repository == 'mdn/content'
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Node.js environment
uses: actions/setup-node@v4
with:
node-version-file: ".nvmrc"
cache: yarn

- name: Check URL deletions and broken fragments
run: |
echo "::add-matcher::.github/workflows/url-issues-problem-matcher.json"
git fetch origin main
node scripts/log-url-issues.js --workflow
18 changes: 18 additions & 0 deletions .github/workflows/url-issues-problem-matcher.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"problemMatcher": [
{
"owner": "log-url-issues",
"severity": "error",
"pattern": [
{
"regexp": "^(ERROR|WARN|INFO):(.+):(\\d+):(\\d+):(.+)$",
"severity": 1,
"file": 2,
"line": 3,
"column": 4,
"message": 5
}
]
}
]
}
3 changes: 2 additions & 1 deletion .lintstagedrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"prettier --write"
],
"tests/**/*.*": "yarn test:front-matter-linter",
"*.{svg,png,jpeg,jpg,gif}": "yarn filecheck"
"*.{svg,png,jpeg,jpg,gif}": "yarn filecheck",
"*": "node scripts/log-url-issues.js"
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ new Date(year, monthIndex, day, hours, minutes, seconds, milliseconds)

##### Formal syntax

Formal syntax notation (using [BNF](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form)) should not be used in the Syntax section — instead use the expanded multiple-line format [described above](multiple_linesoptional_parameters).
Formal syntax notation (using [BNF](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form)) should not be used in the Syntax section — instead use the expanded multiple-line format [described above](#multiple_linesoptional_parameters).

While the formal notation provides a concise mechanism for describing complex syntax, it is not familiar to many developers, and can _conflict_ with valid syntax for particular programming languages. For example, "`[ ]`" indicates both an "optional parameter" and a JavaScript {{jsxref("Array")}}. You can see this in the formal syntax for {{jsxref("Array.prototype.slice()")}} below:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ browser-compat: webextensions.api.runtime.setUninstallURL

{{AddonSidebar()}}

Sets the URL to be visited when the extension is uninstalled. This can be used to clean up server-side data, do analytics, or implement surveys. The URL can be up to 1023 characters. This limit used to be 255, see [Browser compatibility](browser_compatibility) for more details.
Sets the URL to be visited when the extension is uninstalled. This can be used to clean up server-side data, do analytics, or implement surveys. The URL can be up to 1023 characters. This limit used to be 255, see [Browser compatibility](#browser_compatibility) for more details.

This is an asynchronous function that returns a [`Promise`](/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise).

Expand Down
133 changes: 133 additions & 0 deletions scripts/log-url-issues.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* The script logs locations of affected URLs due to following reasons:
* - file deletion
* - Markdown header updates
*/

import fs from "node:fs/promises";
import path from "node:path";
import {
execGit,
getRootDir,
walkSync,
isImagePath,
getLocations,
IMG_RX,
stringToFragment,
} from "./utils.js";

const rootDir = getRootDir();
const argLength = process.argv.length;
const deletedSlugs = [];
const fragmentDetails = [];

function getDeletedSlugs() {
// git status --short --porcelain
let result = execGit(["status", "--short", "--porcelain"], { cwd: "." });

if (result.trim()) {
deletedSlugs.push(
...result
.split("\n")
.filter(
(line) =>
/^\s*D\s+/gi.test(line) &&
line.includes("files/en-us") &&
(IMG_RX.test(line) || line.includes("index.md")),
)
.map((line) => line.replaceAll(/^\s*|files\/en-us\/|\/index.md/gm, ""))
.map((line) => line.split(/\s+/)[1]),
);
}
console.log("deletedSlugs", deletedSlugs);
}

function getFragmentDetails(fromStaging = true) {
let result = "";

if (fromStaging) {
// get staged and unstaged changes
result = execGit(["diff", "HEAD"], { cwd: "." });
} else {
// get diff between branch base and HEAD
result = execGit(["diff", "origin/main...HEAD"], { cwd: "." });
}

if (result.trim()) {
const segments = [
...result.split("diff --git a/").filter((segment) => segment !== ""),
];
for (const segment of segments) {
const path = segment
.substring(0, segment.indexOf(" "))
.replaceAll(/files\/en-us\/|\/index.md/gm, "");

const headerRx = /^-#+ .*$/gm;
const fragments = [...segment.matchAll(headerRx)]
.map((match) => match[0].toLowerCase())
.map((header) => header.replace(/-#+ /g, ""))
.map((header) => stringToFragment(header));

for (const fragment of fragments) {
fragmentDetails.push(`${path}#${fragment}`);
}
}
}
console.log("fragmentDetails", fragmentDetails);
}

if (process.argv[2] !== "--workflow") {
getDeletedSlugs();
getFragmentDetails();
} else {
getFragmentDetails(false);
}

if (deletedSlugs.length < 1 && fragmentDetails.length < 1) {
console.log("Nothing to check. 🎉");
process.exit(0);
}

for await (const filePath of walkSync(getRootDir())) {
if (filePath.endsWith("index.md")) {
try {
const content = await fs.readFile(filePath, "utf-8");
const relativePath = filePath.substring(filePath.indexOf("files/en-us"));

// check deleted links
for (const slug of deletedSlugs) {
const locations = getLocations(
content,
new RegExp(`/${slug}[)># \"']`, "mig"),
);
if (locations.length) {
for (const location of locations) {
console.error(
`ERROR:${relativePath}:${location.line}:${location.column}:Slug '${slug}' has been deleted`,
);
}
}
}

// check broken URL fragment
for (const fragment of fragmentDetails) {
const locations = getLocations(content, fragment);
// check fragments in the same file
const urlParts = fragment.split("#");
if (filePath.includes(urlParts[0])) {
locations.push(...getLocations(content, urlParts[1]));
}
if (locations.length) {
for (const location of locations) {
console.error(
`ERROR:${relativePath}:${location.line}:${location.column}:URL fragment '${fragment}' is broken`,
);
}
}
}
} catch (e) {
console.error(`Error processing ${filePath}: ${e.message}`);
throw e;
}
}
}
40 changes: 39 additions & 1 deletion scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import fs from "node:fs/promises";
import path from "node:path";
import childProcess from "node:child_process";

const IMG_RX = /(\.png|\.jpg|\.svg|\.gif)$/gim;
export const IMG_RX = /(\.png|\.jpg|\.svg|\.gif)$/gim;

export async function* walkSync(dir) {
const files = await fs.readdir(dir, { withFileTypes: true });
Expand Down Expand Up @@ -49,3 +49,41 @@ export function getRootDir() {
export function isImagePath(path) {
return IMG_RX.test(path);
}

/*
* Returns locations (line and column numbers) of 'searchValue' in the given 'content'.
*/
export function getLocations(content, searchValue) {
const lineLengths = content.split("\n").map((line) => line.length);
const searchRx =
searchValue instanceof RegExp
? searchValue
: new RegExp(searchValue, "mig");
const matches = [...content.matchAll(searchRx)].map((match) => match.index);
const positions = [];

let currentPosition = 0;
lineLengths.forEach((lineLength, index) => {
lineLength += 1; // add '\n'
for (const match of matches) {
if (currentPosition < match && currentPosition + lineLength > match) {
positions.push({
line: index + 1,
column: match - currentPosition + 1,
});
}
}
currentPosition += lineLength;
});
return positions;
}

/*
* Convert Markdown header into URL slug.
*/
export function stringToFragment(text) {
return text
.trim()
.replace(/["#$%&+,/:;=?@[\]^`{|}~')(\\]/g, "")
.replace(/\s+/g, "_");
}

0 comments on commit cdd4bf8

Please sign in to comment.