Skip to content

Commit

Permalink
Update check-links to ignore 429 errors (#1229)
Browse files Browse the repository at this point in the history
Add custom `markdown-link-check` to ignore 429 responses.
Diff with https://github.com/tcort/markdown-link-check/blob/v3.8.1/markdown-link-check:
```
< const markdownLinkCheck = require('markdown-link-check');
---
> const markdownLinkCheck = require('./');
134,138d133
<             // workaround to ignore 429 responses (too many requests)
<             if (result.statusCode === 429) {
<                 result.status = 'ignored'
<             }
<
```

**Link to tracking Issue:**

**Testing:**
Tested locally with `circleci` command line tool.

**Documentation:**
Updated .circleci/check-links/README.md.
  • Loading branch information
jchengsfx authored Jun 30, 2020
1 parent 09e4a2f commit 2200590
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 4 deletions.
4 changes: 3 additions & 1 deletion .circleci/check-links/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Markdown Link Checker

Check links in markdown docs with
Check links in markdown docs based on
[`markdown-link-check`](https://github.com/tcort/markdown-link-check).
The app in this repo has been updated to ignore 429 responses (too many
requests).

Update [config.json](./config.json) to exclude specific links from being
checked (e.g. examples or links requiring authentication). See
Expand Down
10 changes: 8 additions & 2 deletions .circleci/check-links/check-links.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,18 @@ REPO_DIR="$( cd "${SCRIPT_DIR}/../../" && pwd )"
CIRCLE_BRANCH=${CIRCLE_BRANCH:-}
CIRCLE_TAG=${CIRCLE_TAG:-}

diff_files="$( git -c "$REPO_DIR" diff HEAD origin/master --name-only )"
check_all_files=1
if [[ "$CIRCLE_BRANCH" = "master" ]] || [[ -n "$CIRCLE_TAG" ]] || [[ -n "$( echo "$diff_files" | grep ".circleci/check-links" )" ]]; then
check_all_files=0
fi

nfailed=0

# check all docs in master/tags or new/modified docs in PR
for md in $(find "$REPO_DIR" -name "*.md" | sort); do
if [[ "$CIRCLE_BRANCH" = "master" ]] || [[ -n "$CIRCLE_TAG" ]] || [[ -n "$( git -c "$REPO_DIR" diff HEAD origin/master --name-only | grep "^${md/#$REPO_DIR\//}" )" ]]; then
markdown-link-check -c ${SCRIPT_DIR}/config.json -qv "$md" || (( nfailed += $? ))
if [[ $check_all_files ]] || [[ -n "$( echo "$diff_files" | grep "^${md/#$REPO_DIR\//}" )" ]]; then
node $SCRIPT_DIR/markdown-link-check -c ${SCRIPT_DIR}/config.json -v "$md" || (( nfailed += $? ))
# wait to scan files so that we don't overload github with requests which may result in 429 responses
sleep 2
fi
Expand Down
165 changes: 165 additions & 0 deletions .circleci/check-links/markdown-link-check
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/env node

'use strict';

const chalk = require('chalk');
const fs = require('fs');
const markdownLinkCheck = require('markdown-link-check');
const program = require('commander');
const request = require('request');
const url = require('url');
const path = require('path');

const statusLabels = {
alive: chalk.green('✓'),
dead: chalk.red('✖'),
ignored: chalk.gray('/'),
error: chalk.yellow('⚠'),
};

const opts = {};
let filenameForOutput = '';
let stream = process.stdin; // read from stdin unless a filename is given

program
.option('-p, --progress', 'show progress bar')
.option('-c, --config [config]', 'apply a config file (JSON), holding e.g. url specific header configuration')
.option('-q, --quiet', 'displays errors only')
.option('-v, --verbose', 'displays detailed error information')
.arguments('[filenameOrUrl]')
.action(function (filenameOrUrl) {
filenameForOutput = filenameOrUrl;
if (/https?:/.test(filenameOrUrl)) {
request(filenameOrUrl, function (error, response, body) {
if (error) {
console.error(chalk.red('\nERROR: Unable to connect! Please provide a valid URL as an argument.'));
process.exit(1);
}
else if (response.statusCode === 404){
console.error(chalk.red('\nERROR: 404 - File not found! Please provide a valid URL as an argument.'));
process.exit(1);
} else {
stream = request.get(filenameOrUrl);
}

});
try { // extract baseUrl from supplied URL
const parsed = url.parse(filenameOrUrl);
delete parsed.search;
delete parsed.hash;
if (parsed.pathname.lastIndexOf('/') !== -1) {
parsed.pathname = parsed.pathname.substr(0, parsed.pathname.lastIndexOf('/') + 1);
}
opts.baseUrl = url.format(parsed);
} catch (err) { /* ignore error */
}
} else {
fs.stat(filenameOrUrl, function(error , stats){
if (!error && stats.isDirectory()){
console.error(chalk.red('\nERROR: ' + filenameOrUrl + ' is a directory! Please provide a vaild filename as an argument.'));
process.exit(1);
}
});
opts.baseUrl = 'file://' + path.dirname(path.resolve(filenameOrUrl));
stream = fs.createReadStream(filenameOrUrl);
}

}).parse(process.argv);

opts.showProgressBar = (program.progress === true); // force true or undefined to be true or false.
opts.quiet = (program.quiet === true);
opts.verbose = (program.verbose === true);

let markdown = ''; // collect the markdown data, then process it

stream
.on('data', function (chunk) {
markdown += chunk.toString();
})
.on('error', function(error) {
if (error.code === 'ENOENT') {
console.error(chalk.red('\nERROR: File not found! Please provide a vaild filename as an argument.'));
} else {
console.error(chalk.red(error));
}
return process.exit(1);
})
.on('end', function () {
if (filenameForOutput) {
console.log(chalk.cyan('\nFILE: ' + filenameForOutput));
}

if (program.config) {
fs.access(program.config, (fs.constants || fs).R_OK, function (err) {
if (!err) {
let configStream = fs.createReadStream(program.config);
let configData = '';

configStream.on('data', function (chunk) {
configData += chunk.toString();
}).on('end', function () {
let config = JSON.parse(configData);

opts.ignorePatterns = config.ignorePatterns;
opts.replacementPatterns = config.replacementPatterns;
opts.httpHeaders = config.httpHeaders;
opts.ignoreDisable = config.ignoreDisable;

runMarkdownLinkCheck(markdown, opts);
});
}
else {
console.error(chalk.red('\nERROR: Config file not accessible.'));
process.exit(1);
}
});
}
else {
runMarkdownLinkCheck(markdown, opts);
}
});

function runMarkdownLinkCheck(markdown, opts) {
markdownLinkCheck(markdown, opts, function (err, results) {
if (err) {
console.error(chalk.red('\nERROR: something went wrong!'));
console.error(err.stack);
process.exit(1);
}

if (results.length === 0 && !opts.quiet) {
console.log(chalk.yellow('No hyperlinks found!'));
}
results.forEach(function (result) {
// workaround to ignore 429 responses (too many requests)
if (result.statusCode === 429) {
result.status = 'ignored'
}

// Skip messages for non-deadlinks in quiet mode.
if (opts.quiet && result.status !== 'dead') {
return;
}

if (opts.verbose) {
if (result.err) {
console.log('[%s] %s → Status: %s %s', statusLabels[result.status], result.link, result.statusCode, result.err);
} else {
console.log('[%s] %s → Status: %s', statusLabels[result.status], result.link, result.statusCode);
}
}
else {
console.log('[%s] %s', statusLabels[result.status], result.link);
}
});
console.log('\n%s links checked.', results.length);
if (results.some((result) => result.status === 'dead')) {
let deadLinks = results.filter(result => { return result.status === 'dead'; });
console.error(chalk.red('\nERROR: %s dead links found!'), deadLinks.length);
deadLinks.forEach(function (result) {
console.log('[%s] %s → Status: %s', statusLabels[result.status], result.link, result.statusCode);
});
process.exit(1);
}
});
}
5 changes: 4 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -300,5 +300,8 @@ jobs:
- run:
name: Check links in markdown docs
command: |
sudo npm install -g markdown-link-check
pushd $HOME
npm install --save-dev [email protected]
popd
export NODE_PATH=$HOME/node_modules
bash .circleci/check-links/check-links.sh

0 comments on commit 2200590

Please sign in to comment.