From 9e8bb98e34f55a52359fbbfa1a85071d9548f4d1 Mon Sep 17 00:00:00 2001 From: Noah Koontz Date: Tue, 24 Nov 2020 13:25:53 -0800 Subject: [PATCH] feat: WIP adding a broken link checker rule --- lib/github_markup.js | 34 ++ lib/github_markup_check_and_render | 32 ++ rules/file-no-broken-links.js | 133 ++++++++ tests/lib/MarkdownForTest.md | 3 + tests/lib/github_markup_tests.js | 46 +++ tests/lib/rst_for_test.rst | 9 + tests/rules/file_no_broken_links_tests.js | 321 ++++++++++++++++++ .../rules/markup_test_files/absolute_link.md | 1 + .../invalid_relative_link.md | 1 + tests/rules/markup_test_files/link.md | 1 + tests/rules/markup_test_files/link.rst | 1 + .../rules/markup_test_files/multiple_links.md | 2 + tests/rules/markup_test_files/no_link.md | 1 + tests/rules/markup_test_files/no_link.rst | 1 + .../rules/markup_test_files/relative_link.md | 1 + .../relative_link_outside_dir.md | 1 + .../relative_link_with_section.md | 1 + tests/rules/markup_test_files/section_link.md | 2 + .../rules/markup_test_files/section_link.rst | 4 + 19 files changed, 595 insertions(+) create mode 100644 lib/github_markup.js create mode 100755 lib/github_markup_check_and_render create mode 100644 rules/file-no-broken-links.js create mode 100644 tests/lib/MarkdownForTest.md create mode 100644 tests/lib/github_markup_tests.js create mode 100644 tests/lib/rst_for_test.rst create mode 100644 tests/rules/file_no_broken_links_tests.js create mode 100644 tests/rules/markup_test_files/absolute_link.md create mode 100644 tests/rules/markup_test_files/invalid_relative_link.md create mode 100644 tests/rules/markup_test_files/link.md create mode 100644 tests/rules/markup_test_files/link.rst create mode 100644 tests/rules/markup_test_files/multiple_links.md create mode 100644 tests/rules/markup_test_files/no_link.md create mode 100644 tests/rules/markup_test_files/no_link.rst create mode 100644 tests/rules/markup_test_files/relative_link.md create mode 100644 tests/rules/markup_test_files/relative_link_outside_dir.md create mode 100644 tests/rules/markup_test_files/relative_link_with_section.md create mode 100644 tests/rules/markup_test_files/section_link.md create mode 100644 tests/rules/markup_test_files/section_link.rst diff --git a/lib/github_markup.js b/lib/github_markup.js new file mode 100644 index 00000000..ec94b950 --- /dev/null +++ b/lib/github_markup.js @@ -0,0 +1,34 @@ +// Copyright 2018 TODO Group. All rights reserved. +// Licensed under the Apache License, Version 2.0. + +const { commandExists } = require('./command_exists') +const spawnSync = require('child_process').spawnSync + +class GitHubMarkup { + /** + * Returns a rendered version of a given README file, or null if the document + * cannot be rendered. Supports all formats used by github_markup. + * + * Throws 'GitHub Markup not installed' error if command line of 'github_markup' is not available. + * + * @param {string} targetFile The file to render + * @returns {Promise} The rendered markup, or null if it cannot be rendered + */ + async renderMarkup(targetFile) { + // TODO: windows? + const command = await commandExists(['github-markup']) + if (command === null) { + throw new Error('GitHub markup not installed') + } + const gitHubMarkupRes = spawnSync( + `${__dirname}/github_markup_check_and_render`, + [targetFile] + ) + if (gitHubMarkupRes.status !== 0) { + return null + } + return gitHubMarkupRes.stdout.toString() + } +} + +module.exports = new GitHubMarkup() diff --git a/lib/github_markup_check_and_render b/lib/github_markup_check_and_render new file mode 100755 index 00000000..4249bd91 --- /dev/null +++ b/lib/github_markup_check_and_render @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby +# Modified github_markup utility which checks first that the file can be rendered before rendering + +$LOAD_PATH.unshift File.dirname(File.realpath(__FILE__)) + "/../lib" +require 'github/markup' + +if ARGV.size < 1 + print "usage: #{File.basename($0)} FILE\n" + exit 1 +end + +name = ARGV.first +file_contents = nil + +begin + file = File.open( name, "r" ) + file_contents = file.read + file.close +rescue Exception => e + $stderr.print "error: #{e.message}\n" + exit 1 +ensure +end + + +if GitHub::Markup.can_render?( name, file_contents ) + print GitHub::Markup.render( name, file_contents ) + exit 0 +else + print "File '#{name}' cannot be rendered.\n" + exit 1 +end diff --git a/rules/file-no-broken-links.js b/rules/file-no-broken-links.js new file mode 100644 index 00000000..f913b0eb --- /dev/null +++ b/rules/file-no-broken-links.js @@ -0,0 +1,133 @@ +// Copyright 2017 TODO Group. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const { HtmlChecker, BLC_INVALID } = require('broken-link-checker') +const path = require('path') +const GitHubMarkup = require('../lib/github_markup') +const Result = require('../lib/result') +// eslint-disable-next-line no-unused-vars +const FileSystem = require('../lib/file_system') + +// TODO: how to autoprefix domains with http or https? +/** + * Searches for a renderable markup document, renders it, and then + * checks for broken links by scanning the html. + * + * @param {FileSystem} fs A filesystem object configured with filter paths and target directories + * @param {object} options The rule configuration + * @returns {Promise} The lint rule result + */ +async function fileNoBrokenLinks(fs, options) { + const files = await fs.findAllFiles(options.globsAll, !!options.nocase) + + if (files.length === 0) { + return new Result( + 'Did not find file matching the specified patterns', + options.globsAll.map(f => { + return { passed: false, pattern: f } + }), + !!options['succeed-on-non-existent'] + ) + } + + // for every file check every broken link + const results = await Promise.all( + files.map(async f => { + // render it, if possible + const absMdPath = path.resolve(fs.targetDir, f) + const rendered = await GitHubMarkup.renderMarkup(absMdPath) + if (rendered === null) { + return { + passed: true, + path: f, + message: 'Ignored due to unknown file format.' + } + } + + // scan the rendered HTML for broken links + const linkRes = await new Promise((resolve, reject) => { + const results = [] + const htmlChecker = new HtmlChecker(options, { + link: res => results.push(res), + complete: () => resolve(results), + acceptedSchemes: ['http', 'https', ''] + }) + if (!htmlChecker.scan(rendered)) { + reject(new Error(`Unable to scan file ${f}`)) + } + }) + + // find all relative links, and double check the filesystem for their existence + // filter down to broken links + const brokenLinks = linkRes.filter(({ broken }) => broken) + // split into invalid and otherwise failing + const { failing, invalid } = brokenLinks.reduce( + (res, linkRes) => { + linkRes.brokenReason === BLC_INVALID + ? res.invalid.push(linkRes) + : res.failing.push(linkRes) + return res + }, + { failing: [], invalid: [] } + ) + // make the messages for the failing URLs + const failingMessages = failing.map( + ({ + brokenReason, + url: { original }, + http: { + response: { statusCode = null } + } + }) => + `${original} (${ + brokenReason.includes('HTTP') + ? `status code ${statusCode}` + : `unknown error ${brokenReason}` + })` + ) + // process the invalid links to check if they're actually filesystem paths + // returning the message for invalid URLs + const failingInvalidMessagesWithNulls = await Promise.all( + invalid.map(async b => { + const { + url: { original } + } = b + // verify the path is relative, else the path is invalid + if (path.posix.isAbsolute(original)) + return `${original} (invalid path)` + // strip any #thing specifiers from the path, since it's too hard to check + const strippedPath = original.replace(/#(?:[.!/\\\w]*)$/, '') + if (!strippedPath) return null + // verify the path doesn't traverse outside the project, else the path is excluded + const targetDir = path.posix.resolve(fs.targetDir) + const absPath = path.posix.resolve(targetDir, strippedPath) + const relPath = path.posix.relative(targetDir, absPath) + if (relPath.startsWith('..')) return null + // verify the file exists (or at least that we have access to it) + if (!(await fs.relativeFileExists(relPath))) + return `${original} (file does not exist)` + return null + }) + ) + // remove messages which didn't fail + const failingInvalidMessages = failingInvalidMessagesWithNulls.filter( + m => m !== null + ) + // join all the messages together to form the result + const allMessages = failingInvalidMessages.concat(failingMessages) + return { + passed: allMessages.length === 0, + path: f, + message: + allMessages.length === 0 + ? 'All links are valid' + : allMessages.concat(', ') + } + }) + ) + // return the final result + const passed = results.every(({ passed }) => passed) + return new Result('', results, passed) +} + +module.exports = fileNoBrokenLinks diff --git a/tests/lib/MarkdownForTest.md b/tests/lib/MarkdownForTest.md new file mode 100644 index 00000000..08feb832 --- /dev/null +++ b/tests/lib/MarkdownForTest.md @@ -0,0 +1,3 @@ +# A Section + +Some text. diff --git a/tests/lib/github_markup_tests.js b/tests/lib/github_markup_tests.js new file mode 100644 index 00000000..8e8319ed --- /dev/null +++ b/tests/lib/github_markup_tests.js @@ -0,0 +1,46 @@ +// Copyright 2017 TODO Group. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const chai = require('chai') +const expect = chai.expect +const GitHubMarkup = require('../../lib/github_markup') +const { commandExists } = require('../../lib/command_exists') + +describe('lib', () => { + describe('github_markup', function () { + const gitHubMarkupInstalled = commandExists('github-markup') + this.timeout(30000) + + if (!gitHubMarkupInstalled) { + it.skip('tests github markup functionality', () => {}) + } else { + it('should render a markdown file', async () => { + const res = await GitHubMarkup.renderMarkup( + `${__dirname}/MarkdownForTest.md` + ) + expect(res).to.contain('Some text') + }) + + it('should render an rst file', async () => { + const res = await GitHubMarkup.renderMarkup( + `${__dirname}/rst_for_test.rst` + ) + expect(res).to.contain( + 'https://opensource.newrelic.com/oss-category/#community-plus' + ) + }) + + it('should fail to render a non-markup file', async () => { + const res = await GitHubMarkup.renderMarkup( + `${__dirname}/image_for_test.png` + ) + expect(res).to.equal(null) + }) + + it("should fail to render a file that doesn't exist", async () => { + const res = await GitHubMarkup.renderMarkup(`${__dirname}/not_a_file`) + expect(res).to.equal(null) + }) + } + }) +}) diff --git a/tests/lib/rst_for_test.rst b/tests/lib/rst_for_test.rst new file mode 100644 index 00000000..903a6a6f --- /dev/null +++ b/tests/lib/rst_for_test.rst @@ -0,0 +1,9 @@ +|header| + +.. |header| image:: https://github.com/newrelic/opensource-website/raw/master/src/images/categories/Community_Plus.png + :target: https://opensource.newrelic.com/oss-category/#community-plus + +New Relic Python Agent +====================== + +The ``newrelic`` package instruments your application for performance monitoring and advanced performance analytics with `New Relic`_. diff --git a/tests/rules/file_no_broken_links_tests.js b/tests/rules/file_no_broken_links_tests.js new file mode 100644 index 00000000..53fb32eb --- /dev/null +++ b/tests/rules/file_no_broken_links_tests.js @@ -0,0 +1,321 @@ +// Copyright 2017 TODO Group. All rights reserved. +// Licensed under the Apache License, Version 2.0. + +const chai = require('chai') +const nock = require('nock') +const expect = chai.expect +const FileSystem = require('../../lib/file_system') +const { commandExists } = require('../../lib/command_exists') + +describe('rule', () => { + describe('files_no_broken_links', function () { + const gitHubMarkupInstalled = commandExists('github-markup') + const fileNoBrokenLinks = require('../../rules/file-no-broken-links') + const targetDir = `${__dirname}/markup_test_files` + const testFs = new FileSystem(targetDir) + + this.timeout(30000) + + if (!gitHubMarkupInstalled) + it.skip('tests file_no_broken_links functionality', () => {}) + else { + it('returns true if no links are present in markdown', async () => { + const ruleopts = { + globsAll: ['no_link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'no_link.md' + }) + }) + + it('returns true if a valid link is present in a markdown file', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .reply(200) + + const ruleopts = { + globsAll: ['link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'link.md' + }) + + scope.done() + }) + + it('returns false if an invalid link is present in a markdown file', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .replyWithError('nxdomain or something') + + const ruleopts = { + globsAll: ['link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(false) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: false, + path: 'link.md' + }) + + scope.done() + }) + + it('returns false if a private link is present in a markdown file', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .reply(404) + + const ruleopts = { + globsAll: ['link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(false) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: false, + path: 'link.md' + }) + + scope.done() + }) + + it('returns true if a valid link is present in an rst file', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .reply(200) + + const ruleopts = { + globsAll: ['link.rst'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'link.rst' + }) + + scope.done() + }) + + it('returns false if an invalid link is present in an rst file', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .replyWithError('nxdomain or something') + + const ruleopts = { + globsAll: ['link.rst'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(false) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: false, + path: 'link.rst' + }) + + scope.done() + }) + + it('ignores section links in markdown', async () => { + const ruleopts = { + globsAll: ['section_link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'section_link.md' + }) + }) + + it('ignores section links in rst', async () => { + const ruleopts = { + globsAll: ['section_link.rst'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'section_link.rst' + }) + }) + + it('returns true with a relative link to a file in markdown', async () => { + const ruleopts = { + globsAll: ['relative_link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'relative_link.md' + }) + }) + + it('returns true with a relative link to a file in markdown with a section link', async () => { + const ruleopts = { + globsAll: ['relative_link_with_section.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'relative_link_with_section.md' + }) + }) + + it('returns true with a relative link to a file in markdown outside the working directory', async () => { + const ruleopts = { + globsAll: ['relative_link_outside_dir.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'relative_link_outside_dir.md' + }) + }) + + it("returns false with a relative link to a file that doesn't exist in markdown", async () => { + const ruleopts = { + globsAll: ['invalid_relative_link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(false) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: false, + path: 'invalid_relative_link.md' + }) + }) + + it('returns false with a absolute path in markdown', async () => { + const ruleopts = { + globsAll: ['absolute_link.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(false) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: false, + path: 'absolute_link.md' + }) + }) + + it('checks multiple links in markdown', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .reply(200) + const scope2 = nock('www.example.com').get('/something').reply(200) + + const ruleopts = { + globsAll: ['multiple_links.md'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(1) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'multiple_links.md' + }) + + scope.done() + scope2.done() + }) + + it('checks multiple files', async () => { + const scope = nock('www.example.com') + .get('/something/somethingelse') + .reply(200) + const scope2 = nock('www.example.com').get('/something').reply(200) + + const ruleopts = { + globsAll: ['link.md', 'link.rst'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(2) + expect(actual.targets[0]).to.deep.include({ + passed: true, + path: 'link.md' + }) + expect(actual.targets[1]).to.deep.include({ + passed: true, + path: 'link.rst' + }) + + scope.done() + scope2.done() + }) + + it('fails if no files are found', async () => { + const ruleopts = { + globsAll: ['notafile'] + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(false) + expect(actual.targets).to.have.length(0) + }) + + it('succeeds if no files are found and succeed-on-non-existent is true', async () => { + const ruleopts = { + globsAll: ['notafile'], + 'succeed-on-non-existent': true + } + + const actual = await fileNoBrokenLinks(testFs, ruleopts) + + expect(actual.passed).to.equal(true) + expect(actual.targets).to.have.length(0) + }) + } + }) +}) diff --git a/tests/rules/markup_test_files/absolute_link.md b/tests/rules/markup_test_files/absolute_link.md new file mode 100644 index 00000000..019d5cb0 --- /dev/null +++ b/tests/rules/markup_test_files/absolute_link.md @@ -0,0 +1 @@ +[my file](/notafile) diff --git a/tests/rules/markup_test_files/invalid_relative_link.md b/tests/rules/markup_test_files/invalid_relative_link.md new file mode 100644 index 00000000..2ec68241 --- /dev/null +++ b/tests/rules/markup_test_files/invalid_relative_link.md @@ -0,0 +1 @@ +[my file](./invalid/link.md) diff --git a/tests/rules/markup_test_files/link.md b/tests/rules/markup_test_files/link.md new file mode 100644 index 00000000..27f02902 --- /dev/null +++ b/tests/rules/markup_test_files/link.md @@ -0,0 +1 @@ +[myurl](www.example.com/something/somethingelse) diff --git a/tests/rules/markup_test_files/link.rst b/tests/rules/markup_test_files/link.rst new file mode 100644 index 00000000..d28de8a2 --- /dev/null +++ b/tests/rules/markup_test_files/link.rst @@ -0,0 +1 @@ +`My URL `_ diff --git a/tests/rules/markup_test_files/multiple_links.md b/tests/rules/markup_test_files/multiple_links.md new file mode 100644 index 00000000..7de40751 --- /dev/null +++ b/tests/rules/markup_test_files/multiple_links.md @@ -0,0 +1,2 @@ +[myurl](www.example.com/something/somethingelse) +[myurl](www.example.com/something) diff --git a/tests/rules/markup_test_files/no_link.md b/tests/rules/markup_test_files/no_link.md new file mode 100644 index 00000000..257cc564 --- /dev/null +++ b/tests/rules/markup_test_files/no_link.md @@ -0,0 +1 @@ +foo diff --git a/tests/rules/markup_test_files/no_link.rst b/tests/rules/markup_test_files/no_link.rst new file mode 100644 index 00000000..257cc564 --- /dev/null +++ b/tests/rules/markup_test_files/no_link.rst @@ -0,0 +1 @@ +foo diff --git a/tests/rules/markup_test_files/relative_link.md b/tests/rules/markup_test_files/relative_link.md new file mode 100644 index 00000000..25bbda47 --- /dev/null +++ b/tests/rules/markup_test_files/relative_link.md @@ -0,0 +1 @@ +[my file](./link.md) diff --git a/tests/rules/markup_test_files/relative_link_outside_dir.md b/tests/rules/markup_test_files/relative_link_outside_dir.md new file mode 100644 index 00000000..246b06e5 --- /dev/null +++ b/tests/rules/markup_test_files/relative_link_outside_dir.md @@ -0,0 +1 @@ +[my file](../thing) diff --git a/tests/rules/markup_test_files/relative_link_with_section.md b/tests/rules/markup_test_files/relative_link_with_section.md new file mode 100644 index 00000000..0b4560ca --- /dev/null +++ b/tests/rules/markup_test_files/relative_link_with_section.md @@ -0,0 +1 @@ +[my file](./apath/toafile#thing) diff --git a/tests/rules/markup_test_files/section_link.md b/tests/rules/markup_test_files/section_link.md new file mode 100644 index 00000000..9a1c4602 --- /dev/null +++ b/tests/rules/markup_test_files/section_link.md @@ -0,0 +1,2 @@ +# my section +[myurl](#my-section) diff --git a/tests/rules/markup_test_files/section_link.rst b/tests/rules/markup_test_files/section_link.rst new file mode 100644 index 00000000..6822f35b --- /dev/null +++ b/tests/rules/markup_test_files/section_link.rst @@ -0,0 +1,4 @@ +My Section Link +-------- + +`My Section Link`_