Skip to content

Commit

Permalink
feat: WIP adding a broken link checker rule
Browse files Browse the repository at this point in the history
  • Loading branch information
prototypicalpro committed Nov 24, 2020
1 parent 8d21449 commit 9e8bb98
Show file tree
Hide file tree
Showing 19 changed files with 595 additions and 0 deletions.
34 changes: 34 additions & 0 deletions lib/github_markup.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2018 TODO Group. All rights reserved.
// Licensed under the Apache License, Version 2.0.

const { commandExists } = require('./command_exists')
const spawnSync = require('child_process').spawnSync

class GitHubMarkup {
/**
* Returns a rendered version of a given README file, or null if the document
* cannot be rendered. Supports all formats used by github_markup.
*
* Throws 'GitHub Markup not installed' error if command line of 'github_markup' is not available.
*
* @param {string} targetFile The file to render
* @returns {Promise<string|null>} The rendered markup, or null if it cannot be rendered
*/
async renderMarkup(targetFile) {
// TODO: windows?
const command = await commandExists(['github-markup'])
if (command === null) {
throw new Error('GitHub markup not installed')
}
const gitHubMarkupRes = spawnSync(
`${__dirname}/github_markup_check_and_render`,
[targetFile]
)
if (gitHubMarkupRes.status !== 0) {
return null
}
return gitHubMarkupRes.stdout.toString()
}
}

module.exports = new GitHubMarkup()
32 changes: 32 additions & 0 deletions lib/github_markup_check_and_render
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env ruby
# Modified github_markup utility which checks first that the file can be rendered before rendering

$LOAD_PATH.unshift File.dirname(File.realpath(__FILE__)) + "/../lib"
require 'github/markup'

if ARGV.size < 1
print "usage: #{File.basename($0)} FILE\n"
exit 1
end

name = ARGV.first
file_contents = nil

begin
file = File.open( name, "r" )
file_contents = file.read
file.close
rescue Exception => e
$stderr.print "error: #{e.message}\n"
exit 1
ensure
end


if GitHub::Markup.can_render?( name, file_contents )
print GitHub::Markup.render( name, file_contents )
exit 0
else
print "File '#{name}' cannot be rendered.\n"
exit 1
end
133 changes: 133 additions & 0 deletions rules/file-no-broken-links.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright 2017 TODO Group. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

const { HtmlChecker, BLC_INVALID } = require('broken-link-checker')
const path = require('path')
const GitHubMarkup = require('../lib/github_markup')
const Result = require('../lib/result')
// eslint-disable-next-line no-unused-vars
const FileSystem = require('../lib/file_system')

// TODO: how to autoprefix domains with http or https?
/**
* Searches for a renderable markup document, renders it, and then
* checks for broken links by scanning the html.
*
* @param {FileSystem} fs A filesystem object configured with filter paths and target directories
* @param {object} options The rule configuration
* @returns {Promise<Result>} The lint rule result
*/
async function fileNoBrokenLinks(fs, options) {
const files = await fs.findAllFiles(options.globsAll, !!options.nocase)

if (files.length === 0) {
return new Result(
'Did not find file matching the specified patterns',
options.globsAll.map(f => {
return { passed: false, pattern: f }
}),
!!options['succeed-on-non-existent']
)
}

// for every file check every broken link
const results = await Promise.all(
files.map(async f => {
// render it, if possible
const absMdPath = path.resolve(fs.targetDir, f)
const rendered = await GitHubMarkup.renderMarkup(absMdPath)
if (rendered === null) {
return {
passed: true,
path: f,
message: 'Ignored due to unknown file format.'
}
}

// scan the rendered HTML for broken links
const linkRes = await new Promise((resolve, reject) => {
const results = []
const htmlChecker = new HtmlChecker(options, {
link: res => results.push(res),
complete: () => resolve(results),
acceptedSchemes: ['http', 'https', '']
})
if (!htmlChecker.scan(rendered)) {
reject(new Error(`Unable to scan file ${f}`))
}
})

// find all relative links, and double check the filesystem for their existence
// filter down to broken links
const brokenLinks = linkRes.filter(({ broken }) => broken)
// split into invalid and otherwise failing
const { failing, invalid } = brokenLinks.reduce(
(res, linkRes) => {
linkRes.brokenReason === BLC_INVALID
? res.invalid.push(linkRes)
: res.failing.push(linkRes)
return res
},
{ failing: [], invalid: [] }
)
// make the messages for the failing URLs
const failingMessages = failing.map(
({
brokenReason,
url: { original },
http: {
response: { statusCode = null }
}
}) =>
`${original} (${
brokenReason.includes('HTTP')
? `status code ${statusCode}`
: `unknown error ${brokenReason}`
})`
)
// process the invalid links to check if they're actually filesystem paths
// returning the message for invalid URLs
const failingInvalidMessagesWithNulls = await Promise.all(
invalid.map(async b => {
const {
url: { original }
} = b
// verify the path is relative, else the path is invalid
if (path.posix.isAbsolute(original))
return `${original} (invalid path)`
// strip any #thing specifiers from the path, since it's too hard to check
const strippedPath = original.replace(/#(?:[.!/\\\w]*)$/, '')
if (!strippedPath) return null
// verify the path doesn't traverse outside the project, else the path is excluded
const targetDir = path.posix.resolve(fs.targetDir)
const absPath = path.posix.resolve(targetDir, strippedPath)
const relPath = path.posix.relative(targetDir, absPath)
if (relPath.startsWith('..')) return null
// verify the file exists (or at least that we have access to it)
if (!(await fs.relativeFileExists(relPath)))
return `${original} (file does not exist)`
return null
})
)
// remove messages which didn't fail
const failingInvalidMessages = failingInvalidMessagesWithNulls.filter(
m => m !== null
)
// join all the messages together to form the result
const allMessages = failingInvalidMessages.concat(failingMessages)
return {
passed: allMessages.length === 0,
path: f,
message:
allMessages.length === 0
? 'All links are valid'
: allMessages.concat(', ')
}
})
)
// return the final result
const passed = results.every(({ passed }) => passed)
return new Result('', results, passed)
}

module.exports = fileNoBrokenLinks
3 changes: 3 additions & 0 deletions tests/lib/MarkdownForTest.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# A Section

Some text.
46 changes: 46 additions & 0 deletions tests/lib/github_markup_tests.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2017 TODO Group. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

const chai = require('chai')
const expect = chai.expect
const GitHubMarkup = require('../../lib/github_markup')
const { commandExists } = require('../../lib/command_exists')

describe('lib', () => {
describe('github_markup', function () {
const gitHubMarkupInstalled = commandExists('github-markup')
this.timeout(30000)

if (!gitHubMarkupInstalled) {
it.skip('tests github markup functionality', () => {})
} else {
it('should render a markdown file', async () => {
const res = await GitHubMarkup.renderMarkup(
`${__dirname}/MarkdownForTest.md`
)
expect(res).to.contain('Some text')
})

it('should render an rst file', async () => {
const res = await GitHubMarkup.renderMarkup(
`${__dirname}/rst_for_test.rst`
)
expect(res).to.contain(
'https://opensource.newrelic.com/oss-category/#community-plus'
)
})

it('should fail to render a non-markup file', async () => {
const res = await GitHubMarkup.renderMarkup(
`${__dirname}/image_for_test.png`
)
expect(res).to.equal(null)
})

it("should fail to render a file that doesn't exist", async () => {
const res = await GitHubMarkup.renderMarkup(`${__dirname}/not_a_file`)
expect(res).to.equal(null)
})
}
})
})
9 changes: 9 additions & 0 deletions tests/lib/rst_for_test.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
|header|

.. |header| image:: https://github.com/newrelic/opensource-website/raw/master/src/images/categories/Community_Plus.png
:target: https://opensource.newrelic.com/oss-category/#community-plus

New Relic Python Agent
======================

The ``newrelic`` package instruments your application for performance monitoring and advanced performance analytics with `New Relic`_.
Loading

0 comments on commit 9e8bb98

Please sign in to comment.