diff --git a/lib/index.js b/lib/index.js index 82fb3e5..f081c93 100644 --- a/lib/index.js +++ b/lib/index.js @@ -12,9 +12,13 @@ import {deadOrAlive} from 'dead-or-alive' import {ok as assert} from 'devlop' import isOnline from 'is-online' +import pAll from 'p-all' +import pLimit from 'p-limit' import {lintRule} from 'unified-lint-rule' import {visit} from 'unist-util-visit' +const limit = pLimit(1) + /** @type {Readonly} */ const emptyOptions = {} const defaultSkipUrlPatterns = [/^(?!https?)/i] @@ -55,124 +59,133 @@ export default remarkLintNoDeadUrls * Nothing. */ async function rule(tree, file, options) { - /** @type {Map>} */ - const nodesByUrl = new Map() - const online = await isOnline() - const settings = options || emptyOptions - const skipUrlPatterns = settings.skipUrlPatterns - ? settings.skipUrlPatterns.map(function (d) { - return typeof d === 'string' ? new RegExp(d) : d - }) - : [...defaultSkipUrlPatterns] - - if (settings.skipLocalhost) { - skipUrlPatterns.push(/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/) - } - - /* c8 ignore next 9 -- difficult to test */ - if (!online) { - if (!settings.skipOffline) { - file.info( - 'Unexpected offline connection, expected either an online connection or `skipOffline: true`' - ) - } + // Operate one file at a time. + // Otherwise we’d send out tons of requests at a time for say 10 files. + await limit(async function () { + /** @type {Map>} */ + const nodesByUrl = new Map() + const online = await isOnline() + const settings = options || emptyOptions + const skipUrlPatterns = settings.skipUrlPatterns + ? settings.skipUrlPatterns.map(function (d) { + return typeof d === 'string' ? new RegExp(d) : d + }) + : [...defaultSkipUrlPatterns] - return - } - - const meta = /** @type {Record | undefined} */ ( - file.data.meta - ) - - const from = - settings.from || - (meta && - typeof meta.origin === 'string' && - typeof meta.pathname === 'string' - ? new URL(meta.pathname, meta.origin).href - : undefined) - - const deadOrAliveOptions = { - ...settings.deadOrAliveOptions, - findUrls: false - } - - visit(tree, function (node) { - if ('url' in node && typeof node.url === 'string') { - const value = node.url - const colon = value.indexOf(':') - const questionMark = value.indexOf('?') - const numberSign = value.indexOf('#') - const slash = value.indexOf('/') - let relativeToSomething = false - - if ( - // If there is no protocol, it’s relative. - colon < 0 || - // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol. - (slash > -1 && colon > slash) || - (questionMark > -1 && colon > questionMark) || - (numberSign > -1 && colon > numberSign) - ) { - relativeToSomething = true - } + if (settings.skipLocalhost) { + skipUrlPatterns.push(/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/) + } - // We can only check URLs relative to something if `from` is passed. - if (relativeToSomething && !from) { - return + /* c8 ignore next 9 -- difficult to test */ + if (!online) { + if (!settings.skipOffline) { + file.info( + 'Unexpected offline connection, expected either an online connection or `skipOffline: true`' + ) } - const url = new URL(value, from).href + return + } - if ( - skipUrlPatterns.some(function (skipPattern) { - return skipPattern.test(url) - }) - ) { - return - } + const meta = /** @type {Record | undefined} */ ( + file.data.meta + ) + + const from = + settings.from || + (meta && + typeof meta.origin === 'string' && + typeof meta.pathname === 'string' + ? new URL(meta.pathname, meta.origin).href + : undefined) + + const deadOrAliveOptions = { + ...settings.deadOrAliveOptions, + findUrls: false + } - let list = nodesByUrl.get(url) + visit(tree, function (node) { + if ('url' in node && typeof node.url === 'string') { + const value = node.url + const colon = value.indexOf(':') + const questionMark = value.indexOf('?') + const numberSign = value.indexOf('#') + const slash = value.indexOf('/') + let relativeToSomething = false + + if ( + // If there is no protocol, it’s relative. + colon < 0 || + // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol. + (slash > -1 && colon > slash) || + (questionMark > -1 && colon > questionMark) || + (numberSign > -1 && colon > numberSign) + ) { + relativeToSomething = true + } - if (!list) { - list = [] - nodesByUrl.set(url, list) - } + // We can only check URLs relative to something if `from` is passed. + if (relativeToSomething && !from) { + return + } - list.push(node) - } - }) + const url = new URL(value, from).href - const urls = [...nodesByUrl.keys()] - - await Promise.all( - urls.map(async function (url) { - const nodes = nodesByUrl.get(url) - assert(nodes) - const result = await deadOrAlive(url, deadOrAliveOptions) - - for (const node of nodes) { - for (const message of result.messages) { - const product = file.message( - 'Unexpected dead URL `' + url + '`, expected live URL', - {ancestors: [node], cause: message, place: node.position} - ) - product.fatal = message.fatal + if ( + skipUrlPatterns.some(function (skipPattern) { + return skipPattern.test(url) + }) + ) { + return } - if (result.status === 'alive' && new URL(url).href !== result.url) { - const message = file.message( - 'Unexpected redirecting URL `' + - url + - '`, expected final URL `' + - result.url + - '`', - {ancestors: [node], place: node.position} - ) - message.actual = url - message.expected = [result.url] + let list = nodesByUrl.get(url) + + if (!list) { + list = [] + nodesByUrl.set(url, list) } + + list.push(node) } }) - ) + + const urls = [...nodesByUrl.keys()] + + await pAll( + urls.map(function (url) { + return async function () { + const nodes = nodesByUrl.get(url) + assert(nodes) + + const result = await deadOrAlive(url, deadOrAliveOptions) + + for (const node of nodes) { + for (const message of result.messages) { + const product = file.message( + 'Unexpected dead URL `' + url + '`, expected live URL', + {ancestors: [node], cause: message, place: node.position} + ) + product.fatal = message.fatal + } + + if (result.status === 'alive' && new URL(url).href !== result.url) { + const message = file.message( + 'Unexpected redirecting URL `' + + url + + '`, expected final URL `' + + result.url + + '`', + {ancestors: [node], place: node.position} + ) + message.actual = url + message.expected = [result.url] + } + } + } + }), + // Operate on 10 URLs at a time. + {concurrency: 10} + ) + }) } diff --git a/package.json b/package.json index a6a03f0..38b5e6e 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,8 @@ "dead-or-alive": "^1.0.0", "devlop": "^1.0.0", "is-online": "^11.0.0", + "p-all": "^5.0.0", + "p-limit": "^6.0.0", "unified-lint-rule": "^3.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0",