Skip to content

Commit

Permalink
perf: replace cheerio with parse5
Browse files Browse the repository at this point in the history
  • Loading branch information
bennypowers committed Sep 16, 2024
1 parent 1675338 commit 0440570
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 89 deletions.
43 changes: 27 additions & 16 deletions docs/_plugins/rhds.cjs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/// <reference lib="ESNext.Array"/>

// @ts-check
const fs = require('node:fs');
const path = require('node:path');
Expand All @@ -8,7 +9,6 @@ const yaml = require('js-yaml');
const _slugify = require('slugify');
const slugify = typeof _slugify === 'function' ? _slugify : _slugify.default;
const capitalize = require('capitalize');
const cheerio = require('cheerio');
const RHDSAlphabetizeTagsPlugin = require('./alphabetize-tags.cjs');
const RHDSShortcodesPlugin = require('./shortcodes.cjs');
const { parse } = require('async-csv');
Expand All @@ -23,28 +23,39 @@ const { parse } = require('async-csv');
* Replace paths in demo files from the dev SPA's format to 11ty's format
* @param {string} content the HTML content to replace
*/
function demoPaths(content) {
async function demoPaths(content) {
const { outputPath, inputPath } = this;
if (!outputPath) {
return '';
}
const isNested = outputPath.match(/demo\/.+\/index\.html$/);
if (inputPath === './docs/elements/demos.html') {
const $ = cheerio.load(content);
$('[href], [src]').each(function() {
const el = $(this);
const attr = el.attr('href') ? 'href' : 'src';
const val = el.attr(attr);
if (!val) {
return;
}
if (!val.startsWith('http') && !val.startsWith('/') && !val.startsWith('#')) {
el.attr(attr, `${isNested ? '../' : ''}${val}`);
} else if (val.startsWith('/elements/rh-')) {
el.attr(attr, val.replace('/elements/rh-', '/'));
const { parse, serialize } = await import('parse5');
const {
queryAll,
isElementNode,
getAttribute,
setAttribute,
hasAttribute,
} = await import('@parse5/tools');
const document = parse(content);
for (const node of queryAll(document, node =>
isElementNode(node)
&& (hasAttribute(node, 'href')
|| hasAttribute(node, 'src')))) {
if (isElementNode(node)) {
const attr = hasAttribute(node, 'href') ? 'href' : 'src';
const val = getAttribute(node, attr);
if (!val) {
return;
} else if (!val.startsWith('http') && !val.startsWith('/') && !val.startsWith('#')) {
setAttribute(node, attr, `${isNested ? '../' : ''}${val}`);
} else if (val.startsWith('/elements/rh-')) {
setAttribute(node, attr, val.replace('/elements/rh-', '/'));
}
}
});
return $.html();
}
return serialize(document);
}
return content;
}
Expand Down
35 changes: 23 additions & 12 deletions docs/_plugins/shortcodes/uxdotPattern.cjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const { readFile } = require('node:fs/promises');
const { pathToFileURL } = require('node:url');
const cheerio = require('cheerio');

// for editor highlighting
const html = String.raw;
Expand Down Expand Up @@ -72,16 +71,28 @@ module.exports = function(eleventyConfig) {
`;
});

eleventyConfig.addTransform('uxdot-pattern-restore-newlines', function(content) {
const $ = cheerio.load(content);
$([
'uxdot-pattern script[type="text/css"]',
'uxdot-pattern script[type="text/html"]',
'uxdot-pattern script[type="sample/javascript"]',
].join()).each(function() {
const el = $(this);
el.text(el.text().replaceAll(COMMENT, '\n'));
});
return $.html();
eleventyConfig.addTransform('uxdot-pattern-restore-newlines', async function(content) {
const { parse, serialize } = await import('parse5');
const {
queryAll,
isElementNode,
getAttribute,
getTextContent,
setTextContent,
} = await import('@parse5/tools');
const document = parse(content);
const isUxDotPattern = node =>
isElementNode(node)
&& node.tagName === 'uxdot-pattern';
const isSampleScript = node =>
isElementNode(node)
&& node.tagName === 'script'
&& getAttribute(node, 'type').match(/(text\/(css|html))|sample\/javascript/);
for (const pattern of queryAll(document, isUxDotPattern)) {
for (const node of queryAll(pattern, isSampleScript)) {
setTextContent(node, getTextContent(node).replaceAll(COMMENT, '\n'));
}
}
return serialize(document);
});
};
140 changes: 79 additions & 61 deletions docs/_plugins/table-of-contents.cjs
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
/** @license portions MIT Jordan Shermer */
const cheerio = require('cheerio');
// @ts-check

/** @license adapted from code (c) Jordan Shermer MIT license*/

/* eslint-disable jsdoc/check-tag-names */

/** Attribute which if found on a heading means the heading is excluded */
const ignoreAttribute = 'data-toc-exclude';

const defaults = {
tags: ['h2', 'h3', 'h4'],
/** @type{string[]} */
ignoredElements: [],
};

/** @typedef {{ Tools: import('@parse5/tools'), Parse5: import('parse5') } & typeof defaults} Options */

/**
* @param {Item} prev
* @param {Item} current
*/
function getParent(prev, current) {
if (current.level > prev.level) {
// child heading
Expand All @@ -23,94 +33,102 @@ function getParent(prev, current) {
}

class Item {
constructor($el) {
if ($el) {
this.slug = $el.attr('id');
this.text = $el.text().trim();
this.level = +$el.get(0).tagName.slice(1);
} else {
this.level = 0;
/** @type{Item[]} */
children = [];

/** @type{Item} */
parent;

level = 0;

/**
* @param {Options} options
* @param {import('@parse5/tools').Element} [element]
*/
constructor(options, element) {
this.options = options;
if (element) {
const { getAttribute, getTextContent } = this.options.Tools;
this.slug = getAttribute(element, 'id');
this.text = getTextContent(element).trim();
this.level = parseInt(element.tagName.replace('h', '')) || 0;
}
this.children = [];
}

html() {
let markup = '';
getItem() {
const { createElement, setTextContent, appendChild } = this.options.Tools;
const container = this.slug && this.text ? createElement('li') : createElement('span');
if (this.slug && this.text) {
markup += `
<li><a href="#${this.slug}">${this.text}</a>
`;
const a = createElement('a', { href: `#${this.slug}` });
setTextContent(a, this.text);
appendChild(container, a);
}
if (this.children.length > 0) {
markup += `
<ol slot="details">
${this.children.map(item => item.html()).join('\n')}
</ol>
<ol slot="expanded">
${this.children.map(item => item.html()).join('\n')}
</ol>
`;
const details = createElement('ol', { slot: 'details' });
const expanded = createElement('ol', { slot: 'expanded' });
for (const child of this.children) {
appendChild(details, child.getItem());
appendChild(expanded, child.getItem());
}
appendChild(container, details);
appendChild(container, expanded);
}

if (this.slug && this.text) {
markup += '\t\t</li>';
}

return markup;
return container;
}
}

class Toc {
constructor(htmlstring = '', options = {}) {
/**
* @param {string} htmlstring
* @param {Options} options
*/
constructor(htmlstring = '', options) {
const { queryAll, hasAttribute, isElementNode } = options.Tools;
const { parse } = options.Parse5;
this.options = { ...defaults, ...options };
const selector = this.options.tags.join(',');
this.root = new Item();
this.root = new Item(options);
this.root.parent = this.root;

const $ = cheerio.load(htmlstring);
const document = parse(htmlstring);

const headings = $(selector)
.filter('[id]')
.filter(`:not([${ignoreAttribute}])`);
const headings = queryAll(document, node => isElementNode(node)
&& this.options.tags.includes(node.tagName)
&& !this.options.ignoredElements.includes(node.tagName)
&& hasAttribute(node, 'id')
&& !hasAttribute(node, ignoreAttribute));

const ignoredElementsSelector = this.options.ignoredElements.join(',');
headings.find(ignoredElementsSelector).remove();
let previous = this.root;

if (headings.length) {
let previous = this.root;
headings.each((index, heading) => {
const current = new Item($(heading));
for (const heading of headings) {
if (isElementNode(heading)) {
const current = new Item(options, heading);
const parent = getParent(previous, current);
current.parent = parent;
parent.children.push(current);
previous = current;
});
}
}
}

get() {
return this.root;
}

html() {
const root = this.get();

let html = '';

if (root.children.length) {
html += `${root.html()}`;
}

return html;
serialize() {
const { serialize } = this.options.Parse5;
return serialize(this.root.getItem());
}
}

module.exports = {
initArguments: {},
configFunction: function(eleventyConfig, options = {}) {
eleventyConfig.addFilter('toc', (content, opts) => {
const toc = new Toc(content, { ...options, ...opts });
return toc.html();
});
eleventyConfig.addFilter('toc', /**
* @param {string} content
* @param {typeof defaults} opts
*/
async function(content, opts) {
const Parse5 = await import('parse5');
const Tools = await import('@parse5/tools');
const toc = new Toc(content, { ...options, ...opts, Parse5, Tools });
const html = toc.serialize();
return html;
});
},
};

0 comments on commit 0440570

Please sign in to comment.