From 6d6c4241f4738c767cd150ab0b7169695fab2827 Mon Sep 17 00:00:00 2001 From: bluwy Date: Fri, 23 Aug 2024 22:46:58 +0800 Subject: [PATCH 1/3] Refactor createShikiHighlighter --- .changeset/breezy-colts-promise.md | 5 + .changeset/hungry-jokes-try.md | 5 + .changeset/large-zebras-sniff.md | 9 + packages/astro/components/Code.astro | 8 +- .../markdoc/src/extensions/shiki.ts | 2 +- packages/markdown/remark/src/highlight.ts | 23 +- packages/markdown/remark/src/index.ts | 7 +- packages/markdown/remark/src/rehype-shiki.ts | 15 +- packages/markdown/remark/src/shiki.ts | 259 ++++++++++-------- packages/markdown/remark/src/types.ts | 20 +- packages/markdown/remark/test/shiki.test.js | 24 +- 11 files changed, 232 insertions(+), 145 deletions(-) create mode 100644 .changeset/breezy-colts-promise.md create mode 100644 .changeset/hungry-jokes-try.md create mode 100644 .changeset/large-zebras-sniff.md diff --git a/.changeset/breezy-colts-promise.md b/.changeset/breezy-colts-promise.md new file mode 100644 index 000000000000..b552cb7f5e16 --- /dev/null +++ b/.changeset/breezy-colts-promise.md @@ -0,0 +1,5 @@ +--- +'@astrojs/markdoc': patch +--- + +Uses latest version of `@astrojs/markdown-remark` with updated Shiki APIs diff --git a/.changeset/hungry-jokes-try.md b/.changeset/hungry-jokes-try.md new file mode 100644 index 000000000000..4d3597cb849c --- /dev/null +++ b/.changeset/hungry-jokes-try.md @@ -0,0 +1,5 @@ +--- +'@astrojs/markdown-remark': major +--- + +Updates return object of `createShikiHighlighter` as `codeToHast` and `codeToHtml` to allow generating either the hast or html string directly diff --git a/.changeset/large-zebras-sniff.md b/.changeset/large-zebras-sniff.md new file mode 100644 index 000000000000..32847a6c564a --- /dev/null +++ b/.changeset/large-zebras-sniff.md @@ -0,0 +1,9 @@ +--- +'astro': major +--- + +Updates internal Shiki rehype plugin to highlight code blocks as hast (using Shiki's `codeToHast()` API). This allows a more direct markdown and MDX processing, and improves the performance when building the project. + +However, a caveat with `codeToHast()` is that Shiki transformers' `postprocess` hook will now not run on code blocks in `.md` and `.mdx` files (also [documented in Shiki](https://shiki.style/guide/transformers#transformer-hooks)). Make sure the Shiki transformers passed to `markdown.shikiConfig.transformers` do not use the `postprocess` hook to avoid issues with the HTML output. + +Code blocks in `.mdoc` files and `` component will still work the same and shouldn't need any changes as they do not use the internal Shiki rehype plugin. diff --git a/packages/astro/components/Code.astro b/packages/astro/components/Code.astro index 8818b2ae0d27..afa510f1cd32 100644 --- a/packages/astro/components/Code.astro +++ b/packages/astro/components/Code.astro @@ -111,13 +111,13 @@ const highlighter = await getCachedHighlighter({ ], theme, themes, - defaultColor, - wrap, - transformers, }); -const html = await highlighter.highlight(code, typeof lang === 'string' ? lang : lang.name, { +const html = await highlighter.codeToHtml(code, typeof lang === 'string' ? lang : lang.name, { + defaultColor, + wrap, inline, + transformers, meta, attributes: rest as any, }); diff --git a/packages/integrations/markdoc/src/extensions/shiki.ts b/packages/integrations/markdoc/src/extensions/shiki.ts index 04fc8e8673aa..85c46b295856 100644 --- a/packages/integrations/markdoc/src/extensions/shiki.ts +++ b/packages/integrations/markdoc/src/extensions/shiki.ts @@ -16,7 +16,7 @@ export default async function shiki(config?: ShikiConfig): Promise Promise; +type Highlighter = ( + code: string, + language: string, + options?: { meta?: string }, +) => Promise; const languagePattern = /\blanguage-(\S+)\b/; @@ -73,12 +77,17 @@ export async function highlightCodeBlocks(tree: Root, highlighter: Highlighter) for (const { node, language, grandParent, parent } of nodes) { const meta = (node.data as any)?.meta ?? node.properties.metastring ?? undefined; const code = toText(node, { whitespace: 'pre' }); - // TODO: In Astro 5, have `highlighter()` return hast directly to skip expensive HTML parsing and serialization. - const html = await highlighter(code, language, { meta }); - // The replacement returns a root node with 1 child, the `` element replacement. - const replacement = fromHtml(html, { fragment: true }).children[0] as Element; - // We just generated this node, so any positional information is invalid. - removePosition(replacement); + const result = await highlighter(code, language, { meta }); + + let replacement: Element; + if (typeof result === 'string') { + // The replacement returns a root node with 1 child, the `
` element replacement.
+			replacement = fromHtml(result, { fragment: true }).children[0] as Element;
+			// We just generated this node, so any positional information is invalid.
+			removePosition(replacement);
+		} else {
+			replacement = result.children[0] as Element;
+		}
 
 		// We replace the parent in its parent with the new `
` element.
 		const index = grandParent.children.indexOf(parent);
diff --git a/packages/markdown/remark/src/index.ts b/packages/markdown/remark/src/index.ts
index 66cf00043ff4..0ed2aaca10b5 100644
--- a/packages/markdown/remark/src/index.ts
+++ b/packages/markdown/remark/src/index.ts
@@ -26,7 +26,12 @@ export { rehypeHeadingIds } from './rehype-collect-headings.js';
 export { remarkCollectImages } from './remark-collect-images.js';
 export { rehypePrism } from './rehype-prism.js';
 export { rehypeShiki } from './rehype-shiki.js';
-export { createShikiHighlighter, type ShikiHighlighter } from './shiki.js';
+export {
+	createShikiHighlighter,
+	type ShikiHighlighter,
+	type CreateShikiHighlighterOptions,
+	type ShikiHighlighterHighlightOptions,
+} from './shiki.js';
 export * from './types.js';
 
 export const markdownConfigDefaults: Required = {
diff --git a/packages/markdown/remark/src/rehype-shiki.ts b/packages/markdown/remark/src/rehype-shiki.ts
index fdab3ddf3517..9344ddbb9b1e 100644
--- a/packages/markdown/remark/src/rehype-shiki.ts
+++ b/packages/markdown/remark/src/rehype-shiki.ts
@@ -8,9 +8,20 @@ export const rehypeShiki: Plugin<[ShikiConfig?], Root> = (config) => {
 	let highlighterAsync: Promise | undefined;
 
 	return async (tree) => {
-		highlighterAsync ??= createShikiHighlighter(config);
+		highlighterAsync ??= createShikiHighlighter({
+			langs: config?.langs,
+			theme: config?.theme,
+			themes: config?.themes,
+		});
 		const highlighter = await highlighterAsync;
 
-		await highlightCodeBlocks(tree, highlighter.highlight);
+		await highlightCodeBlocks(tree, (code, language, options) => {
+			return highlighter.codeToHast(code, language, {
+				meta: options?.meta,
+				wrap: config?.wrap,
+				defaultColor: config?.defaultColor,
+				transformers: config?.transformers,
+			});
+		});
 	};
 };
diff --git a/packages/markdown/remark/src/shiki.ts b/packages/markdown/remark/src/shiki.ts
index 011431e5002e..fe46b60c21fd 100644
--- a/packages/markdown/remark/src/shiki.ts
+++ b/packages/markdown/remark/src/shiki.ts
@@ -1,27 +1,65 @@
-import type { Properties } from 'hast';
+import type { Properties, Root } from 'hast';
 import {
 	type BundledLanguage,
+	type LanguageRegistration,
+	type ShikiTransformer,
+	type ThemeRegistration,
+	type ThemeRegistrationRaw,
 	createCssVariablesTheme,
-	getHighlighter,
+	createHighlighter,
 	isSpecialLang,
 } from 'shiki';
-import type { ShikiConfig } from './types.js';
+import type { ThemePresets } from './types.js';
 
 export interface ShikiHighlighter {
-	highlight(
+	codeToHast(
 		code: string,
 		lang?: string,
-		options?: {
-			inline?: boolean;
-			attributes?: Record;
-			/**
-			 * Raw `meta` information to be used by Shiki transformers
-			 */
-			meta?: string;
-		},
+		options?: ShikiHighlighterHighlightOptions,
+	): Promise;
+	codeToHtml(
+		code: string,
+		lang?: string,
+		options?: ShikiHighlighterHighlightOptions,
 	): Promise;
 }
 
+export interface CreateShikiHighlighterOptions {
+	langs?: LanguageRegistration[];
+	theme?: ThemePresets | ThemeRegistration | ThemeRegistrationRaw;
+	themes?: Record;
+}
+
+export interface ShikiHighlighterHighlightOptions {
+	/**
+	 * Generate inline code element only, without the pre element wrapper.
+	 */
+	inline?: boolean;
+	/**
+	 * Enable word wrapping.
+	 * - true: enabled.
+	 * - false: disabled.
+	 * - null: All overflow styling removed. Code will overflow the element by default.
+	 */
+	wrap?: boolean | null;
+	/**
+	 * Chooses a theme from the "themes" option that you've defined as the default styling theme.
+	 */
+	defaultColor?: 'light' | 'dark' | string | false;
+	/**
+	 * Shiki transformers to customize the generated HTML by manipulating the hast tree.
+	 */
+	transformers?: ShikiTransformer[];
+	/**
+	 * Additional attributes to be added to the root code block element.
+	 */
+	attributes?: Record;
+	/**
+	 * Raw `meta` information to be used by Shiki transformers.
+	 */
+	meta?: string;
+}
+
 let _cssVariablesTheme: ReturnType;
 const cssVariablesTheme = () =>
 	_cssVariablesTheme ??
@@ -31,113 +69,120 @@ export async function createShikiHighlighter({
 	langs = [],
 	theme = 'github-dark',
 	themes = {},
-	defaultColor,
-	wrap = false,
-	transformers = [],
-}: ShikiConfig = {}): Promise {
+}: CreateShikiHighlighterOptions = {}): Promise {
 	theme = theme === 'css-variables' ? cssVariablesTheme() : theme;
 
-	const highlighter = await getHighlighter({
+	const highlighter = await createHighlighter({
 		langs: ['plaintext', ...langs],
 		themes: Object.values(themes).length ? Object.values(themes) : [theme],
 	});
 
-	return {
-		async highlight(code, lang = 'plaintext', options) {
-			const loadedLanguages = highlighter.getLoadedLanguages();
-
-			if (!isSpecialLang(lang) && !loadedLanguages.includes(lang)) {
-				try {
-					await highlighter.loadLanguage(lang as BundledLanguage);
-				} catch (_err) {
-					// eslint-disable-next-line no-console
-					console.warn(
-						`[Shiki] The language "${lang}" doesn't exist, falling back to "plaintext".`,
-					);
-					lang = 'plaintext';
-				}
+	async function highlight(
+		code: string,
+		lang = 'plaintext',
+		options: ShikiHighlighterHighlightOptions,
+		to: 'hast' | 'html',
+	) {
+		const loadedLanguages = highlighter.getLoadedLanguages();
+
+		if (!isSpecialLang(lang) && !loadedLanguages.includes(lang)) {
+			try {
+				await highlighter.loadLanguage(lang as BundledLanguage);
+			} catch (_err) {
+				// eslint-disable-next-line no-console
+				console.warn(`[Shiki] The language "${lang}" doesn't exist, falling back to "plaintext".`);
+				lang = 'plaintext';
 			}
+		}
 
-			const themeOptions = Object.values(themes).length ? { themes } : { theme };
-			const inline = options?.inline ?? false;
-
-			return highlighter.codeToHtml(code, {
-				...themeOptions,
-				defaultColor,
-				lang,
-				// NOTE: while we can spread `options.attributes` here so that Shiki can auto-serialize this as rendered
-				// attributes on the top-level tag, it's not clear whether it is fine to pass all attributes as meta, as
-				// they're technically not meta, nor parsed from Shiki's `parseMetaString` API.
-				meta: options?.meta ? { __raw: options?.meta } : undefined,
-				transformers: [
-					{
-						pre(node) {
-							// Swap to `code` tag if inline
-							if (inline) {
-								node.tagName = 'code';
-							}
+		const themeOptions = Object.values(themes).length ? { themes } : { theme };
+		const inline = options?.inline ?? false;
 
-							const {
-								class: attributesClass,
-								style: attributesStyle,
-								...rest
-							} = options?.attributes ?? {};
-							Object.assign(node.properties, rest);
-
-							const classValue =
-								(normalizePropAsString(node.properties.class) ?? '') +
-								(attributesClass ? ` ${attributesClass}` : '');
-							const styleValue =
-								(normalizePropAsString(node.properties.style) ?? '') +
-								(attributesStyle ? `; ${attributesStyle}` : '');
-
-							// Replace "shiki" class naming with "astro-code"
-							node.properties.class = classValue.replace(/shiki/g, 'astro-code');
-
-							// Add data-language attribute
-							node.properties.dataLanguage = lang;
-
-							// Handle code wrapping
-							// if wrap=null, do nothing.
-							if (wrap === false) {
-								node.properties.style = styleValue + '; overflow-x: auto;';
-							} else if (wrap === true) {
-								node.properties.style =
-									styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;';
-							}
-						},
-						line(node) {
-							// Add "user-select: none;" for "+"/"-" diff symbols.
-							// Transform `+ something
-							// into      `+ something`
-							if (lang === 'diff') {
-								const innerSpanNode = node.children[0];
-								const innerSpanTextNode =
-									innerSpanNode?.type === 'element' && innerSpanNode.children?.[0];
-
-								if (innerSpanTextNode && innerSpanTextNode.type === 'text') {
-									const start = innerSpanTextNode.value[0];
-									if (start === '+' || start === '-') {
-										innerSpanTextNode.value = innerSpanTextNode.value.slice(1);
-										innerSpanNode.children.unshift({
-											type: 'element',
-											tagName: 'span',
-											properties: { style: 'user-select: none;' },
-											children: [{ type: 'text', value: start }],
-										});
-									}
+		return highlighter[to === 'html' ? 'codeToHtml' : 'codeToHast'](code, {
+			...themeOptions,
+			defaultColor: options.defaultColor,
+			lang,
+			// NOTE: while we can spread `options.attributes` here so that Shiki can auto-serialize this as rendered
+			// attributes on the top-level tag, it's not clear whether it is fine to pass all attributes as meta, as
+			// they're technically not meta, nor parsed from Shiki's `parseMetaString` API.
+			meta: options?.meta ? { __raw: options?.meta } : undefined,
+			transformers: [
+				{
+					pre(node) {
+						// Swap to `code` tag if inline
+						if (inline) {
+							node.tagName = 'code';
+						}
+
+						const {
+							class: attributesClass,
+							style: attributesStyle,
+							...rest
+						} = options?.attributes ?? {};
+						Object.assign(node.properties, rest);
+
+						const classValue =
+							(normalizePropAsString(node.properties.class) ?? '') +
+							(attributesClass ? ` ${attributesClass}` : '');
+						const styleValue =
+							(normalizePropAsString(node.properties.style) ?? '') +
+							(attributesStyle ? `; ${attributesStyle}` : '');
+
+						// Replace "shiki" class naming with "astro-code"
+						node.properties.class = classValue.replace(/shiki/g, 'astro-code');
+
+						// Add data-language attribute
+						node.properties.dataLanguage = lang;
+
+						// Handle code wrapping
+						// if wrap=null, do nothing.
+						if (options.wrap === false || options.wrap === undefined) {
+							node.properties.style = styleValue + '; overflow-x: auto;';
+						} else if (options.wrap === true) {
+							node.properties.style =
+								styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;';
+						}
+					},
+					line(node) {
+						// Add "user-select: none;" for "+"/"-" diff symbols.
+						// Transform `+ something
+						// into      `+ something`
+						if (lang === 'diff') {
+							const innerSpanNode = node.children[0];
+							const innerSpanTextNode =
+								innerSpanNode?.type === 'element' && innerSpanNode.children?.[0];
+
+							if (innerSpanTextNode && innerSpanTextNode.type === 'text') {
+								const start = innerSpanTextNode.value[0];
+								if (start === '+' || start === '-') {
+									innerSpanTextNode.value = innerSpanTextNode.value.slice(1);
+									innerSpanNode.children.unshift({
+										type: 'element',
+										tagName: 'span',
+										properties: { style: 'user-select: none;' },
+										children: [{ type: 'text', value: start }],
+									});
 								}
 							}
-						},
-						code(node) {
-							if (inline) {
-								return node.children[0] as typeof node;
-							}
-						},
+						}
+					},
+					code(node) {
+						if (inline) {
+							return node.children[0] as typeof node;
+						}
 					},
-					...transformers,
-				],
-			});
+				},
+				...(options.transformers ?? []),
+			],
+		});
+	}
+
+	return {
+		codeToHast(code, lang, options = {}) {
+			return highlight(code, lang, options, 'hast') as Promise;
+		},
+		codeToHtml(code, lang, options = {}) {
+			return highlight(code, lang, options, 'html') as Promise;
 		},
 	};
 }
diff --git a/packages/markdown/remark/src/types.ts b/packages/markdown/remark/src/types.ts
index aa7b62c9a780..4a1263e50f0b 100644
--- a/packages/markdown/remark/src/types.ts
+++ b/packages/markdown/remark/src/types.ts
@@ -1,15 +1,10 @@
 import type * as hast from 'hast';
 import type * as mdast from 'mdast';
 import type { Options as RemarkRehypeOptions } from 'remark-rehype';
-import type {
-	BuiltinTheme,
-	LanguageRegistration,
-	ShikiTransformer,
-	ThemeRegistration,
-	ThemeRegistrationRaw,
-} from 'shiki';
+import type { BuiltinTheme } from 'shiki';
 import type * as unified from 'unified';
 import type { DataMap, VFile } from 'vfile';
+import type { CreateShikiHighlighterOptions, ShikiHighlighterHighlightOptions } from './shiki.js';
 
 export type { Node } from 'unist';
 
@@ -35,14 +30,9 @@ export type RemarkRehype = RemarkRehypeOptions;
 
 export type ThemePresets = BuiltinTheme | 'css-variables';
 
-export interface ShikiConfig {
-	langs?: LanguageRegistration[];
-	theme?: ThemePresets | ThemeRegistration | ThemeRegistrationRaw;
-	themes?: Record;
-	defaultColor?: 'light' | 'dark' | string | false;
-	wrap?: boolean | null;
-	transformers?: ShikiTransformer[];
-}
+export interface ShikiConfig
+	extends Pick,
+		Pick {}
 
 export interface AstroMarkdownOptions {
 	syntaxHighlight?: 'shiki' | 'prism' | false;
diff --git a/packages/markdown/remark/test/shiki.test.js b/packages/markdown/remark/test/shiki.test.js
index c3cb813702db..d980282024b3 100644
--- a/packages/markdown/remark/test/shiki.test.js
+++ b/packages/markdown/remark/test/shiki.test.js
@@ -33,16 +33,25 @@ describe('shiki syntax highlighting', () => {
 	it('createShikiHighlighter works', async () => {
 		const highlighter = await createShikiHighlighter();
 
-		const html = await highlighter.highlight('const foo = "bar";', 'js');
+		const html = await highlighter.codeToHtml('const foo = "bar";', 'js');
 
 		assert.match(html, /astro-code github-dark/);
 		assert.match(html, /background-color:#24292e;color:#e1e4e8;/);
 	});
 
+	it('createShikiHighlighter works with codeToHast', async () => {
+		const highlighter = await createShikiHighlighter();
+
+		const hast = await highlighter.codeToHast('const foo = "bar";', 'js');
+
+		assert.match(hast.children[0].properties.class, /astro-code github-dark/);
+		assert.match(hast.children[0].properties.style, /background-color:#24292e;color:#e1e4e8;/);
+	});
+
 	it('diff +/- text has user-select: none', async () => {
 		const highlighter = await createShikiHighlighter();
 
-		const html = await highlighter.highlight(
+		const html = await highlighter.codeToHtml(
 			`\
 - const foo = "bar";
 + const foo = "world";`,
@@ -57,7 +66,7 @@ describe('shiki syntax highlighting', () => {
 	it('renders attributes', async () => {
 		const highlighter = await createShikiHighlighter();
 
-		const html = await highlighter.highlight(`foo`, 'js', {
+		const html = await highlighter.codeToHtml(`foo`, 'js', {
 			attributes: { 'data-foo': 'bar', autofocus: true },
 		});
 
@@ -66,7 +75,10 @@ describe('shiki syntax highlighting', () => {
 	});
 
 	it('supports transformers that reads meta', async () => {
-		const highlighter = await createShikiHighlighter({
+		const highlighter = await createShikiHighlighter();
+
+		const html = await highlighter.codeToHtml(`foo`, 'js', {
+			meta: '{1,3-4}',
 			transformers: [
 				{
 					pre(node) {
@@ -79,10 +91,6 @@ describe('shiki syntax highlighting', () => {
 			],
 		});
 
-		const html = await highlighter.highlight(`foo`, 'js', {
-			meta: '{1,3-4}',
-		});
-
 		assert.match(html, /data-test="\{1,3-4\}"/);
 	});
 

From 15111dc6feb23ae5074d4d5c69c4c6e005ef6b27 Mon Sep 17 00:00:00 2001
From: bluwy 
Date: Fri, 23 Aug 2024 23:41:38 +0800
Subject: [PATCH 2/3] Fix markdoc

---
 .../integrations/markdoc/src/extensions/shiki.ts     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/packages/integrations/markdoc/src/extensions/shiki.ts b/packages/integrations/markdoc/src/extensions/shiki.ts
index 85c46b295856..1102242fdfe5 100644
--- a/packages/integrations/markdoc/src/extensions/shiki.ts
+++ b/packages/integrations/markdoc/src/extensions/shiki.ts
@@ -5,7 +5,11 @@ import { unescapeHTML } from 'astro/runtime/server/index.js';
 import type { AstroMarkdocConfig } from '../config.js';
 
 export default async function shiki(config?: ShikiConfig): Promise {
-	const highlighter = await createShikiHighlighter(config);
+	const highlighter = await createShikiHighlighter({
+		langs: config?.langs,
+		theme: config?.theme,
+		themes: config?.themes,
+	});
 
 	return {
 		nodes: {
@@ -16,7 +20,11 @@ export default async function shiki(config?: ShikiConfig): Promise
Date: Mon, 2 Sep 2024 20:32:50 +0800
Subject: [PATCH 3/3] Apply suggestions from code review

Co-authored-by: Sarah Rainsberger 
---
 .changeset/large-zebras-sniff.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.changeset/large-zebras-sniff.md b/.changeset/large-zebras-sniff.md
index 32847a6c564a..97143b52122b 100644
--- a/.changeset/large-zebras-sniff.md
+++ b/.changeset/large-zebras-sniff.md
@@ -2,8 +2,8 @@
 'astro': major
 ---
 
-Updates internal Shiki rehype plugin to highlight code blocks as hast (using Shiki's `codeToHast()` API). This allows a more direct markdown and MDX processing, and improves the performance when building the project.
+Updates internal Shiki rehype plugin to highlight code blocks as hast (using Shiki's `codeToHast()` API). This allows a more direct Markdown and MDX processing, and improves the performance when building the project, but may cause issues with existing Shiki transformers.
 
-However, a caveat with `codeToHast()` is that Shiki transformers' `postprocess` hook will now not run on code blocks in `.md` and `.mdx` files (also [documented in Shiki](https://shiki.style/guide/transformers#transformer-hooks)). Make sure the Shiki transformers passed to `markdown.shikiConfig.transformers` do not use the `postprocess` hook to avoid issues with the HTML output.
+If you are using Shiki transformers passed to `markdown.shikiConfig.transformers`, you must make sure they do not use the `postprocess` hook as it no longer runs on code blocks in `.md` and `.mdx` files. (See [the Shiki documentation on transformer hooks](https://shiki.style/guide/transformers#transformer-hooks) for more information). 
 
-Code blocks in `.mdoc` files and `` component will still work the same and shouldn't need any changes as they do not use the internal Shiki rehype plugin.
+Code blocks in `.mdoc` files and `` component do not use the internal Shiki rehype plugin and are unaffected.