From e28349e748f9d3ed7a8adc01879136b817c1d232 Mon Sep 17 00:00:00 2001 From: Jason Joseph Nathan Date: Mon, 23 Sep 2024 20:34:57 +0800 Subject: [PATCH] Add Support for Custom Language Grammars in processMarkdown (Fixes #11) (#12) * feat: add support for custom language grammars in processMarkdown - Introduced langConfig option to allow custom language grammars and aliases. - Updated test cases to validate custom language support. * Removed .skip from test * Removed the frontmatter flag, already accounted for * refactor to a class * add changeset * Added an alternative idea * Updated `createProcessMarkdown` to fallback to defaults instead of merging * refactor class --------- Co-authored-by: rossrobino --- .changeset/tender-emus-try.md | 7 + packages/md/src/index.test.ts | 44 ++++-- packages/md/src/index.ts | 270 ++++++++++++++++++++-------------- packages/md/src/test.html | 3 + 4 files changed, 201 insertions(+), 123 deletions(-) create mode 100644 .changeset/tender-emus-try.md diff --git a/.changeset/tender-emus-try.md b/.changeset/tender-emus-try.md new file mode 100644 index 0000000..c60ae03 --- /dev/null +++ b/.changeset/tender-emus-try.md @@ -0,0 +1,7 @@ +--- +"@robino/md": major +--- + +v1.0.0 + +- This package follows semantic versioning, so it will no longer have breaking changes on minor releases. diff --git a/packages/md/src/index.test.ts b/packages/md/src/index.test.ts index cde85b4..1c94ccf 100644 --- a/packages/md/src/index.test.ts +++ b/packages/md/src/index.test.ts @@ -1,9 +1,22 @@ -import { processMarkdown } from "./index.js"; +import { MarkdownProcessor } from "./index.js"; import fs from "node:fs/promises"; import path from "node:path"; +import langHtml from "shiki/langs/html.mjs"; +import langLua from "shiki/langs/lua.mjs"; +import langMd from "shiki/langs/md.mjs"; +import langTsx from "shiki/langs/tsx.mjs"; import { expect, test } from "vitest"; import { z } from "zod"; +const processor = new MarkdownProcessor({ + highlighter: { + langs: [langHtml, langMd, langTsx, langLua], + langAlias: { + ts: "tsx", + }, + }, +}); + const frontmatterSchema = z .object({ title: z.string(), @@ -46,12 +59,17 @@ const add = (a: number, b: number): number => { \`\`\`html test \`\`\` + +\`\`\`lua +function hello_world() + print("Hello, World!") +end +\`\`\` `; -test("processMarkdown", () => { - const { article, headings, html, frontmatter } = processMarkdown({ - md, - }); +test("process", () => { + const { article, headings, html, frontmatter } = processor.process(md); + expect(article).toBeTypeOf("string"); expect(article.at(0)).toBe("-"); expect(headings).toBeInstanceOf(Array); @@ -63,13 +81,19 @@ test("processMarkdown", () => { }); test("with frontmatter", async () => { - const { frontmatter, html } = processMarkdown({ - md, - frontmatterSchema, - }); + const { frontmatter, html } = processor.process(md, frontmatterSchema); + await fs.writeFile(path.join(import.meta.dirname, "test.html"), html); - console.log(html); + expect(frontmatter.title).toBeTypeOf("string"); expect(frontmatter.description).toBeTypeOf("string"); expect(frontmatter.keywords).toBeInstanceOf(Array); }); + +test("check Lua language support", async () => { + const { html } = processor.process(md); + + // Verify that the Lua code block was properly highlighted in the output HTML + expect(html).toContain('
');
+});
diff --git a/packages/md/src/index.ts b/packages/md/src/index.ts
index 1d7767a..2a310c3 100644
--- a/packages/md/src/index.ts
+++ b/packages/md/src/index.ts
@@ -2,19 +2,15 @@ import { fromHighlighter } from "@shikijs/markdown-it/core";
 import { transformerMetaHighlight } from "@shikijs/transformers";
 import { load } from "js-yaml";
 import MarkdownIt from "markdown-it";
+import type { Options as MarkdownItOptions } from "markdown-it";
 import Anchor from "markdown-it-anchor";
 import {
 	createCssVariablesTheme,
 	createHighlighterCoreSync,
 	type HighlighterGeneric,
 	createJavaScriptRegexEngine,
+	type HighlighterCoreOptions,
 } from "shiki/core";
-import langBash from "shiki/langs/bash.mjs";
-import langCss from "shiki/langs/css.mjs";
-import langHtml from "shiki/langs/html.mjs";
-import langJson from "shiki/langs/json.mjs";
-import langMd from "shiki/langs/md.mjs";
-import langTsx from "shiki/langs/tsx.mjs";
 import type { z } from "zod";
 
 export interface MdHeading {
@@ -42,137 +38,185 @@ export interface MdData {
 	frontmatter: z.infer;
 }
 
-const mdIt = MarkdownIt({ typographer: true, linkify: true, html: true });
-
-const variableTheme = createCssVariablesTheme();
-
-const highlighter = createHighlighterCoreSync({
-	themes: [variableTheme],
-	langs: [langHtml, langCss, langTsx, langMd, langBash, langJson],
-	engine: createJavaScriptRegexEngine(),
-	langAlias: {
-		svelte: "html",
-		js: "tsx",
-		jsx: "tsx",
-		ts: "tsx",
-	},
-}) as HighlighterGeneric;
-
-mdIt.use(
-	fromHighlighter(highlighter, {
-		theme: "css-variables",
-		transformers: [transformerMetaHighlight()],
-	}),
-);
-
-mdIt.use(Anchor, { permalink: Anchor.permalink.headerLink() });
+export type MarkdownProcessorOptions = {
+	/**
+	 * MarkdownIt options
+	 *
+	 * @default
+	 *
+	 * ```ts
+	 * {
+	 * 	typographer: true,
+	 * 	linkify: true,
+	 * 	html: true,
+	 * }
+	 * ```
+	 */
+	markdownIt?: MarkdownItOptions;
+
+	/** Shiki highlighter options. */
+	highlighter: {
+		/** Custom shiki grammars to load */
+		langs: HighlighterCoreOptions["langs"];
+
+		/**
+		 * Define custom aliases used
+		 *
+		 * @default undefined
+		 */
+		langAlias?: HighlighterCoreOptions["langAlias"];
+	};
+};
 
 /**
- * - processes markdown strings, pass in a zod schema for frontmatter parsing
- * - uses `shiki` to syntax highlight
+ * - Processes markdown strings with optional frontmatter and syntax highlighting.
+ * - Pass custom language options for syntax highlighting, import from `"shiki/langs/..."`.
  *
  * ```ts
- * import { processMarkdown } from "robino/util/md";
+ * import { MarkdownProcessor } from "@robino/md";
+ * import langHtml from "shiki/langs/html.mjs";
+ *
+ * const frontmatterSchema = z.object({
+ *   title: z.string(),
+ *   description: z.string(),
+ *   keywords: z.string().transform((val) => val.split(",").map((s) => s.trim().toLowerCase())),
+ *   date: z.string(),
+ * }).strict();
  *
- * const frontmatterSchema = z
- *		.object({
- *			title: z.string(),
- *			description: z.string(),
- *			keywords: z
- *				.string()
- *				.transform((val) => val.split(",").map((s) => s.trim().toLowerCase())),
- *			date: z.string(),
- *		})
- *		.strict();
+ * const processor = new MarkdownProcessor({
+ * 	highlighter: {
+ * 		langs: [langHtml],
+ * 		langAlias: {
+ * 			svelte: "html",
+ * 		},
+ * 	},
+ * });
  *
- * const data = processMarkdown({ md, frontmatterSchema });
+ * const data = processor.process(md, frontmatterSchema);
  * ```
- * @param options
- * @returns headings, article, frontmatter, html
  */
-export const processMarkdown = (options: {
-	/** String of markdown to process. */
-	md: string;
-
-	/** An optional zod schema */
-	frontmatterSchema?: T;
-}) => {
-	const { md, frontmatterSchema } = options;
-
-	const split = md.split("---");
-
-	const yaml = split.at(1);
-
-	const shouldProcessFrontmatter = yaml && frontmatterSchema;
+export class MarkdownProcessor {
+	/** MarkdownIt instance */
+	markdownIt: MarkdownIt;
+
+	constructor(options: MarkdownProcessorOptions) {
+		if (!options.markdownIt) {
+			// default MarkdownIt options
+			options.markdownIt = {
+				typographer: true,
+				linkify: true,
+				html: true,
+			};
+		}
 
-	const article = shouldProcessFrontmatter ? split.slice(2).join("---") : md;
+		this.markdownIt = MarkdownIt(options.markdownIt);
+
+		// Configure MarkdownIt with syntax highlighting
+		this.markdownIt
+			.use(
+				fromHighlighter(
+					// Create the highlighter core with provided languages
+					createHighlighterCoreSync({
+						themes: [createCssVariablesTheme()],
+						langs: options.highlighter.langs,
+						engine: createJavaScriptRegexEngine(),
+						langAlias: options.highlighter.langAlias,
+					}) as HighlighterGeneric,
+					{
+						theme: "css-variables",
+						transformers: [transformerMetaHighlight()],
+					},
+				),
+			)
+			.use(Anchor, { permalink: Anchor.permalink.headerLink() });
+	}
 
-	const frontmatter = shouldProcessFrontmatter
-		? getFrontmatter(yaml, frontmatterSchema)
-		: {};
+	/**
+	 * @param md Markdown string to process.
+	 * @param frontmatterSchema Optional Zod frontmatter schema
+	 * @returns headings, article, frontmatter, html
+	 */
+	process(
+		md: string,
+		frontmatterSchema?: T,
+	): MdData {
+		const split = md.split("---");
 
-	const headings = getHeadings(article);
+		const yaml = split.at(1);
+		const shouldProcessFrontmatter = yaml && frontmatterSchema;
 
-	const html = mdIt.render(article);
+		const article = shouldProcessFrontmatter ? split.slice(2).join("---") : md;
 
-	const data: MdData = { article, headings, html, frontmatter };
+		// Process frontmatter based on option
+		const frontmatter = shouldProcessFrontmatter
+			? this.getFrontmatter(yaml, frontmatterSchema)
+			: {};
 
-	return data;
-};
+		const headings = this.getHeadings(article);
 
-const getHeadings = (md: string) => {
-	const lines = md.split("\n");
-	const headingRegex = /^(#{1,6})\s*(.+)/;
-	const codeFenceRegex = /^```/;
+		// Render markdown to HTML
+		const html = this.markdownIt.render(article);
 
-	let inCodeFence = false;
-	const headings: MdHeading[] = [];
-	for (let line of lines) {
-		line = line.trim();
+		return { article, headings, html, frontmatter };
+	}
 
-		// Check for code fence
-		if (codeFenceRegex.test(line)) {
-			inCodeFence = !inCodeFence;
-			continue;
-		}
+	/** Extracts headings from markdown content, skipping code blocks. */
+	getHeadings(md: string) {
+		const lines = md.split("\n");
+		const headingRegex = /^(#{1,6})\s*(.+)/;
+		const codeFenceRegex = /^```/;
+
+		let inCodeFence = false;
+		const headings: MdHeading[] = [];
+		for (let line of lines) {
+			line = line.trim();
+
+			// Check for code fence
+			if (codeFenceRegex.test(line)) {
+				inCodeFence = !inCodeFence;
+				continue;
+			}
 
-		// Skip headings within code fences
-		if (inCodeFence) continue;
+			// Skip headings within code fences
+			if (inCodeFence) continue;
 
-		const match = headingRegex.exec(line);
-		if (match) {
-			const level = match.at(1)?.length;
-			const name = match.at(2);
+			const match = headingRegex.exec(line);
+			if (match) {
+				const level = match.at(1)?.length;
+				const name = match.at(2);
 
-			if (level && name) {
-				const id = name
-					.trim()
-					.toLowerCase()
-					.replace(/\s+/g, "-")
-					.replace(/[^\w-]+/g, "");
+				if (level && name) {
+					const id = name
+						.trim()
+						.toLowerCase()
+						.replace(/\s+/g, "-")
+						.replace(/[^\w-]+/g, "");
 
-				headings.push({ id, level, name });
+					headings.push({ id, level, name });
+				}
 			}
 		}
-	}
-
-	return headings;
-};
 
-const getFrontmatter = (yaml: string, frontmatterSchema: z.ZodSchema) => {
-	const loaded = load(yaml);
+		return headings;
+	}
 
-	const parsed = frontmatterSchema.safeParse(loaded);
+	/**
+	 * Extracts and validates frontmatter using the provided Zod schema.
+	 * If frontmatter is invalid, throws an error.
+	 */
+	getFrontmatter(yaml: string, frontmatterSchema: z.ZodSchema) {
+		const parsed = frontmatterSchema.safeParse(load(yaml));
+
+		if (!parsed.success) {
+			throw new Error(
+				`Invalid frontmatter, please correct or update schema:\n\n${JSON.stringify(
+					parsed.error.issues[0],
+					null,
+					4,
+				)}`,
+			);
+		}
 
-	if (!parsed.success) {
-		throw new Error(
-			`Invalid frontmatter, please correct or update schema:\n\n${JSON.stringify(
-				parsed.error.issues[0],
-				null,
-				4,
-			)}`,
-		);
+		return parsed.data;
 	}
-
-	return parsed.data;
-};
+}
diff --git a/packages/md/src/test.html b/packages/md/src/test.html
index 36c7665..5b64984 100644
--- a/packages/md/src/test.html
+++ b/packages/md/src/test.html
@@ -13,3 +13,6 @@ 

Head return a + b; }

<hello attr="hi">test</hello>
+
function hello_world()
+  print("Hello, World!")
+end