Skip to content

Commit

Permalink
Add Support for Custom Language Grammars in processMarkdown (Fixes #11)…
Browse files Browse the repository at this point in the history
… (#12)

* feat: add support for custom language grammars in processMarkdown

- Introduced langConfig option to allow custom language grammars and aliases.
- Updated test cases to validate custom language support.

* Removed .skip from test

* Removed the frontmatter flag, already accounted for

* refactor to a class

* add changeset

* Added an alternative idea

* Updated `createProcessMarkdown` to fallback to defaults instead of merging

* refactor class

---------

Co-authored-by: rossrobino <rbrobino@gmail.com>
  • Loading branch information
jasonnathan and rossrobino authored Sep 23, 2024
1 parent 7d5f8ca commit e28349e
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 123 deletions.
7 changes: 7 additions & 0 deletions .changeset/tender-emus-try.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@robino/md": major
---

v1.0.0

- This package follows semantic versioning, so it will no longer have breaking changes on minor releases.
44 changes: 34 additions & 10 deletions packages/md/src/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
import { processMarkdown } from "./index.js";
import { MarkdownProcessor } from "./index.js";
import fs from "node:fs/promises";
import path from "node:path";
import langHtml from "shiki/langs/html.mjs";
import langLua from "shiki/langs/lua.mjs";
import langMd from "shiki/langs/md.mjs";
import langTsx from "shiki/langs/tsx.mjs";
import { expect, test } from "vitest";
import { z } from "zod";

const processor = new MarkdownProcessor({
highlighter: {
langs: [langHtml, langMd, langTsx, langLua],
langAlias: {
ts: "tsx",
},
},
});

const frontmatterSchema = z
.object({
title: z.string(),
Expand Down Expand Up @@ -46,12 +59,17 @@ const add = (a: number, b: number): number => {
\`\`\`html
<hello attr="hi">test</hello>
\`\`\`
\`\`\`lua
function hello_world()
print("Hello, World!")
end
\`\`\`
`;

test("processMarkdown", () => {
const { article, headings, html, frontmatter } = processMarkdown({
md,
});
test("process", () => {
const { article, headings, html, frontmatter } = processor.process(md);

expect(article).toBeTypeOf("string");
expect(article.at(0)).toBe("-");
expect(headings).toBeInstanceOf(Array);
Expand All @@ -63,13 +81,19 @@ test("processMarkdown", () => {
});

test("with frontmatter", async () => {
const { frontmatter, html } = processMarkdown({
md,
frontmatterSchema,
});
const { frontmatter, html } = processor.process(md, frontmatterSchema);

await fs.writeFile(path.join(import.meta.dirname, "test.html"), html);
console.log(html);

expect(frontmatter.title).toBeTypeOf("string");
expect(frontmatter.description).toBeTypeOf("string");
expect(frontmatter.keywords).toBeInstanceOf(Array);
});

test("check Lua language support", async () => {
const { html } = processor.process(md);

// Verify that the Lua code block was properly highlighted in the output HTML
expect(html).toContain('<pre class="shiki');
expect(html).toContain('<code class="language-lua">');
});
270 changes: 157 additions & 113 deletions packages/md/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@ import { fromHighlighter } from "@shikijs/markdown-it/core";
import { transformerMetaHighlight } from "@shikijs/transformers";
import { load } from "js-yaml";
import MarkdownIt from "markdown-it";
import type { Options as MarkdownItOptions } from "markdown-it";
import Anchor from "markdown-it-anchor";
import {
createCssVariablesTheme,
createHighlighterCoreSync,
type HighlighterGeneric,
createJavaScriptRegexEngine,
type HighlighterCoreOptions,
} from "shiki/core";
import langBash from "shiki/langs/bash.mjs";
import langCss from "shiki/langs/css.mjs";
import langHtml from "shiki/langs/html.mjs";
import langJson from "shiki/langs/json.mjs";
import langMd from "shiki/langs/md.mjs";
import langTsx from "shiki/langs/tsx.mjs";
import type { z } from "zod";

export interface MdHeading {
Expand Down Expand Up @@ -42,137 +38,185 @@ export interface MdData<T extends z.ZodTypeAny> {
frontmatter: z.infer<T>;
}

const mdIt = MarkdownIt({ typographer: true, linkify: true, html: true });

const variableTheme = createCssVariablesTheme();

const highlighter = createHighlighterCoreSync({
themes: [variableTheme],
langs: [langHtml, langCss, langTsx, langMd, langBash, langJson],
engine: createJavaScriptRegexEngine(),
langAlias: {
svelte: "html",
js: "tsx",
jsx: "tsx",
ts: "tsx",
},
}) as HighlighterGeneric<any, any>;

mdIt.use(
fromHighlighter(highlighter, {
theme: "css-variables",
transformers: [transformerMetaHighlight()],
}),
);

mdIt.use(Anchor, { permalink: Anchor.permalink.headerLink() });
export type MarkdownProcessorOptions = {
/**
* MarkdownIt options
*
* @default
*
* ```ts
* {
* typographer: true,
* linkify: true,
* html: true,
* }
* ```
*/
markdownIt?: MarkdownItOptions;

/** Shiki highlighter options. */
highlighter: {
/** Custom shiki grammars to load */
langs: HighlighterCoreOptions<true>["langs"];

/**
* Define custom aliases used
*
* @default undefined
*/
langAlias?: HighlighterCoreOptions<true>["langAlias"];
};
};

/**
* - processes markdown strings, pass in a zod schema for frontmatter parsing
* - uses `shiki` to syntax highlight
* - Processes markdown strings with optional frontmatter and syntax highlighting.
* - Pass custom language options for syntax highlighting, import from `"shiki/langs/..."`.
*
* ```ts
* import { processMarkdown } from "robino/util/md";
* import { MarkdownProcessor } from "@robino/md";
* import langHtml from "shiki/langs/html.mjs";
*
* const frontmatterSchema = z.object({
* title: z.string(),
* description: z.string(),
* keywords: z.string().transform((val) => val.split(",").map((s) => s.trim().toLowerCase())),
* date: z.string(),
* }).strict();
*
* const frontmatterSchema = z
* .object({
* title: z.string(),
* description: z.string(),
* keywords: z
* .string()
* .transform((val) => val.split(",").map((s) => s.trim().toLowerCase())),
* date: z.string(),
* })
* .strict();
* const processor = new MarkdownProcessor({
* highlighter: {
* langs: [langHtml],
* langAlias: {
* svelte: "html",
* },
* },
* });
*
* const data = processMarkdown({ md, frontmatterSchema });
* const data = processor.process(md, frontmatterSchema);
* ```
* @param options
* @returns headings, article, frontmatter, html
*/
export const processMarkdown = <T extends z.ZodTypeAny>(options: {
/** String of markdown to process. */
md: string;

/** An optional zod schema */
frontmatterSchema?: T;
}) => {
const { md, frontmatterSchema } = options;

const split = md.split("---");

const yaml = split.at(1);

const shouldProcessFrontmatter = yaml && frontmatterSchema;
export class MarkdownProcessor {
/** MarkdownIt instance */
markdownIt: MarkdownIt;

constructor(options: MarkdownProcessorOptions) {
if (!options.markdownIt) {
// default MarkdownIt options
options.markdownIt = {
typographer: true,
linkify: true,
html: true,
};
}

const article = shouldProcessFrontmatter ? split.slice(2).join("---") : md;
this.markdownIt = MarkdownIt(options.markdownIt);

// Configure MarkdownIt with syntax highlighting
this.markdownIt
.use(
fromHighlighter(
// Create the highlighter core with provided languages
createHighlighterCoreSync({
themes: [createCssVariablesTheme()],
langs: options.highlighter.langs,
engine: createJavaScriptRegexEngine(),
langAlias: options.highlighter.langAlias,
}) as HighlighterGeneric<any, any>,
{
theme: "css-variables",
transformers: [transformerMetaHighlight()],
},
),
)
.use(Anchor, { permalink: Anchor.permalink.headerLink() });
}

const frontmatter = shouldProcessFrontmatter
? getFrontmatter(yaml, frontmatterSchema)
: {};
/**
* @param md Markdown string to process.
* @param frontmatterSchema Optional Zod frontmatter schema
* @returns headings, article, frontmatter, html
*/
process<T extends z.ZodTypeAny>(
md: string,
frontmatterSchema?: T,
): MdData<T> {
const split = md.split("---");

const headings = getHeadings(article);
const yaml = split.at(1);
const shouldProcessFrontmatter = yaml && frontmatterSchema;

const html = mdIt.render(article);
const article = shouldProcessFrontmatter ? split.slice(2).join("---") : md;

const data: MdData<T> = { article, headings, html, frontmatter };
// Process frontmatter based on option
const frontmatter = shouldProcessFrontmatter
? this.getFrontmatter(yaml, frontmatterSchema)
: {};

return data;
};
const headings = this.getHeadings(article);

const getHeadings = (md: string) => {
const lines = md.split("\n");
const headingRegex = /^(#{1,6})\s*(.+)/;
const codeFenceRegex = /^```/;
// Render markdown to HTML
const html = this.markdownIt.render(article);

let inCodeFence = false;
const headings: MdHeading[] = [];
for (let line of lines) {
line = line.trim();
return { article, headings, html, frontmatter };
}

// Check for code fence
if (codeFenceRegex.test(line)) {
inCodeFence = !inCodeFence;
continue;
}
/** Extracts headings from markdown content, skipping code blocks. */
getHeadings(md: string) {
const lines = md.split("\n");
const headingRegex = /^(#{1,6})\s*(.+)/;
const codeFenceRegex = /^```/;

let inCodeFence = false;
const headings: MdHeading[] = [];
for (let line of lines) {
line = line.trim();

// Check for code fence
if (codeFenceRegex.test(line)) {
inCodeFence = !inCodeFence;
continue;
}

// Skip headings within code fences
if (inCodeFence) continue;
// Skip headings within code fences
if (inCodeFence) continue;

const match = headingRegex.exec(line);
if (match) {
const level = match.at(1)?.length;
const name = match.at(2);
const match = headingRegex.exec(line);
if (match) {
const level = match.at(1)?.length;
const name = match.at(2);

if (level && name) {
const id = name
.trim()
.toLowerCase()
.replace(/\s+/g, "-")
.replace(/[^\w-]+/g, "");
if (level && name) {
const id = name
.trim()
.toLowerCase()
.replace(/\s+/g, "-")
.replace(/[^\w-]+/g, "");

headings.push({ id, level, name });
headings.push({ id, level, name });
}
}
}
}

return headings;
};

const getFrontmatter = (yaml: string, frontmatterSchema: z.ZodSchema) => {
const loaded = load(yaml);
return headings;
}

const parsed = frontmatterSchema.safeParse(loaded);
/**
* Extracts and validates frontmatter using the provided Zod schema.
* If frontmatter is invalid, throws an error.
*/
getFrontmatter(yaml: string, frontmatterSchema: z.ZodSchema) {
const parsed = frontmatterSchema.safeParse(load(yaml));

if (!parsed.success) {
throw new Error(
`Invalid frontmatter, please correct or update schema:\n\n${JSON.stringify(
parsed.error.issues[0],
null,
4,
)}`,
);
}

if (!parsed.success) {
throw new Error(
`Invalid frontmatter, please correct or update schema:\n\n${JSON.stringify(
parsed.error.issues[0],
null,
4,
)}`,
);
return parsed.data;
}

return parsed.data;
};
}
Loading

0 comments on commit e28349e

Please sign in to comment.