refactor(api): 🎨 Improve Markdown parsing with markdown-it instead of marked

This commit is contained in:
Jesse Wierzbinski 2024-04-22 11:02:09 -10:00
parent 436e805789
commit abc8f1ae16
No known key found for this signature in database
9 changed files with 71 additions and 65 deletions

BIN
bun.lockb

Binary file not shown.

View file

@ -24,7 +24,6 @@ import {
maybe, maybe,
oneOrMore, oneOrMore,
} from "magic-regexp"; } from "magic-regexp";
import { parse } from "marked";
import { db } from "~drizzle/db"; import { db } from "~drizzle/db";
import { import {
Attachments, Attachments,
@ -60,6 +59,11 @@ import {
userExtrasTemplate, userExtrasTemplate,
userRelations, userRelations,
} from "./User"; } from "./User";
import MarkdownIt from "markdown-it";
import markdownItTocDoneRight from "markdown-it-toc-done-right";
import markdownItContainer from "markdown-it-container";
import markdownItAnchor from "markdown-it-anchor";
import markdownItTaskLists from "@hackmd/markdown-it-task-lists";
export type Status = InferSelectModel<typeof Notes>; export type Status = InferSelectModel<typeof Notes>;
@ -577,9 +581,9 @@ export const contentToHtml = async (
htmlContent = content["text/html"].content; htmlContent = content["text/html"].content;
} else if (content["text/markdown"]) { } else if (content["text/markdown"]) {
htmlContent = await sanitizeHtml( htmlContent = await sanitizeHtml(
await parse(content["text/markdown"].content), await markdownParse(content["text/markdown"].content),
); );
} else if (content["text/plain"]) { } else if (content["text/plain"]?.content) {
// Split by newline and add <p> tags // Split by newline and add <p> tags
htmlContent = content["text/plain"].content htmlContent = content["text/plain"].content
.split("\n") .split("\n")
@ -605,6 +609,39 @@ export const contentToHtml = async (
return htmlContent; return htmlContent;
}; };
export const markdownParse = async (content: string) => {
return (await getMarkdownRenderer()).render(content);
};
export const getMarkdownRenderer = async () => {
const renderer = MarkdownIt({
html: true,
linkify: true,
});
renderer.use(markdownItAnchor, {
permalink: markdownItAnchor.permalink.ariaHidden({
symbol: "",
placement: "before",
}),
});
renderer.use(markdownItTocDoneRight, {
containerClass: "toc",
level: [1, 2, 3, 4],
listType: "ul",
listClass: "toc-list",
itemClass: "toc-item",
linkClass: "toc-link",
});
renderer.use(markdownItTaskLists);
renderer.use(markdownItContainer);
return renderer;
};
export const federateNote = async (note: Note) => { export const federateNote = async (note: Note) => {
for (const user of await note.getUsersToFederateTo()) { for (const user of await note.getUsersToFederateTo()) {
// TODO: Add queue system // TODO: Add queue system

View file

@ -56,6 +56,7 @@
"@types/html-to-text": "^9.0.4", "@types/html-to-text": "^9.0.4",
"@types/ioredis": "^5.0.0", "@types/ioredis": "^5.0.0",
"@types/jsonld": "^1.5.13", "@types/jsonld": "^1.5.13",
"@types/markdown-it-container": "^2.0.10",
"@types/mime-types": "^2.1.4", "@types/mime-types": "^2.1.4",
"@types/pg": "^8.11.5", "@types/pg": "^8.11.5",
"bun-types": "latest", "bun-types": "latest",
@ -66,7 +67,9 @@
"typescript": "^5.3.2" "typescript": "^5.3.2"
}, },
"dependencies": { "dependencies": {
"@hackmd/markdown-it-task-lists": "^2.1.4",
"@json2csv/plainjs": "^7.0.6", "@json2csv/plainjs": "^7.0.6",
"@shikijs/markdown-it": "^1.3.0",
"blurhash": "^2.0.5", "blurhash": "^2.0.5",
"bullmq": "^5.7.1", "bullmq": "^5.7.1",
"chalk": "^5.3.0", "chalk": "^5.3.0",
@ -86,7 +89,10 @@
"linkifyjs": "^4.1.3", "linkifyjs": "^4.1.3",
"log-manager": "workspace:*", "log-manager": "workspace:*",
"magic-regexp": "^0.8.0", "magic-regexp": "^0.8.0",
"marked": "^12.0.1", "markdown-it": "^14.1.0",
"markdown-it-anchor": "^8.6.7",
"markdown-it-container": "^4.0.0",
"markdown-it-toc-done-right": "^4.2.0",
"media-manager": "workspace:*", "media-manager": "workspace:*",
"megalodon": "^10.0.0", "megalodon": "^10.0.0",
"meilisearch": "^0.38.0", "meilisearch": "^0.38.0",

View file

@ -20,8 +20,10 @@ export const meta = applyConfig({
}); });
export const schema = z.object({ export const schema = z.object({
"user[email]": z.string().email(), user: z.object({
"user[password]": z.string().max(100).min(3), email: z.string().email(),
password: z.string().max(100).min(3),
}),
}); });
/** /**
@ -29,8 +31,9 @@ export const schema = z.object({
*/ */
export default apiRoute<typeof meta, typeof schema>( export default apiRoute<typeof meta, typeof schema>(
async (req, matchedRoute, extraData) => { async (req, matchedRoute, extraData) => {
const { "user[email]": email, "user[password]": password } = const {
extraData.parsedRequest; user: { email, password },
} = extraData.parsedRequest;
const redirectToLogin = (error: string) => const redirectToLogin = (error: string) =>
Response.redirect( Response.redirect(

View file

@ -1,5 +1,4 @@
import { apiRoute, applyConfig } from "@api"; import { apiRoute, applyConfig } from "@api";
import { convertTextToHtml } from "@formatting";
import { errorResponse, jsonResponse } from "@response"; import { errorResponse, jsonResponse } from "@response";
import { sanitizeHtml } from "@sanitization"; import { sanitizeHtml } from "@sanitization";
import { config } from "config-manager"; import { config } from "config-manager";
@ -11,10 +10,10 @@ import { LocalMediaBackend, S3MediaBackend } from "media-manager";
import { z } from "zod"; import { z } from "zod";
import { getUrl } from "~database/entities/Attachment"; import { getUrl } from "~database/entities/Attachment";
import { parseEmojis } from "~database/entities/Emoji"; import { parseEmojis } from "~database/entities/Emoji";
import { contentToHtml } from "~database/entities/Status";
import { findFirstUser, userToAPI } from "~database/entities/User"; import { findFirstUser, userToAPI } from "~database/entities/User";
import { db } from "~drizzle/db"; import { db } from "~drizzle/db";
import { EmojiToUser, Users } from "~drizzle/schema"; import { EmojiToUser, Users } from "~drizzle/schema";
import type { Source as APISource } from "~types/mastodon/source";
export const meta = applyConfig({ export const meta = applyConfig({
allowedMethods: ["PATCH"], allowedMethods: ["PATCH"],
@ -125,20 +124,24 @@ export default apiRoute<typeof meta, typeof schema>(
return errorResponse("Bio contains blocked words", 422); return errorResponse("Bio contains blocked words", 422);
} }
(self.source as APISource).note = sanitizedNote; self.source.note = sanitizedNote;
self.note = await convertTextToHtml(sanitizedNote); self.note = await contentToHtml({
"text/markdown": {
content: sanitizedNote,
},
});
} }
if (source_privacy && self.source) { if (source_privacy && self.source) {
(self.source as APISource).privacy = source_privacy; self.source.privacy = source_privacy;
} }
if (source_sensitive && self.source) { if (source_sensitive && self.source) {
(self.source as APISource).sensitive = source_sensitive; self.source.sensitive = source_sensitive;
} }
if (source_language && self.source) { if (source_language && self.source) {
(self.source as APISource).language = source_language; self.source.language = source_language;
} }
if (avatar) { if (avatar) {

View file

@ -1,7 +1,7 @@
import { apiRoute, applyConfig } from "@api"; import { apiRoute, applyConfig } from "@api";
import { dualLogger } from "@loggers"; import { dualLogger } from "@loggers";
import { jsonResponse } from "@response"; import { jsonResponse } from "@response";
import { parse } from "marked"; import { getMarkdownRenderer } from "~database/entities/Status";
import { LogLevel } from "~packages/log-manager"; import { LogLevel } from "~packages/log-manager";
export const meta = applyConfig({ export const meta = applyConfig({
@ -19,7 +19,7 @@ export const meta = applyConfig({
export default apiRoute(async (req, matchedRoute, extraData) => { export default apiRoute(async (req, matchedRoute, extraData) => {
const config = await extraData.configManager.getConfig(); const config = await extraData.configManager.getConfig();
let extended_description = parse( let extended_description = (await getMarkdownRenderer()).render(
"This is a [Lysand](https://lysand.org) server with the default extended description.", "This is a [Lysand](https://lysand.org) server with the default extended description.",
); );
let lastModified = new Date(2024, 0, 0); let lastModified = new Date(2024, 0, 0);
@ -30,13 +30,13 @@ export default apiRoute(async (req, matchedRoute, extraData) => {
if (await extended_description_file.exists()) { if (await extended_description_file.exists()) {
extended_description = extended_description =
(await parse( (await getMarkdownRenderer()).render(
(await extended_description_file.text().catch(async (e) => { (await extended_description_file.text().catch(async (e) => {
await dualLogger.logError(LogLevel.ERROR, "Routes", e); await dualLogger.logError(LogLevel.ERROR, "Routes", e);
return ""; return "";
})) || })) ||
"This is a [Lysand](https://lysand.org) server with the default extended description.", "This is a [Lysand](https://lysand.org) server with the default extended description.",
)) || ""; ) || "";
lastModified = new Date(extended_description_file.lastModified); lastModified = new Date(extended_description_file.lastModified);
} }

View file

@ -1,11 +1,8 @@
import { apiRoute, applyConfig, idValidator } from "@api"; import { apiRoute, applyConfig, idValidator } from "@api";
import { errorResponse, jsonResponse } from "@response"; import { errorResponse, jsonResponse } from "@response";
import { sanitizeHtml } from "@sanitization";
import { config } from "config-manager"; import { config } from "config-manager";
import ISO6391 from "iso-639-1"; import ISO6391 from "iso-639-1";
import { parse } from "marked";
import { z } from "zod"; import { z } from "zod";
import type { StatusWithRelations } from "~database/entities/Status";
import { federateNote, parseTextMentions } from "~database/entities/Status"; import { federateNote, parseTextMentions } from "~database/entities/Status";
import { db } from "~drizzle/db"; import { db } from "~drizzle/db";
import { Note } from "~packages/database-interface/note"; import { Note } from "~packages/database-interface/note";
@ -106,18 +103,6 @@ export default apiRoute<typeof meta, typeof schema>(
} }
} }
let sanitizedStatus: string;
if (content_type === "text/markdown") {
sanitizedStatus = await sanitizeHtml(parse(status ?? "") as string);
} else if (content_type === "text/x.misskeymarkdown") {
// Parse as MFM
// TODO: Parse as MFM
sanitizedStatus = await sanitizeHtml(parse(status ?? "") as string);
} else {
sanitizedStatus = await sanitizeHtml(status ?? "");
}
// Check if status body doesnt match filters // Check if status body doesnt match filters
if ( if (
config.filters.note_content.some((filter) => status?.match(filter)) config.filters.note_content.some((filter) => status?.match(filter))
@ -152,13 +137,13 @@ export default apiRoute<typeof meta, typeof schema>(
} }
} }
const mentions = await parseTextMentions(sanitizedStatus); const mentions = await parseTextMentions(status ?? "");
const newNote = await Note.fromData( const newNote = await Note.fromData(
user, user,
{ {
[content_type]: { [content_type]: {
content: sanitizedStatus ?? "", content: status ?? "",
}, },
}, },
visibility, visibility,

1
types.d.ts vendored
View file

@ -0,0 +1 @@
declare module "@hackmd/markdown-it-task-lists";

View file

@ -1,29 +0,0 @@
import { sanitizeHtml } from "@sanitization";
import linkifyHtml from "linkify-html";
import linkifyStr from "linkify-string";
import { parse } from "marked";
/**
* Converts plaintext, MFM or Markdown to HTML
* @param text Text to convert
* @param content_type Content type of the text (optional, defaults to plaintext)
* @returns HTML
*/
export const convertTextToHtml = async (
text: string,
content_type?: string,
) => {
if (content_type === "text/markdown") {
return linkifyHtml(await sanitizeHtml(await parse(text)));
}
if (content_type === "text/x.misskeymarkdown") {
// Parse as MFM
// TODO: Implement MFM
return text;
}
// Parse as plaintext
return linkifyStr(text)
.split("\n")
.map((line) => `<p>${line}</p>`)
.join("\n");
};