refactor(api): 🎨 Improve Markdown parsing with markdown-it instead of marked

This commit is contained in:
Jesse Wierzbinski 2024-04-22 11:02:09 -10:00
parent 436e805789
commit abc8f1ae16
No known key found for this signature in database
9 changed files with 71 additions and 65 deletions

BIN
bun.lockb

Binary file not shown.

View file

@ -24,7 +24,6 @@ import {
maybe,
oneOrMore,
} from "magic-regexp";
import { parse } from "marked";
import { db } from "~drizzle/db";
import {
Attachments,
@ -60,6 +59,11 @@ import {
userExtrasTemplate,
userRelations,
} from "./User";
import MarkdownIt from "markdown-it";
import markdownItTocDoneRight from "markdown-it-toc-done-right";
import markdownItContainer from "markdown-it-container";
import markdownItAnchor from "markdown-it-anchor";
import markdownItTaskLists from "@hackmd/markdown-it-task-lists";
export type Status = InferSelectModel<typeof Notes>;
@ -577,9 +581,9 @@ export const contentToHtml = async (
htmlContent = content["text/html"].content;
} else if (content["text/markdown"]) {
htmlContent = await sanitizeHtml(
await parse(content["text/markdown"].content),
await markdownParse(content["text/markdown"].content),
);
} else if (content["text/plain"]) {
} else if (content["text/plain"]?.content) {
// Split by newline and add <p> tags
htmlContent = content["text/plain"].content
.split("\n")
@ -605,6 +609,39 @@ export const contentToHtml = async (
return htmlContent;
};
export const markdownParse = async (content: string) => {
return (await getMarkdownRenderer()).render(content);
};
export const getMarkdownRenderer = async () => {
const renderer = MarkdownIt({
html: true,
linkify: true,
});
renderer.use(markdownItAnchor, {
permalink: markdownItAnchor.permalink.ariaHidden({
symbol: "",
placement: "before",
}),
});
renderer.use(markdownItTocDoneRight, {
containerClass: "toc",
level: [1, 2, 3, 4],
listType: "ul",
listClass: "toc-list",
itemClass: "toc-item",
linkClass: "toc-link",
});
renderer.use(markdownItTaskLists);
renderer.use(markdownItContainer);
return renderer;
};
export const federateNote = async (note: Note) => {
for (const user of await note.getUsersToFederateTo()) {
// TODO: Add queue system

View file

@ -56,6 +56,7 @@
"@types/html-to-text": "^9.0.4",
"@types/ioredis": "^5.0.0",
"@types/jsonld": "^1.5.13",
"@types/markdown-it-container": "^2.0.10",
"@types/mime-types": "^2.1.4",
"@types/pg": "^8.11.5",
"bun-types": "latest",
@ -66,7 +67,9 @@
"typescript": "^5.3.2"
},
"dependencies": {
"@hackmd/markdown-it-task-lists": "^2.1.4",
"@json2csv/plainjs": "^7.0.6",
"@shikijs/markdown-it": "^1.3.0",
"blurhash": "^2.0.5",
"bullmq": "^5.7.1",
"chalk": "^5.3.0",
@ -86,7 +89,10 @@
"linkifyjs": "^4.1.3",
"log-manager": "workspace:*",
"magic-regexp": "^0.8.0",
"marked": "^12.0.1",
"markdown-it": "^14.1.0",
"markdown-it-anchor": "^8.6.7",
"markdown-it-container": "^4.0.0",
"markdown-it-toc-done-right": "^4.2.0",
"media-manager": "workspace:*",
"megalodon": "^10.0.0",
"meilisearch": "^0.38.0",

View file

@ -20,8 +20,10 @@ export const meta = applyConfig({
});
export const schema = z.object({
"user[email]": z.string().email(),
"user[password]": z.string().max(100).min(3),
user: z.object({
email: z.string().email(),
password: z.string().max(100).min(3),
}),
});
/**
@ -29,8 +31,9 @@ export const schema = z.object({
*/
export default apiRoute<typeof meta, typeof schema>(
async (req, matchedRoute, extraData) => {
const { "user[email]": email, "user[password]": password } =
extraData.parsedRequest;
const {
user: { email, password },
} = extraData.parsedRequest;
const redirectToLogin = (error: string) =>
Response.redirect(

View file

@ -1,5 +1,4 @@
import { apiRoute, applyConfig } from "@api";
import { convertTextToHtml } from "@formatting";
import { errorResponse, jsonResponse } from "@response";
import { sanitizeHtml } from "@sanitization";
import { config } from "config-manager";
@ -11,10 +10,10 @@ import { LocalMediaBackend, S3MediaBackend } from "media-manager";
import { z } from "zod";
import { getUrl } from "~database/entities/Attachment";
import { parseEmojis } from "~database/entities/Emoji";
import { contentToHtml } from "~database/entities/Status";
import { findFirstUser, userToAPI } from "~database/entities/User";
import { db } from "~drizzle/db";
import { EmojiToUser, Users } from "~drizzle/schema";
import type { Source as APISource } from "~types/mastodon/source";
export const meta = applyConfig({
allowedMethods: ["PATCH"],
@ -125,20 +124,24 @@ export default apiRoute<typeof meta, typeof schema>(
return errorResponse("Bio contains blocked words", 422);
}
(self.source as APISource).note = sanitizedNote;
self.note = await convertTextToHtml(sanitizedNote);
self.source.note = sanitizedNote;
self.note = await contentToHtml({
"text/markdown": {
content: sanitizedNote,
},
});
}
if (source_privacy && self.source) {
(self.source as APISource).privacy = source_privacy;
self.source.privacy = source_privacy;
}
if (source_sensitive && self.source) {
(self.source as APISource).sensitive = source_sensitive;
self.source.sensitive = source_sensitive;
}
if (source_language && self.source) {
(self.source as APISource).language = source_language;
self.source.language = source_language;
}
if (avatar) {

View file

@ -1,7 +1,7 @@
import { apiRoute, applyConfig } from "@api";
import { dualLogger } from "@loggers";
import { jsonResponse } from "@response";
import { parse } from "marked";
import { getMarkdownRenderer } from "~database/entities/Status";
import { LogLevel } from "~packages/log-manager";
export const meta = applyConfig({
@ -19,7 +19,7 @@ export const meta = applyConfig({
export default apiRoute(async (req, matchedRoute, extraData) => {
const config = await extraData.configManager.getConfig();
let extended_description = parse(
let extended_description = (await getMarkdownRenderer()).render(
"This is a [Lysand](https://lysand.org) server with the default extended description.",
);
let lastModified = new Date(2024, 0, 0);
@ -30,13 +30,13 @@ export default apiRoute(async (req, matchedRoute, extraData) => {
if (await extended_description_file.exists()) {
extended_description =
(await parse(
(await getMarkdownRenderer()).render(
(await extended_description_file.text().catch(async (e) => {
await dualLogger.logError(LogLevel.ERROR, "Routes", e);
return "";
})) ||
"This is a [Lysand](https://lysand.org) server with the default extended description.",
)) || "";
) || "";
lastModified = new Date(extended_description_file.lastModified);
}

View file

@ -1,11 +1,8 @@
import { apiRoute, applyConfig, idValidator } from "@api";
import { errorResponse, jsonResponse } from "@response";
import { sanitizeHtml } from "@sanitization";
import { config } from "config-manager";
import ISO6391 from "iso-639-1";
import { parse } from "marked";
import { z } from "zod";
import type { StatusWithRelations } from "~database/entities/Status";
import { federateNote, parseTextMentions } from "~database/entities/Status";
import { db } from "~drizzle/db";
import { Note } from "~packages/database-interface/note";
@ -106,18 +103,6 @@ export default apiRoute<typeof meta, typeof schema>(
}
}
let sanitizedStatus: string;
if (content_type === "text/markdown") {
sanitizedStatus = await sanitizeHtml(parse(status ?? "") as string);
} else if (content_type === "text/x.misskeymarkdown") {
// Parse as MFM
// TODO: Parse as MFM
sanitizedStatus = await sanitizeHtml(parse(status ?? "") as string);
} else {
sanitizedStatus = await sanitizeHtml(status ?? "");
}
// Check if status body doesnt match filters
if (
config.filters.note_content.some((filter) => status?.match(filter))
@ -152,13 +137,13 @@ export default apiRoute<typeof meta, typeof schema>(
}
}
const mentions = await parseTextMentions(sanitizedStatus);
const mentions = await parseTextMentions(status ?? "");
const newNote = await Note.fromData(
user,
{
[content_type]: {
content: sanitizedStatus ?? "",
content: status ?? "",
},
},
visibility,

1
types.d.ts vendored
View file

@ -0,0 +1 @@
declare module "@hackmd/markdown-it-task-lists";

View file

@ -1,29 +0,0 @@
import { sanitizeHtml } from "@sanitization";
import linkifyHtml from "linkify-html";
import linkifyStr from "linkify-string";
import { parse } from "marked";
/**
* Converts plaintext, MFM or Markdown to HTML
* @param text Text to convert
* @param content_type Content type of the text (optional, defaults to plaintext)
* @returns HTML
*/
export const convertTextToHtml = async (
text: string,
content_type?: string,
) => {
if (content_type === "text/markdown") {
return linkifyHtml(await sanitizeHtml(await parse(text)));
}
if (content_type === "text/x.misskeymarkdown") {
// Parse as MFM
// TODO: Implement MFM
return text;
}
// Parse as plaintext
return linkifyStr(text)
.split("\n")
.map((line) => `<p>${line}</p>`)
.join("\n");
};