2025-06-15 04:38:20 +02:00
|
|
|
import type { ContentFormatSchema } from "@versia/sdk/schemas";
|
|
|
|
|
import { config } from "@versia-server/config";
|
2025-01-02 04:52:30 +01:00
|
|
|
import { htmlToText as htmlToTextLib } from "html-to-text";
|
2024-04-10 04:05:02 +02:00
|
|
|
import { lookup } from "mime-types";
|
2025-11-21 08:31:02 +01:00
|
|
|
import type { z } from "zod";
|
2023-11-05 00:59:55 +01:00
|
|
|
|
2024-11-02 00:43:33 +01:00
|
|
|
export const getBestContentType = (
|
2025-04-08 16:01:10 +02:00
|
|
|
content?: z.infer<typeof ContentFormatSchema> | null,
|
2024-11-02 00:43:33 +01:00
|
|
|
): {
|
|
|
|
|
content: string;
|
|
|
|
|
format: string;
|
|
|
|
|
} => {
|
2024-06-13 04:26:43 +02:00
|
|
|
if (!content) {
|
|
|
|
|
return { content: "", format: "text/plain" };
|
|
|
|
|
}
|
2024-04-10 04:05:02 +02:00
|
|
|
|
|
|
|
|
const bestFormatsRanked = [
|
|
|
|
|
"text/x.misskeymarkdown",
|
|
|
|
|
"text/html",
|
|
|
|
|
"text/markdown",
|
|
|
|
|
"text/plain",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (const format of bestFormatsRanked) {
|
2024-06-13 04:26:43 +02:00
|
|
|
if (content[format]) {
|
2024-04-10 04:05:02 +02:00
|
|
|
return { content: content[format].content, format };
|
2024-06-13 04:26:43 +02:00
|
|
|
}
|
2024-04-07 07:30:49 +02:00
|
|
|
}
|
2024-04-10 04:05:02 +02:00
|
|
|
|
|
|
|
|
return { content: "", format: "text/plain" };
|
|
|
|
|
};
|
|
|
|
|
|
2024-12-16 23:57:21 +01:00
|
|
|
export const urlToContentFormat = (
|
2025-02-01 16:32:18 +01:00
|
|
|
url: URL,
|
2024-12-16 23:57:21 +01:00
|
|
|
contentType?: string,
|
2025-04-08 16:01:10 +02:00
|
|
|
): z.infer<typeof ContentFormatSchema> | null => {
|
2025-02-01 16:32:18 +01:00
|
|
|
if (url.href.startsWith("https://api.dicebear.com/")) {
|
2024-04-10 04:05:02 +02:00
|
|
|
return {
|
|
|
|
|
"image/svg+xml": {
|
2025-02-01 16:32:18 +01:00
|
|
|
content: url.toString(),
|
2024-08-26 19:06:49 +02:00
|
|
|
remote: true,
|
2024-04-10 04:05:02 +02:00
|
|
|
},
|
|
|
|
|
};
|
2024-04-07 07:30:49 +02:00
|
|
|
}
|
2024-04-10 04:05:02 +02:00
|
|
|
const mimeType =
|
2024-12-16 23:57:21 +01:00
|
|
|
contentType ||
|
2025-02-01 16:32:18 +01:00
|
|
|
lookup(url.toString().replace(url.search, "")) ||
|
2024-04-10 04:05:02 +02:00
|
|
|
"application/octet-stream";
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
[mimeType]: {
|
2025-02-01 16:32:18 +01:00
|
|
|
content: url.toString(),
|
2024-08-26 19:06:49 +02:00
|
|
|
remote: true,
|
2024-04-10 04:05:02 +02:00
|
|
|
},
|
|
|
|
|
};
|
2023-11-05 00:59:55 +01:00
|
|
|
};
|
2024-05-12 03:27:28 +02:00
|
|
|
|
2025-01-28 18:06:33 +01:00
|
|
|
export const mimeLookup = (url: URL): Promise<string> => {
|
|
|
|
|
const urlWithoutSearch = url.toString().replace(url.search, "");
|
|
|
|
|
|
|
|
|
|
// Strip query params from URL to get the proper file extension
|
|
|
|
|
const naiveLookup = lookup(urlWithoutSearch);
|
2024-05-12 03:27:28 +02:00
|
|
|
|
2024-06-13 04:26:43 +02:00
|
|
|
if (naiveLookup) {
|
2024-10-03 11:43:16 +02:00
|
|
|
return Promise.resolve(naiveLookup);
|
2024-06-13 04:26:43 +02:00
|
|
|
}
|
2024-05-12 03:27:28 +02:00
|
|
|
|
2024-06-26 05:13:40 +02:00
|
|
|
const fetchLookup = fetch(url, {
|
|
|
|
|
method: "HEAD",
|
2024-08-26 18:15:14 +02:00
|
|
|
// @ts-expect-error Proxy is a Bun-specific feature
|
2025-02-15 02:47:29 +01:00
|
|
|
proxy: config.http.proxy_address,
|
2024-12-16 23:57:21 +01:00
|
|
|
})
|
|
|
|
|
.then(
|
|
|
|
|
(response) =>
|
|
|
|
|
response.headers.get("content-type") ||
|
|
|
|
|
"application/octet-stream",
|
|
|
|
|
)
|
|
|
|
|
.catch(() => "application/octet-stream");
|
2024-05-12 03:27:28 +02:00
|
|
|
|
|
|
|
|
return fetchLookup;
|
|
|
|
|
};
|
2025-01-02 04:49:36 +01:00
|
|
|
|
|
|
|
|
export const htmlToText = (html: string): string => {
|
|
|
|
|
return htmlToTextLib(html, {
|
|
|
|
|
selectors: [
|
|
|
|
|
{
|
|
|
|
|
selector: "a",
|
|
|
|
|
options: {
|
|
|
|
|
hideLinkHrefIfSameAsText: true,
|
|
|
|
|
ignoreHref: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
};
|