feat: Add Meilisearch integration, begin work on search endpoint

This commit is contained in:
Jesse Wierzbinski 2023-12-02 18:11:30 -10:00
parent d9f428eed6
commit aa0813fef8
No known key found for this signature in database
14 changed files with 605 additions and 7 deletions

BIN
bun.lockb

Binary file not shown.

77
cli.ts
View file

@ -3,6 +3,7 @@ import chalk from "chalk";
import { client } from "~database/datasource"; import { client } from "~database/datasource";
import { createNewLocalUser } from "~database/entities/User"; import { createNewLocalUser } from "~database/entities/User";
import Table from "cli-table"; import Table from "cli-table";
import { rebuildSearchIndexes, SonicIndexType } from "@meilisearch";
const args = process.argv; const args = process.argv;
@ -86,7 +87,20 @@ ${chalk.bold("Commands:")}
${chalk.bold("Example:")} ${chalk.bgGray( ${chalk.bold("Example:")} ${chalk.bgGray(
`bun cli note search hello` `bun cli note search hello`
)} )}
${alignDots(chalk.blue("index"), 24)} Manage user and status indexes
${alignDots(chalk.blue("rebuild"))} Rebuild the index
${alignDotsSmall(
chalk.green("batch-size")
)} The number of items to index at once (optional, default 100)
${alignDotsSmall(
chalk.yellow("--statuses")
)} Only rebuild the statuses index (optional)
${alignDotsSmall(
chalk.yellow("--users")
)} Only rebuild the users index (optional)
${chalk.bold("Example:")} ${chalk.bgGray(
`bun cli index rebuild --users 200`
)}
`; `;
if (args.length < 3) { if (args.length < 3) {
@ -504,10 +518,71 @@ switch (command) {
console.log(`Unknown command ${chalk.blue(command)}`); console.log(`Unknown command ${chalk.blue(command)}`);
break; break;
} }
break;
}
case "index": {
switch (args[3]) {
case "rebuild": {
const statuses = args.includes("--statuses");
const users = args.includes("--users");
const argsWithoutFlags = args.filter(
arg => !arg.startsWith("--")
);
const batchSize = Number(argsWithoutFlags[4]) || 100;
const neither = !statuses && !users;
if (statuses || neither) {
console.log(
`${chalk.yellow(``)} ${chalk.bold(
`Rebuilding Meilisearch index for statuses`
)}`
);
await rebuildSearchIndexes(
[SonicIndexType.Statuses],
batchSize
);
console.log(
`${chalk.green(``)} ${chalk.bold(
`Meilisearch index for statuses rebuilt`
)}`
);
}
if (users || neither) {
console.log(
`${chalk.yellow(``)} ${chalk.bold(
`Rebuilding Meilisearch index for users`
)}`
);
await rebuildSearchIndexes(
[SonicIndexType.Accounts],
batchSize
);
console.log(
`${chalk.green(``)} ${chalk.bold(
`Meilisearch index for users rebuilt`
)}`
);
}
break; break;
} }
default: default:
console.log(`Unknown command ${chalk.blue(command)}`); console.log(`Unknown command ${chalk.blue(command)}`);
break; break;
}
break;
}
default:
console.log(`Unknown command ${chalk.blue(command)}`);
break;
} }
process.exit(0);

View file

@ -18,6 +18,12 @@ password = ""
database = 1 database = 1
enabled = false enabled = false
[meilisearch]
host = "localhost"
port = 40007
api_key = ""
enabled = true
[http] [http]
base_url = "https://lysand.social" base_url = "https://lysand.social"
bind = "http://localhost" bind = "http://localhost"

View file

@ -13,7 +13,7 @@ export const notificationToAPI = async (
): Promise<APINotification> => { ): Promise<APINotification> => {
return { return {
account: userToAPI(notification.account), account: userToAPI(notification.account),
created_at: notification.createdAt.toISOString(), created_at: new Date(notification.createdAt).toISOString(),
id: notification.id, id: notification.id,
type: notification.type, type: notification.type,
status: notification.status status: notification.status

View file

@ -24,6 +24,10 @@ import type { APIStatus } from "~types/entities/status";
import { applicationToAPI } from "./Application"; import { applicationToAPI } from "./Application";
import { attachmentToAPI } from "./Attachment"; import { attachmentToAPI } from "./Attachment";
import type { APIAttachment } from "~types/entities/attachment"; import type { APIAttachment } from "~types/entities/attachment";
import { sanitizeHtml } from "@sanitization";
import { parse } from "marked";
import linkifyStr from "linkify-string";
import linkifyHtml from "linkify-html";
const config = getConfig(); const config = getConfig();
@ -303,7 +307,7 @@ export const createNewStatus = async (data: {
visibility: APIStatus["visibility"]; visibility: APIStatus["visibility"];
sensitive: boolean; sensitive: boolean;
spoiler_text: string; spoiler_text: string;
emojis: Emoji[]; emojis?: Emoji[];
content_type?: string; content_type?: string;
uri?: string; uri?: string;
mentions?: User[]; mentions?: User[];
@ -320,6 +324,8 @@ export const createNewStatus = async (data: {
let mentions = data.mentions || []; let mentions = data.mentions || [];
// TODO: Parse emojis
// Get list of mentioned users // Get list of mentioned users
if (mentions.length === 0) { if (mentions.length === 0) {
mentions = await client.user.findMany({ mentions = await client.user.findMany({
@ -335,17 +341,36 @@ export const createNewStatus = async (data: {
}); });
} }
let formattedContent;
// Get HTML version of content
if (data.content_type === "text/markdown") {
formattedContent = linkifyHtml(await sanitizeHtml(parse(data.content)));
} else if (data.content_type === "text/x.misskeymarkdown") {
// Parse as MFM
} else {
// Parse as plaintext
formattedContent = linkifyStr(data.content);
// Split by newline and add <p> tags
formattedContent = formattedContent
.split("\n")
.map(line => `<p>${line}</p>`)
.join("\n");
}
let status = await client.status.create({ let status = await client.status.create({
data: { data: {
authorId: data.account.id, authorId: data.account.id,
applicationId: data.application?.id, applicationId: data.application?.id,
content: data.content, content: formattedContent,
contentSource: data.content,
contentType: data.content_type, contentType: data.content_type,
visibility: data.visibility, visibility: data.visibility,
sensitive: data.sensitive, sensitive: data.sensitive,
spoilerText: data.spoiler_text, spoilerText: data.spoiler_text,
emojis: { emojis: {
connect: data.emojis.map(emoji => { connect: data.emojis?.map(emoji => {
return { return {
id: emoji.id, id: emoji.id,
}; };
@ -405,6 +430,102 @@ export const createNewStatus = async (data: {
return status; return status;
}; };
export const editStatus = async (
status: StatusWithRelations,
data: {
content: string;
visibility?: APIStatus["visibility"];
sensitive: boolean;
spoiler_text: string;
emojis?: Emoji[];
content_type?: string;
uri?: string;
mentions?: User[];
media_attachments?: string[];
}
) => {
// Get people mentioned in the content (match @username or @username@domain.com mentions
const mentionedPeople =
data.content.match(/@[a-zA-Z0-9_]+(@[a-zA-Z0-9_]+)?/g) ?? [];
let mentions = data.mentions || [];
// TODO: Parse emojis
// Get list of mentioned users
if (mentions.length === 0) {
mentions = await client.user.findMany({
where: {
OR: mentionedPeople.map(person => ({
username: person.split("@")[1],
instance: {
base_url: person.split("@")[2],
},
})),
},
include: userRelations,
});
}
let formattedContent;
// Get HTML version of content
if (data.content_type === "text/markdown") {
formattedContent = linkifyHtml(await sanitizeHtml(parse(data.content)));
} else if (data.content_type === "text/x.misskeymarkdown") {
// Parse as MFM
} else {
// Parse as plaintext
formattedContent = linkifyStr(data.content);
// Split by newline and add <p> tags
formattedContent = formattedContent
.split("\n")
.map(line => `<p>${line}</p>`)
.join("\n");
}
const newStatus = await client.status.update({
where: {
id: status.id,
},
data: {
content: formattedContent,
contentSource: data.content,
contentType: data.content_type,
visibility: data.visibility,
sensitive: data.sensitive,
spoilerText: data.spoiler_text,
emojis: {
connect: data.emojis?.map(emoji => {
return {
id: emoji.id,
};
}),
},
attachments: data.media_attachments
? {
connect: data.media_attachments.map(attachment => {
return {
id: attachment,
};
}),
}
: undefined,
mentions: {
connect: mentions.map(mention => {
return {
id: mention.id,
};
}),
},
},
include: statusAndUserRelations,
});
return newStatus;
};
export const isFavouritedBy = async (status: Status, user: User) => { export const isFavouritedBy = async (status: Status, user: User) => {
return !!(await client.like.findFirst({ return !!(await client.like.findFirst({
where: { where: {

View file

@ -12,6 +12,7 @@ import { client } from "~database/datasource";
import type { PrismaClientInitializationError } from "@prisma/client/runtime/library"; import type { PrismaClientInitializationError } from "@prisma/client/runtime/library";
import { HookTypes, Server } from "~plugins/types"; import { HookTypes, Server } from "~plugins/types";
import { initializeRedisCache } from "@redis"; import { initializeRedisCache } from "@redis";
import { connectMeili } from "@meilisearch";
const timeAtStart = performance.now(); const timeAtStart = performance.now();
const server = new Server(); const server = new Server();
@ -36,6 +37,10 @@ if (!(await requests_log.exists())) {
const redisCache = await initializeRedisCache(); const redisCache = await initializeRedisCache();
if (config.meilisearch.enabled) {
await connectMeili();
}
if (redisCache) { if (redisCache) {
client.$use(redisCache); client.$use(redisCache);
} }

View file

@ -84,7 +84,11 @@
"iso-639-1": "^3.1.0", "iso-639-1": "^3.1.0",
"isomorphic-dompurify": "^1.10.0", "isomorphic-dompurify": "^1.10.0",
"jsonld": "^8.3.1", "jsonld": "^8.3.1",
"linkify-html": "^4.1.3",
"linkify-string": "^4.1.3",
"linkifyjs": "^4.1.3",
"marked": "^9.1.2", "marked": "^9.1.2",
"meilisearch": "^0.36.0",
"prisma": "^5.6.0", "prisma": "^5.6.0",
"prisma-redis-middleware": "^4.8.0", "prisma-redis-middleware": "^4.8.0",
"semver": "^7.5.4", "semver": "^7.5.4",

View file

@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "Status" ADD COLUMN "contentSource" TEXT NOT NULL DEFAULT '';

View file

@ -103,6 +103,7 @@ model Status {
isReblog Boolean isReblog Boolean
content String @default("") content String @default("")
contentType String @default("text/plain") contentType String @default("text/plain")
contentSource String @default("")
visibility String visibility String
inReplyToPost Status? @relation("StatusToStatusReply", fields: [inReplyToPostId], references: [id], onDelete: SetNull) inReplyToPost Status? @relation("StatusToStatusReply", fields: [inReplyToPostId], references: [id], onDelete: SetNull)
inReplyToPostId String? @db.Uuid inReplyToPostId String? @db.Uuid

View file

@ -1,8 +1,13 @@
import { applyConfig } from "@api"; import { applyConfig } from "@api";
import { getConfig } from "@config";
import { parseRequest } from "@request";
import { errorResponse, jsonResponse } from "@response"; import { errorResponse, jsonResponse } from "@response";
import { sanitizeHtml } from "@sanitization";
import type { MatchedRoute } from "bun"; import type { MatchedRoute } from "bun";
import { parse } from "marked";
import { client } from "~database/datasource"; import { client } from "~database/datasource";
import { import {
editStatus,
isViewableByUser, isViewableByUser,
statusAndUserRelations, statusAndUserRelations,
statusToAPI, statusToAPI,
@ -11,7 +16,7 @@ import { getFromRequest } from "~database/entities/User";
import type { APIRouteMeta } from "~types/api"; import type { APIRouteMeta } from "~types/api";
export const meta: APIRouteMeta = applyConfig({ export const meta: APIRouteMeta = applyConfig({
allowedMethods: ["GET", "DELETE"], allowedMethods: ["GET", "DELETE", "PUT"],
ratelimits: { ratelimits: {
max: 100, max: 100,
duration: 60, duration: 60,
@ -19,7 +24,7 @@ export const meta: APIRouteMeta = applyConfig({
route: "/api/v1/statuses/:id", route: "/api/v1/statuses/:id",
auth: { auth: {
required: false, required: false,
requiredOnMethods: ["DELETE"], requiredOnMethods: ["DELETE", "PUT"],
}, },
}); });
@ -39,6 +44,8 @@ export default async (
include: statusAndUserRelations, include: statusAndUserRelations,
}); });
const config = getConfig();
// Check if user is authorized to view this status (if it's private) // Check if user is authorized to view this status (if it's private)
if (!status || !isViewableByUser(status, user)) if (!status || !isViewableByUser(status, user))
return errorResponse("Record not found", 404); return errorResponse("Record not found", 404);
@ -69,6 +76,150 @@ export default async (
}, },
200 200
); );
} else if (req.method == "PUT") {
if (status.authorId !== user?.id) {
return errorResponse("Unauthorized", 401);
}
const {
status: statusText,
content_type,
"poll[expires_in]": expires_in,
"poll[options][]": options,
"media_ids[]": media_ids,
spoiler_text,
sensitive,
} = await parseRequest<{
status?: string;
spoiler_text?: string;
sensitive?: boolean;
language?: string;
content_type?: string;
"media_ids[]"?: string[];
"poll[options][]"?: string[];
"poll[expires_in]"?: number;
"poll[multiple]"?: boolean;
"poll[hide_totals]"?: boolean;
}>(req);
// TODO: Add Poll support
// Validate status
if (!statusText && !(media_ids && media_ids.length > 0)) {
return errorResponse(
"Status is required unless media is attached",
422
);
}
// Validate media_ids
if (media_ids && !Array.isArray(media_ids)) {
return errorResponse("Media IDs must be an array", 422);
}
// Validate poll options
if (options && !Array.isArray(options)) {
return errorResponse("Poll options must be an array", 422);
}
if (options && options.length > 4) {
return errorResponse("Poll options must be less than 5", 422);
}
if (media_ids && media_ids.length > 0) {
// Disallow poll
if (options) {
return errorResponse("Cannot attach poll to media", 422);
}
if (media_ids.length > 4) {
return errorResponse("Media IDs must be less than 5", 422);
}
}
if (options && options.length > config.validation.max_poll_options) {
return errorResponse(
`Poll options must be less than ${config.validation.max_poll_options}`,
422
);
}
if (
options &&
options.some(
option => option.length > config.validation.max_poll_option_size
)
) {
return errorResponse(
`Poll options must be less than ${config.validation.max_poll_option_size} characters`,
422
);
}
if (expires_in && expires_in < config.validation.min_poll_duration) {
return errorResponse(
`Poll duration must be greater than ${config.validation.min_poll_duration} seconds`,
422
);
}
if (expires_in && expires_in > config.validation.max_poll_duration) {
return errorResponse(
`Poll duration must be less than ${config.validation.max_poll_duration} seconds`,
422
);
}
let sanitizedStatus: string;
if (content_type === "text/markdown") {
sanitizedStatus = await sanitizeHtml(parse(statusText ?? ""));
} else if (content_type === "text/x.misskeymarkdown") {
// Parse as MFM
// TODO: Parse as MFM
sanitizedStatus = await sanitizeHtml(parse(statusText ?? ""));
} else {
sanitizedStatus = await sanitizeHtml(statusText ?? "");
}
if (sanitizedStatus.length > config.validation.max_note_size) {
return errorResponse(
`Status must be less than ${config.validation.max_note_size} characters`,
400
);
}
// Check if status body doesnt match filters
if (
config.filters.note_filters.some(
filter => statusText?.match(filter)
)
) {
return errorResponse("Status contains blocked words", 422);
}
// Check if media attachments are all valid
const foundAttachments = await client.attachment.findMany({
where: {
id: {
in: media_ids ?? [],
},
},
});
if (foundAttachments.length !== (media_ids ?? []).length) {
return errorResponse("Invalid media IDs", 422);
}
// Update status
const newStatus = await editStatus(status, {
content: sanitizedStatus,
content_type,
media_attachments: media_ids,
spoiler_text: spoiler_text ?? "",
sensitive: sensitive ?? false,
});
return jsonResponse(await statusToAPI(newStatus, user));
} }
return jsonResponse({}); return jsonResponse({});

View file

@ -0,0 +1,49 @@
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
import { applyConfig } from "@api";
import { errorResponse, jsonResponse } from "@response";
import type { MatchedRoute } from "bun";
import { client } from "~database/datasource";
import { createLike } from "~database/entities/Like";
import {
isViewableByUser,
statusAndUserRelations,
statusToAPI,
} from "~database/entities/Status";
import { getFromRequest } from "~database/entities/User";
import type { APIRouteMeta } from "~types/api";
import type { APIStatus } from "~types/entities/status";
export const meta: APIRouteMeta = applyConfig({
allowedMethods: ["GET"],
ratelimits: {
max: 100,
duration: 60,
},
route: "/api/v1/statuses/:id/source",
auth: {
required: true,
},
});
/**
* Favourite a post
*/
export default async (
req: Request,
matchedRoute: MatchedRoute
): Promise<Response> => {
const id = matchedRoute.params.id;
const { user } = await getFromRequest(req);
if (!user) return errorResponse("Unauthorized", 401);
const status = await client.status.findUnique({
where: { id },
include: statusAndUserRelations,
});
// Check if user is authorized to view this status (if it's private)
if (!status || !isViewableByUser(status, user))
return errorResponse("Record not found", 404);
};

View file

@ -0,0 +1,60 @@
import { applyConfig } from "@api";
import { parseRequest } from "@request";
import { errorResponse, jsonResponse } from "@response";
import { getFromRequest } from "~database/entities/User";
import type { APIRouteMeta } from "~types/api";
export const meta: APIRouteMeta = applyConfig({
allowedMethods: ["GET"],
ratelimits: {
max: 10,
duration: 60,
},
route: "/api/v2/search",
auth: {
required: false,
oauthPermissions: ["read:search"],
},
});
/**
* Upload new media
*/
export default async (req: Request): Promise<Response> => {
const { user } = await getFromRequest(req);
const {
q,
type,
resolve,
following,
account_id,
max_id,
min_id,
limit,
offset,
} = await parseRequest<{
q?: string;
type?: string;
resolve?: boolean;
following?: boolean;
account_id?: string;
max_id?: string;
min_id?: string;
limit?: number;
offset?: number;
}>(req);
if (!user && (resolve || offset)) {
return errorResponse(
"Cannot use resolve or offset without being authenticated",
401
);
}
return jsonResponse({
accounts: [],
statuses: [],
hashtags: [],
});
};

View file

@ -25,6 +25,13 @@ export interface ConfigType {
}; };
}; };
meilisearch: {
host: string;
port: number;
api_key: string;
enabled: boolean;
};
http: { http: {
base_url: string; base_url: string;
bind: string; bind: string;
@ -176,6 +183,12 @@ export const configDefaults: ConfigType = {
enabled: false, enabled: false,
}, },
}, },
meilisearch: {
host: "localhost",
port: 1491,
api_key: "",
enabled: false,
},
instance: { instance: {
banner: "", banner: "",
description: "", description: "",

111
utils/meilisearch.ts Normal file
View file

@ -0,0 +1,111 @@
import { getConfig } from "@config";
import chalk from "chalk";
import { client } from "~database/datasource";
import { Meilisearch } from "meilisearch";
const config = getConfig();
export const meilisearch = new Meilisearch({
host: `${config.meilisearch.host}:${config.meilisearch.port}`,
apiKey: config.meilisearch.api_key,
});
export const connectMeili = async () => {
if (!config.meilisearch.enabled) return;
if (await meilisearch.isHealthy()) {
console.log(
`${chalk.green(``)} ${chalk.bold(`Connected to Meilisearch`)}`
);
} else {
console.error(
`${chalk.red(``)} ${chalk.bold(
`Error while connecting to Meilisearch`
)}`
);
process.exit(1);
}
};
export enum SonicIndexType {
Accounts = "accounts",
Statuses = "statuses",
}
export const getNthDatabaseAccountBatch = (
n: number,
batchSize = 1000
): Promise<Record<string, string>[]> => {
return client.user.findMany({
skip: n * batchSize,
take: batchSize,
select: {
id: true,
username: true,
displayName: true,
note: true,
},
});
};
export const getNthDatabaseStatusBatch = (
n: number,
batchSize = 1000
): Promise<Record<string, string>[]> => {
return client.status.findMany({
skip: n * batchSize,
take: batchSize,
select: {
id: true,
authorId: true,
content: true,
},
});
};
export const rebuildSearchIndexes = async (
indexes: SonicIndexType[],
batchSize = 100
) => {
if (indexes.includes(SonicIndexType.Accounts)) {
// await sonicIngestor.flushc(SonicIndexType.Accounts);
const accountCount = await client.user.count();
for (let i = 0; i < accountCount / batchSize; i++) {
const accounts = await getNthDatabaseAccountBatch(i, batchSize);
const progress = Math.round((i / (accountCount / batchSize)) * 100);
console.log(`${chalk.green(``)} ${progress}%`);
// Sync with Meilisearch
await meilisearch
.index(SonicIndexType.Accounts)
.addDocuments(accounts);
}
console.log(`${chalk.green(``)} ${chalk.bold(`Done!`)}`);
}
if (indexes.includes(SonicIndexType.Statuses)) {
// await sonicIngestor.flushc(SonicIndexType.Statuses);
const statusCount = await client.status.count();
for (let i = 0; i < statusCount / batchSize; i++) {
const statuses = await getNthDatabaseStatusBatch(i, batchSize);
const progress = Math.round((i / (statusCount / batchSize)) * 100);
console.log(`${chalk.green(``)} ${progress}%`);
// Sync with Meilisearch
await meilisearch
.index(SonicIndexType.Statuses)
.addDocuments(statuses);
}
console.log(`${chalk.green(``)} ${chalk.bold(`Done!`)}`);
}
};