mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 05:14:54 +00:00
feat(server): workspace file embedding & ignored docs model impl (#11804)
fix AI-30 fix AI-31
This commit is contained in:
@@ -105,3 +105,17 @@ export type FileChunkSimilarity = ChunkSimilarity & {
|
||||
export type DocChunkSimilarity = ChunkSimilarity & {
|
||||
docId: string;
|
||||
};
|
||||
|
||||
export const CopilotWorkspaceFileSchema = z.object({
|
||||
fileName: z.string(),
|
||||
mimeType: z.string(),
|
||||
size: z.number(),
|
||||
});
|
||||
|
||||
export type CopilotWorkspaceFile = z.infer<
|
||||
typeof CopilotWorkspaceFileSchema
|
||||
> & {
|
||||
workspaceId: string;
|
||||
fileId: string;
|
||||
createdAt: Date;
|
||||
};
|
||||
|
||||
@@ -177,12 +177,12 @@ export class CopilotContextModel extends BaseModel {
|
||||
const similarityChunks = await this.db.$queryRaw<
|
||||
Array<FileChunkSimilarity>
|
||||
>`
|
||||
SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_context_embeddings"
|
||||
WHERE context_id = ${contextId}
|
||||
ORDER BY "distance" ASC
|
||||
LIMIT ${topK};
|
||||
`;
|
||||
SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_context_embeddings"
|
||||
WHERE context_id = ${contextId}
|
||||
ORDER BY "distance" ASC
|
||||
LIMIT ${topK};
|
||||
`;
|
||||
return similarityChunks.filter(c => Number(c.distance) <= threshold);
|
||||
}
|
||||
|
||||
@@ -198,11 +198,11 @@ export class CopilotContextModel extends BaseModel {
|
||||
false
|
||||
);
|
||||
await this.db.$executeRaw`
|
||||
INSERT INTO "ai_workspace_embeddings"
|
||||
("workspace_id", "doc_id", "chunk", "content", "embedding", "updated_at") VALUES ${values}
|
||||
ON CONFLICT (workspace_id, doc_id, chunk) DO UPDATE SET
|
||||
embedding = EXCLUDED.embedding, updated_at = excluded.updated_at;
|
||||
`;
|
||||
INSERT INTO "ai_workspace_embeddings"
|
||||
("workspace_id", "doc_id", "chunk", "content", "embedding", "updated_at") VALUES ${values}
|
||||
ON CONFLICT (workspace_id, doc_id, chunk) DO UPDATE SET
|
||||
embedding = EXCLUDED.embedding, updated_at = excluded.updated_at;
|
||||
`;
|
||||
}
|
||||
|
||||
async matchWorkspaceEmbedding(
|
||||
@@ -212,12 +212,12 @@ export class CopilotContextModel extends BaseModel {
|
||||
threshold: number
|
||||
): Promise<DocChunkSimilarity[]> {
|
||||
const similarityChunks = await this.db.$queryRaw<Array<DocChunkSimilarity>>`
|
||||
SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_workspace_embeddings"
|
||||
WHERE "workspace_id" = ${workspaceId}
|
||||
ORDER BY "distance" ASC
|
||||
LIMIT ${topK};
|
||||
`;
|
||||
SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_workspace_embeddings"
|
||||
WHERE "workspace_id" = ${workspaceId}
|
||||
ORDER BY "distance" ASC
|
||||
LIMIT ${topK};
|
||||
`;
|
||||
return similarityChunks.filter(c => Number(c.distance) <= threshold);
|
||||
}
|
||||
|
||||
|
||||
156
packages/backend/server/src/models/copilot-workspace.ts
Normal file
156
packages/backend/server/src/models/copilot-workspace.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Transactional } from '@nestjs-cls/transactional';
|
||||
import { Prisma } from '@prisma/client';
|
||||
|
||||
import { BaseModel } from './base';
|
||||
import {
|
||||
type CopilotWorkspaceFile,
|
||||
type Embedding,
|
||||
FileChunkSimilarity,
|
||||
} from './common';
|
||||
|
||||
@Injectable()
|
||||
export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
@Transactional()
|
||||
async updateIgnoredDocs(
|
||||
workspaceId: string,
|
||||
add: string[] = [],
|
||||
remove: string[] = []
|
||||
) {
|
||||
const removed = new Set(remove);
|
||||
const ignored = await this.listIgnoredDocs(workspaceId).then(
|
||||
r => new Set(r.filter(id => !removed.has(id)))
|
||||
);
|
||||
const added = add.filter(id => !ignored.has(id));
|
||||
|
||||
if (added.length) {
|
||||
await this.db.aiWorkspaceIgnoredDocs.createMany({
|
||||
data: added.map(docId => ({
|
||||
workspaceId,
|
||||
docId,
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
if (removed.size) {
|
||||
await this.db.aiWorkspaceIgnoredDocs.deleteMany({
|
||||
where: {
|
||||
workspaceId,
|
||||
docId: {
|
||||
in: Array.from(removed),
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return added.length + ignored.size;
|
||||
}
|
||||
|
||||
async listIgnoredDocs(workspaceId: string): Promise<string[]> {
|
||||
const row = await this.db.aiWorkspaceIgnoredDocs.findMany({
|
||||
where: {
|
||||
workspaceId,
|
||||
},
|
||||
select: {
|
||||
docId: true,
|
||||
},
|
||||
});
|
||||
return row.map(r => r.docId);
|
||||
}
|
||||
|
||||
@Transactional()
|
||||
async checkIgnoredDocs(workspaceId: string, docIds: string[]) {
|
||||
const ignored = await this.listIgnoredDocs(workspaceId).then(
|
||||
r => new Set(r)
|
||||
);
|
||||
|
||||
return docIds.filter(id => ignored.has(id));
|
||||
}
|
||||
|
||||
// ================ embeddings ================
|
||||
|
||||
async checkEmbeddingAvailable(): Promise<boolean> {
|
||||
const [{ count }] = await this.db.$queryRaw<
|
||||
{ count: number }[]
|
||||
>`SELECT count(1) FROM pg_tables WHERE tablename in ('ai_workspace_file_embeddings')`;
|
||||
return Number(count) === 1;
|
||||
}
|
||||
|
||||
private processEmbeddings(
|
||||
workspaceId: string,
|
||||
fileId: string,
|
||||
embeddings: Embedding[]
|
||||
) {
|
||||
const groups = embeddings.map(e =>
|
||||
[
|
||||
workspaceId,
|
||||
fileId,
|
||||
e.index,
|
||||
e.content,
|
||||
Prisma.raw(`'[${e.embedding.join(',')}]'`),
|
||||
].filter(v => v !== undefined)
|
||||
);
|
||||
return Prisma.join(groups.map(row => Prisma.sql`(${Prisma.join(row)})`));
|
||||
}
|
||||
|
||||
@Transactional()
|
||||
async addWorkspaceFile(
|
||||
workspaceId: string,
|
||||
file: Pick<CopilotWorkspaceFile, 'fileName' | 'mimeType' | 'size'>,
|
||||
embeddings: Embedding[]
|
||||
): Promise<string> {
|
||||
const fileId = randomUUID();
|
||||
await this.db.aiWorkspaceFiles.create({
|
||||
data: { ...file, workspaceId, fileId },
|
||||
});
|
||||
|
||||
const values = this.processEmbeddings(workspaceId, fileId, embeddings);
|
||||
await this.db.$executeRaw`
|
||||
INSERT INTO "ai_workspace_file_embeddings"
|
||||
("workspace_id", "file_id", "chunk", "content", "embedding") VALUES ${values}
|
||||
ON CONFLICT (workspace_id, file_id, chunk) DO NOTHING;
|
||||
`;
|
||||
return fileId;
|
||||
}
|
||||
|
||||
async listWorkspaceFiles(
|
||||
workspaceId: string
|
||||
): Promise<CopilotWorkspaceFile[]> {
|
||||
const files = await this.db.aiWorkspaceFiles.findMany({
|
||||
where: {
|
||||
workspaceId,
|
||||
},
|
||||
});
|
||||
return files;
|
||||
}
|
||||
|
||||
async matchWorkspaceFileEmbedding(
|
||||
workspaceId: string,
|
||||
embedding: number[],
|
||||
topK: number,
|
||||
threshold: number
|
||||
): Promise<FileChunkSimilarity[]> {
|
||||
const similarityChunks = await this.db.$queryRaw<
|
||||
Array<FileChunkSimilarity>
|
||||
>`
|
||||
SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_workspace_file_embeddings"
|
||||
WHERE workspace_id = ${workspaceId}
|
||||
ORDER BY "distance" ASC
|
||||
LIMIT ${topK};
|
||||
`;
|
||||
return similarityChunks.filter(c => Number(c.distance) <= threshold);
|
||||
}
|
||||
|
||||
async removeWorkspaceFile(workspaceId: string, fileId: string) {
|
||||
// embeddings will be removed by foreign key constraint
|
||||
await this.db.aiWorkspaceFiles.deleteMany({
|
||||
where: {
|
||||
workspaceId,
|
||||
fileId,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import { AppConfigModel } from './config';
|
||||
import { CopilotContextModel } from './copilot-context';
|
||||
import { CopilotJobModel } from './copilot-job';
|
||||
import { CopilotSessionModel } from './copilot-session';
|
||||
import { CopilotWorkspaceConfigModel } from './copilot-workspace';
|
||||
import { DocModel } from './doc';
|
||||
import { DocUserModel } from './doc-user';
|
||||
import { FeatureModel } from './feature';
|
||||
@@ -44,6 +45,7 @@ const MODELS = {
|
||||
userSettings: UserSettingsModel,
|
||||
copilotSession: CopilotSessionModel,
|
||||
copilotContext: CopilotContextModel,
|
||||
copilotWorkspace: CopilotWorkspaceConfigModel,
|
||||
copilotJob: CopilotJobModel,
|
||||
appConfig: AppConfigModel,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user