feat(server): workspace file embedding & ignored docs model impl (#11804)

fix AI-30
fix AI-31
This commit is contained in:
darkskygit
2025-04-21 05:34:10 +00:00
parent 93b7c288cb
commit f2adb9f72c
7 changed files with 473 additions and 22 deletions

View File

@@ -105,3 +105,17 @@ export type FileChunkSimilarity = ChunkSimilarity & {
export type DocChunkSimilarity = ChunkSimilarity & {
docId: string;
};
export const CopilotWorkspaceFileSchema = z.object({
fileName: z.string(),
mimeType: z.string(),
size: z.number(),
});
export type CopilotWorkspaceFile = z.infer<
typeof CopilotWorkspaceFileSchema
> & {
workspaceId: string;
fileId: string;
createdAt: Date;
};

View File

@@ -177,12 +177,12 @@ export class CopilotContextModel extends BaseModel {
const similarityChunks = await this.db.$queryRaw<
Array<FileChunkSimilarity>
>`
SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
FROM "ai_context_embeddings"
WHERE context_id = ${contextId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
FROM "ai_context_embeddings"
WHERE context_id = ${contextId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
@@ -198,11 +198,11 @@ export class CopilotContextModel extends BaseModel {
false
);
await this.db.$executeRaw`
INSERT INTO "ai_workspace_embeddings"
("workspace_id", "doc_id", "chunk", "content", "embedding", "updated_at") VALUES ${values}
ON CONFLICT (workspace_id, doc_id, chunk) DO UPDATE SET
embedding = EXCLUDED.embedding, updated_at = excluded.updated_at;
`;
INSERT INTO "ai_workspace_embeddings"
("workspace_id", "doc_id", "chunk", "content", "embedding", "updated_at") VALUES ${values}
ON CONFLICT (workspace_id, doc_id, chunk) DO UPDATE SET
embedding = EXCLUDED.embedding, updated_at = excluded.updated_at;
`;
}
async matchWorkspaceEmbedding(
@@ -212,12 +212,12 @@ export class CopilotContextModel extends BaseModel {
threshold: number
): Promise<DocChunkSimilarity[]> {
const similarityChunks = await this.db.$queryRaw<Array<DocChunkSimilarity>>`
SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_embeddings"
WHERE "workspace_id" = ${workspaceId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_embeddings"
WHERE "workspace_id" = ${workspaceId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}

View File

@@ -0,0 +1,156 @@
import { randomUUID } from 'node:crypto';
import { Injectable } from '@nestjs/common';
import { Transactional } from '@nestjs-cls/transactional';
import { Prisma } from '@prisma/client';
import { BaseModel } from './base';
import {
type CopilotWorkspaceFile,
type Embedding,
FileChunkSimilarity,
} from './common';
@Injectable()
export class CopilotWorkspaceConfigModel extends BaseModel {
@Transactional()
async updateIgnoredDocs(
workspaceId: string,
add: string[] = [],
remove: string[] = []
) {
const removed = new Set(remove);
const ignored = await this.listIgnoredDocs(workspaceId).then(
r => new Set(r.filter(id => !removed.has(id)))
);
const added = add.filter(id => !ignored.has(id));
if (added.length) {
await this.db.aiWorkspaceIgnoredDocs.createMany({
data: added.map(docId => ({
workspaceId,
docId,
})),
});
}
if (removed.size) {
await this.db.aiWorkspaceIgnoredDocs.deleteMany({
where: {
workspaceId,
docId: {
in: Array.from(removed),
},
},
});
}
return added.length + ignored.size;
}
async listIgnoredDocs(workspaceId: string): Promise<string[]> {
const row = await this.db.aiWorkspaceIgnoredDocs.findMany({
where: {
workspaceId,
},
select: {
docId: true,
},
});
return row.map(r => r.docId);
}
@Transactional()
async checkIgnoredDocs(workspaceId: string, docIds: string[]) {
const ignored = await this.listIgnoredDocs(workspaceId).then(
r => new Set(r)
);
return docIds.filter(id => ignored.has(id));
}
// ================ embeddings ================
async checkEmbeddingAvailable(): Promise<boolean> {
const [{ count }] = await this.db.$queryRaw<
{ count: number }[]
>`SELECT count(1) FROM pg_tables WHERE tablename in ('ai_workspace_file_embeddings')`;
return Number(count) === 1;
}
private processEmbeddings(
workspaceId: string,
fileId: string,
embeddings: Embedding[]
) {
const groups = embeddings.map(e =>
[
workspaceId,
fileId,
e.index,
e.content,
Prisma.raw(`'[${e.embedding.join(',')}]'`),
].filter(v => v !== undefined)
);
return Prisma.join(groups.map(row => Prisma.sql`(${Prisma.join(row)})`));
}
@Transactional()
async addWorkspaceFile(
workspaceId: string,
file: Pick<CopilotWorkspaceFile, 'fileName' | 'mimeType' | 'size'>,
embeddings: Embedding[]
): Promise<string> {
const fileId = randomUUID();
await this.db.aiWorkspaceFiles.create({
data: { ...file, workspaceId, fileId },
});
const values = this.processEmbeddings(workspaceId, fileId, embeddings);
await this.db.$executeRaw`
INSERT INTO "ai_workspace_file_embeddings"
("workspace_id", "file_id", "chunk", "content", "embedding") VALUES ${values}
ON CONFLICT (workspace_id, file_id, chunk) DO NOTHING;
`;
return fileId;
}
async listWorkspaceFiles(
workspaceId: string
): Promise<CopilotWorkspaceFile[]> {
const files = await this.db.aiWorkspaceFiles.findMany({
where: {
workspaceId,
},
});
return files;
}
async matchWorkspaceFileEmbedding(
workspaceId: string,
embedding: number[],
topK: number,
threshold: number
): Promise<FileChunkSimilarity[]> {
const similarityChunks = await this.db.$queryRaw<
Array<FileChunkSimilarity>
>`
SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_file_embeddings"
WHERE workspace_id = ${workspaceId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
async removeWorkspaceFile(workspaceId: string, fileId: string) {
// embeddings will be removed by foreign key constraint
await this.db.aiWorkspaceFiles.deleteMany({
where: {
workspaceId,
fileId,
},
});
}
}

View File

@@ -11,6 +11,7 @@ import { AppConfigModel } from './config';
import { CopilotContextModel } from './copilot-context';
import { CopilotJobModel } from './copilot-job';
import { CopilotSessionModel } from './copilot-session';
import { CopilotWorkspaceConfigModel } from './copilot-workspace';
import { DocModel } from './doc';
import { DocUserModel } from './doc-user';
import { FeatureModel } from './feature';
@@ -44,6 +45,7 @@ const MODELS = {
userSettings: UserSettingsModel,
copilotSession: CopilotSessionModel,
copilotContext: CopilotContextModel,
copilotWorkspace: CopilotWorkspaceConfigModel,
copilotJob: CopilotJobModel,
appConfig: AppConfigModel,
};