fix(server): embedding chunks primary key (#12416)

fix AI-131

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **Refactor**
  - Updated database schema to consolidate unique constraints into composite primary keys for embedding-related data, improving consistency.
  - Changed the relation in the Snapshot model to allow multiple embeddings.
  - Improved filtering logic for documents and snapshots based on embedding existence.
  - Reformatted SQL queries and schema attributes for improved readability; no changes to functionality.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-05-21 10:51:35 +00:00
parent c9b296c896
commit 7fd3ee957f
5 changed files with 41 additions and 20 deletions

View File

@@ -0,0 +1,20 @@
/*
Warnings:
- The primary key for the `ai_workspace_embeddings` table will be changed. If it partially fails, the table could be left without primary key constraint.
- The primary key for the `ai_workspace_file_embeddings` table will be changed. If it partially fails, the table could be left without primary key constraint.
*/
-- DropIndex
DROP INDEX "ai_workspace_embeddings_workspace_id_doc_id_chunk_key";
-- DropIndex
DROP INDEX "ai_workspace_file_embeddings_workspace_id_file_id_chunk_key";
-- AlterTable
ALTER TABLE "ai_workspace_embeddings" DROP CONSTRAINT "ai_workspace_embeddings_pkey",
ADD CONSTRAINT "ai_workspace_embeddings_pkey" PRIMARY KEY ("workspace_id", "doc_id", "chunk");
-- AlterTable
ALTER TABLE "ai_workspace_file_embeddings" DROP CONSTRAINT "ai_workspace_file_embeddings_pkey",
ADD CONSTRAINT "ai_workspace_file_embeddings_pkey" PRIMARY KEY ("workspace_id", "file_id", "chunk");

View File

@@ -312,7 +312,7 @@ model Snapshot {
// we need to clear all hanging updates and snapshots before enable the foreign key on workspaceId
// workspace Workspace @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
embedding AiWorkspaceEmbedding?
embedding AiWorkspaceEmbedding[]
@@id([workspaceId, id])
@@index([workspaceId, updatedAt])
@@ -394,18 +394,18 @@ model AiPromptMessage {
}
model AiPrompt {
id Int @id @default(autoincrement()) @db.Integer
name String @unique @db.VarChar(32)
id Int @id @default(autoincrement()) @db.Integer
name String @unique @db.VarChar(32)
// an mark identifying which view to use to display the session
// it is only used in the frontend and does not affect the backend
action String? @db.VarChar
model String @db.VarChar
optionalModels String[] @default([]) @db.VarChar @map("optional_models")
config Json? @db.Json
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(3)
updatedAt DateTime @default(now()) @map("updated_at") @db.Timestamptz(3)
action String? @db.VarChar
model String @db.VarChar
optionalModels String[] @default([]) @map("optional_models") @db.VarChar
config Json? @db.Json
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(3)
updatedAt DateTime @default(now()) @map("updated_at") @db.Timestamptz(3)
// whether the prompt is modified by the admin panel
modified Boolean @default(false)
modified Boolean @default(false)
messages AiPromptMessage[]
sessions AiSession[]
@@ -500,8 +500,7 @@ model AiWorkspaceEmbedding {
// so we can match this record with the snapshot one by one
snapshot Snapshot @relation(fields: [workspaceId, docId], references: [workspaceId, id], onDelete: Cascade)
@@id([workspaceId, docId])
@@unique([workspaceId, docId, chunk])
@@id([workspaceId, docId, chunk])
@@index([embedding], map: "ai_workspace_embeddings_idx")
@@map("ai_workspace_embeddings")
}
@@ -521,7 +520,7 @@ model AiWorkspaceIgnoredDocs {
model AiWorkspaceFiles {
workspaceId String @map("workspace_id") @db.VarChar
fileId String @map("file_id") @db.VarChar
blobId String @map("blob_id") @db.VarChar @default("")
blobId String @default("") @map("blob_id") @db.VarChar
fileName String @map("file_name") @db.VarChar
mimeType String @map("mime_type") @db.VarChar
size Int @db.Integer
@@ -548,8 +547,7 @@ model AiWorkspaceFileEmbedding {
file AiWorkspaceFiles @relation(fields: [workspaceId, fileId], references: [workspaceId, fileId], onDelete: Cascade)
@@id([workspaceId, fileId])
@@unique([workspaceId, fileId, chunk])
@@id([workspaceId, fileId, chunk])
@@index([embedding], map: "ai_workspace_file_embeddings_idx")
@@map("ai_workspace_file_embeddings")
}

View File

@@ -210,9 +210,12 @@ export class CopilotContextModel extends BaseModel {
);
await this.db.$executeRaw`
INSERT INTO "ai_workspace_embeddings"
("workspace_id", "doc_id", "chunk", "content", "embedding", "updated_at") VALUES ${values}
ON CONFLICT (workspace_id, doc_id, chunk) DO UPDATE SET
embedding = EXCLUDED.embedding, updated_at = excluded.updated_at;
("workspace_id", "doc_id", "chunk", "content", "embedding", "updated_at")
VALUES ${values}
ON CONFLICT (workspace_id, doc_id, chunk)
DO UPDATE SET
embedding = EXCLUDED.embedding,
updated_at = excluded.updated_at;
`;
}

View File

@@ -47,7 +47,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
where: {
workspaceId,
embedding: {
is: null,
none: {},
},
},
select: { id: true },

View File

@@ -371,7 +371,7 @@ export class CopilotContextRootResolver {
if (this.context.canEmbedding) {
const total = await this.db.snapshot.count({ where: { workspaceId } });
const embedded = await this.db.snapshot.count({
where: { workspaceId, embedding: { isNot: null } },
where: { workspaceId, embedding: { some: {} } },
});
return { total, embedded };
}