From 6224344a4fbdd82948faa75a84c434004fb697ae Mon Sep 17 00:00:00 2001 From: darkskygit Date: Thu, 15 May 2025 09:36:28 +0000 Subject: [PATCH] chore(server): improve ignored docs list & match (#12307) ## Summary by CodeRabbit - **Bug Fixes** - Improved the accuracy of document matching by excluding ignored documents from search results. - **Chores** - Updated internal handling of ignored document lists for better consistency and reliability. --- .../copilot-workspace.spec.ts.md | 6 +++- .../copilot-workspace.spec.ts.snap | Bin 560 -> 579 bytes .../models/copilot-workspace.spec.ts | 2 +- .../server/src/models/copilot-context.ts | 32 ++++++++---------- .../server/src/models/copilot-workspace.ts | 11 +++--- 5 files changed, 25 insertions(+), 26 deletions(-) diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md index 211f3b9a2f..ef8b1f4487 100644 --- a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md +++ b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md @@ -24,6 +24,10 @@ Generated by [AVA](https://avajs.dev). 'doc1', ] +> should not change if ignored doc exists + + 0 + > should not add ignored doc again [ @@ -34,7 +38,7 @@ Generated by [AVA](https://avajs.dev). > should add new ignored doc - 2 + 1 > should add ignored doc diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap index 8447bb15c58eb1e0a8be480fe96b0dc6b9a1155d..fae9d358b16b2cbe76aa3ed289e17b9b5afb816f 100644 GIT binary patch literal 579 zcmV-J0=)e}RzVKoEVCZ{2}%r=x*tg@kBMz)?_9K!p$>C@7#p>)lDb z#MzC!n~SfYfC>qTo(kRl4>~G-2PHoOIf)&|3PcCZx$%xYZ)V=xncdStlsGl=AI>eB-7)vXMER>s=#04>P>Bvu{Vqu2XrL2W^${TOx#K$i9k7ruTAfGkSMgC9= zM4ZToljCJXsdW{+0KEECB6`#~1aKF?0d@AN{V+}C2~n}?_xqq75P^1y?w241z!ad) z^XM(BK#g*_x3>qnJcsphK#eQZ1hx06t(|(T3#+A_I$G2_Th^O$n5H%_d#&I{|5EU! zJTo>V%Wke`tEwxEUFp8a{HUK2YkKj&Y`Zf6?*N)^nKf&Um!ztR33ZgReBxb8 zUT%T!{~wD800000000BcRI!fJKoEUn$4PVtN=`=u)d~sGoPeXCqJRn^grJ~+3axkN zk|p+TWN$9Hg5s{BK%%EYcmF|0#dlEh5y*+{BvwRp(3~6Z*zfJkdpkSlM{(lRC_lSk zZ+ybDgM8>Fi597}a==2lu}Pd4QBn7}! zr1q=$11nEKz3lh4yIJy&^b#0N@dT zC)-2^bI~sr#9gASc+*_}R{-C(=W*wH*{q4BSyob_ydGj71T+T>nZ#hZ8OrY7Y# yCu6QY?0BB}`{P6!+km#LOlw$lWD>3}?VWnedo^5{?geIfAb$WkLUMK<1^@tTk`!S8 diff --git a/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts b/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts index 6e1da27d34..5d60878361 100644 --- a/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts +++ b/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts @@ -74,7 +74,7 @@ test('should manage copilot workspace ignored docs', async t => { workspace.id, [docId] ); - t.is(count, 1, 'should not add ignored doc again'); + t.snapshot(count, 'should not change if ignored doc exists'); const ret = await t.context.copilotWorkspace.listIgnoredDocs(workspace.id); t.snapshot(cleanObject(ret), 'should not add ignored doc again'); diff --git a/packages/backend/server/src/models/copilot-context.ts b/packages/backend/server/src/models/copilot-context.ts index a241e0a788..6c868c61db 100644 --- a/packages/backend/server/src/models/copilot-context.ts +++ b/packages/backend/server/src/models/copilot-context.ts @@ -1,7 +1,6 @@ import { randomUUID } from 'node:crypto'; import { Injectable } from '@nestjs/common'; -import { Transactional } from '@nestjs-cls/transactional'; import { Prisma } from '@prisma/client'; import { CopilotSessionNotFound } from '../base'; @@ -218,7 +217,6 @@ export class CopilotContextModel extends BaseModel { }); } - @Transactional() async matchWorkspaceEmbedding( embedding: number[], workspaceId: string, @@ -227,25 +225,23 @@ export class CopilotContextModel extends BaseModel { matchDocIds?: string[] ): Promise { const similarityChunks = await this.db.$queryRaw>` - SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance" - FROM "ai_workspace_embeddings" - WHERE "workspace_id" = ${workspaceId} - ${matchDocIds?.length ? Prisma.sql`AND "doc_id" IN (${Prisma.join(matchDocIds)})` : Prisma.empty} + SELECT + w."doc_id" as "docId", + w."chunk", + w."content", + w."embedding" <=> ${embedding}::vector as "distance" + FROM "ai_workspace_embeddings" w + LEFT JOIN "ai_workspace_ignored_docs" i + ON i."workspace_id" = w."workspace_id" + AND i."doc_id" = w."doc_id" + ${matchDocIds?.length ? Prisma.sql`AND w."doc_id" NOT IN (${Prisma.join(matchDocIds)})` : Prisma.empty} + WHERE + w."workspace_id" = ${workspaceId} + AND i."doc_id" IS NULL ORDER BY "distance" ASC LIMIT ${topK}; `; - const matchedChunks = similarityChunks.filter( - c => Number(c.distance) <= threshold - ); - const matchedDocIds = Array.from(new Set(matchedChunks.map(c => c.docId))); - if (!matchDocIds?.length && matchedDocIds.length) { - const ignoredDocs = await this.models.copilotWorkspace.checkIgnoredDocs( - workspaceId, - matchedDocIds - ); - return matchedChunks.filter(c => !ignoredDocs.includes(c.docId)); - } - return matchedChunks; + return similarityChunks.filter(c => Number(c.distance) <= threshold); } } diff --git a/packages/backend/server/src/models/copilot-workspace.ts b/packages/backend/server/src/models/copilot-workspace.ts index 5d0c1d7c2c..eebeda344d 100644 --- a/packages/backend/server/src/models/copilot-workspace.ts +++ b/packages/backend/server/src/models/copilot-workspace.ts @@ -16,7 +16,6 @@ import type { @Injectable() export class CopilotWorkspaceConfigModel extends BaseModel { - @Transactional() private async listIgnoredDocIds( workspaceId: string, options?: PaginationInput @@ -47,16 +46,15 @@ export class CopilotWorkspaceConfigModel extends BaseModel { ); const added = add.filter(id => !ignored.has(id)); - if (added.length) { + const { count: addedCount } = await this.db.aiWorkspaceIgnoredDocs.createMany({ data: added.map(docId => ({ workspaceId, docId, })), }); - } - if (removed.size) { + const { count: removedCount } = await this.db.aiWorkspaceIgnoredDocs.deleteMany({ where: { workspaceId, @@ -65,11 +63,11 @@ export class CopilotWorkspaceConfigModel extends BaseModel { }, }, }); - } - return added.length + ignored.size; + return addedCount + removedCount; } + @Transactional() async listIgnoredDocs( workspaceId: string, options?: PaginationInput @@ -100,6 +98,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel { }); } + @Transactional() async countIgnoredDocs(workspaceId: string): Promise { const count = await this.db.aiWorkspaceIgnoredDocs.count({ where: {