chore(server): improve ignored docs list & match (#12307)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **Bug Fixes**
	- Improved the accuracy of document matching by excluding ignored documents from search results.
- **Chores**
	- Updated internal handling of ignored document lists for better consistency and reliability.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-05-15 09:36:28 +00:00
parent 393458871d
commit 6224344a4f
5 changed files with 25 additions and 26 deletions

View File

@@ -24,6 +24,10 @@ Generated by [AVA](https://avajs.dev).
'doc1',
]
> should not change if ignored doc exists
0
> should not add ignored doc again
[
@@ -34,7 +38,7 @@ Generated by [AVA](https://avajs.dev).
> should add new ignored doc
2
1
> should add ignored doc

View File

@@ -74,7 +74,7 @@ test('should manage copilot workspace ignored docs', async t => {
workspace.id,
[docId]
);
t.is(count, 1, 'should not add ignored doc again');
t.snapshot(count, 'should not change if ignored doc exists');
const ret = await t.context.copilotWorkspace.listIgnoredDocs(workspace.id);
t.snapshot(cleanObject(ret), 'should not add ignored doc again');

View File

@@ -1,7 +1,6 @@
import { randomUUID } from 'node:crypto';
import { Injectable } from '@nestjs/common';
import { Transactional } from '@nestjs-cls/transactional';
import { Prisma } from '@prisma/client';
import { CopilotSessionNotFound } from '../base';
@@ -218,7 +217,6 @@ export class CopilotContextModel extends BaseModel {
});
}
@Transactional()
async matchWorkspaceEmbedding(
embedding: number[],
workspaceId: string,
@@ -227,25 +225,23 @@ export class CopilotContextModel extends BaseModel {
matchDocIds?: string[]
): Promise<DocChunkSimilarity[]> {
const similarityChunks = await this.db.$queryRaw<Array<DocChunkSimilarity>>`
SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_embeddings"
WHERE "workspace_id" = ${workspaceId}
${matchDocIds?.length ? Prisma.sql`AND "doc_id" IN (${Prisma.join(matchDocIds)})` : Prisma.empty}
SELECT
w."doc_id" as "docId",
w."chunk",
w."content",
w."embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_embeddings" w
LEFT JOIN "ai_workspace_ignored_docs" i
ON i."workspace_id" = w."workspace_id"
AND i."doc_id" = w."doc_id"
${matchDocIds?.length ? Prisma.sql`AND w."doc_id" NOT IN (${Prisma.join(matchDocIds)})` : Prisma.empty}
WHERE
w."workspace_id" = ${workspaceId}
AND i."doc_id" IS NULL
ORDER BY "distance" ASC
LIMIT ${topK};
`;
const matchedChunks = similarityChunks.filter(
c => Number(c.distance) <= threshold
);
const matchedDocIds = Array.from(new Set(matchedChunks.map(c => c.docId)));
if (!matchDocIds?.length && matchedDocIds.length) {
const ignoredDocs = await this.models.copilotWorkspace.checkIgnoredDocs(
workspaceId,
matchedDocIds
);
return matchedChunks.filter(c => !ignoredDocs.includes(c.docId));
}
return matchedChunks;
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
}

View File

@@ -16,7 +16,6 @@ import type {
@Injectable()
export class CopilotWorkspaceConfigModel extends BaseModel {
@Transactional()
private async listIgnoredDocIds(
workspaceId: string,
options?: PaginationInput
@@ -47,16 +46,15 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
);
const added = add.filter(id => !ignored.has(id));
if (added.length) {
const { count: addedCount } =
await this.db.aiWorkspaceIgnoredDocs.createMany({
data: added.map(docId => ({
workspaceId,
docId,
})),
});
}
if (removed.size) {
const { count: removedCount } =
await this.db.aiWorkspaceIgnoredDocs.deleteMany({
where: {
workspaceId,
@@ -65,11 +63,11 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
},
},
});
}
return added.length + ignored.size;
return addedCount + removedCount;
}
@Transactional()
async listIgnoredDocs(
workspaceId: string,
options?: PaginationInput
@@ -100,6 +98,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
});
}
@Transactional()
async countIgnoredDocs(workspaceId: string): Promise<number> {
const count = await this.db.aiWorkspaceIgnoredDocs.count({
where: {