mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 05:14:54 +00:00
chore(server): improve ignored docs list & match (#12307)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Bug Fixes** - Improved the accuracy of document matching by excluding ignored documents from search results. - **Chores** - Updated internal handling of ignored document lists for better consistency and reliability. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -24,6 +24,10 @@ Generated by [AVA](https://avajs.dev).
|
||||
'doc1',
|
||||
]
|
||||
|
||||
> should not change if ignored doc exists
|
||||
|
||||
0
|
||||
|
||||
> should not add ignored doc again
|
||||
|
||||
[
|
||||
@@ -34,7 +38,7 @@ Generated by [AVA](https://avajs.dev).
|
||||
|
||||
> should add new ignored doc
|
||||
|
||||
2
|
||||
1
|
||||
|
||||
> should add ignored doc
|
||||
|
||||
|
||||
Binary file not shown.
@@ -74,7 +74,7 @@ test('should manage copilot workspace ignored docs', async t => {
|
||||
workspace.id,
|
||||
[docId]
|
||||
);
|
||||
t.is(count, 1, 'should not add ignored doc again');
|
||||
t.snapshot(count, 'should not change if ignored doc exists');
|
||||
|
||||
const ret = await t.context.copilotWorkspace.listIgnoredDocs(workspace.id);
|
||||
t.snapshot(cleanObject(ret), 'should not add ignored doc again');
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Transactional } from '@nestjs-cls/transactional';
|
||||
import { Prisma } from '@prisma/client';
|
||||
|
||||
import { CopilotSessionNotFound } from '../base';
|
||||
@@ -218,7 +217,6 @@ export class CopilotContextModel extends BaseModel {
|
||||
});
|
||||
}
|
||||
|
||||
@Transactional()
|
||||
async matchWorkspaceEmbedding(
|
||||
embedding: number[],
|
||||
workspaceId: string,
|
||||
@@ -227,25 +225,23 @@ export class CopilotContextModel extends BaseModel {
|
||||
matchDocIds?: string[]
|
||||
): Promise<DocChunkSimilarity[]> {
|
||||
const similarityChunks = await this.db.$queryRaw<Array<DocChunkSimilarity>>`
|
||||
SELECT "doc_id" as "docId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_workspace_embeddings"
|
||||
WHERE "workspace_id" = ${workspaceId}
|
||||
${matchDocIds?.length ? Prisma.sql`AND "doc_id" IN (${Prisma.join(matchDocIds)})` : Prisma.empty}
|
||||
SELECT
|
||||
w."doc_id" as "docId",
|
||||
w."chunk",
|
||||
w."content",
|
||||
w."embedding" <=> ${embedding}::vector as "distance"
|
||||
FROM "ai_workspace_embeddings" w
|
||||
LEFT JOIN "ai_workspace_ignored_docs" i
|
||||
ON i."workspace_id" = w."workspace_id"
|
||||
AND i."doc_id" = w."doc_id"
|
||||
${matchDocIds?.length ? Prisma.sql`AND w."doc_id" NOT IN (${Prisma.join(matchDocIds)})` : Prisma.empty}
|
||||
WHERE
|
||||
w."workspace_id" = ${workspaceId}
|
||||
AND i."doc_id" IS NULL
|
||||
ORDER BY "distance" ASC
|
||||
LIMIT ${topK};
|
||||
`;
|
||||
|
||||
const matchedChunks = similarityChunks.filter(
|
||||
c => Number(c.distance) <= threshold
|
||||
);
|
||||
const matchedDocIds = Array.from(new Set(matchedChunks.map(c => c.docId)));
|
||||
if (!matchDocIds?.length && matchedDocIds.length) {
|
||||
const ignoredDocs = await this.models.copilotWorkspace.checkIgnoredDocs(
|
||||
workspaceId,
|
||||
matchedDocIds
|
||||
);
|
||||
return matchedChunks.filter(c => !ignoredDocs.includes(c.docId));
|
||||
}
|
||||
return matchedChunks;
|
||||
return similarityChunks.filter(c => Number(c.distance) <= threshold);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ import type {
|
||||
|
||||
@Injectable()
|
||||
export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
@Transactional()
|
||||
private async listIgnoredDocIds(
|
||||
workspaceId: string,
|
||||
options?: PaginationInput
|
||||
@@ -47,16 +46,15 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
);
|
||||
const added = add.filter(id => !ignored.has(id));
|
||||
|
||||
if (added.length) {
|
||||
const { count: addedCount } =
|
||||
await this.db.aiWorkspaceIgnoredDocs.createMany({
|
||||
data: added.map(docId => ({
|
||||
workspaceId,
|
||||
docId,
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
if (removed.size) {
|
||||
const { count: removedCount } =
|
||||
await this.db.aiWorkspaceIgnoredDocs.deleteMany({
|
||||
where: {
|
||||
workspaceId,
|
||||
@@ -65,11 +63,11 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return added.length + ignored.size;
|
||||
return addedCount + removedCount;
|
||||
}
|
||||
|
||||
@Transactional()
|
||||
async listIgnoredDocs(
|
||||
workspaceId: string,
|
||||
options?: PaginationInput
|
||||
@@ -100,6 +98,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
});
|
||||
}
|
||||
|
||||
@Transactional()
|
||||
async countIgnoredDocs(workspaceId: string): Promise<number> {
|
||||
const count = await this.db.aiWorkspaceIgnoredDocs.count({
|
||||
where: {
|
||||
|
||||
Reference in New Issue
Block a user