mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 21:05:19 +00:00
fix(server): exclude outdated doc id style in embedding count (#13269)
fix AI-392 fix AI-393 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Improved filtering of outdated document ID styles in embedding status reporting, ensuring more accurate counts of embedded documents. * Stricter rate limiting applied to workspace embedding status queries for enhanced system reliability. * **Bug Fixes** * Resolved issues with duplicate or outdated document IDs affecting embedding status totals. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -89,3 +89,19 @@ Generated by [AVA](https://avajs.dev).
|
||||
> should not find docs to embed
|
||||
|
||||
0
|
||||
|
||||
## should filter outdated doc id style in embedding status
|
||||
|
||||
> should include modern doc format
|
||||
|
||||
{
|
||||
embedded: 0,
|
||||
total: 1,
|
||||
}
|
||||
|
||||
> should count docs after filtering outdated
|
||||
|
||||
{
|
||||
embedded: 1,
|
||||
total: 1,
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -306,3 +306,50 @@ test('should check embedding table', async t => {
|
||||
// t.false(ret, 'should return false when embedding table is not available');
|
||||
// }
|
||||
});
|
||||
|
||||
test('should filter outdated doc id style in embedding status', async t => {
|
||||
const docId = randomUUID();
|
||||
const outdatedDocId = `${workspace.id}:space:${docId}`;
|
||||
|
||||
await t.context.doc.upsert({
|
||||
spaceId: workspace.id,
|
||||
docId,
|
||||
blob: Uint8Array.from([1, 2, 3]),
|
||||
timestamp: Date.now(),
|
||||
editorId: user.id,
|
||||
});
|
||||
|
||||
await t.context.doc.upsert({
|
||||
spaceId: workspace.id,
|
||||
docId: outdatedDocId,
|
||||
blob: Uint8Array.from([1, 2, 3]),
|
||||
timestamp: Date.now(),
|
||||
editorId: user.id,
|
||||
});
|
||||
|
||||
{
|
||||
const status = await t.context.copilotWorkspace.getEmbeddingStatus(
|
||||
workspace.id
|
||||
);
|
||||
t.snapshot(status, 'should include modern doc format');
|
||||
}
|
||||
|
||||
{
|
||||
await t.context.copilotContext.insertWorkspaceEmbedding(
|
||||
workspace.id,
|
||||
docId,
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
content: 'content',
|
||||
embedding: Array.from({ length: 1024 }, () => 1),
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
const status = await t.context.copilotWorkspace.getEmbeddingStatus(
|
||||
workspace.id
|
||||
);
|
||||
t.snapshot(status, 'should count docs after filtering outdated');
|
||||
}
|
||||
});
|
||||
|
||||
@@ -152,7 +152,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
}
|
||||
|
||||
@Transactional()
|
||||
async getWorkspaceEmbeddingStatus(workspaceId: string) {
|
||||
async getEmbeddingStatus(workspaceId: string) {
|
||||
const ignoredDocIds = (await this.listIgnoredDocIds(workspaceId)).map(
|
||||
d => d.docId
|
||||
);
|
||||
@@ -168,9 +168,13 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
};
|
||||
|
||||
const [docTotal, docEmbedded, fileTotal, fileEmbedded] = await Promise.all([
|
||||
this.db.snapshot.count({ where: snapshotCondition }),
|
||||
this.db.snapshot.count({
|
||||
this.db.snapshot.findMany({
|
||||
where: snapshotCondition,
|
||||
select: { id: true },
|
||||
}),
|
||||
this.db.snapshot.findMany({
|
||||
where: { ...snapshotCondition, embedding: { some: {} } },
|
||||
select: { id: true },
|
||||
}),
|
||||
this.db.aiWorkspaceFiles.count({ where: { workspaceId } }),
|
||||
this.db.aiWorkspaceFiles.count({
|
||||
@@ -178,9 +182,23 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
}),
|
||||
]);
|
||||
|
||||
const docTotalIds = docTotal.map(d => d.id);
|
||||
const docTotalSet = new Set(docTotalIds);
|
||||
const outdatedDocPrefix = `${workspaceId}:space:`;
|
||||
const duplicateOutdatedDocSet = new Set(
|
||||
docTotalIds
|
||||
.filter(id => id.startsWith(outdatedDocPrefix))
|
||||
.filter(id => docTotalSet.has(id.slice(outdatedDocPrefix.length)))
|
||||
);
|
||||
|
||||
return {
|
||||
total: docTotal + fileTotal,
|
||||
embedded: docEmbedded + fileEmbedded,
|
||||
total:
|
||||
docTotalIds.filter(id => !duplicateOutdatedDocSet.has(id)).length +
|
||||
fileTotal,
|
||||
embedded:
|
||||
docEmbedded
|
||||
.map(d => d.id)
|
||||
.filter(id => !duplicateOutdatedDocSet.has(id)).length + fileEmbedded,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -356,6 +356,7 @@ export class CopilotContextRootResolver {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Throttle('strict')
|
||||
@Query(() => ContextWorkspaceEmbeddingStatus, {
|
||||
description: 'query workspace embedding status',
|
||||
})
|
||||
@@ -372,9 +373,7 @@ export class CopilotContextRootResolver {
|
||||
|
||||
if (this.context.canEmbedding) {
|
||||
const { total, embedded } =
|
||||
await this.models.copilotWorkspace.getWorkspaceEmbeddingStatus(
|
||||
workspaceId
|
||||
);
|
||||
await this.models.copilotWorkspace.getEmbeddingStatus(workspaceId);
|
||||
return { total, embedded };
|
||||
}
|
||||
|
||||
|
||||
@@ -103,6 +103,7 @@ export class CopilotWorkspaceEmbeddingConfigResolver {
|
||||
|
||||
return ignoredDocs;
|
||||
}
|
||||
|
||||
@Mutation(() => Number, {
|
||||
name: 'updateWorkspaceEmbeddingIgnoredDocs',
|
||||
complexity: 2,
|
||||
|
||||
Reference in New Issue
Block a user