mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-25 02:13:00 +08:00
feat(server): skip embedding for deprecated doc ids & empty docs (#13211)
fix AI-367 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Improved document filtering to exclude settings documents and empty blobs from embedding and status calculations. * Enhanced embedding jobs to skip processing deprecated documents if a newer version exists, ensuring only up-to-date documents are embedded. * **New Features** * Added a mutation to trigger the cron job for generating missing titles. * **Tests** * Added test to verify exclusion of documents with empty content from embedding. * Updated embedding-related tests to toggle embedding state during attachment upload under simulated network conditions. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -214,6 +214,21 @@ test('should insert and search embedding', async t => {
|
||||
);
|
||||
t.false(results.includes(docId), 'docs containing `$` should be excluded');
|
||||
}
|
||||
|
||||
{
|
||||
const docId = 'empty_doc';
|
||||
await t.context.doc.upsert({
|
||||
spaceId: workspace.id,
|
||||
docId: docId,
|
||||
blob: Uint8Array.from([0, 0]),
|
||||
timestamp: Date.now(),
|
||||
editorId: user.id,
|
||||
});
|
||||
const results = await t.context.copilotWorkspace.findDocsToEmbed(
|
||||
workspace.id
|
||||
);
|
||||
t.false(results.includes(docId), 'empty documents should be excluded');
|
||||
}
|
||||
});
|
||||
|
||||
test('should check need to be embedded', async t => {
|
||||
|
||||
@@ -58,10 +58,12 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
ON id.workspace_id = s.workspace_id
|
||||
AND id.doc_id = s.guid
|
||||
WHERE s.workspace_id = ${workspaceId}
|
||||
AND s.guid != s.workspace_id
|
||||
AND s.guid <> s.workspace_id
|
||||
AND s.guid NOT LIKE '%$%'
|
||||
AND s.guid NOT LIKE '%:settings:%'
|
||||
AND e.doc_id IS NULL
|
||||
AND id.doc_id IS NULL;`;
|
||||
AND id.doc_id IS NULL
|
||||
AND s.blob <> E'\\\\x0000';`;
|
||||
|
||||
return docIds.map(r => r.id);
|
||||
}
|
||||
@@ -160,6 +162,8 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
|
||||
{ id: { notIn: ignoredDocIds } },
|
||||
{ id: { not: workspaceId } },
|
||||
{ id: { not: { contains: '$' } } },
|
||||
{ id: { not: { contains: ':settings:' } } },
|
||||
{ blob: { not: new Uint8Array([0, 0]) } },
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
@@ -337,6 +337,10 @@ export class CopilotEmbeddingJob {
|
||||
const signal = this.getWorkspaceSignal(workspaceId);
|
||||
|
||||
try {
|
||||
const hasNewDoc = await this.models.doc.exists(
|
||||
workspaceId,
|
||||
docId.split(':space:')[1] || ''
|
||||
);
|
||||
const needEmbedding =
|
||||
await this.models.copilotWorkspace.checkDocNeedEmbedded(
|
||||
workspaceId,
|
||||
@@ -352,8 +356,11 @@ export class CopilotEmbeddingJob {
|
||||
);
|
||||
return;
|
||||
}
|
||||
const fragment = await this.getDocFragment(workspaceId, docId);
|
||||
if (fragment) {
|
||||
// if doc id deprecated, skip embedding and fulfill empty embedding
|
||||
const fragment = !hasNewDoc
|
||||
? await this.getDocFragment(workspaceId, docId)
|
||||
: undefined;
|
||||
if (!hasNewDoc && fragment) {
|
||||
// fast fall for empty doc, journal is easily to create a empty doc
|
||||
if (fragment.summary.trim()) {
|
||||
const embeddings = await this.embeddingClient.getFileEmbeddings(
|
||||
@@ -382,7 +389,7 @@ export class CopilotEmbeddingJob {
|
||||
);
|
||||
await this.fulfillEmptyEmbedding(workspaceId, docId);
|
||||
}
|
||||
} else if (contextId) {
|
||||
} else {
|
||||
this.logger.warn(
|
||||
`Doc ${docId} in workspace ${workspaceId} has no fragment, fulfilling empty embedding.`
|
||||
);
|
||||
|
||||
@@ -844,7 +844,7 @@ export class PromptsManagementResolver {
|
||||
private readonly promptService: PromptService
|
||||
) {}
|
||||
|
||||
@Query(() => Boolean, {
|
||||
@Mutation(() => Boolean, {
|
||||
description: 'Trigger generate missing titles cron job',
|
||||
})
|
||||
async triggerGenerateTitleCron() {
|
||||
|
||||
@@ -1297,6 +1297,9 @@ type Mutation {
|
||||
setBlob(blob: Upload!, workspaceId: String!): String!
|
||||
submitAudioTranscription(blob: Upload, blobId: String!, blobs: [Upload!], workspaceId: String!): TranscriptionResultType
|
||||
|
||||
"""Trigger generate missing titles cron job"""
|
||||
triggerGenerateTitleCron: Boolean!
|
||||
|
||||
"""update app configuration"""
|
||||
updateAppConfig(updates: [UpdateAppConfigInput!]!): JSONObject!
|
||||
|
||||
|
||||
Reference in New Issue
Block a user