feat(server): increase embedding jobs concurrency & handle empty content after trim (#12574)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **Improvements**
  - Increased the default concurrency for background tasks, enhancing processing efficiency.
  - Improved handling of empty or unsupported documents to ensure consistent processing.
  - Optimized document filtering to exclude certain documents from processing, improving performance.

- **Bug Fixes**
  - Enhanced detection of empty document summaries, reducing errors during processing.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-05-27 14:28:34 +00:00
parent 7eb6b268a6
commit 9220b973c7
6 changed files with 54 additions and 23 deletions

View File

@@ -4,6 +4,7 @@ import {
AFFiNELogger,
BlobNotFound,
Config,
CopilotContextFileNotSupported,
DocNotFound,
EventBus,
JobQueue,
@@ -300,6 +301,19 @@ export class CopilotContextDocJob {
return controller.signal;
}
private async fulfillEmptyEmbedding(workspaceId: string, docId: string) {
const emptyEmbedding = {
index: 0,
content: '',
embedding: Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0),
};
await this.models.copilotContext.insertWorkspaceEmbedding(
workspaceId,
docId,
[emptyEmbedding]
);
}
@OnJob('copilot.embedding.docs')
async embedPendingDocs({
contextId,
@@ -321,7 +335,7 @@ export class CopilotContextDocJob {
const fragment = await this.getDocFragment(workspaceId, docId);
if (fragment) {
// fast fall for empty doc, journal is easily to create a empty doc
if (fragment.summary) {
if (fragment.summary.trim()) {
const embeddings = await this.embeddingClient.getFileEmbeddings(
new File(
[fragment.summary],
@@ -340,16 +354,7 @@ export class CopilotContextDocJob {
}
} else {
// for empty doc, insert empty embedding
const emptyEmbedding = {
index: 0,
content: '',
embedding: Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0),
};
await this.models.copilotContext.insertWorkspaceEmbedding(
workspaceId,
docId,
[emptyEmbedding]
);
await this.fulfillEmptyEmbedding(workspaceId, docId);
}
} else if (contextId) {
throw new DocNotFound({ spaceId: workspaceId, docId });
@@ -362,6 +367,14 @@ export class CopilotContextDocJob {
docId,
});
}
if (
error instanceof CopilotContextFileNotSupported &&
error.message.includes('no content found')
) {
// if the doc is empty, we still need to fulfill the embedding
await this.fulfillEmptyEmbedding(workspaceId, docId);
return;
}
// passthrough error to job queue
throw error;