mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-09 11:03:43 +00:00
fix(server): process empty doc embedding (#12417)
fix CLOUD-219 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Bug Fixes** - Ensured that documents without content now receive a placeholder embedding, improving consistency in document processing. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -7,7 +7,12 @@ import { chunk } from 'lodash-es';
|
||||
|
||||
import { ChunkSimilarity, Embedding } from '../../../models';
|
||||
import { OpenAIConfig } from '../providers/openai';
|
||||
import { EmbeddingClient, getReRankSchema, ReRankResult } from './types';
|
||||
import {
|
||||
EMBEDDING_DIMENSIONS,
|
||||
EmbeddingClient,
|
||||
getReRankSchema,
|
||||
ReRankResult,
|
||||
} from './types';
|
||||
|
||||
const RERANK_MODEL = 'gpt-4.1-mini';
|
||||
|
||||
@@ -24,7 +29,7 @@ export class OpenAIEmbeddingClient extends EmbeddingClient {
|
||||
|
||||
async getEmbeddings(input: string[]): Promise<Embedding[]> {
|
||||
const modelInstance = this.#instance.embedding('text-embedding-3-large', {
|
||||
dimensions: 1024,
|
||||
dimensions: EMBEDDING_DIMENSIONS,
|
||||
});
|
||||
|
||||
const { embeddings } = await embedMany({
|
||||
@@ -124,7 +129,9 @@ export class MockEmbeddingClient extends EmbeddingClient {
|
||||
return input.map((_, i) => ({
|
||||
index: i,
|
||||
content: input[i],
|
||||
embedding: Array.from({ length: 1024 }, () => Math.random()),
|
||||
embedding: Array.from({ length: EMBEDDING_DIMENSIONS }, () =>
|
||||
Math.random()
|
||||
),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ import { Models } from '../../../models';
|
||||
import { CopilotStorage } from '../storage';
|
||||
import { readStream } from '../utils';
|
||||
import { OpenAIEmbeddingClient } from './embedding';
|
||||
import { EmbeddingClient } from './types';
|
||||
import { EMBEDDING_DIMENSIONS, EmbeddingClient } from './types';
|
||||
|
||||
@Injectable()
|
||||
export class CopilotContextDocJob {
|
||||
@@ -225,16 +225,29 @@ export class CopilotContextDocJob {
|
||||
const content = await this.doc.getFullDocContent(workspaceId, docId);
|
||||
if (content) {
|
||||
// fast fall for empty doc, journal is easily to create a empty doc
|
||||
if (!content.summary) return;
|
||||
const embeddings = await this.embeddingClient.getFileEmbeddings(
|
||||
new File([content.summary], `${content.title || 'Untitled'}.md`)
|
||||
);
|
||||
if (content.summary) {
|
||||
const embeddings = await this.embeddingClient.getFileEmbeddings(
|
||||
new File([content.summary], `${content.title || 'Untitled'}.md`)
|
||||
);
|
||||
|
||||
for (const chunks of embeddings) {
|
||||
for (const chunks of embeddings) {
|
||||
await this.models.copilotContext.insertWorkspaceEmbedding(
|
||||
workspaceId,
|
||||
docId,
|
||||
chunks
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// for empty doc, insert empty embedding
|
||||
const emptyEmbedding = {
|
||||
index: 0,
|
||||
content: '',
|
||||
embedding: Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0),
|
||||
};
|
||||
await this.models.copilotContext.insertWorkspaceEmbedding(
|
||||
workspaceId,
|
||||
docId,
|
||||
chunks
|
||||
[emptyEmbedding]
|
||||
);
|
||||
}
|
||||
} else if (contextId) {
|
||||
|
||||
@@ -57,6 +57,8 @@ export type Chunk = {
|
||||
content: string;
|
||||
};
|
||||
|
||||
export const EMBEDDING_DIMENSIONS = 1024;
|
||||
|
||||
export abstract class EmbeddingClient {
|
||||
async getFileEmbeddings(
|
||||
file: File,
|
||||
|
||||
Reference in New Issue
Block a user