mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 13:25:12 +00:00
feat(server): attachment embedding (#13348)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added support for managing "blobs" in Copilot context, including adding and removing blobs via new GraphQL mutations and UI fields. * Introduced tracking and querying of blob embeddings within workspaces, enabling search and similarity matching for blob content. * Extended Copilot context and workspace APIs, schema, and UI to display and manage blobs alongside existing documents and files. * **Bug Fixes** * Updated context and embedding status logic to handle blobs, ensuring accurate status reporting and embedding management. * **Tests** * Added and updated test cases and snapshots to cover blob embedding insertion, matching, and removal scenarios. * **Documentation** * Updated GraphQL schema and TypeScript types to reflect new blob-related fields and mutations. * **Chores** * Refactored and cleaned up code to support new blob entity and embedding logic, including renaming and updating internal methods and types. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -20,6 +20,7 @@ import { SafeIntResolver } from 'graphql-scalars';
|
||||
import GraphQLUpload from 'graphql-upload/GraphQLUpload.mjs';
|
||||
|
||||
import {
|
||||
BlobNotFound,
|
||||
BlobQuotaExceeded,
|
||||
CallMetric,
|
||||
CopilotEmbeddingUnavailable,
|
||||
@@ -37,6 +38,7 @@ import {
|
||||
import { CurrentUser } from '../../../core/auth';
|
||||
import { AccessController } from '../../../core/permission';
|
||||
import {
|
||||
ContextBlob,
|
||||
ContextCategories,
|
||||
ContextCategory,
|
||||
ContextDoc,
|
||||
@@ -118,6 +120,24 @@ class RemoveContextFileInput {
|
||||
fileId!: string;
|
||||
}
|
||||
|
||||
@InputType()
|
||||
class AddContextBlobInput {
|
||||
@Field(() => String)
|
||||
contextId!: string;
|
||||
|
||||
@Field(() => String)
|
||||
blobId!: string;
|
||||
}
|
||||
|
||||
@InputType()
|
||||
class RemoveContextBlobInput {
|
||||
@Field(() => String)
|
||||
contextId!: string;
|
||||
|
||||
@Field(() => String)
|
||||
blobId!: string;
|
||||
}
|
||||
|
||||
@ObjectType('CopilotContext')
|
||||
export class CopilotContextType {
|
||||
@Field(() => ID, { nullable: true })
|
||||
@@ -130,7 +150,24 @@ export class CopilotContextType {
|
||||
registerEnumType(ContextCategories, { name: 'ContextCategories' });
|
||||
|
||||
@ObjectType()
|
||||
class CopilotDocType implements Omit<ContextDoc, 'status'> {
|
||||
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
|
||||
@Field(() => ID)
|
||||
id!: string;
|
||||
|
||||
@Field(() => ContextCategories)
|
||||
type!: ContextCategories;
|
||||
|
||||
@Field(() => [CopilotContextDoc])
|
||||
docs!: CopilotContextDoc[];
|
||||
|
||||
@Field(() => SafeIntResolver)
|
||||
createdAt!: number;
|
||||
}
|
||||
|
||||
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
|
||||
|
||||
@ObjectType()
|
||||
class CopilotContextBlob implements Omit<ContextBlob, 'status'> {
|
||||
@Field(() => ID)
|
||||
id!: string;
|
||||
|
||||
@@ -142,28 +179,17 @@ class CopilotDocType implements Omit<ContextDoc, 'status'> {
|
||||
}
|
||||
|
||||
@ObjectType()
|
||||
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
|
||||
class CopilotContextDoc implements Omit<ContextDoc, 'status'> {
|
||||
@Field(() => ID)
|
||||
id!: string;
|
||||
|
||||
@Field(() => ContextCategories)
|
||||
type!: ContextCategories;
|
||||
|
||||
@Field(() => [CopilotDocType])
|
||||
docs!: CopilotDocType[];
|
||||
@Field(() => ContextEmbedStatus, { nullable: true })
|
||||
status!: ContextEmbedStatus | null;
|
||||
|
||||
@Field(() => SafeIntResolver)
|
||||
createdAt!: number;
|
||||
}
|
||||
|
||||
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
|
||||
|
||||
@ObjectType()
|
||||
class CopilotContextDoc extends CopilotDocType {
|
||||
@Field(() => String, { nullable: true })
|
||||
error!: string | null;
|
||||
}
|
||||
|
||||
@ObjectType()
|
||||
class CopilotContextFile implements ContextFile {
|
||||
@Field(() => ID)
|
||||
@@ -433,11 +459,33 @@ export class CopilotContextResolver {
|
||||
return tags;
|
||||
}
|
||||
|
||||
@ResolveField(() => [CopilotContextBlob], {
|
||||
description: 'list blobs in context',
|
||||
})
|
||||
@CallMetric('ai', 'context_blob_list')
|
||||
async blobs(
|
||||
@Parent() context: CopilotContextType
|
||||
): Promise<CopilotContextBlob[]> {
|
||||
if (!context.id) {
|
||||
return [];
|
||||
}
|
||||
const session = await this.context.get(context.id);
|
||||
const blobs = session.blobs;
|
||||
await this.models.copilotContext.mergeBlobStatus(
|
||||
session.workspaceId,
|
||||
blobs
|
||||
);
|
||||
|
||||
return blobs.map(blob => ({ ...blob, status: blob.status || null }));
|
||||
}
|
||||
|
||||
@ResolveField(() => [CopilotContextDoc], {
|
||||
description: 'list files in context',
|
||||
})
|
||||
@CallMetric('ai', 'context_file_list')
|
||||
async docs(@Parent() context: CopilotContextType): Promise<CopilotDocType[]> {
|
||||
async docs(
|
||||
@Parent() context: CopilotContextType
|
||||
): Promise<CopilotContextDoc[]> {
|
||||
if (!context.id) {
|
||||
return [];
|
||||
}
|
||||
@@ -538,7 +586,7 @@ export class CopilotContextResolver {
|
||||
async addContextDoc(
|
||||
@Args({ name: 'options', type: () => AddContextDocInput })
|
||||
options: AddContextDocInput
|
||||
): Promise<CopilotDocType> {
|
||||
): Promise<CopilotContextDoc> {
|
||||
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
|
||||
await using lock = await this.mutex.acquire(lockFlag);
|
||||
if (!lock) {
|
||||
@@ -674,6 +722,85 @@ export class CopilotContextResolver {
|
||||
}
|
||||
}
|
||||
|
||||
@Mutation(() => CopilotContextBlob, {
|
||||
description: 'add a blob to context',
|
||||
})
|
||||
@CallMetric('ai', 'context_blob_add')
|
||||
async addContextBlob(
|
||||
@CurrentUser() user: CurrentUser,
|
||||
@Args({ name: 'options', type: () => AddContextBlobInput })
|
||||
options: AddContextBlobInput
|
||||
): Promise<CopilotContextBlob> {
|
||||
if (!this.context.canEmbedding) {
|
||||
throw new CopilotEmbeddingUnavailable();
|
||||
}
|
||||
|
||||
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
|
||||
await using lock = await this.mutex.acquire(lockFlag);
|
||||
if (!lock) {
|
||||
throw new TooManyRequest('Server is busy');
|
||||
}
|
||||
|
||||
const contextSession = await this.context.get(options.contextId);
|
||||
|
||||
try {
|
||||
const blob = await contextSession.addBlobRecord(options.blobId);
|
||||
if (!blob) {
|
||||
throw new BlobNotFound({
|
||||
spaceId: contextSession.workspaceId,
|
||||
blobId: options.blobId,
|
||||
});
|
||||
}
|
||||
|
||||
await this.jobs.addBlobEmbeddingQueue({
|
||||
userId: user.id,
|
||||
workspaceId: contextSession.workspaceId,
|
||||
contextId: contextSession.id,
|
||||
blobId: options.blobId,
|
||||
});
|
||||
|
||||
return { ...blob, status: blob.status || null };
|
||||
} catch (e: any) {
|
||||
if (e instanceof UserFriendlyError) {
|
||||
throw e;
|
||||
}
|
||||
throw new CopilotFailedToModifyContext({
|
||||
contextId: options.contextId,
|
||||
message: e.message,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Mutation(() => Boolean, {
|
||||
description: 'remove a blob from context',
|
||||
})
|
||||
@CallMetric('ai', 'context_blob_remove')
|
||||
async removeContextBlob(
|
||||
@Args({ name: 'options', type: () => RemoveContextBlobInput })
|
||||
options: RemoveContextBlobInput
|
||||
): Promise<boolean> {
|
||||
if (!this.context.canEmbedding) {
|
||||
throw new CopilotEmbeddingUnavailable();
|
||||
}
|
||||
|
||||
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
|
||||
await using lock = await this.mutex.acquire(lockFlag);
|
||||
if (!lock) {
|
||||
throw new TooManyRequest('Server is busy');
|
||||
}
|
||||
|
||||
const contextSession = await this.context.get(options.contextId);
|
||||
|
||||
try {
|
||||
return await contextSession.removeBlobRecord(options.blobId);
|
||||
} catch (e: any) {
|
||||
throw new CopilotFailedToModifyContext({
|
||||
contextId: options.contextId,
|
||||
message: e.message,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ResolveField(() => [ContextMatchedFileChunk], {
|
||||
description: 'match file in context',
|
||||
})
|
||||
|
||||
@@ -147,6 +147,28 @@ export class CopilotContextService implements OnApplicationBootstrap {
|
||||
return null;
|
||||
}
|
||||
|
||||
async matchWorkspaceBlobs(
|
||||
workspaceId: string,
|
||||
content: string,
|
||||
topK: number = 5,
|
||||
signal?: AbortSignal,
|
||||
threshold: number = 0.5
|
||||
) {
|
||||
if (!this.embeddingClient) return [];
|
||||
const embedding = await this.embeddingClient.getEmbedding(content, signal);
|
||||
if (!embedding) return [];
|
||||
|
||||
const blobChunks = await this.models.copilotWorkspace.matchBlobEmbedding(
|
||||
workspaceId,
|
||||
embedding,
|
||||
topK * 2,
|
||||
threshold
|
||||
);
|
||||
if (!blobChunks.length) return [];
|
||||
|
||||
return await this.embeddingClient.reRank(content, blobChunks, topK, signal);
|
||||
}
|
||||
|
||||
async matchWorkspaceFiles(
|
||||
workspaceId: string,
|
||||
content: string,
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import { nanoid } from 'nanoid';
|
||||
|
||||
import {
|
||||
ContextBlob,
|
||||
ContextCategories,
|
||||
ContextCategory,
|
||||
ContextConfig,
|
||||
ContextDoc,
|
||||
ContextEmbedStatus,
|
||||
ContextFile,
|
||||
ContextList,
|
||||
FileChunkSimilarity,
|
||||
Models,
|
||||
} from '../../../models';
|
||||
@@ -47,6 +47,10 @@ export class ContextSession implements AsyncDisposable {
|
||||
return categories.filter(c => c.type === ContextCategories.Collection);
|
||||
}
|
||||
|
||||
get blobs(): ContextBlob[] {
|
||||
return this.config.blobs.map(d => ({ ...d }));
|
||||
}
|
||||
|
||||
get docs(): ContextDoc[] {
|
||||
return this.config.docs.map(d => ({ ...d }));
|
||||
}
|
||||
@@ -65,13 +69,6 @@ export class ContextSession implements AsyncDisposable {
|
||||
);
|
||||
}
|
||||
|
||||
get sortedList(): ContextList {
|
||||
const { docs, files } = this.config;
|
||||
return [...docs, ...files].toSorted(
|
||||
(a, b) => a.createdAt - b.createdAt
|
||||
) as ContextList;
|
||||
}
|
||||
|
||||
async addCategoryRecord(type: ContextCategories, id: string, docs: string[]) {
|
||||
const category = this.config.categories.find(
|
||||
c => c.type === type && c.id === id
|
||||
@@ -120,6 +117,33 @@ export class ContextSession implements AsyncDisposable {
|
||||
return true;
|
||||
}
|
||||
|
||||
async addBlobRecord(blobId: string): Promise<ContextBlob | null> {
|
||||
const existsBlob = this.config.blobs.find(b => b.id === blobId);
|
||||
if (existsBlob) {
|
||||
return existsBlob;
|
||||
}
|
||||
const blob = await this.models.blob.get(this.config.workspaceId, blobId);
|
||||
if (!blob) return null;
|
||||
|
||||
const record: ContextBlob = {
|
||||
id: blobId,
|
||||
createdAt: Date.now(),
|
||||
status: ContextEmbedStatus.processing,
|
||||
};
|
||||
this.config.blobs.push(record);
|
||||
await this.save();
|
||||
return record;
|
||||
}
|
||||
|
||||
async removeBlobRecord(blobId: string): Promise<boolean> {
|
||||
const index = this.config.blobs.findIndex(b => b.id === blobId);
|
||||
if (index >= 0) {
|
||||
this.config.blobs.splice(index, 1);
|
||||
await this.save();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async addDocRecord(docId: string): Promise<ContextDoc> {
|
||||
const doc = this.config.docs.find(f => f.id === docId);
|
||||
if (doc) {
|
||||
|
||||
@@ -65,15 +65,14 @@ export class CopilotEmbeddingJob {
|
||||
async addFileEmbeddingQueue(file: Jobs['copilot.embedding.files']) {
|
||||
if (!this.supportEmbedding) return;
|
||||
|
||||
const { userId, workspaceId, contextId, blobId, fileId, fileName } = file;
|
||||
await this.queue.add('copilot.embedding.files', {
|
||||
userId,
|
||||
workspaceId,
|
||||
contextId,
|
||||
blobId,
|
||||
fileId,
|
||||
fileName,
|
||||
});
|
||||
await this.queue.add('copilot.embedding.files', file);
|
||||
}
|
||||
|
||||
@CallMetric('ai', 'addBlobEmbeddingQueue')
|
||||
async addBlobEmbeddingQueue(blob: Jobs['copilot.embedding.blobs']) {
|
||||
if (!this.supportEmbedding) return;
|
||||
|
||||
await this.queue.add('copilot.embedding.blobs', blob);
|
||||
}
|
||||
|
||||
@OnEvent('workspace.doc.embedding')
|
||||
@@ -288,6 +287,55 @@ export class CopilotEmbeddingJob {
|
||||
}
|
||||
}
|
||||
|
||||
@OnJob('copilot.embedding.blobs')
|
||||
async embedPendingBlob({
|
||||
userId,
|
||||
workspaceId,
|
||||
contextId,
|
||||
blobId,
|
||||
}: Jobs['copilot.embedding.blobs']) {
|
||||
if (!this.supportEmbedding || !this.embeddingClient) return;
|
||||
|
||||
try {
|
||||
const file = await this.readCopilotBlob(
|
||||
userId,
|
||||
workspaceId,
|
||||
blobId,
|
||||
'blob'
|
||||
);
|
||||
|
||||
const chunks = await this.embeddingClient.getFileChunks(file);
|
||||
const total = chunks.reduce((acc, c) => acc + c.length, 0);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const embeddings = await this.embeddingClient.generateEmbeddings(chunk);
|
||||
await this.models.copilotWorkspace.insertBlobEmbeddings(
|
||||
workspaceId,
|
||||
blobId,
|
||||
embeddings
|
||||
);
|
||||
}
|
||||
|
||||
if (contextId) {
|
||||
this.event.emit('workspace.blob.embed.finished', {
|
||||
contextId,
|
||||
blobId,
|
||||
chunkSize: total,
|
||||
});
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (contextId) {
|
||||
this.event.emit('workspace.blob.embed.failed', {
|
||||
contextId,
|
||||
blobId,
|
||||
error: mapAnyError(error).message,
|
||||
});
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private async getDocFragment(
|
||||
workspaceId: string,
|
||||
docId: string
|
||||
@@ -465,7 +513,7 @@ export class CopilotEmbeddingJob {
|
||||
|
||||
const docIdsInWorkspace = readAllDocIdsFromWorkspaceSnapshot(snapshot.blob);
|
||||
const docIdsInEmbedding =
|
||||
await this.models.copilotContext.listWorkspaceEmbedding(workspaceId);
|
||||
await this.models.copilotContext.listWorkspaceDocEmbedding(workspaceId);
|
||||
const docIdsInWorkspaceSet = new Set(docIdsInWorkspace);
|
||||
|
||||
const deletedDocIds = docIdsInEmbedding.filter(
|
||||
|
||||
@@ -14,6 +14,18 @@ declare global {
|
||||
enableDocEmbedding?: boolean;
|
||||
};
|
||||
|
||||
'workspace.blob.embed.finished': {
|
||||
contextId: string;
|
||||
blobId: string;
|
||||
chunkSize: number;
|
||||
};
|
||||
|
||||
'workspace.blob.embed.failed': {
|
||||
contextId: string;
|
||||
blobId: string;
|
||||
error: string;
|
||||
};
|
||||
|
||||
'workspace.doc.embedding': Array<{
|
||||
workspaceId: string;
|
||||
docId: string;
|
||||
@@ -62,6 +74,13 @@ declare global {
|
||||
fileName: string;
|
||||
};
|
||||
|
||||
'copilot.embedding.blobs': {
|
||||
contextId?: string;
|
||||
userId: string;
|
||||
workspaceId: string;
|
||||
blobId: string;
|
||||
};
|
||||
|
||||
'copilot.embedding.cleanupTrashedDocEmbeddings': {
|
||||
workspaceId: string;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user