feat(server): attachment embedding (#13348)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Added support for managing "blobs" in Copilot context, including
adding and removing blobs via new GraphQL mutations and UI fields.
* Introduced tracking and querying of blob embeddings within workspaces,
enabling search and similarity matching for blob content.
* Extended Copilot context and workspace APIs, schema, and UI to display
and manage blobs alongside existing documents and files.

* **Bug Fixes**
* Updated context and embedding status logic to handle blobs, ensuring
accurate status reporting and embedding management.

* **Tests**
* Added and updated test cases and snapshots to cover blob embedding
insertion, matching, and removal scenarios.

* **Documentation**
* Updated GraphQL schema and TypeScript types to reflect new
blob-related fields and mutations.

* **Chores**
* Refactored and cleaned up code to support new blob entity and
embedding logic, including renaming and updating internal methods and
types.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-07-31 06:07:28 +08:00
committed by GitHub
parent b6a5bc052e
commit feb42e34be
24 changed files with 689 additions and 84 deletions

View File

@@ -20,6 +20,7 @@ import { SafeIntResolver } from 'graphql-scalars';
import GraphQLUpload from 'graphql-upload/GraphQLUpload.mjs';
import {
BlobNotFound,
BlobQuotaExceeded,
CallMetric,
CopilotEmbeddingUnavailable,
@@ -37,6 +38,7 @@ import {
import { CurrentUser } from '../../../core/auth';
import { AccessController } from '../../../core/permission';
import {
ContextBlob,
ContextCategories,
ContextCategory,
ContextDoc,
@@ -118,6 +120,24 @@ class RemoveContextFileInput {
fileId!: string;
}
@InputType()
class AddContextBlobInput {
@Field(() => String)
contextId!: string;
@Field(() => String)
blobId!: string;
}
@InputType()
class RemoveContextBlobInput {
@Field(() => String)
contextId!: string;
@Field(() => String)
blobId!: string;
}
@ObjectType('CopilotContext')
export class CopilotContextType {
@Field(() => ID, { nullable: true })
@@ -130,7 +150,24 @@ export class CopilotContextType {
registerEnumType(ContextCategories, { name: 'ContextCategories' });
@ObjectType()
class CopilotDocType implements Omit<ContextDoc, 'status'> {
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
@Field(() => ID)
id!: string;
@Field(() => ContextCategories)
type!: ContextCategories;
@Field(() => [CopilotContextDoc])
docs!: CopilotContextDoc[];
@Field(() => SafeIntResolver)
createdAt!: number;
}
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
@ObjectType()
class CopilotContextBlob implements Omit<ContextBlob, 'status'> {
@Field(() => ID)
id!: string;
@@ -142,28 +179,17 @@ class CopilotDocType implements Omit<ContextDoc, 'status'> {
}
@ObjectType()
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
class CopilotContextDoc implements Omit<ContextDoc, 'status'> {
@Field(() => ID)
id!: string;
@Field(() => ContextCategories)
type!: ContextCategories;
@Field(() => [CopilotDocType])
docs!: CopilotDocType[];
@Field(() => ContextEmbedStatus, { nullable: true })
status!: ContextEmbedStatus | null;
@Field(() => SafeIntResolver)
createdAt!: number;
}
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
@ObjectType()
class CopilotContextDoc extends CopilotDocType {
@Field(() => String, { nullable: true })
error!: string | null;
}
@ObjectType()
class CopilotContextFile implements ContextFile {
@Field(() => ID)
@@ -433,11 +459,33 @@ export class CopilotContextResolver {
return tags;
}
@ResolveField(() => [CopilotContextBlob], {
description: 'list blobs in context',
})
@CallMetric('ai', 'context_blob_list')
async blobs(
@Parent() context: CopilotContextType
): Promise<CopilotContextBlob[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
const blobs = session.blobs;
await this.models.copilotContext.mergeBlobStatus(
session.workspaceId,
blobs
);
return blobs.map(blob => ({ ...blob, status: blob.status || null }));
}
@ResolveField(() => [CopilotContextDoc], {
description: 'list files in context',
})
@CallMetric('ai', 'context_file_list')
async docs(@Parent() context: CopilotContextType): Promise<CopilotDocType[]> {
async docs(
@Parent() context: CopilotContextType
): Promise<CopilotContextDoc[]> {
if (!context.id) {
return [];
}
@@ -538,7 +586,7 @@ export class CopilotContextResolver {
async addContextDoc(
@Args({ name: 'options', type: () => AddContextDocInput })
options: AddContextDocInput
): Promise<CopilotDocType> {
): Promise<CopilotContextDoc> {
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
@@ -674,6 +722,85 @@ export class CopilotContextResolver {
}
}
@Mutation(() => CopilotContextBlob, {
description: 'add a blob to context',
})
@CallMetric('ai', 'context_blob_add')
async addContextBlob(
@CurrentUser() user: CurrentUser,
@Args({ name: 'options', type: () => AddContextBlobInput })
options: AddContextBlobInput
): Promise<CopilotContextBlob> {
if (!this.context.canEmbedding) {
throw new CopilotEmbeddingUnavailable();
}
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
throw new TooManyRequest('Server is busy');
}
const contextSession = await this.context.get(options.contextId);
try {
const blob = await contextSession.addBlobRecord(options.blobId);
if (!blob) {
throw new BlobNotFound({
spaceId: contextSession.workspaceId,
blobId: options.blobId,
});
}
await this.jobs.addBlobEmbeddingQueue({
userId: user.id,
workspaceId: contextSession.workspaceId,
contextId: contextSession.id,
blobId: options.blobId,
});
return { ...blob, status: blob.status || null };
} catch (e: any) {
if (e instanceof UserFriendlyError) {
throw e;
}
throw new CopilotFailedToModifyContext({
contextId: options.contextId,
message: e.message,
});
}
}
@Mutation(() => Boolean, {
description: 'remove a blob from context',
})
@CallMetric('ai', 'context_blob_remove')
async removeContextBlob(
@Args({ name: 'options', type: () => RemoveContextBlobInput })
options: RemoveContextBlobInput
): Promise<boolean> {
if (!this.context.canEmbedding) {
throw new CopilotEmbeddingUnavailable();
}
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
throw new TooManyRequest('Server is busy');
}
const contextSession = await this.context.get(options.contextId);
try {
return await contextSession.removeBlobRecord(options.blobId);
} catch (e: any) {
throw new CopilotFailedToModifyContext({
contextId: options.contextId,
message: e.message,
});
}
}
@ResolveField(() => [ContextMatchedFileChunk], {
description: 'match file in context',
})

View File

@@ -147,6 +147,28 @@ export class CopilotContextService implements OnApplicationBootstrap {
return null;
}
async matchWorkspaceBlobs(
workspaceId: string,
content: string,
topK: number = 5,
signal?: AbortSignal,
threshold: number = 0.5
) {
if (!this.embeddingClient) return [];
const embedding = await this.embeddingClient.getEmbedding(content, signal);
if (!embedding) return [];
const blobChunks = await this.models.copilotWorkspace.matchBlobEmbedding(
workspaceId,
embedding,
topK * 2,
threshold
);
if (!blobChunks.length) return [];
return await this.embeddingClient.reRank(content, blobChunks, topK, signal);
}
async matchWorkspaceFiles(
workspaceId: string,
content: string,

View File

@@ -1,13 +1,13 @@
import { nanoid } from 'nanoid';
import {
ContextBlob,
ContextCategories,
ContextCategory,
ContextConfig,
ContextDoc,
ContextEmbedStatus,
ContextFile,
ContextList,
FileChunkSimilarity,
Models,
} from '../../../models';
@@ -47,6 +47,10 @@ export class ContextSession implements AsyncDisposable {
return categories.filter(c => c.type === ContextCategories.Collection);
}
get blobs(): ContextBlob[] {
return this.config.blobs.map(d => ({ ...d }));
}
get docs(): ContextDoc[] {
return this.config.docs.map(d => ({ ...d }));
}
@@ -65,13 +69,6 @@ export class ContextSession implements AsyncDisposable {
);
}
get sortedList(): ContextList {
const { docs, files } = this.config;
return [...docs, ...files].toSorted(
(a, b) => a.createdAt - b.createdAt
) as ContextList;
}
async addCategoryRecord(type: ContextCategories, id: string, docs: string[]) {
const category = this.config.categories.find(
c => c.type === type && c.id === id
@@ -120,6 +117,33 @@ export class ContextSession implements AsyncDisposable {
return true;
}
async addBlobRecord(blobId: string): Promise<ContextBlob | null> {
const existsBlob = this.config.blobs.find(b => b.id === blobId);
if (existsBlob) {
return existsBlob;
}
const blob = await this.models.blob.get(this.config.workspaceId, blobId);
if (!blob) return null;
const record: ContextBlob = {
id: blobId,
createdAt: Date.now(),
status: ContextEmbedStatus.processing,
};
this.config.blobs.push(record);
await this.save();
return record;
}
async removeBlobRecord(blobId: string): Promise<boolean> {
const index = this.config.blobs.findIndex(b => b.id === blobId);
if (index >= 0) {
this.config.blobs.splice(index, 1);
await this.save();
}
return true;
}
async addDocRecord(docId: string): Promise<ContextDoc> {
const doc = this.config.docs.find(f => f.id === docId);
if (doc) {

View File

@@ -65,15 +65,14 @@ export class CopilotEmbeddingJob {
async addFileEmbeddingQueue(file: Jobs['copilot.embedding.files']) {
if (!this.supportEmbedding) return;
const { userId, workspaceId, contextId, blobId, fileId, fileName } = file;
await this.queue.add('copilot.embedding.files', {
userId,
workspaceId,
contextId,
blobId,
fileId,
fileName,
});
await this.queue.add('copilot.embedding.files', file);
}
@CallMetric('ai', 'addBlobEmbeddingQueue')
async addBlobEmbeddingQueue(blob: Jobs['copilot.embedding.blobs']) {
if (!this.supportEmbedding) return;
await this.queue.add('copilot.embedding.blobs', blob);
}
@OnEvent('workspace.doc.embedding')
@@ -288,6 +287,55 @@ export class CopilotEmbeddingJob {
}
}
@OnJob('copilot.embedding.blobs')
async embedPendingBlob({
userId,
workspaceId,
contextId,
blobId,
}: Jobs['copilot.embedding.blobs']) {
if (!this.supportEmbedding || !this.embeddingClient) return;
try {
const file = await this.readCopilotBlob(
userId,
workspaceId,
blobId,
'blob'
);
const chunks = await this.embeddingClient.getFileChunks(file);
const total = chunks.reduce((acc, c) => acc + c.length, 0);
for (const chunk of chunks) {
const embeddings = await this.embeddingClient.generateEmbeddings(chunk);
await this.models.copilotWorkspace.insertBlobEmbeddings(
workspaceId,
blobId,
embeddings
);
}
if (contextId) {
this.event.emit('workspace.blob.embed.finished', {
contextId,
blobId,
chunkSize: total,
});
}
} catch (error: any) {
if (contextId) {
this.event.emit('workspace.blob.embed.failed', {
contextId,
blobId,
error: mapAnyError(error).message,
});
}
throw error;
}
}
private async getDocFragment(
workspaceId: string,
docId: string
@@ -465,7 +513,7 @@ export class CopilotEmbeddingJob {
const docIdsInWorkspace = readAllDocIdsFromWorkspaceSnapshot(snapshot.blob);
const docIdsInEmbedding =
await this.models.copilotContext.listWorkspaceEmbedding(workspaceId);
await this.models.copilotContext.listWorkspaceDocEmbedding(workspaceId);
const docIdsInWorkspaceSet = new Set(docIdsInWorkspace);
const deletedDocIds = docIdsInEmbedding.filter(

View File

@@ -14,6 +14,18 @@ declare global {
enableDocEmbedding?: boolean;
};
'workspace.blob.embed.finished': {
contextId: string;
blobId: string;
chunkSize: number;
};
'workspace.blob.embed.failed': {
contextId: string;
blobId: string;
error: string;
};
'workspace.doc.embedding': Array<{
workspaceId: string;
docId: string;
@@ -62,6 +74,13 @@ declare global {
fileName: string;
};
'copilot.embedding.blobs': {
contextId?: string;
userId: string;
workspaceId: string;
blobId: string;
};
'copilot.embedding.cleanupTrashedDocEmbeddings': {
workspaceId: string;
};