feat(server): attachment embedding (#13348)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Added support for managing "blobs" in Copilot context, including
adding and removing blobs via new GraphQL mutations and UI fields.
* Introduced tracking and querying of blob embeddings within workspaces,
enabling search and similarity matching for blob content.
* Extended Copilot context and workspace APIs, schema, and UI to display
and manage blobs alongside existing documents and files.

* **Bug Fixes**
* Updated context and embedding status logic to handle blobs, ensuring
accurate status reporting and embedding management.

* **Tests**
* Added and updated test cases and snapshots to cover blob embedding
insertion, matching, and removal scenarios.

* **Documentation**
* Updated GraphQL schema and TypeScript types to reflect new
blob-related fields and mutations.

* **Chores**
* Refactored and cleaned up code to support new blob entity and
embedding logic, including renaming and updating internal methods and
types.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-07-31 06:07:28 +08:00
committed by GitHub
parent b6a5bc052e
commit feb42e34be
24 changed files with 689 additions and 84 deletions

View File

@@ -0,0 +1,37 @@
-- CreateTable
/*
Warnings:
- The primary key for the `ai_workspace_embeddings` table will be changed. If it partially fails, the table could be left without primary key constraint.
- The primary key for the `ai_workspace_file_embeddings` table will be changed. If it partially fails, the table could be left without primary key constraint.
*/
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'ai_workspace_embeddings') AND
EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'ai_workspace_file_embeddings') THEN
CREATE TABLE "ai_workspace_blob_embeddings" (
"workspace_id" VARCHAR NOT NULL,
"blob_id" VARCHAR NOT NULL,
"chunk" INTEGER NOT NULL,
"content" VARCHAR NOT NULL,
"embedding" vector(1024) NOT NULL,
"created_at" TIMESTAMPTZ(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "ai_workspace_blob_embeddings_pkey" PRIMARY KEY ("workspace_id","blob_id","chunk")
);
-- CreateIndex
CREATE INDEX "ai_workspace_blob_embeddings_idx" ON "ai_workspace_blob_embeddings"
USING hnsw (embedding vector_cosine_ops);
-- AddForeignKey
ALTER TABLE "ai_workspace_blob_embeddings"
ADD CONSTRAINT "ai_workspace_blob_embeddings_workspace_id_blob_id_fkey"
FOREIGN KEY ("workspace_id", "blob_id")
REFERENCES "blobs"("workspace_id", "key")
ON DELETE CASCADE ON UPDATE CASCADE;
END IF;
END
$$;

View File

@@ -568,6 +568,23 @@ model AiWorkspaceFileEmbedding {
@@map("ai_workspace_file_embeddings")
}
model AiWorkspaceBlobEmbedding {
workspaceId String @map("workspace_id") @db.VarChar
blobId String @map("blob_id") @db.VarChar
// a file can be divided into multiple chunks and embedded separately.
chunk Int @db.Integer
content String @db.VarChar
embedding Unsupported("vector(1024)")
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(3)
blob Blob @relation(fields: [workspaceId, blobId], references: [workspaceId, key], onDelete: Cascade)
@@id([workspaceId, blobId, chunk])
@@index([embedding], map: "ai_workspace_blob_embeddings_idx")
@@map("ai_workspace_blob_embeddings")
}
enum AiJobStatus {
pending
running
@@ -807,7 +824,8 @@ model Blob {
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(3)
deletedAt DateTime? @map("deleted_at") @db.Timestamptz(3)
workspace Workspace @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
workspace Workspace @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
AiWorkspaceBlobEmbedding AiWorkspaceBlobEmbedding[]
@@id([workspaceId, key])
@@map("blobs")

View File

@@ -1520,14 +1520,17 @@ test('TextStreamParser should process a sequence of message chunks', t => {
// ==================== context ====================
test('should be able to manage context', async t => {
const { context, prompt, session, event, jobs, storage } = t.context;
const { context, db, event, jobs, prompt, session, storage, workspace } =
t.context;
const ws = await workspace.create(userId);
await prompt.set(promptName, 'model', [
{ role: 'system', content: 'hello {{word}}' },
]);
const chatSession = await session.create({
docId: 'test',
workspaceId: 'test',
workspaceId: ws.id,
userId,
promptName,
pinned: false,
@@ -1608,6 +1611,36 @@ test('should be able to manage context', async t => {
t.is(result[0].fileId, file.id, 'should match file id');
}
// blob record
{
const blobId = 'test-blob';
await storage.put(userId, session.workspaceId, blobId, buffer);
await db.blob.create({
data: {
workspaceId: session.workspaceId,
key: blobId,
size: buffer.length,
mime: 'application/pdf',
},
});
await jobs.embedPendingBlob({
userId,
workspaceId: session.workspaceId,
blobId,
});
const result = await t.context.context.matchWorkspaceBlobs(
session.workspaceId,
'test',
1,
undefined,
1
);
t.is(result.length, 1, 'should match blob embedding');
t.is(result[0].blobId, blobId, 'should match blob id');
}
// doc record
const addDoc = async () => {

View File

@@ -74,6 +74,17 @@ Generated by [AVA](https://avajs.dev).
},
]
> should match workspace blob embedding
[
{
blobId: 'blob-test',
chunk: 0,
content: 'blob content',
distance: 0,
},
]
> should find docs to embed
1

View File

@@ -89,13 +89,14 @@ test('should get null for non-exist job', async t => {
test('should update context', async t => {
const { id: contextId } = await t.context.copilotContext.create(sessionId);
const config = await t.context.copilotContext.getConfig(contextId);
const config = (await t.context.copilotContext.getConfig(contextId))!;
t.assert(config, 'should get context config');
const doc = {
id: docId,
createdAt: Date.now(),
};
config?.docs.push(doc);
config.docs.push(doc);
await t.context.copilotContext.update(contextId, { config });
const config1 = await t.context.copilotContext.getConfig(contextId);
@@ -164,7 +165,7 @@ test('should insert embedding by doc id', async t => {
);
{
const ret = await t.context.copilotContext.listWorkspaceEmbedding(
const ret = await t.context.copilotContext.listWorkspaceDocEmbedding(
workspace.id,
[docId]
);
@@ -320,7 +321,7 @@ test('should merge doc status correctly', async t => {
const hasEmbeddingStub = Sinon.stub(
t.context.copilotContext,
'listWorkspaceEmbedding'
'listWorkspaceDocEmbedding'
).resolves([]);
const stubResult = await t.context.copilotContext.mergeDocStatus(

View File

@@ -145,6 +145,52 @@ test('should insert and search embedding', async t => {
}
}
{
await t.context.db.blob.create({
data: {
workspaceId: workspace.id,
key: 'blob-test',
mime: 'text/plain',
size: 1,
},
});
const blobId = 'blob-test';
await t.context.copilotWorkspace.insertBlobEmbeddings(
workspace.id,
blobId,
[
{
index: 0,
content: 'blob content',
embedding: Array.from({ length: 1024 }, () => 1),
},
]
);
{
const ret = await t.context.copilotWorkspace.matchBlobEmbedding(
workspace.id,
Array.from({ length: 1024 }, () => 0.9),
1,
1
);
t.snapshot(cleanObject(ret), 'should match workspace blob embedding');
}
await t.context.copilotWorkspace.removeBlob(workspace.id, blobId);
{
const ret = await t.context.copilotWorkspace.matchBlobEmbedding(
workspace.id,
Array.from({ length: 1024 }, () => 0.9),
1,
1
);
t.deepEqual(ret, [], 'should not match after removal');
}
}
{
const docId = randomUUID();
await t.context.doc.upsert({

View File

@@ -369,7 +369,6 @@ export async function listContextDocAndFiles(
docs {
id
status
error
createdAt
}
files {

View File

@@ -37,6 +37,11 @@ const ContextEmbedStatusSchema = z.enum([
ContextEmbedStatus.failed,
]);
const ContextBlobSchema = z.object({
id: z.string(),
createdAt: z.number(),
});
const ContextDocSchema = z.object({
id: z.string(),
createdAt: z.number(),
@@ -64,6 +69,9 @@ export const ContextCategorySchema = z.object({
export const ContextConfigSchema = z.object({
workspaceId: z.string(),
blobs: ContextBlobSchema.merge(
z.object({ status: ContextEmbedStatusSchema.optional() })
).array(),
files: ContextFileSchema.array(),
docs: ContextDocSchema.merge(
z.object({ status: ContextEmbedStatusSchema.optional() })
@@ -77,10 +85,9 @@ export const MinimalContextConfigSchema = ContextConfigSchema.pick({
export type ContextCategory = z.infer<typeof ContextCategorySchema>;
export type ContextConfig = z.infer<typeof ContextConfigSchema>;
export type ContextBlob = z.infer<typeof ContextConfigSchema>['blobs'][number];
export type ContextDoc = z.infer<typeof ContextConfigSchema>['docs'][number];
export type ContextFile = z.infer<typeof ContextConfigSchema>['files'][number];
export type ContextListItem = ContextDoc | ContextFile;
export type ContextList = ContextListItem[];
// embeddings
@@ -106,6 +113,10 @@ export type FileChunkSimilarity = ChunkSimilarity & {
mimeType: string;
};
export type BlobChunkSimilarity = ChunkSimilarity & {
blobId: string;
};
export type DocChunkSimilarity = ChunkSimilarity & {
docId: string;
};

View File

@@ -6,6 +6,7 @@ import { Prisma } from '@prisma/client';
import { CopilotSessionNotFound } from '../base';
import { BaseModel } from './base';
import {
ContextBlob,
ContextConfigSchema,
ContextDoc,
ContextEmbedStatus,
@@ -39,6 +40,7 @@ export class CopilotContextModel extends BaseModel {
sessionId,
config: {
workspaceId: session.workspaceId,
blobs: [],
docs: [],
files: [],
categories: [],
@@ -66,10 +68,11 @@ export class CopilotContextModel extends BaseModel {
if (minimalConfig.success) {
// fulfill the missing fields
return {
...minimalConfig.data,
blobs: [],
docs: [],
files: [],
categories: [],
...minimalConfig.data,
};
}
}
@@ -83,10 +86,35 @@ export class CopilotContextModel extends BaseModel {
return row;
}
async mergeBlobStatus(
workspaceId: string,
blobs: ContextBlob[]
): Promise<ContextBlob[]> {
const canEmbedding = await this.checkEmbeddingAvailable();
const finishedBlobs = canEmbedding
? await this.listWorkspaceBlobEmbedding(
workspaceId,
Array.from(new Set(blobs.map(blob => blob.id)))
)
: [];
const finishedBlobSet = new Set(finishedBlobs);
for (const blob of blobs) {
const status = finishedBlobSet.has(blob.id)
? ContextEmbedStatus.finished
: undefined;
// NOTE: when the blob has not been synchronized to the server or is in the embedding queue
// the status will be empty, fallback to processing if no status is provided
blob.status = status || blob.status || ContextEmbedStatus.processing;
}
return blobs;
}
async mergeDocStatus(workspaceId: string, docs: ContextDoc[]) {
const canEmbedding = await this.checkEmbeddingAvailable();
const finishedDoc = canEmbedding
? await this.listWorkspaceEmbedding(
? await this.listWorkspaceDocEmbedding(
workspaceId,
Array.from(new Set(docs.map(doc => doc.id)))
)
@@ -126,7 +154,23 @@ export class CopilotContextModel extends BaseModel {
return Number(count) === 2;
}
async listWorkspaceEmbedding(workspaceId: string, docIds?: string[]) {
async listWorkspaceBlobEmbedding(
workspaceId: string,
blobIds?: string[]
): Promise<string[]> {
const existsIds = await this.db.aiWorkspaceBlobEmbedding
.groupBy({
where: {
workspaceId,
blobId: blobIds ? { in: blobIds } : undefined,
},
by: ['blobId'],
})
.then(r => r.map(r => r.blobId));
return existsIds;
}
async listWorkspaceDocEmbedding(workspaceId: string, docIds?: string[]) {
const existsIds = await this.db.aiWorkspaceEmbedding
.groupBy({
where: {

View File

@@ -7,6 +7,7 @@ import { Prisma, PrismaClient } from '@prisma/client';
import { PaginationInput } from '../base';
import { BaseModel } from './base';
import type {
BlobChunkSimilarity,
CopilotWorkspaceFile,
CopilotWorkspaceFileMetadata,
Embedding,
@@ -256,19 +257,19 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
async checkEmbeddingAvailable(): Promise<boolean> {
const [{ count }] = await this.db.$queryRaw<
{ count: number }[]
>`SELECT count(1) FROM pg_tables WHERE tablename in ('ai_workspace_embeddings', 'ai_workspace_file_embeddings')`;
return Number(count) === 2;
>`SELECT count(1) FROM pg_tables WHERE tablename in ('ai_workspace_embeddings', 'ai_workspace_file_embeddings', 'ai_workspace_blob_embeddings')`;
return Number(count) === 3;
}
private processEmbeddings(
workspaceId: string,
fileId: string,
fileOrBlobId: string,
embeddings: Embedding[]
) {
const groups = embeddings.map(e =>
[
workspaceId,
fileId,
fileOrBlobId,
e.index,
e.content,
Prisma.raw(`'[${e.embedding.join(',')}]'`),
@@ -378,6 +379,61 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
@Transactional()
async insertBlobEmbeddings(
workspaceId: string,
blobId: string,
embeddings: Embedding[]
) {
if (embeddings.length === 0) {
this.logger.warn(
`No embeddings provided for workspaceId: ${workspaceId}, blobId: ${blobId}. Skipping insertion.`
);
return;
}
const values = this.processEmbeddings(workspaceId, blobId, embeddings);
await this.db.$executeRaw`
INSERT INTO "ai_workspace_blob_embeddings"
("workspace_id", "blob_id", "chunk", "content", "embedding") VALUES ${values}
ON CONFLICT (workspace_id, blob_id, chunk) DO NOTHING;
`;
}
async matchBlobEmbedding(
workspaceId: string,
embedding: number[],
topK: number,
threshold: number
): Promise<BlobChunkSimilarity[]> {
if (!(await this.allowEmbedding(workspaceId))) {
return [];
}
const similarityChunks = await this.db.$queryRaw<
Array<BlobChunkSimilarity>
>`
SELECT
e."blob_id" as "blobId",
e."chunk",
e."content",
e."embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_blob_embeddings" e
WHERE e.workspace_id = ${workspaceId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
async removeBlob(workspaceId: string, blobId: string) {
await this.db.$executeRaw`
DELETE FROM "ai_workspace_blob_embeddings"
WHERE workspace_id = ${workspaceId} AND blob_id = ${blobId};
`;
return true;
}
async removeFile(workspaceId: string, fileId: string) {
// embeddings will be removed by foreign key constraint
await this.db.aiWorkspaceFiles.deleteMany({

View File

@@ -20,6 +20,7 @@ import { SafeIntResolver } from 'graphql-scalars';
import GraphQLUpload from 'graphql-upload/GraphQLUpload.mjs';
import {
BlobNotFound,
BlobQuotaExceeded,
CallMetric,
CopilotEmbeddingUnavailable,
@@ -37,6 +38,7 @@ import {
import { CurrentUser } from '../../../core/auth';
import { AccessController } from '../../../core/permission';
import {
ContextBlob,
ContextCategories,
ContextCategory,
ContextDoc,
@@ -118,6 +120,24 @@ class RemoveContextFileInput {
fileId!: string;
}
@InputType()
class AddContextBlobInput {
@Field(() => String)
contextId!: string;
@Field(() => String)
blobId!: string;
}
@InputType()
class RemoveContextBlobInput {
@Field(() => String)
contextId!: string;
@Field(() => String)
blobId!: string;
}
@ObjectType('CopilotContext')
export class CopilotContextType {
@Field(() => ID, { nullable: true })
@@ -130,7 +150,24 @@ export class CopilotContextType {
registerEnumType(ContextCategories, { name: 'ContextCategories' });
@ObjectType()
class CopilotDocType implements Omit<ContextDoc, 'status'> {
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
@Field(() => ID)
id!: string;
@Field(() => ContextCategories)
type!: ContextCategories;
@Field(() => [CopilotContextDoc])
docs!: CopilotContextDoc[];
@Field(() => SafeIntResolver)
createdAt!: number;
}
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
@ObjectType()
class CopilotContextBlob implements Omit<ContextBlob, 'status'> {
@Field(() => ID)
id!: string;
@@ -142,28 +179,17 @@ class CopilotDocType implements Omit<ContextDoc, 'status'> {
}
@ObjectType()
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
class CopilotContextDoc implements Omit<ContextDoc, 'status'> {
@Field(() => ID)
id!: string;
@Field(() => ContextCategories)
type!: ContextCategories;
@Field(() => [CopilotDocType])
docs!: CopilotDocType[];
@Field(() => ContextEmbedStatus, { nullable: true })
status!: ContextEmbedStatus | null;
@Field(() => SafeIntResolver)
createdAt!: number;
}
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
@ObjectType()
class CopilotContextDoc extends CopilotDocType {
@Field(() => String, { nullable: true })
error!: string | null;
}
@ObjectType()
class CopilotContextFile implements ContextFile {
@Field(() => ID)
@@ -433,11 +459,33 @@ export class CopilotContextResolver {
return tags;
}
@ResolveField(() => [CopilotContextBlob], {
description: 'list blobs in context',
})
@CallMetric('ai', 'context_blob_list')
async blobs(
@Parent() context: CopilotContextType
): Promise<CopilotContextBlob[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
const blobs = session.blobs;
await this.models.copilotContext.mergeBlobStatus(
session.workspaceId,
blobs
);
return blobs.map(blob => ({ ...blob, status: blob.status || null }));
}
@ResolveField(() => [CopilotContextDoc], {
description: 'list files in context',
})
@CallMetric('ai', 'context_file_list')
async docs(@Parent() context: CopilotContextType): Promise<CopilotDocType[]> {
async docs(
@Parent() context: CopilotContextType
): Promise<CopilotContextDoc[]> {
if (!context.id) {
return [];
}
@@ -538,7 +586,7 @@ export class CopilotContextResolver {
async addContextDoc(
@Args({ name: 'options', type: () => AddContextDocInput })
options: AddContextDocInput
): Promise<CopilotDocType> {
): Promise<CopilotContextDoc> {
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
@@ -674,6 +722,85 @@ export class CopilotContextResolver {
}
}
@Mutation(() => CopilotContextBlob, {
description: 'add a blob to context',
})
@CallMetric('ai', 'context_blob_add')
async addContextBlob(
@CurrentUser() user: CurrentUser,
@Args({ name: 'options', type: () => AddContextBlobInput })
options: AddContextBlobInput
): Promise<CopilotContextBlob> {
if (!this.context.canEmbedding) {
throw new CopilotEmbeddingUnavailable();
}
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
throw new TooManyRequest('Server is busy');
}
const contextSession = await this.context.get(options.contextId);
try {
const blob = await contextSession.addBlobRecord(options.blobId);
if (!blob) {
throw new BlobNotFound({
spaceId: contextSession.workspaceId,
blobId: options.blobId,
});
}
await this.jobs.addBlobEmbeddingQueue({
userId: user.id,
workspaceId: contextSession.workspaceId,
contextId: contextSession.id,
blobId: options.blobId,
});
return { ...blob, status: blob.status || null };
} catch (e: any) {
if (e instanceof UserFriendlyError) {
throw e;
}
throw new CopilotFailedToModifyContext({
contextId: options.contextId,
message: e.message,
});
}
}
@Mutation(() => Boolean, {
description: 'remove a blob from context',
})
@CallMetric('ai', 'context_blob_remove')
async removeContextBlob(
@Args({ name: 'options', type: () => RemoveContextBlobInput })
options: RemoveContextBlobInput
): Promise<boolean> {
if (!this.context.canEmbedding) {
throw new CopilotEmbeddingUnavailable();
}
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
throw new TooManyRequest('Server is busy');
}
const contextSession = await this.context.get(options.contextId);
try {
return await contextSession.removeBlobRecord(options.blobId);
} catch (e: any) {
throw new CopilotFailedToModifyContext({
contextId: options.contextId,
message: e.message,
});
}
}
@ResolveField(() => [ContextMatchedFileChunk], {
description: 'match file in context',
})

View File

@@ -147,6 +147,28 @@ export class CopilotContextService implements OnApplicationBootstrap {
return null;
}
async matchWorkspaceBlobs(
workspaceId: string,
content: string,
topK: number = 5,
signal?: AbortSignal,
threshold: number = 0.5
) {
if (!this.embeddingClient) return [];
const embedding = await this.embeddingClient.getEmbedding(content, signal);
if (!embedding) return [];
const blobChunks = await this.models.copilotWorkspace.matchBlobEmbedding(
workspaceId,
embedding,
topK * 2,
threshold
);
if (!blobChunks.length) return [];
return await this.embeddingClient.reRank(content, blobChunks, topK, signal);
}
async matchWorkspaceFiles(
workspaceId: string,
content: string,

View File

@@ -1,13 +1,13 @@
import { nanoid } from 'nanoid';
import {
ContextBlob,
ContextCategories,
ContextCategory,
ContextConfig,
ContextDoc,
ContextEmbedStatus,
ContextFile,
ContextList,
FileChunkSimilarity,
Models,
} from '../../../models';
@@ -47,6 +47,10 @@ export class ContextSession implements AsyncDisposable {
return categories.filter(c => c.type === ContextCategories.Collection);
}
get blobs(): ContextBlob[] {
return this.config.blobs.map(d => ({ ...d }));
}
get docs(): ContextDoc[] {
return this.config.docs.map(d => ({ ...d }));
}
@@ -65,13 +69,6 @@ export class ContextSession implements AsyncDisposable {
);
}
get sortedList(): ContextList {
const { docs, files } = this.config;
return [...docs, ...files].toSorted(
(a, b) => a.createdAt - b.createdAt
) as ContextList;
}
async addCategoryRecord(type: ContextCategories, id: string, docs: string[]) {
const category = this.config.categories.find(
c => c.type === type && c.id === id
@@ -120,6 +117,33 @@ export class ContextSession implements AsyncDisposable {
return true;
}
async addBlobRecord(blobId: string): Promise<ContextBlob | null> {
const existsBlob = this.config.blobs.find(b => b.id === blobId);
if (existsBlob) {
return existsBlob;
}
const blob = await this.models.blob.get(this.config.workspaceId, blobId);
if (!blob) return null;
const record: ContextBlob = {
id: blobId,
createdAt: Date.now(),
status: ContextEmbedStatus.processing,
};
this.config.blobs.push(record);
await this.save();
return record;
}
async removeBlobRecord(blobId: string): Promise<boolean> {
const index = this.config.blobs.findIndex(b => b.id === blobId);
if (index >= 0) {
this.config.blobs.splice(index, 1);
await this.save();
}
return true;
}
async addDocRecord(docId: string): Promise<ContextDoc> {
const doc = this.config.docs.find(f => f.id === docId);
if (doc) {

View File

@@ -65,15 +65,14 @@ export class CopilotEmbeddingJob {
async addFileEmbeddingQueue(file: Jobs['copilot.embedding.files']) {
if (!this.supportEmbedding) return;
const { userId, workspaceId, contextId, blobId, fileId, fileName } = file;
await this.queue.add('copilot.embedding.files', {
userId,
workspaceId,
contextId,
blobId,
fileId,
fileName,
});
await this.queue.add('copilot.embedding.files', file);
}
@CallMetric('ai', 'addBlobEmbeddingQueue')
async addBlobEmbeddingQueue(blob: Jobs['copilot.embedding.blobs']) {
if (!this.supportEmbedding) return;
await this.queue.add('copilot.embedding.blobs', blob);
}
@OnEvent('workspace.doc.embedding')
@@ -288,6 +287,55 @@ export class CopilotEmbeddingJob {
}
}
@OnJob('copilot.embedding.blobs')
async embedPendingBlob({
userId,
workspaceId,
contextId,
blobId,
}: Jobs['copilot.embedding.blobs']) {
if (!this.supportEmbedding || !this.embeddingClient) return;
try {
const file = await this.readCopilotBlob(
userId,
workspaceId,
blobId,
'blob'
);
const chunks = await this.embeddingClient.getFileChunks(file);
const total = chunks.reduce((acc, c) => acc + c.length, 0);
for (const chunk of chunks) {
const embeddings = await this.embeddingClient.generateEmbeddings(chunk);
await this.models.copilotWorkspace.insertBlobEmbeddings(
workspaceId,
blobId,
embeddings
);
}
if (contextId) {
this.event.emit('workspace.blob.embed.finished', {
contextId,
blobId,
chunkSize: total,
});
}
} catch (error: any) {
if (contextId) {
this.event.emit('workspace.blob.embed.failed', {
contextId,
blobId,
error: mapAnyError(error).message,
});
}
throw error;
}
}
private async getDocFragment(
workspaceId: string,
docId: string
@@ -465,7 +513,7 @@ export class CopilotEmbeddingJob {
const docIdsInWorkspace = readAllDocIdsFromWorkspaceSnapshot(snapshot.blob);
const docIdsInEmbedding =
await this.models.copilotContext.listWorkspaceEmbedding(workspaceId);
await this.models.copilotContext.listWorkspaceDocEmbedding(workspaceId);
const docIdsInWorkspaceSet = new Set(docIdsInWorkspace);
const deletedDocIds = docIdsInEmbedding.filter(

View File

@@ -14,6 +14,18 @@ declare global {
enableDocEmbedding?: boolean;
};
'workspace.blob.embed.finished': {
contextId: string;
blobId: string;
chunkSize: number;
};
'workspace.blob.embed.failed': {
contextId: string;
blobId: string;
error: string;
};
'workspace.doc.embedding': Array<{
workspaceId: string;
docId: string;
@@ -62,6 +74,13 @@ declare global {
fileName: string;
};
'copilot.embedding.blobs': {
contextId?: string;
userId: string;
workspaceId: string;
blobId: string;
};
'copilot.embedding.cleanupTrashedDocEmbeddings': {
workspaceId: string;
};

View File

@@ -2,6 +2,11 @@
# THIS FILE WAS AUTOMATICALLY GENERATED (DO NOT MODIFY)
# ------------------------------------------------------
input AddContextBlobInput {
blobId: String!
contextId: String!
}
input AddContextCategoryInput {
categoryId: String!
contextId: String!
@@ -226,6 +231,9 @@ type Copilot {
}
type CopilotContext {
"""list blobs in context"""
blobs: [CopilotContextBlob!]!
"""list collections in context"""
collections: [CopilotContextCategory!]!
@@ -247,16 +255,21 @@ type CopilotContext {
workspaceId: String!
}
type CopilotContextBlob {
createdAt: SafeInt!
id: ID!
status: ContextEmbedStatus
}
type CopilotContextCategory {
createdAt: SafeInt!
docs: [CopilotDocType!]!
docs: [CopilotContextDoc!]!
id: ID!
type: ContextCategories!
}
type CopilotContextDoc {
createdAt: SafeInt!
error: String
id: ID!
status: ContextEmbedStatus
}
@@ -281,12 +294,6 @@ type CopilotDocNotFoundDataType {
docId: String!
}
type CopilotDocType {
createdAt: SafeInt!
id: ID!
status: ContextEmbedStatus
}
type CopilotFailedToAddWorkspaceFileEmbeddingDataType {
message: String!
}
@@ -1163,6 +1170,9 @@ type Mutation {
acceptInviteById(inviteId: String!, sendAcceptMail: Boolean @deprecated(reason: "never used"), workspaceId: String @deprecated(reason: "never used")): Boolean!
activateLicense(license: String!, workspaceId: String!): License!
"""add a blob to context"""
addContextBlob(options: AddContextBlobInput!): CopilotContextBlob!
"""add a category to context"""
addContextCategory(options: AddContextCategoryInput!): CopilotContextCategory!
@@ -1266,6 +1276,9 @@ type Mutation {
"""Remove user avatar"""
removeAvatar: RemoveAvatar!
"""remove a blob from context"""
removeContextBlob(options: RemoveContextBlobInput!): Boolean!
"""remove a category from context"""
removeContextCategory(options: RemoveContextCategoryInput!): Boolean!
@@ -1617,6 +1630,11 @@ type RemoveAvatar {
success: Boolean!
}
input RemoveContextBlobInput {
blobId: String!
contextId: String!
}
input RemoveContextCategoryInput {
categoryId: String!
contextId: String!

View File

@@ -0,0 +1,7 @@
mutation addContextBlob($options: AddContextBlobInput!) {
addContextBlob(options: $options) {
id
createdAt
status
}
}

View File

@@ -0,0 +1,3 @@
mutation removeContextBlob($options: RemoveContextBlobInput!) {
removeContextBlob(options: $options)
}

View File

@@ -3,6 +3,5 @@ mutation addContextDoc($options: AddContextDocInput!) {
id
createdAt
status
error
}
}

View File

@@ -6,10 +6,14 @@ query listContextObject(
currentUser {
copilot(workspaceId: $workspaceId) {
contexts(sessionId: $sessionId, contextId: $contextId) {
blobs {
id
status
createdAt
}
docs {
id
status
error
createdAt
}
files {

View File

@@ -568,6 +568,26 @@ export const applyDocUpdatesQuery = {
}`,
};
export const addContextBlobMutation = {
id: 'addContextBlobMutation' as const,
op: 'addContextBlob',
query: `mutation addContextBlob($options: AddContextBlobInput!) {
addContextBlob(options: $options) {
id
createdAt
status
}
}`,
};
export const removeContextBlobMutation = {
id: 'removeContextBlobMutation' as const,
op: 'removeContextBlob',
query: `mutation removeContextBlob($options: RemoveContextBlobInput!) {
removeContextBlob(options: $options)
}`,
};
export const addContextCategoryMutation = {
id: 'addContextCategoryMutation' as const,
op: 'addContextCategory',
@@ -609,7 +629,6 @@ export const addContextDocMutation = {
id
createdAt
status
error
}
}`,
};
@@ -655,10 +674,14 @@ export const listContextObjectQuery = {
currentUser {
copilot(workspaceId: $workspaceId) {
contexts(sessionId: $sessionId, contextId: $contextId) {
blobs {
id
status
createdAt
}
docs {
id
status
error
createdAt
}
files {

View File

@@ -37,6 +37,11 @@ export interface Scalars {
Upload: { input: File; output: File };
}
export interface AddContextBlobInput {
blobId: Scalars['String']['input'];
contextId: Scalars['String']['input'];
}
export interface AddContextCategoryInput {
categoryId: Scalars['String']['input'];
contextId: Scalars['String']['input'];
@@ -294,6 +299,8 @@ export interface CopilotSessionsArgs {
export interface CopilotContext {
__typename?: 'CopilotContext';
/** list blobs in context */
blobs: Array<CopilotContextBlob>;
/** list collections in context */
collections: Array<CopilotContextCategory>;
/** list files in context */
@@ -324,10 +331,17 @@ export interface CopilotContextMatchWorkspaceDocsArgs {
threshold?: InputMaybe<Scalars['Float']['input']>;
}
export interface CopilotContextBlob {
__typename?: 'CopilotContextBlob';
createdAt: Scalars['SafeInt']['output'];
id: Scalars['ID']['output'];
status: Maybe<ContextEmbedStatus>;
}
export interface CopilotContextCategory {
__typename?: 'CopilotContextCategory';
createdAt: Scalars['SafeInt']['output'];
docs: Array<CopilotDocType>;
docs: Array<CopilotContextDoc>;
id: Scalars['ID']['output'];
type: ContextCategories;
}
@@ -335,7 +349,6 @@ export interface CopilotContextCategory {
export interface CopilotContextDoc {
__typename?: 'CopilotContextDoc';
createdAt: Scalars['SafeInt']['output'];
error: Maybe<Scalars['String']['output']>;
id: Scalars['ID']['output'];
status: Maybe<ContextEmbedStatus>;
}
@@ -363,13 +376,6 @@ export interface CopilotDocNotFoundDataType {
docId: Scalars['String']['output'];
}
export interface CopilotDocType {
__typename?: 'CopilotDocType';
createdAt: Scalars['SafeInt']['output'];
id: Scalars['ID']['output'];
status: Maybe<ContextEmbedStatus>;
}
export interface CopilotFailedToAddWorkspaceFileEmbeddingDataType {
__typename?: 'CopilotFailedToAddWorkspaceFileEmbeddingDataType';
message: Scalars['String']['output'];
@@ -1333,6 +1339,8 @@ export interface Mutation {
__typename?: 'Mutation';
acceptInviteById: Scalars['Boolean']['output'];
activateLicense: License;
/** add a blob to context */
addContextBlob: CopilotContextBlob;
/** add a category to context */
addContextCategory: CopilotContextCategory;
/** add a doc to context */
@@ -1412,6 +1420,8 @@ export interface Mutation {
releaseDeletedBlobs: Scalars['Boolean']['output'];
/** Remove user avatar */
removeAvatar: RemoveAvatar;
/** remove a blob from context */
removeContextBlob: Scalars['Boolean']['output'];
/** remove a category from context */
removeContextCategory: Scalars['Boolean']['output'];
/** remove a doc from context */
@@ -1489,6 +1499,10 @@ export interface MutationActivateLicenseArgs {
workspaceId: Scalars['String']['input'];
}
export interface MutationAddContextBlobArgs {
options: AddContextBlobInput;
}
export interface MutationAddContextCategoryArgs {
options: AddContextCategoryInput;
}
@@ -1707,6 +1721,10 @@ export interface MutationReleaseDeletedBlobsArgs {
workspaceId: Scalars['String']['input'];
}
export interface MutationRemoveContextBlobArgs {
options: RemoveContextBlobInput;
}
export interface MutationRemoveContextCategoryArgs {
options: RemoveContextCategoryInput;
}
@@ -2221,6 +2239,11 @@ export interface RemoveAvatar {
success: Scalars['Boolean']['output'];
}
export interface RemoveContextBlobInput {
blobId: Scalars['String']['input'];
contextId: Scalars['String']['input'];
}
export interface RemoveContextCategoryInput {
categoryId: Scalars['String']['input'];
contextId: Scalars['String']['input'];
@@ -3535,6 +3558,29 @@ export type ApplyDocUpdatesQuery = {
applyDocUpdates: string;
};
export type AddContextBlobMutationVariables = Exact<{
options: AddContextBlobInput;
}>;
export type AddContextBlobMutation = {
__typename?: 'Mutation';
addContextBlob: {
__typename?: 'CopilotContextBlob';
id: string;
createdAt: number;
status: ContextEmbedStatus | null;
};
};
export type RemoveContextBlobMutationVariables = Exact<{
options: RemoveContextBlobInput;
}>;
export type RemoveContextBlobMutation = {
__typename?: 'Mutation';
removeContextBlob: boolean;
};
export type AddContextCategoryMutationVariables = Exact<{
options: AddContextCategoryInput;
}>;
@@ -3547,7 +3593,7 @@ export type AddContextCategoryMutation = {
createdAt: number;
type: ContextCategories;
docs: Array<{
__typename?: 'CopilotDocType';
__typename?: 'CopilotContextDoc';
id: string;
createdAt: number;
status: ContextEmbedStatus | null;
@@ -3585,7 +3631,6 @@ export type AddContextDocMutation = {
id: string;
createdAt: number;
status: ContextEmbedStatus | null;
error: string | null;
};
};
@@ -3641,11 +3686,16 @@ export type ListContextObjectQuery = {
__typename?: 'Copilot';
contexts: Array<{
__typename?: 'CopilotContext';
blobs: Array<{
__typename?: 'CopilotContextBlob';
id: string;
status: ContextEmbedStatus | null;
createdAt: number;
}>;
docs: Array<{
__typename?: 'CopilotContextDoc';
id: string;
status: ContextEmbedStatus | null;
error: string | null;
createdAt: number;
}>;
files: Array<{
@@ -3665,7 +3715,7 @@ export type ListContextObjectQuery = {
id: string;
createdAt: number;
docs: Array<{
__typename?: 'CopilotDocType';
__typename?: 'CopilotContextDoc';
id: string;
status: ContextEmbedStatus | null;
createdAt: number;
@@ -3677,7 +3727,7 @@ export type ListContextObjectQuery = {
id: string;
createdAt: number;
docs: Array<{
__typename?: 'CopilotDocType';
__typename?: 'CopilotContextDoc';
id: string;
status: ContextEmbedStatus | null;
createdAt: number;
@@ -6616,6 +6666,16 @@ export type Mutations =
variables: UploadCommentAttachmentMutationVariables;
response: UploadCommentAttachmentMutation;
}
| {
name: 'addContextBlobMutation';
variables: AddContextBlobMutationVariables;
response: AddContextBlobMutation;
}
| {
name: 'removeContextBlobMutation';
variables: RemoveContextBlobMutationVariables;
response: RemoveContextBlobMutation;
}
| {
name: 'addContextCategoryMutation';
variables: AddContextCategoryMutationVariables;

View File

@@ -11,7 +11,6 @@ import type {
CopilotChatHistoryFragment,
CopilotContextDoc,
CopilotContextFile,
CopilotDocType,
} from '@affine/graphql';
import { SignalWatcher, WithDisposable } from '@blocksuite/affine/global/lit';
import type { EditorHost } from '@blocksuite/affine/std';
@@ -270,7 +269,6 @@ export class AIChatComposer extends SignalWatcher(
const docChips: DocChip[] = docs.map(doc => ({
docId: doc.id,
state: doc.status || 'processing',
tooltip: doc.error,
createdAt: doc.createdAt,
}));
@@ -583,10 +581,7 @@ export class AIChatComposer extends SignalWatcher(
...tags.flatMap(tag => tag.docs),
...collections.flatMap(collection => collection.docs),
];
const hashMap = new Map<
string,
CopilotContextDoc | CopilotDocType | CopilotContextFile
>();
const hashMap = new Map<string, CopilotContextDoc | CopilotContextFile>();
const count: Record<ContextEmbedStatus, number> = {
finished: 0,
processing: 0,