feat(server): attachment embedding (#13348)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Added support for managing "blobs" in Copilot context, including
adding and removing blobs via new GraphQL mutations and UI fields.
* Introduced tracking and querying of blob embeddings within workspaces,
enabling search and similarity matching for blob content.
* Extended Copilot context and workspace APIs, schema, and UI to display
and manage blobs alongside existing documents and files.

* **Bug Fixes**
* Updated context and embedding status logic to handle blobs, ensuring
accurate status reporting and embedding management.

* **Tests**
* Added and updated test cases and snapshots to cover blob embedding
insertion, matching, and removal scenarios.

* **Documentation**
* Updated GraphQL schema and TypeScript types to reflect new
blob-related fields and mutations.

* **Chores**
* Refactored and cleaned up code to support new blob entity and
embedding logic, including renaming and updating internal methods and
types.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-07-31 06:07:28 +08:00
committed by GitHub
parent b6a5bc052e
commit feb42e34be
24 changed files with 689 additions and 84 deletions

View File

@@ -0,0 +1,37 @@
-- CreateTable
/*
Warnings:
- The primary key for the `ai_workspace_embeddings` table will be changed. If it partially fails, the table could be left without primary key constraint.
- The primary key for the `ai_workspace_file_embeddings` table will be changed. If it partially fails, the table could be left without primary key constraint.
*/
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'ai_workspace_embeddings') AND
EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'ai_workspace_file_embeddings') THEN
CREATE TABLE "ai_workspace_blob_embeddings" (
"workspace_id" VARCHAR NOT NULL,
"blob_id" VARCHAR NOT NULL,
"chunk" INTEGER NOT NULL,
"content" VARCHAR NOT NULL,
"embedding" vector(1024) NOT NULL,
"created_at" TIMESTAMPTZ(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "ai_workspace_blob_embeddings_pkey" PRIMARY KEY ("workspace_id","blob_id","chunk")
);
-- CreateIndex
CREATE INDEX "ai_workspace_blob_embeddings_idx" ON "ai_workspace_blob_embeddings"
USING hnsw (embedding vector_cosine_ops);
-- AddForeignKey
ALTER TABLE "ai_workspace_blob_embeddings"
ADD CONSTRAINT "ai_workspace_blob_embeddings_workspace_id_blob_id_fkey"
FOREIGN KEY ("workspace_id", "blob_id")
REFERENCES "blobs"("workspace_id", "key")
ON DELETE CASCADE ON UPDATE CASCADE;
END IF;
END
$$;

View File

@@ -568,6 +568,23 @@ model AiWorkspaceFileEmbedding {
@@map("ai_workspace_file_embeddings")
}
model AiWorkspaceBlobEmbedding {
workspaceId String @map("workspace_id") @db.VarChar
blobId String @map("blob_id") @db.VarChar
// a file can be divided into multiple chunks and embedded separately.
chunk Int @db.Integer
content String @db.VarChar
embedding Unsupported("vector(1024)")
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(3)
blob Blob @relation(fields: [workspaceId, blobId], references: [workspaceId, key], onDelete: Cascade)
@@id([workspaceId, blobId, chunk])
@@index([embedding], map: "ai_workspace_blob_embeddings_idx")
@@map("ai_workspace_blob_embeddings")
}
enum AiJobStatus {
pending
running
@@ -807,7 +824,8 @@ model Blob {
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(3)
deletedAt DateTime? @map("deleted_at") @db.Timestamptz(3)
workspace Workspace @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
workspace Workspace @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
AiWorkspaceBlobEmbedding AiWorkspaceBlobEmbedding[]
@@id([workspaceId, key])
@@map("blobs")

View File

@@ -1520,14 +1520,17 @@ test('TextStreamParser should process a sequence of message chunks', t => {
// ==================== context ====================
test('should be able to manage context', async t => {
const { context, prompt, session, event, jobs, storage } = t.context;
const { context, db, event, jobs, prompt, session, storage, workspace } =
t.context;
const ws = await workspace.create(userId);
await prompt.set(promptName, 'model', [
{ role: 'system', content: 'hello {{word}}' },
]);
const chatSession = await session.create({
docId: 'test',
workspaceId: 'test',
workspaceId: ws.id,
userId,
promptName,
pinned: false,
@@ -1608,6 +1611,36 @@ test('should be able to manage context', async t => {
t.is(result[0].fileId, file.id, 'should match file id');
}
// blob record
{
const blobId = 'test-blob';
await storage.put(userId, session.workspaceId, blobId, buffer);
await db.blob.create({
data: {
workspaceId: session.workspaceId,
key: blobId,
size: buffer.length,
mime: 'application/pdf',
},
});
await jobs.embedPendingBlob({
userId,
workspaceId: session.workspaceId,
blobId,
});
const result = await t.context.context.matchWorkspaceBlobs(
session.workspaceId,
'test',
1,
undefined,
1
);
t.is(result.length, 1, 'should match blob embedding');
t.is(result[0].blobId, blobId, 'should match blob id');
}
// doc record
const addDoc = async () => {

View File

@@ -74,6 +74,17 @@ Generated by [AVA](https://avajs.dev).
},
]
> should match workspace blob embedding
[
{
blobId: 'blob-test',
chunk: 0,
content: 'blob content',
distance: 0,
},
]
> should find docs to embed
1

View File

@@ -89,13 +89,14 @@ test('should get null for non-exist job', async t => {
test('should update context', async t => {
const { id: contextId } = await t.context.copilotContext.create(sessionId);
const config = await t.context.copilotContext.getConfig(contextId);
const config = (await t.context.copilotContext.getConfig(contextId))!;
t.assert(config, 'should get context config');
const doc = {
id: docId,
createdAt: Date.now(),
};
config?.docs.push(doc);
config.docs.push(doc);
await t.context.copilotContext.update(contextId, { config });
const config1 = await t.context.copilotContext.getConfig(contextId);
@@ -164,7 +165,7 @@ test('should insert embedding by doc id', async t => {
);
{
const ret = await t.context.copilotContext.listWorkspaceEmbedding(
const ret = await t.context.copilotContext.listWorkspaceDocEmbedding(
workspace.id,
[docId]
);
@@ -320,7 +321,7 @@ test('should merge doc status correctly', async t => {
const hasEmbeddingStub = Sinon.stub(
t.context.copilotContext,
'listWorkspaceEmbedding'
'listWorkspaceDocEmbedding'
).resolves([]);
const stubResult = await t.context.copilotContext.mergeDocStatus(

View File

@@ -145,6 +145,52 @@ test('should insert and search embedding', async t => {
}
}
{
await t.context.db.blob.create({
data: {
workspaceId: workspace.id,
key: 'blob-test',
mime: 'text/plain',
size: 1,
},
});
const blobId = 'blob-test';
await t.context.copilotWorkspace.insertBlobEmbeddings(
workspace.id,
blobId,
[
{
index: 0,
content: 'blob content',
embedding: Array.from({ length: 1024 }, () => 1),
},
]
);
{
const ret = await t.context.copilotWorkspace.matchBlobEmbedding(
workspace.id,
Array.from({ length: 1024 }, () => 0.9),
1,
1
);
t.snapshot(cleanObject(ret), 'should match workspace blob embedding');
}
await t.context.copilotWorkspace.removeBlob(workspace.id, blobId);
{
const ret = await t.context.copilotWorkspace.matchBlobEmbedding(
workspace.id,
Array.from({ length: 1024 }, () => 0.9),
1,
1
);
t.deepEqual(ret, [], 'should not match after removal');
}
}
{
const docId = randomUUID();
await t.context.doc.upsert({

View File

@@ -369,7 +369,6 @@ export async function listContextDocAndFiles(
docs {
id
status
error
createdAt
}
files {

View File

@@ -37,6 +37,11 @@ const ContextEmbedStatusSchema = z.enum([
ContextEmbedStatus.failed,
]);
const ContextBlobSchema = z.object({
id: z.string(),
createdAt: z.number(),
});
const ContextDocSchema = z.object({
id: z.string(),
createdAt: z.number(),
@@ -64,6 +69,9 @@ export const ContextCategorySchema = z.object({
export const ContextConfigSchema = z.object({
workspaceId: z.string(),
blobs: ContextBlobSchema.merge(
z.object({ status: ContextEmbedStatusSchema.optional() })
).array(),
files: ContextFileSchema.array(),
docs: ContextDocSchema.merge(
z.object({ status: ContextEmbedStatusSchema.optional() })
@@ -77,10 +85,9 @@ export const MinimalContextConfigSchema = ContextConfigSchema.pick({
export type ContextCategory = z.infer<typeof ContextCategorySchema>;
export type ContextConfig = z.infer<typeof ContextConfigSchema>;
export type ContextBlob = z.infer<typeof ContextConfigSchema>['blobs'][number];
export type ContextDoc = z.infer<typeof ContextConfigSchema>['docs'][number];
export type ContextFile = z.infer<typeof ContextConfigSchema>['files'][number];
export type ContextListItem = ContextDoc | ContextFile;
export type ContextList = ContextListItem[];
// embeddings
@@ -106,6 +113,10 @@ export type FileChunkSimilarity = ChunkSimilarity & {
mimeType: string;
};
export type BlobChunkSimilarity = ChunkSimilarity & {
blobId: string;
};
export type DocChunkSimilarity = ChunkSimilarity & {
docId: string;
};

View File

@@ -6,6 +6,7 @@ import { Prisma } from '@prisma/client';
import { CopilotSessionNotFound } from '../base';
import { BaseModel } from './base';
import {
ContextBlob,
ContextConfigSchema,
ContextDoc,
ContextEmbedStatus,
@@ -39,6 +40,7 @@ export class CopilotContextModel extends BaseModel {
sessionId,
config: {
workspaceId: session.workspaceId,
blobs: [],
docs: [],
files: [],
categories: [],
@@ -66,10 +68,11 @@ export class CopilotContextModel extends BaseModel {
if (minimalConfig.success) {
// fulfill the missing fields
return {
...minimalConfig.data,
blobs: [],
docs: [],
files: [],
categories: [],
...minimalConfig.data,
};
}
}
@@ -83,10 +86,35 @@ export class CopilotContextModel extends BaseModel {
return row;
}
async mergeBlobStatus(
workspaceId: string,
blobs: ContextBlob[]
): Promise<ContextBlob[]> {
const canEmbedding = await this.checkEmbeddingAvailable();
const finishedBlobs = canEmbedding
? await this.listWorkspaceBlobEmbedding(
workspaceId,
Array.from(new Set(blobs.map(blob => blob.id)))
)
: [];
const finishedBlobSet = new Set(finishedBlobs);
for (const blob of blobs) {
const status = finishedBlobSet.has(blob.id)
? ContextEmbedStatus.finished
: undefined;
// NOTE: when the blob has not been synchronized to the server or is in the embedding queue
// the status will be empty, fallback to processing if no status is provided
blob.status = status || blob.status || ContextEmbedStatus.processing;
}
return blobs;
}
async mergeDocStatus(workspaceId: string, docs: ContextDoc[]) {
const canEmbedding = await this.checkEmbeddingAvailable();
const finishedDoc = canEmbedding
? await this.listWorkspaceEmbedding(
? await this.listWorkspaceDocEmbedding(
workspaceId,
Array.from(new Set(docs.map(doc => doc.id)))
)
@@ -126,7 +154,23 @@ export class CopilotContextModel extends BaseModel {
return Number(count) === 2;
}
async listWorkspaceEmbedding(workspaceId: string, docIds?: string[]) {
async listWorkspaceBlobEmbedding(
workspaceId: string,
blobIds?: string[]
): Promise<string[]> {
const existsIds = await this.db.aiWorkspaceBlobEmbedding
.groupBy({
where: {
workspaceId,
blobId: blobIds ? { in: blobIds } : undefined,
},
by: ['blobId'],
})
.then(r => r.map(r => r.blobId));
return existsIds;
}
async listWorkspaceDocEmbedding(workspaceId: string, docIds?: string[]) {
const existsIds = await this.db.aiWorkspaceEmbedding
.groupBy({
where: {

View File

@@ -7,6 +7,7 @@ import { Prisma, PrismaClient } from '@prisma/client';
import { PaginationInput } from '../base';
import { BaseModel } from './base';
import type {
BlobChunkSimilarity,
CopilotWorkspaceFile,
CopilotWorkspaceFileMetadata,
Embedding,
@@ -256,19 +257,19 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
async checkEmbeddingAvailable(): Promise<boolean> {
const [{ count }] = await this.db.$queryRaw<
{ count: number }[]
>`SELECT count(1) FROM pg_tables WHERE tablename in ('ai_workspace_embeddings', 'ai_workspace_file_embeddings')`;
return Number(count) === 2;
>`SELECT count(1) FROM pg_tables WHERE tablename in ('ai_workspace_embeddings', 'ai_workspace_file_embeddings', 'ai_workspace_blob_embeddings')`;
return Number(count) === 3;
}
private processEmbeddings(
workspaceId: string,
fileId: string,
fileOrBlobId: string,
embeddings: Embedding[]
) {
const groups = embeddings.map(e =>
[
workspaceId,
fileId,
fileOrBlobId,
e.index,
e.content,
Prisma.raw(`'[${e.embedding.join(',')}]'`),
@@ -378,6 +379,61 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
@Transactional()
async insertBlobEmbeddings(
workspaceId: string,
blobId: string,
embeddings: Embedding[]
) {
if (embeddings.length === 0) {
this.logger.warn(
`No embeddings provided for workspaceId: ${workspaceId}, blobId: ${blobId}. Skipping insertion.`
);
return;
}
const values = this.processEmbeddings(workspaceId, blobId, embeddings);
await this.db.$executeRaw`
INSERT INTO "ai_workspace_blob_embeddings"
("workspace_id", "blob_id", "chunk", "content", "embedding") VALUES ${values}
ON CONFLICT (workspace_id, blob_id, chunk) DO NOTHING;
`;
}
async matchBlobEmbedding(
workspaceId: string,
embedding: number[],
topK: number,
threshold: number
): Promise<BlobChunkSimilarity[]> {
if (!(await this.allowEmbedding(workspaceId))) {
return [];
}
const similarityChunks = await this.db.$queryRaw<
Array<BlobChunkSimilarity>
>`
SELECT
e."blob_id" as "blobId",
e."chunk",
e."content",
e."embedding" <=> ${embedding}::vector as "distance"
FROM "ai_workspace_blob_embeddings" e
WHERE e.workspace_id = ${workspaceId}
ORDER BY "distance" ASC
LIMIT ${topK};
`;
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
async removeBlob(workspaceId: string, blobId: string) {
await this.db.$executeRaw`
DELETE FROM "ai_workspace_blob_embeddings"
WHERE workspace_id = ${workspaceId} AND blob_id = ${blobId};
`;
return true;
}
async removeFile(workspaceId: string, fileId: string) {
// embeddings will be removed by foreign key constraint
await this.db.aiWorkspaceFiles.deleteMany({

View File

@@ -20,6 +20,7 @@ import { SafeIntResolver } from 'graphql-scalars';
import GraphQLUpload from 'graphql-upload/GraphQLUpload.mjs';
import {
BlobNotFound,
BlobQuotaExceeded,
CallMetric,
CopilotEmbeddingUnavailable,
@@ -37,6 +38,7 @@ import {
import { CurrentUser } from '../../../core/auth';
import { AccessController } from '../../../core/permission';
import {
ContextBlob,
ContextCategories,
ContextCategory,
ContextDoc,
@@ -118,6 +120,24 @@ class RemoveContextFileInput {
fileId!: string;
}
@InputType()
class AddContextBlobInput {
@Field(() => String)
contextId!: string;
@Field(() => String)
blobId!: string;
}
@InputType()
class RemoveContextBlobInput {
@Field(() => String)
contextId!: string;
@Field(() => String)
blobId!: string;
}
@ObjectType('CopilotContext')
export class CopilotContextType {
@Field(() => ID, { nullable: true })
@@ -130,7 +150,24 @@ export class CopilotContextType {
registerEnumType(ContextCategories, { name: 'ContextCategories' });
@ObjectType()
class CopilotDocType implements Omit<ContextDoc, 'status'> {
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
@Field(() => ID)
id!: string;
@Field(() => ContextCategories)
type!: ContextCategories;
@Field(() => [CopilotContextDoc])
docs!: CopilotContextDoc[];
@Field(() => SafeIntResolver)
createdAt!: number;
}
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
@ObjectType()
class CopilotContextBlob implements Omit<ContextBlob, 'status'> {
@Field(() => ID)
id!: string;
@@ -142,28 +179,17 @@ class CopilotDocType implements Omit<ContextDoc, 'status'> {
}
@ObjectType()
class CopilotContextCategory implements Omit<ContextCategory, 'docs'> {
class CopilotContextDoc implements Omit<ContextDoc, 'status'> {
@Field(() => ID)
id!: string;
@Field(() => ContextCategories)
type!: ContextCategories;
@Field(() => [CopilotDocType])
docs!: CopilotDocType[];
@Field(() => ContextEmbedStatus, { nullable: true })
status!: ContextEmbedStatus | null;
@Field(() => SafeIntResolver)
createdAt!: number;
}
registerEnumType(ContextEmbedStatus, { name: 'ContextEmbedStatus' });
@ObjectType()
class CopilotContextDoc extends CopilotDocType {
@Field(() => String, { nullable: true })
error!: string | null;
}
@ObjectType()
class CopilotContextFile implements ContextFile {
@Field(() => ID)
@@ -433,11 +459,33 @@ export class CopilotContextResolver {
return tags;
}
@ResolveField(() => [CopilotContextBlob], {
description: 'list blobs in context',
})
@CallMetric('ai', 'context_blob_list')
async blobs(
@Parent() context: CopilotContextType
): Promise<CopilotContextBlob[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
const blobs = session.blobs;
await this.models.copilotContext.mergeBlobStatus(
session.workspaceId,
blobs
);
return blobs.map(blob => ({ ...blob, status: blob.status || null }));
}
@ResolveField(() => [CopilotContextDoc], {
description: 'list files in context',
})
@CallMetric('ai', 'context_file_list')
async docs(@Parent() context: CopilotContextType): Promise<CopilotDocType[]> {
async docs(
@Parent() context: CopilotContextType
): Promise<CopilotContextDoc[]> {
if (!context.id) {
return [];
}
@@ -538,7 +586,7 @@ export class CopilotContextResolver {
async addContextDoc(
@Args({ name: 'options', type: () => AddContextDocInput })
options: AddContextDocInput
): Promise<CopilotDocType> {
): Promise<CopilotContextDoc> {
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
@@ -674,6 +722,85 @@ export class CopilotContextResolver {
}
}
@Mutation(() => CopilotContextBlob, {
description: 'add a blob to context',
})
@CallMetric('ai', 'context_blob_add')
async addContextBlob(
@CurrentUser() user: CurrentUser,
@Args({ name: 'options', type: () => AddContextBlobInput })
options: AddContextBlobInput
): Promise<CopilotContextBlob> {
if (!this.context.canEmbedding) {
throw new CopilotEmbeddingUnavailable();
}
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
throw new TooManyRequest('Server is busy');
}
const contextSession = await this.context.get(options.contextId);
try {
const blob = await contextSession.addBlobRecord(options.blobId);
if (!blob) {
throw new BlobNotFound({
spaceId: contextSession.workspaceId,
blobId: options.blobId,
});
}
await this.jobs.addBlobEmbeddingQueue({
userId: user.id,
workspaceId: contextSession.workspaceId,
contextId: contextSession.id,
blobId: options.blobId,
});
return { ...blob, status: blob.status || null };
} catch (e: any) {
if (e instanceof UserFriendlyError) {
throw e;
}
throw new CopilotFailedToModifyContext({
contextId: options.contextId,
message: e.message,
});
}
}
@Mutation(() => Boolean, {
description: 'remove a blob from context',
})
@CallMetric('ai', 'context_blob_remove')
async removeContextBlob(
@Args({ name: 'options', type: () => RemoveContextBlobInput })
options: RemoveContextBlobInput
): Promise<boolean> {
if (!this.context.canEmbedding) {
throw new CopilotEmbeddingUnavailable();
}
const lockFlag = `${COPILOT_LOCKER}:context:${options.contextId}`;
await using lock = await this.mutex.acquire(lockFlag);
if (!lock) {
throw new TooManyRequest('Server is busy');
}
const contextSession = await this.context.get(options.contextId);
try {
return await contextSession.removeBlobRecord(options.blobId);
} catch (e: any) {
throw new CopilotFailedToModifyContext({
contextId: options.contextId,
message: e.message,
});
}
}
@ResolveField(() => [ContextMatchedFileChunk], {
description: 'match file in context',
})

View File

@@ -147,6 +147,28 @@ export class CopilotContextService implements OnApplicationBootstrap {
return null;
}
async matchWorkspaceBlobs(
workspaceId: string,
content: string,
topK: number = 5,
signal?: AbortSignal,
threshold: number = 0.5
) {
if (!this.embeddingClient) return [];
const embedding = await this.embeddingClient.getEmbedding(content, signal);
if (!embedding) return [];
const blobChunks = await this.models.copilotWorkspace.matchBlobEmbedding(
workspaceId,
embedding,
topK * 2,
threshold
);
if (!blobChunks.length) return [];
return await this.embeddingClient.reRank(content, blobChunks, topK, signal);
}
async matchWorkspaceFiles(
workspaceId: string,
content: string,

View File

@@ -1,13 +1,13 @@
import { nanoid } from 'nanoid';
import {
ContextBlob,
ContextCategories,
ContextCategory,
ContextConfig,
ContextDoc,
ContextEmbedStatus,
ContextFile,
ContextList,
FileChunkSimilarity,
Models,
} from '../../../models';
@@ -47,6 +47,10 @@ export class ContextSession implements AsyncDisposable {
return categories.filter(c => c.type === ContextCategories.Collection);
}
get blobs(): ContextBlob[] {
return this.config.blobs.map(d => ({ ...d }));
}
get docs(): ContextDoc[] {
return this.config.docs.map(d => ({ ...d }));
}
@@ -65,13 +69,6 @@ export class ContextSession implements AsyncDisposable {
);
}
get sortedList(): ContextList {
const { docs, files } = this.config;
return [...docs, ...files].toSorted(
(a, b) => a.createdAt - b.createdAt
) as ContextList;
}
async addCategoryRecord(type: ContextCategories, id: string, docs: string[]) {
const category = this.config.categories.find(
c => c.type === type && c.id === id
@@ -120,6 +117,33 @@ export class ContextSession implements AsyncDisposable {
return true;
}
async addBlobRecord(blobId: string): Promise<ContextBlob | null> {
const existsBlob = this.config.blobs.find(b => b.id === blobId);
if (existsBlob) {
return existsBlob;
}
const blob = await this.models.blob.get(this.config.workspaceId, blobId);
if (!blob) return null;
const record: ContextBlob = {
id: blobId,
createdAt: Date.now(),
status: ContextEmbedStatus.processing,
};
this.config.blobs.push(record);
await this.save();
return record;
}
async removeBlobRecord(blobId: string): Promise<boolean> {
const index = this.config.blobs.findIndex(b => b.id === blobId);
if (index >= 0) {
this.config.blobs.splice(index, 1);
await this.save();
}
return true;
}
async addDocRecord(docId: string): Promise<ContextDoc> {
const doc = this.config.docs.find(f => f.id === docId);
if (doc) {

View File

@@ -65,15 +65,14 @@ export class CopilotEmbeddingJob {
async addFileEmbeddingQueue(file: Jobs['copilot.embedding.files']) {
if (!this.supportEmbedding) return;
const { userId, workspaceId, contextId, blobId, fileId, fileName } = file;
await this.queue.add('copilot.embedding.files', {
userId,
workspaceId,
contextId,
blobId,
fileId,
fileName,
});
await this.queue.add('copilot.embedding.files', file);
}
@CallMetric('ai', 'addBlobEmbeddingQueue')
async addBlobEmbeddingQueue(blob: Jobs['copilot.embedding.blobs']) {
if (!this.supportEmbedding) return;
await this.queue.add('copilot.embedding.blobs', blob);
}
@OnEvent('workspace.doc.embedding')
@@ -288,6 +287,55 @@ export class CopilotEmbeddingJob {
}
}
@OnJob('copilot.embedding.blobs')
async embedPendingBlob({
userId,
workspaceId,
contextId,
blobId,
}: Jobs['copilot.embedding.blobs']) {
if (!this.supportEmbedding || !this.embeddingClient) return;
try {
const file = await this.readCopilotBlob(
userId,
workspaceId,
blobId,
'blob'
);
const chunks = await this.embeddingClient.getFileChunks(file);
const total = chunks.reduce((acc, c) => acc + c.length, 0);
for (const chunk of chunks) {
const embeddings = await this.embeddingClient.generateEmbeddings(chunk);
await this.models.copilotWorkspace.insertBlobEmbeddings(
workspaceId,
blobId,
embeddings
);
}
if (contextId) {
this.event.emit('workspace.blob.embed.finished', {
contextId,
blobId,
chunkSize: total,
});
}
} catch (error: any) {
if (contextId) {
this.event.emit('workspace.blob.embed.failed', {
contextId,
blobId,
error: mapAnyError(error).message,
});
}
throw error;
}
}
private async getDocFragment(
workspaceId: string,
docId: string
@@ -465,7 +513,7 @@ export class CopilotEmbeddingJob {
const docIdsInWorkspace = readAllDocIdsFromWorkspaceSnapshot(snapshot.blob);
const docIdsInEmbedding =
await this.models.copilotContext.listWorkspaceEmbedding(workspaceId);
await this.models.copilotContext.listWorkspaceDocEmbedding(workspaceId);
const docIdsInWorkspaceSet = new Set(docIdsInWorkspace);
const deletedDocIds = docIdsInEmbedding.filter(

View File

@@ -14,6 +14,18 @@ declare global {
enableDocEmbedding?: boolean;
};
'workspace.blob.embed.finished': {
contextId: string;
blobId: string;
chunkSize: number;
};
'workspace.blob.embed.failed': {
contextId: string;
blobId: string;
error: string;
};
'workspace.doc.embedding': Array<{
workspaceId: string;
docId: string;
@@ -62,6 +74,13 @@ declare global {
fileName: string;
};
'copilot.embedding.blobs': {
contextId?: string;
userId: string;
workspaceId: string;
blobId: string;
};
'copilot.embedding.cleanupTrashedDocEmbeddings': {
workspaceId: string;
};

View File

@@ -2,6 +2,11 @@
# THIS FILE WAS AUTOMATICALLY GENERATED (DO NOT MODIFY)
# ------------------------------------------------------
input AddContextBlobInput {
blobId: String!
contextId: String!
}
input AddContextCategoryInput {
categoryId: String!
contextId: String!
@@ -226,6 +231,9 @@ type Copilot {
}
type CopilotContext {
"""list blobs in context"""
blobs: [CopilotContextBlob!]!
"""list collections in context"""
collections: [CopilotContextCategory!]!
@@ -247,16 +255,21 @@ type CopilotContext {
workspaceId: String!
}
type CopilotContextBlob {
createdAt: SafeInt!
id: ID!
status: ContextEmbedStatus
}
type CopilotContextCategory {
createdAt: SafeInt!
docs: [CopilotDocType!]!
docs: [CopilotContextDoc!]!
id: ID!
type: ContextCategories!
}
type CopilotContextDoc {
createdAt: SafeInt!
error: String
id: ID!
status: ContextEmbedStatus
}
@@ -281,12 +294,6 @@ type CopilotDocNotFoundDataType {
docId: String!
}
type CopilotDocType {
createdAt: SafeInt!
id: ID!
status: ContextEmbedStatus
}
type CopilotFailedToAddWorkspaceFileEmbeddingDataType {
message: String!
}
@@ -1163,6 +1170,9 @@ type Mutation {
acceptInviteById(inviteId: String!, sendAcceptMail: Boolean @deprecated(reason: "never used"), workspaceId: String @deprecated(reason: "never used")): Boolean!
activateLicense(license: String!, workspaceId: String!): License!
"""add a blob to context"""
addContextBlob(options: AddContextBlobInput!): CopilotContextBlob!
"""add a category to context"""
addContextCategory(options: AddContextCategoryInput!): CopilotContextCategory!
@@ -1266,6 +1276,9 @@ type Mutation {
"""Remove user avatar"""
removeAvatar: RemoveAvatar!
"""remove a blob from context"""
removeContextBlob(options: RemoveContextBlobInput!): Boolean!
"""remove a category from context"""
removeContextCategory(options: RemoveContextCategoryInput!): Boolean!
@@ -1617,6 +1630,11 @@ type RemoveAvatar {
success: Boolean!
}
input RemoveContextBlobInput {
blobId: String!
contextId: String!
}
input RemoveContextCategoryInput {
categoryId: String!
contextId: String!