From c846c57a1228af4e8b627df8696df9aabf20fb1a Mon Sep 17 00:00:00 2001 From: darkskygit Date: Tue, 10 Jun 2025 02:35:09 +0000 Subject: [PATCH] fix(server): list context status (#12771) ## Summary by CodeRabbit - **Bug Fixes** - Improved handling of document statuses to ensure documents without a finished or existing status are now explicitly marked as "processing" instead of remaining undefined. - **Tests** - Added comprehensive new tests and snapshot entries to verify document status merging, including edge cases and concurrent operations, ensuring robust and consistent behavior. - **Enhancements** - Updated context document listings to display the processing status for relevant documents. --- .../__tests__/__snapshots__/copilot.e2e.ts.md | 1 + .../__snapshots__/copilot.e2e.ts.snap | Bin 955 -> 961 bytes .../__snapshots__/copilot-context.spec.ts.md | 192 ++++++++++++++++++ .../copilot-context.spec.ts.snap | Bin 428 -> 1617 bytes .../__tests__/models/copilot-context.spec.ts | 172 ++++++++++++++++ .../server/src/models/copilot-context.ts | 4 +- 6 files changed, 368 insertions(+), 1 deletion(-) diff --git a/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md b/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md index 9c1693aefe..f5b84bfb9a 100644 --- a/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md +++ b/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md @@ -52,6 +52,7 @@ Generated by [AVA](https://avajs.dev). [ { id: 'docId1', + status: 'processing', }, ] diff --git a/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.snap b/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.snap index 3bd449d93d0d253da344c5daad6400edf8b9f6d3..f69a3815f81fd489f5669dc85db574c7a11fc6ee 100644 GIT binary patch literal 961 zcmV;y13vsgRzVuB{&_u;e462U>UhhcdXT`qdg@T z2VT}#^B;={00000000B+l~0HqMHI%rSKU23yF;=Y5<$U0fqIc;ok?O2OHkq|IVlL@ zA3Rid*UZ#Tch}HWo$TmM1EPol@uY$#2wucPJa`aqUOWjB1h0a3!Gq|@OmBC`>Dg>g z*bND?bD659UiJ5W@4c_D_5DZ(BlGrkX_bxSHaA0^MIrYk7k(tU)jX9po$$-b4!IaB zWfCzqLv77l*TYa)@d`-?OPKzsO>7;&DFDZ*b&S08X(}c}1MT8RQHA z!~otY16$AgTQac3s^!|+8t4d!fsPV=0pJ8VAnzEpV`+>SNCR5Mabo53menJfn0)K* zi2D)1Pc?DQ?&5x}#JL2l5b!M3#JRhPn~|^-=)}szmivbWGLfmUl3TfLr*NnLwIDau zkt{dM!Wd<&NUV&6FQe@z=L-}mWBE`StJ4V=eJj(QnjO~X0k6-UXD$|r7)YMW+qS#R zTxY*E^)!GCz$XB{1MpjOZQXhW{Yd*4!-{5KMe<_UtDRa7hFLOtQN1CnQqS^CL|pM) zF(MvEa$_9!>n}4_*vwQ1$EgmaF)A4pc+F*WA*@NW5qIxuA)Jv!eQ*9xsWx+0R#GPs zL_);zq*Dv=8r{i}Ni}lqZg;cW-8}R3 z9NMuIBbklf5Iw?mPP_(W&CuT#FI)pR1c9ZDWcY4f!X#bl!x(#MM-%^P;_8U0`QjQ9GX z&)id=@!lukLjpb};7bB-H2IC2^ZmZ^n?DM_=@31}Q2NnJ)1{NY;avt?WxzKKxXFNj znq8>t0B0TGRR?&}0j@Q;P@B9z9pG;VIOzhlR9(cQB-T_n36=U`Y4w|5xQ-bcLRRvk(9PUE9z7 literal 955 zcmV;s14R5mRzV=8|p85Z2K2lE>K;%0S zhwd-d-ye$z00000000B+l~0HqMHI%rSKU23yF;=YqM%@)K)uMa&X}0P5|nt#o)iS} z4<4$!Yi4SvyKCsGjyrnOfEa{;cv3+V1TW$t9>tp%Pl5!&tKePmAbK*>+uh03teFj) zuo8AIQ`OX~{@(As_x06&nCNI^-n%BPvWeW`W~hrK=Aq;wOeD9O=hEgA{+6;sF2+ik zOpMJ?Tl4O z0Poj>8Chb@a&2u5bPU8m$BDiK@DO>R-~_c(X^a?116svNV)gTu)gzgia_jGk z`w_rTjyP{`alba=d;(Soc%B?_{$Ao{BrHWbvof>w{^5blWG<}aR_@p--08m)+|0;mx@dbBroOd*ga;h zbI_Uw8bATyQvlxq_-%1*eYb*PqQkAYq1ji7+=_e7spV)`WTThW+p;P3EYD1&4bK%L z(s3d;#&O?$nX$qara3sybtH{Z*`Ol-Xoa!FU6JEcNoE(?tCtVTj70#{Vou~pXR2Y; zigmOV_bSPc>?JwYk*QnbvH8}pxyVeEtMP4ZajDAi%3)>b6lWo+Jaoplby&5wrw^J$%K(@F_y)iY0LuiNbKPM+slasy5lyw3yK)DYM3e}X%GkZy z6lp5*$z5F?ls&O1$F#s{S;_TtD!F!uekPQ#+Y|&0 z=rG`E2E4?8i|o$k+os@a23%*rFAVsL0jE4CBv@!Tp3Is_YInPv-R|a@XXnt4r5MS) z-qU#(qp>2l&NZjI&Z${$_0zQrGD-A0mltI|(V5)XpUmE7&%J+6<_r6gdFJfCWUgOO z>)Z}SHsTXq@V?IZwpP=L_`vTz& should return true when embedding table is available true + +## should merge doc status correctly + +> basic doc status merge + + [ + { + id: 'doc1', + status: 'processing', + }, + { + id: 'doc2', + status: 'processing', + }, + { + id: 'doc3', + status: 'failed', + }, + { + id: 'doc4', + status: 'processing', + }, + ] + +> mixed doc status merge + + [ + { + id: 'doc5', + status: 'finished', + }, + { + id: 'doc5', + status: 'finished', + }, + { + id: 'doc6', + status: 'processing', + }, + { + id: 'doc6', + status: 'failed', + }, + { + id: 'doc7', + status: 'processing', + }, + ] + +> edge cases results + + [ + { + case: 0, + length: 1, + statuses: [ + 'processing', + ], + }, + { + case: 1, + length: 1, + statuses: [ + 'processing', + ], + }, + { + case: 2, + length: 100, + statuses: [ + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + 'processing', + ], + }, + ] + +## should handle concurrent mergeDocStatus calls + +> concurrent calls results + + [ + { + call: 1, + status: 'finished', + }, + { + call: 2, + status: 'finished', + }, + { + call: 3, + status: 'processing', + }, + ] diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.snap b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.snap index 91e2cc47e2041b5c9a3ae947dc539084657dc5b6..661d0e2d3444f4f6c1cda3b7003be21bf57a2d87 100644 GIT binary patch literal 1617 zcmV-X2Cn%*RzVQH@8hvOp4Z(ZkOBefKpJRDQnGo2KvPIVO42le zq`V|;10MU?T_;{oWP8Jg15i++mP$pc5QiY45)cOtsO^CwRMCn92P8Of$RS8bZ~zGj zkWfK_s))|p&d%)ae>QZbvX>p}^Z5TfexBc0^E^+F)}p$9G(CAnXJHoUsg>z?eKHE% zF`c>bWE8oh^~8i&%L7kC=*`hkGl=)K2Ce&S9V7}?||FYHuTP-o;a4BaJ@zw_r7r* z&zPw&bpsvgOb6{=e*gcPA1kQ)-RUudy1%~tG;Yt_+*ePIrpG-0f104R116}Mp41J; z;(GFJo|=`t&#@?TbM1n|(@9SI>v(DVMt@xUM`tw4yjrA%Z@)pO>y5o#XUU}Q@3UE~ zu{PduFN_)&{cnQ_os8+`3Z|KtO{T72PZI5C(TVOD9%+r?ps{Plnp)QZ+kjhv1Hj=y z>uWaGH*6IsDfM{ce{0eI(R#AR#}8YtIOV9^g$h3i}s~M@O>Bv3P zjoh2v$bHd{TuG@_jTJYWi@#679SX9+>3dSa(+d7MIDH=|_((yyKT@;Px5W(r$`0uyoBfwFxdSI~4n-uI+P*d>G!1twj zTEU+b{9VDh{^qlznCqK6)Av%{yfEpX)=0-=*?0>Zk5yIbUkawP)S6744k|qRm*yOm zFE+kO<^H7fRt}n#x6NJIQN|}Z_$=4E$hn?Hc9bz}!g8~>k=55ouQy?f3HwcmO?cFV z=S+Cjgm+B%vQfl>5exQOaKwUB7M!u*6${Q;@VNy`ZCGc+ZW~5zc-V%gY|s_=0YibK5%mY?H?Z@Cd1=6stiq(LAyXp&Nb z8G_{mGX=8*vjxiw<_K00tSDGXu(Dtk!K#7{2{tU)62X=VwoI@K1iMhM<$`@ju!{t{ zSg=b3TOrt`f?X!q<$`@zFjugZf?Xllm4dAj?0bT(7VIj)t`_Y3f~^tk2ZCKA*jmBX z3HC$5t`%&(U?YNU5NxAhn*`e|*cQRA6YP4ywhDHGU^fc(Bf+)__G7_r66|KdwhQ(X z!FC9Ci(oqi`>9~J3bsqI-Gcp0uswqPT(G@@?Gx-a!EP69zhDOh`-Nb42zIAncL{d4 zUv1m* zBJKM1*q=-i9cQiW_Kv!LpncrwMNvBIxb>iVY0);~C%YT*KTmJOb8iFh0{-)md5%8d%uJB4B9`%Wb9UM&@B93tLJ?-v`l3p9hh$>}+8tc7C+z zd}G70+QlkG>JP$%@$3#8rDT>!DbFNn3M8Y-%U|&+Nh=n3wxktj(G6Q+GqSGd2VDFJ zQ0ogov+40nb&PfY8BKH5$FHzC{r2$WlM}z=VN4V$iQdw+z!OcM{Qd&u(wD2!!3}9$ Wb5<;S8~(tR&VqM$@rcBW1ONc=Ny%;i diff --git a/packages/backend/server/src/__tests__/models/copilot-context.spec.ts b/packages/backend/server/src/__tests__/models/copilot-context.spec.ts index 397fd7dfa8..e189f9a9ac 100644 --- a/packages/backend/server/src/__tests__/models/copilot-context.spec.ts +++ b/packages/backend/server/src/__tests__/models/copilot-context.spec.ts @@ -2,8 +2,10 @@ import { randomUUID } from 'node:crypto'; import { AiSession, PrismaClient, User, Workspace } from '@prisma/client'; import ava, { TestFn } from 'ava'; +import Sinon from 'sinon'; import { Config } from '../../base'; +import { ContextEmbedStatus } from '../../models/common/copilot'; import { CopilotContextModel } from '../../models/copilot-context'; import { CopilotSessionModel } from '../../models/copilot-session'; import { CopilotWorkspaceConfigModel } from '../../models/copilot-workspace'; @@ -236,3 +238,173 @@ test('should check embedding table', async t => { // t.false(ret, 'should return false when embedding table is not available'); // } }); + +test('should merge doc status correctly', async t => { + const createDoc = (id: string, status?: string) => ({ + id, + createdAt: Date.now(), + ...(status && { status: status as any }), + }); + + const createDocWithEmbedding = async (docId: string) => { + await t.context.db.snapshot.create({ + data: { + workspaceId: workspace.id, + id: docId, + blob: Buffer.from([1, 1]), + state: Buffer.from([1, 1]), + updatedAt: new Date(), + createdAt: new Date(), + }, + }); + + await t.context.copilotContext.insertWorkspaceEmbedding( + workspace.id, + docId, + [ + { + index: 0, + content: 'content', + embedding: Array.from({ length: 1024 }, () => 1), + }, + ] + ); + }; + + const emptyResult = await t.context.copilotContext.mergeDocStatus( + workspace.id, + [] + ); + t.deepEqual(emptyResult, []); + + const basicDocs = [ + createDoc('doc1'), + createDoc('doc2'), + createDoc('doc3', 'failed'), + createDoc('doc4', 'processing'), + ]; + const basicResult = await t.context.copilotContext.mergeDocStatus( + workspace.id, + basicDocs + ); + t.snapshot( + basicResult.map(d => ({ id: d.id, status: d.status })), + 'basic doc status merge' + ); + + { + await createDocWithEmbedding('doc5'); + + const mixedDocs = [ + createDoc('doc5'), + createDoc('doc5', 'processing'), + createDoc('doc6'), + createDoc('doc6', 'failed'), + createDoc('doc7'), + ]; + const mixedResult = await t.context.copilotContext.mergeDocStatus( + workspace.id, + mixedDocs + ); + t.snapshot( + mixedResult.map(d => ({ id: d.id, status: d.status })), + 'mixed doc status merge' + ); + + const hasEmbeddingStub = Sinon.stub( + t.context.copilotContext, + 'hasWorkspaceEmbedding' + ).resolves(new Set()); + + const stubResult = await t.context.copilotContext.mergeDocStatus( + workspace.id, + [createDoc('doc5')] + ); + t.is(stubResult[0].status, ContextEmbedStatus.processing); + + hasEmbeddingStub.restore(); + } + + { + const testCases = [ + { + workspaceId: 'invalid-workspace', + docs: [{ id: 'doc1', createdAt: Date.now() }], + }, + { + workspaceId: workspace.id, + docs: [{ id: 'doc1', createdAt: Date.now(), status: undefined as any }], + }, + { + workspaceId: workspace.id, + docs: Array.from({ length: 100 }, (_, i) => ({ + id: `doc-${i}`, + createdAt: Date.now() + i, + })), + }, + ]; + + const results = await Promise.all( + testCases.map(testCase => + t.context.copilotContext.mergeDocStatus( + testCase.workspaceId, + testCase.docs + ) + ) + ); + + t.snapshot( + results.map((result, index) => ({ + case: index, + length: result.length, + statuses: result.map(d => d.status), + })), + 'edge cases results' + ); + } +}); + +test('should handle concurrent mergeDocStatus calls', async t => { + await t.context.db.snapshot.create({ + data: { + workspaceId: workspace.id, + id: 'concurrent-doc', + blob: Buffer.from([1, 1]), + state: Buffer.from([1, 1]), + updatedAt: new Date(), + createdAt: new Date(), + }, + }); + + await t.context.copilotContext.insertWorkspaceEmbedding( + workspace.id, + 'concurrent-doc', + [ + { + index: 0, + content: 'content', + embedding: Array.from({ length: 1024 }, () => 1), + }, + ] + ); + + const concurrentDocs = [ + [{ id: 'concurrent-doc', createdAt: Date.now() }], + [{ id: 'concurrent-doc', createdAt: Date.now() + 1000 }], + [{ id: 'non-existent-doc', createdAt: Date.now() }], + ]; + + const results = await Promise.all( + concurrentDocs.map(docs => + t.context.copilotContext.mergeDocStatus(workspace.id, docs) + ) + ); + + t.snapshot( + results.map((result, index) => ({ + call: index + 1, + status: result[0].status, + })), + 'concurrent calls results' + ); +}); diff --git a/packages/backend/server/src/models/copilot-context.ts b/packages/backend/server/src/models/copilot-context.ts index f346f9679b..ef9213afc4 100644 --- a/packages/backend/server/src/models/copilot-context.ts +++ b/packages/backend/server/src/models/copilot-context.ts @@ -91,7 +91,9 @@ export class CopilotContextModel extends BaseModel { const status = finishedDoc.has(doc.id) ? ContextEmbedStatus.finished : undefined; - doc.status = status || doc.status; + // NOTE: when the document has not been synchronized to the server or is in the embedding queue + // the status will be empty, fallback to processing if no status is provided + doc.status = status || doc.status || ContextEmbedStatus.processing; } return docs;