From 1e9cbdb65daa17bd7b80270a4370303391c14f41 Mon Sep 17 00:00:00 2001 From: darkskygit Date: Tue, 27 May 2025 11:36:47 +0000 Subject: [PATCH] feat(server): use generative ai api for transcript (#12569) fix AI-151 --- .../__tests__/__snapshots__/copilot.e2e.ts.md | 12 +++---- .../__snapshots__/copilot.e2e.ts.snap | Bin 1011 -> 955 bytes .../src/__tests__/copilot-provider.spec.ts | 33 +++++++++++++++--- .../server/src/__tests__/copilot.e2e.ts | 8 +++-- .../src/plugins/copilot/transcript/service.ts | 33 ++++++++++-------- 5 files changed, 59 insertions(+), 27 deletions(-) diff --git a/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md b/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md index 448c65cc1e..9c1693aefe 100644 --- a/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md +++ b/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.md @@ -69,10 +69,10 @@ Generated by [AVA](https://avajs.dev). [ { - actions: '[{"a":"A","s":30,"e":45,"t":"Hello, everyone."},{"a":"B","s":46,"e":70,"t":"Hi, thank you for joining the meeting today."}]', + actions: 'generate text to text', status: 'claimed', - summary: '[{"a":"A","s":30,"e":45,"t":"Hello, everyone."},{"a":"B","s":46,"e":70,"t":"Hi, thank you for joining the meeting today."}]', - title: '[{"a":"A","s":30,"e":45,"t":"Hello, everyone."},{"a":"B","s":46,"e":70,"t":"Hi, thank you for joining the meeting today."}]', + summary: 'generate text to text', + title: 'generate text to text', transcription: [ { end: '00:00:45', @@ -102,10 +102,10 @@ Generated by [AVA](https://avajs.dev). [ { - actions: '[{"a":"A","s":30,"e":45,"t":"Hello, everyone."},{"a":"B","s":46,"e":70,"t":"Hi, thank you for joining the meeting today."}]', + actions: 'generate text to text', status: 'claimed', - summary: '[{"a":"A","s":30,"e":45,"t":"Hello, everyone."},{"a":"B","s":46,"e":70,"t":"Hi, thank you for joining the meeting today."}]', - title: '[{"a":"A","s":30,"e":45,"t":"Hello, everyone."},{"a":"B","s":46,"e":70,"t":"Hi, thank you for joining the meeting today."}]', + summary: 'generate text to text', + title: 'generate text to text', transcription: [ { end: '00:00:45', diff --git a/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.snap b/packages/backend/server/src/__tests__/__snapshots__/copilot.e2e.ts.snap index 53e83ab6981c6c8c3e959e6dc342f9929271c841..3bd449d93d0d253da344c5daad6400edf8b9f6d3 100644 GIT binary patch literal 955 zcmV;s14R5mRzV=8|p85Z2K2lE>K;%0S zhwd-d-ye$z00000000B+l~0HqMHI%rSKU23yF;=YqM%@)K)uMa&X}0P5|nt#o)iS} z4<4$!Yi4SvyKCsGjyrnOfEa{;cv3+V1TW$t9>tp%Pl5!&tKePmAbK*>+uh03teFj) zuo8AIQ`OX~{@(As_x06&nCNI^-n%BPvWeW`W~hrK=Aq;wOeD9O=hEgA{+6;sF2+ik zOpMJ?Tl4O z0Poj>8Chb@a&2u5bPU8m$BDiK@DO>R-~_c(X^a?116svNV)gTu)gzgia_jGk z`w_rTjyP{`alba=d;(Soc%B?_{$Ao{BrHWbvof>w{^5blWG<}aR_@p--08m)+|0;mx@dbBroOd*ga;h zbI_Uw8bATyQvlxq_-%1*eYb*PqQkAYq1ji7+=_e7spV)`WTThW+p;P3EYD1&4bK%L z(s3d;#&O?$nX$qara3sybtH{Z*`Ol-Xoa!FU6JEcNoE(?tCtVTj70#{Vou~pXR2Y; zigmOV_bSPc>?JwYk*QnbvH8}pxyVeEtMP4ZajDAi%3)>b6lWo+Jaoplby&5wrw^J$%K(@F_y)iY0LuiNbKPM+slasy5lyw3yK)DYM3e}X%GkZy z6lp5*$z5F?ls&O1$F#s{S;_TtD!F!uekPQ#+Y|&0 z=rG`E2E4?8i|o$k+os@a23%*rFAVsL0jE4CBv@!Tp3Is_YInPv-R|a@XXnt4r5MS) z-qU#(qp>2l&NZjI&Z${$_0zQrGD-A0mltI|(V5)XpUmE7&%J+6<_r6gdFJfCWUgOO z>)Z}SHsTXq@V?IZwpP=L_`vTz&ANH_tLXJ)KO=npxSE zU1aAnubTI|zxTbbzIVGDBr+VRx9qP7_)t5yQu|O~$B!X$la-s7Pdo$L3#)omN zGCov&snxAJQN%TWjf9OQjDOT1s|nx&fOF)XC3iE=`G{y5t*)+umO%tsA$nn2WcL81 z0N$MzdfUN{2sK%CxVE+iIt{WwXNW!p@EF-3_bfG1p%m{41zN>9vZl{HEe9e~<AX_4?iB)iYe4=9P1X8l-cnU5InYLQQ7b~2x1KA(TSb1BbwPp}f+uHNI$ ztsHcwt^`m3_yoXR0KYGutz%9wNMx`T)oga-L~KQEBkNSyFS5bQ@mr!E^(>zmPisDx z6;FqWSRY1R^J7YLU8p*7n9EQo6=%JQ`KKxxOS&uO*cg(UkoM~P2j$EP0mO@WA_KY= z*Bp6~47Z|o1^LOnAV)G(Q)xUsFAbgZOoe$od`K4WR2_bFR2>?_S;~tndoY>vRTruV z(pWQIMDa`?GGjY3sHE-OK4=r30x&8a26cuqoGc2Of^G)m`0q{f5u|j=$+Q zedTv9wwk{1I~$jpzV|a(}ad z`!>*UfM*?G+c8Sqg%)j#toEKptJP_>IvbbfWHJjpn&oZ! zbj_UJ{$RE{ZS(zeFt6UIDC diff --git a/packages/backend/server/src/__tests__/copilot-provider.spec.ts b/packages/backend/server/src/__tests__/copilot-provider.spec.ts index 3281c64f40..6ca04e26d7 100644 --- a/packages/backend/server/src/__tests__/copilot-provider.spec.ts +++ b/packages/backend/server/src/__tests__/copilot-provider.spec.ts @@ -6,7 +6,10 @@ import { AuthService } from '../core/auth'; import { QuotaModule } from '../core/quota'; import { CopilotModule } from '../plugins/copilot'; import { prompts, PromptService } from '../plugins/copilot/prompt'; -import { CopilotProviderFactory } from '../plugins/copilot/providers'; +import { + CopilotProviderFactory, + CopilotProviderType, +} from '../plugins/copilot/providers'; import { TranscriptionResponseSchema } from '../plugins/copilot/transcript/types'; import { CopilotChatTextExecutor, @@ -183,11 +186,18 @@ const checkUrl = (url: string) => { const retry = async ( action: string, t: ExecutionContext, - callback: (t: ExecutionContext) => void + callback: (t: ExecutionContext) => Promise ) => { let i = 3; while (i--) { - const ret = await t.try(callback); + const ret = await t.try(async t => { + try { + await callback(t); + } catch (e) { + t.log(`Error during ${action}:`, e); + throw e; + } + }); if (ret.passed) { return ret.commit(); } else { @@ -343,6 +353,7 @@ const actions = [ }); }, type: 'structured' as const, + prefer: CopilotProviderType.Gemini, }, { name: 'Should transcribe middle audio', @@ -365,6 +376,7 @@ const actions = [ }); }, type: 'structured' as const, + prefer: CopilotProviderType.Gemini, }, { name: 'Should transcribe long audio', @@ -387,6 +399,7 @@ const actions = [ }); }, type: 'structured' as const, + prefer: CopilotProviderType.Gemini, }, { promptName: [ @@ -554,7 +567,15 @@ const actions = [ }, ]; -for (const { name, promptName, messages, verifier, type, config } of actions) { +for (const { + name, + promptName, + messages, + verifier, + type, + config, + prefer, +} of actions) { const prompts = Array.isArray(promptName) ? promptName : [promptName]; for (const promptName of prompts) { test( @@ -564,7 +585,9 @@ for (const { name, promptName, messages, verifier, type, config } of actions) { const { factory, prompt: promptService } = t.context; const prompt = (await promptService.get(promptName))!; t.truthy(prompt, 'should have prompt'); - const provider = (await factory.getProviderByModel(prompt.model))!; + const provider = (await factory.getProviderByModel(prompt.model, { + prefer, + }))!; t.truthy(provider, 'should have provider'); await retry(`action: ${promptName}`, t, async t => { switch (type) { diff --git a/packages/backend/server/src/__tests__/copilot.e2e.ts b/packages/backend/server/src/__tests__/copilot.e2e.ts index a526862782..68cb33541a 100644 --- a/packages/backend/server/src/__tests__/copilot.e2e.ts +++ b/packages/backend/server/src/__tests__/copilot.e2e.ts @@ -19,6 +19,7 @@ import { MockEmbeddingClient } from '../plugins/copilot/context/embedding'; import { prompts, PromptService } from '../plugins/copilot/prompt'; import { CopilotProviderFactory, + CopilotProviderType, GeminiGenerativeProvider, OpenAIProvider, } from '../plugins/copilot/providers'; @@ -79,7 +80,7 @@ test.before(async t => { providers: { openai: { apiKey: '1' }, fal: {}, - perplexity: {}, + gemini: { apiKey: '1' }, }, unsplash: { key: process.env.UNSPLASH_ACCESS_KEY || '1', @@ -101,7 +102,10 @@ test.before(async t => { }); m.overrideProvider(OpenAIProvider).useClass(MockCopilotProvider); m.overrideProvider(GeminiGenerativeProvider).useClass( - MockCopilotProvider + class MockGenerativeProvider extends MockCopilotProvider { + // @ts-expect-error + override type: CopilotProviderType = CopilotProviderType.Gemini; + } ); }, }); diff --git a/packages/backend/server/src/plugins/copilot/transcript/service.ts b/packages/backend/server/src/plugins/copilot/transcript/service.ts index 67ff4f8090..885bfac5e1 100644 --- a/packages/backend/server/src/plugins/copilot/transcript/service.ts +++ b/packages/backend/server/src/plugins/copilot/transcript/service.ts @@ -18,6 +18,7 @@ import { PromptService } from '../prompt'; import { CopilotProvider, CopilotProviderFactory, + CopilotProviderType, ModelOutputType, PromptMessage, } from '../providers'; @@ -156,14 +157,18 @@ export class CopilotTranscriptionService { private async getProvider( modelId: string, - structured: boolean + structured: boolean, + prefer?: CopilotProviderType ): Promise { - let provider = await this.providerFactory.getProvider({ - outputType: structured - ? ModelOutputType.Structured - : ModelOutputType.Text, - modelId, - }); + let provider = await this.providerFactory.getProvider( + { + outputType: structured + ? ModelOutputType.Structured + : ModelOutputType.Text, + modelId, + }, + { prefer } + ); if (!provider) { throw new NoCopilotProviderAvailable(); @@ -175,7 +180,8 @@ export class CopilotTranscriptionService { private async chatWithPrompt( promptName: string, message: Partial, - schema?: ZodType + schema?: ZodType, + prefer?: CopilotProviderType ): Promise { const prompt = await this.prompt.get(promptName); if (!prompt) { @@ -186,7 +192,7 @@ export class CopilotTranscriptionService { const msg = { role: 'user' as const, content: '', ...message }; const config = Object.assign({}, prompt.config); if (schema) { - const provider = await this.getProvider(prompt.model, true); + const provider = await this.getProvider(prompt.model, true, prefer); return provider.structure( cond, [...prompt.finish({ schema }), msg], @@ -226,13 +232,12 @@ export class CopilotTranscriptionService { } private async callTranscript(url: string, mimeType: string, offset: number) { + // NOTE: Vertex provider not support transcription yet, we always use Gemini here const result = await this.chatWithPrompt( 'Transcript audio', - { - attachments: [url], - params: { mimetype: mimeType }, - }, - TranscriptionResponseSchema + { attachments: [url], params: { mimetype: mimeType } }, + TranscriptionResponseSchema, + CopilotProviderType.Gemini ); const transcription = TranscriptionResponseSchema.parse(