diff --git a/packages/backend/server/package.json b/packages/backend/server/package.json index 91209eb1b1..2476bd800c 100644 --- a/packages/backend/server/package.json +++ b/packages/backend/server/package.json @@ -29,7 +29,7 @@ "@affine/reader": "workspace:*", "@affine/server-native": "workspace:*", "@ai-sdk/anthropic": "^1.2.10", - "@ai-sdk/google": "^1.2.10", + "@ai-sdk/google": "^1.2.18", "@ai-sdk/openai": "^1.3.21", "@ai-sdk/perplexity": "^1.1.6", "@apollo/server": "^4.11.3", diff --git a/packages/backend/server/src/__tests__/copilot-provider.spec.ts b/packages/backend/server/src/__tests__/copilot-provider.spec.ts index e0ac21c0a1..29cea6ced7 100644 --- a/packages/backend/server/src/__tests__/copilot-provider.spec.ts +++ b/packages/backend/server/src/__tests__/copilot-provider.spec.ts @@ -328,7 +328,7 @@ const actions = [ messages: [ { role: 'user' as const, - content: '', + content: 'transcript the audio', attachments: [ 'https://cdn.affine.pro/copilot-test/MP9qDGuYgnY+ILoEAmHpp3h9Npuw2403EAYMEA.mp3', ], @@ -350,7 +350,7 @@ const actions = [ messages: [ { role: 'user' as const, - content: '', + content: 'transcript the audio', attachments: [ 'https://cdn.affine.pro/copilot-test/2ed05eo1KvZ2tWB_BAjFo67EAPZZY-w4LylUAw.m4a', ], @@ -372,7 +372,7 @@ const actions = [ messages: [ { role: 'user' as const, - content: '', + content: 'transcript the audio', attachments: [ 'https://cdn.affine.pro/copilot-test/nC9-e7P85PPI2rU29QWwf8slBNRMy92teLIIMw.opus', ], diff --git a/packages/backend/server/src/__tests__/mocks/copilot.mock.ts b/packages/backend/server/src/__tests__/mocks/copilot.mock.ts index a4cb5bd93f..2f30df562f 100644 --- a/packages/backend/server/src/__tests__/mocks/copilot.mock.ts +++ b/packages/backend/server/src/__tests__/mocks/copilot.mock.ts @@ -111,7 +111,7 @@ export class MockCopilotProvider extends OpenAIProvider { ], }, { - id: 'gemini-2.5-pro-preview-03-25', + id: 'gemini-2.5-pro-preview-05-06', capabilities: [ { input: [ModelInputType.Text, ModelInputType.Image], diff --git a/packages/backend/server/src/plugins/copilot/prompt/prompts.ts b/packages/backend/server/src/plugins/copilot/prompt/prompts.ts index 6c7a1f5a7c..21df22fa99 100644 --- a/packages/backend/server/src/plugins/copilot/prompt/prompts.ts +++ b/packages/backend/server/src/plugins/copilot/prompt/prompts.ts @@ -350,7 +350,7 @@ const actions: Prompt[] = [ { name: 'Transcript audio', action: 'Transcript audio', - model: 'gemini-2.5-pro-preview-03-25', + model: 'gemini-2.5-pro-preview-05-06', messages: [ { role: 'system', @@ -1096,6 +1096,8 @@ const chat: Prompt[] = [ 'o4-mini', 'claude-3-7-sonnet-20250219', 'claude-3-5-sonnet-20241022', + 'gemini-2.5-flash-preview-04-17', + 'gemini-2.5-pro-preview-05-06', ], messages: [ { @@ -1122,11 +1124,12 @@ When referencing information from the provided documents, files or web search re 1. Use markdown footnote format for citations 2. Add citations immediately after the relevant sentence or paragraph 3. Required format: [^reference_index] where reference_index is an increasing positive integer -4. You MUST include citations at the end of your response in this exact format: +4. When a single sentence needs multiple citations, write each marker in its own pair of brackets and place them consecutively. Correct: [^2][^4][^12], Incorrect: [^2, 4, 12]. +5. You MUST include citations at the end of your response in this exact format: - For documents: [^reference_index]:{"type":"doc","docId":"document_id"} - For files: [^reference_index]:{"type":"attachment","blobId":"blob_id","fileName":"file_name","fileType":"file_type"} - For web search results: [^reference_index]:{"type":"url","url":"url_path"} -5. Ensure citations adhere strictly to the required format. Do not add extra spaces in citations like [^ reference_index] or [ ^reference_index]. +6. Ensure citations adhere strictly to the required format. Do not add extra spaces in citations like [^ reference_index] or [ ^reference_index]. ### Citations Structure Your response MUST follow this structure: @@ -1136,6 +1139,7 @@ Your response MUST follow this structure: Example Output with Citations: This is my response with a document citation[^1]. Here is more content with another file citation[^2]. And here is a web search result citation[^3]. +Here is multiple citations: [^1][^2][^3]. [^1]:{"type":"doc","docId":"abc123"} [^2]:{"type":"attachment","blobId":"xyz789","fileName":"example.txt","fileType":"text"} diff --git a/packages/backend/server/src/plugins/copilot/providers/anthropic.ts b/packages/backend/server/src/plugins/copilot/providers/anthropic.ts index cba0b3b598..a94f440159 100644 --- a/packages/backend/server/src/plugins/copilot/providers/anthropic.ts +++ b/packages/backend/server/src/plugins/copilot/providers/anthropic.ts @@ -232,7 +232,7 @@ export class AnthropicProvider extends CopilotProvider { private getAnthropicOptions(options: CopilotChatOptions, model: string) { const result: AnthropicProviderOptions = {}; - if (options?.reasoning && this.isThinkingModel(model)) { + if (options?.reasoning && this.isReasoningModel(model)) { result.thinking = { type: 'enabled', budgetTokens: 12000, @@ -257,7 +257,8 @@ export class AnthropicProvider extends CopilotProvider { return text.replaceAll('\n', '\n> '); } - private isThinkingModel(model: string) { + private isReasoningModel(model: string) { + // only claude 3.7 sonnet supports reasoning config return model.startsWith('claude-3-7-sonnet'); } } diff --git a/packages/backend/server/src/plugins/copilot/providers/gemini.ts b/packages/backend/server/src/plugins/copilot/providers/gemini.ts index d0d19397b2..5604363418 100644 --- a/packages/backend/server/src/plugins/copilot/providers/gemini.ts +++ b/packages/backend/server/src/plugins/copilot/providers/gemini.ts @@ -1,6 +1,7 @@ import { createGoogleGenerativeAI, type GoogleGenerativeAIProvider, + type GoogleGenerativeAIProviderOptions, } from '@ai-sdk/google'; import { AISDKError, @@ -52,9 +53,23 @@ export class GeminiProvider extends CopilotProvider { }, ], }, + { + name: 'Gemini 2.5 Flash', + id: 'gemini-2.5-flash-preview-04-17', + capabilities: [ + { + input: [ + ModelInputType.Text, + ModelInputType.Image, + ModelInputType.Audio, + ], + output: [ModelOutputType.Text, ModelOutputType.Structured], + }, + ], + }, { name: 'Gemini 2.5 Pro', - id: 'gemini-2.5-pro-preview-03-25', + id: 'gemini-2.5-pro-preview-05-06', capabilities: [ { input: [ @@ -78,6 +93,10 @@ export class GeminiProvider extends CopilotProvider { }, ]; + private readonly MAX_STEPS = 20; + + private readonly CALLOUT_PREFIX = '\n> [!]\n> '; + #instance!: GoogleGenerativeAIProvider; override configured(): boolean { @@ -203,20 +222,55 @@ export class GeminiProvider extends CopilotProvider { metrics.ai.counter('chat_text_stream_calls').add(1, { model: model.id }); const [system, msgs] = await chatToGPTMessage(messages); - const { textStream } = streamText({ - model: this.#instance(model.id), + const { fullStream } = streamText({ + model: this.#instance(model.id, { + useSearchGrounding: this.useSearchGrounding(options), + }), system, messages: msgs, abortSignal: options.signal, + maxSteps: this.MAX_STEPS, + providerOptions: { + google: this.getGeminiOptions(options, model.id), + }, }); - for await (const message of textStream) { - if (message) { - yield message; + let lastType; + // reasoning, tool-call, tool-result need to mark as callout + let prefix: string | null = this.CALLOUT_PREFIX; + for await (const chunk of fullStream) { + if (chunk) { + switch (chunk.type) { + case 'text-delta': { + let result = chunk.textDelta; + if (lastType !== chunk.type) { + result = '\n\n' + result; + } + yield result; + break; + } + case 'reasoning': { + if (prefix) { + yield prefix; + prefix = null; + } + let result = chunk.textDelta; + if (lastType !== chunk.type) { + result = '\n\n' + result; + } + yield this.markAsCallout(result); + break; + } + case 'error': { + const error = chunk.error as { type: string; message: string }; + throw new Error(error.message); + } + } if (options.signal?.aborted) { - await textStream.cancel(); + await fullStream.cancel(); break; } + lastType = chunk.type; } } } catch (e: any) { @@ -224,4 +278,27 @@ export class GeminiProvider extends CopilotProvider { throw this.handleError(e); } } + + private getGeminiOptions(options: CopilotChatOptions, model: string) { + const result: GoogleGenerativeAIProviderOptions = {}; + if (options?.reasoning && this.isReasoningModel(model)) { + result.thinkingConfig = { + thinkingBudget: 12000, + includeThoughts: true, + }; + } + return result; + } + + private markAsCallout(text: string) { + return text.replaceAll('\n', '\n> '); + } + + private isReasoningModel(model: string) { + return model.startsWith('gemini-2.5'); + } + + private useSearchGrounding(options: CopilotChatOptions) { + return options?.tools?.includes('webSearch'); + } } diff --git a/yarn.lock b/yarn.lock index a4494e69f7..dfa162efff 100644 --- a/yarn.lock +++ b/yarn.lock @@ -908,7 +908,7 @@ __metadata: "@affine/reader": "workspace:*" "@affine/server-native": "workspace:*" "@ai-sdk/anthropic": "npm:^1.2.10" - "@ai-sdk/google": "npm:^1.2.10" + "@ai-sdk/google": "npm:^1.2.18" "@ai-sdk/openai": "npm:^1.3.21" "@ai-sdk/perplexity": "npm:^1.1.6" "@apollo/server": "npm:^4.11.3" @@ -1083,7 +1083,7 @@ __metadata: languageName: node linkType: hard -"@ai-sdk/google@npm:^1.2.10": +"@ai-sdk/google@npm:^1.2.18": version: 1.2.18 resolution: "@ai-sdk/google@npm:1.2.18" dependencies: