feat(core): add gemini callout syntax highlighting (#12413)

Close [AI-125](https://linear.app/affine-design/issue/AI-125)

What Changed?
- Add `gemini-2.5-flash-preview-04-17` model
- Add `thinkingConfig` provider options
- Add callout syntax highlighting

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Added support for the "Gemini 2.5 Flash" model and updated the "Gemini 2.5 Pro" model to a newer version.
  - Enhanced streaming responses to better format reasoning outputs and provide clearer callouts in AI-generated content.

- **Bug Fixes**
  - Improved audio transcription prompts in test cases for more accurate and explicit testing.

- **Documentation**
  - Expanded citation instructions for AI chat responses, including examples for multiple citations.

- **Chores**
  - Updated the "@ai-sdk/google" dependency to a newer version.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
akumatus
2025-05-23 01:58:01 +00:00
parent 0ce05ca96e
commit 119cf9442b
7 changed files with 101 additions and 19 deletions

View File

@@ -328,7 +328,7 @@ const actions = [
messages: [
{
role: 'user' as const,
content: '',
content: 'transcript the audio',
attachments: [
'https://cdn.affine.pro/copilot-test/MP9qDGuYgnY+ILoEAmHpp3h9Npuw2403EAYMEA.mp3',
],
@@ -350,7 +350,7 @@ const actions = [
messages: [
{
role: 'user' as const,
content: '',
content: 'transcript the audio',
attachments: [
'https://cdn.affine.pro/copilot-test/2ed05eo1KvZ2tWB_BAjFo67EAPZZY-w4LylUAw.m4a',
],
@@ -372,7 +372,7 @@ const actions = [
messages: [
{
role: 'user' as const,
content: '',
content: 'transcript the audio',
attachments: [
'https://cdn.affine.pro/copilot-test/nC9-e7P85PPI2rU29QWwf8slBNRMy92teLIIMw.opus',
],

View File

@@ -111,7 +111,7 @@ export class MockCopilotProvider extends OpenAIProvider {
],
},
{
id: 'gemini-2.5-pro-preview-03-25',
id: 'gemini-2.5-pro-preview-05-06',
capabilities: [
{
input: [ModelInputType.Text, ModelInputType.Image],

View File

@@ -350,7 +350,7 @@ const actions: Prompt[] = [
{
name: 'Transcript audio',
action: 'Transcript audio',
model: 'gemini-2.5-pro-preview-03-25',
model: 'gemini-2.5-pro-preview-05-06',
messages: [
{
role: 'system',
@@ -1096,6 +1096,8 @@ const chat: Prompt[] = [
'o4-mini',
'claude-3-7-sonnet-20250219',
'claude-3-5-sonnet-20241022',
'gemini-2.5-flash-preview-04-17',
'gemini-2.5-pro-preview-05-06',
],
messages: [
{
@@ -1122,11 +1124,12 @@ When referencing information from the provided documents, files or web search re
1. Use markdown footnote format for citations
2. Add citations immediately after the relevant sentence or paragraph
3. Required format: [^reference_index] where reference_index is an increasing positive integer
4. You MUST include citations at the end of your response in this exact format:
4. When a single sentence needs multiple citations, write each marker in its own pair of brackets and place them consecutively. Correct: [^2][^4][^12], Incorrect: [^2, 4, 12].
5. You MUST include citations at the end of your response in this exact format:
- For documents: [^reference_index]:{"type":"doc","docId":"document_id"}
- For files: [^reference_index]:{"type":"attachment","blobId":"blob_id","fileName":"file_name","fileType":"file_type"}
- For web search results: [^reference_index]:{"type":"url","url":"url_path"}
5. Ensure citations adhere strictly to the required format. Do not add extra spaces in citations like [^ reference_index] or [ ^reference_index].
6. Ensure citations adhere strictly to the required format. Do not add extra spaces in citations like [^ reference_index] or [ ^reference_index].
### Citations Structure
Your response MUST follow this structure:
@@ -1136,6 +1139,7 @@ Your response MUST follow this structure:
Example Output with Citations:
This is my response with a document citation[^1]. Here is more content with another file citation[^2]. And here is a web search result citation[^3].
Here is multiple citations: [^1][^2][^3].
[^1]:{"type":"doc","docId":"abc123"}
[^2]:{"type":"attachment","blobId":"xyz789","fileName":"example.txt","fileType":"text"}

View File

@@ -232,7 +232,7 @@ export class AnthropicProvider extends CopilotProvider<AnthropicConfig> {
private getAnthropicOptions(options: CopilotChatOptions, model: string) {
const result: AnthropicProviderOptions = {};
if (options?.reasoning && this.isThinkingModel(model)) {
if (options?.reasoning && this.isReasoningModel(model)) {
result.thinking = {
type: 'enabled',
budgetTokens: 12000,
@@ -257,7 +257,8 @@ export class AnthropicProvider extends CopilotProvider<AnthropicConfig> {
return text.replaceAll('\n', '\n> ');
}
private isThinkingModel(model: string) {
private isReasoningModel(model: string) {
// only claude 3.7 sonnet supports reasoning config
return model.startsWith('claude-3-7-sonnet');
}
}

View File

@@ -1,6 +1,7 @@
import {
createGoogleGenerativeAI,
type GoogleGenerativeAIProvider,
type GoogleGenerativeAIProviderOptions,
} from '@ai-sdk/google';
import {
AISDKError,
@@ -52,9 +53,23 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
},
],
},
{
name: 'Gemini 2.5 Flash',
id: 'gemini-2.5-flash-preview-04-17',
capabilities: [
{
input: [
ModelInputType.Text,
ModelInputType.Image,
ModelInputType.Audio,
],
output: [ModelOutputType.Text, ModelOutputType.Structured],
},
],
},
{
name: 'Gemini 2.5 Pro',
id: 'gemini-2.5-pro-preview-03-25',
id: 'gemini-2.5-pro-preview-05-06',
capabilities: [
{
input: [
@@ -78,6 +93,10 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
},
];
private readonly MAX_STEPS = 20;
private readonly CALLOUT_PREFIX = '\n> [!]\n> ';
#instance!: GoogleGenerativeAIProvider;
override configured(): boolean {
@@ -203,20 +222,55 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
metrics.ai.counter('chat_text_stream_calls').add(1, { model: model.id });
const [system, msgs] = await chatToGPTMessage(messages);
const { textStream } = streamText({
model: this.#instance(model.id),
const { fullStream } = streamText({
model: this.#instance(model.id, {
useSearchGrounding: this.useSearchGrounding(options),
}),
system,
messages: msgs,
abortSignal: options.signal,
maxSteps: this.MAX_STEPS,
providerOptions: {
google: this.getGeminiOptions(options, model.id),
},
});
for await (const message of textStream) {
if (message) {
yield message;
let lastType;
// reasoning, tool-call, tool-result need to mark as callout
let prefix: string | null = this.CALLOUT_PREFIX;
for await (const chunk of fullStream) {
if (chunk) {
switch (chunk.type) {
case 'text-delta': {
let result = chunk.textDelta;
if (lastType !== chunk.type) {
result = '\n\n' + result;
}
yield result;
break;
}
case 'reasoning': {
if (prefix) {
yield prefix;
prefix = null;
}
let result = chunk.textDelta;
if (lastType !== chunk.type) {
result = '\n\n' + result;
}
yield this.markAsCallout(result);
break;
}
case 'error': {
const error = chunk.error as { type: string; message: string };
throw new Error(error.message);
}
}
if (options.signal?.aborted) {
await textStream.cancel();
await fullStream.cancel();
break;
}
lastType = chunk.type;
}
}
} catch (e: any) {
@@ -224,4 +278,27 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
throw this.handleError(e);
}
}
private getGeminiOptions(options: CopilotChatOptions, model: string) {
const result: GoogleGenerativeAIProviderOptions = {};
if (options?.reasoning && this.isReasoningModel(model)) {
result.thinkingConfig = {
thinkingBudget: 12000,
includeThoughts: true,
};
}
return result;
}
private markAsCallout(text: string) {
return text.replaceAll('\n', '\n> ');
}
private isReasoningModel(model: string) {
return model.startsWith('gemini-2.5');
}
private useSearchGrounding(options: CopilotChatOptions) {
return options?.tools?.includes('webSearch');
}
}