feat(server): switch i2i to gpt (#12238)

fix AI-14
fix AI-17
fix AI-39
fix AI-112

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Expanded and reorganized prompt options for text and image actions, adding new prompts for image generation, style conversions, upscaling, background removal, and sticker creation.
  - Enhanced image editing capabilities with direct support for image attachments in prompts.

- **Improvements**
  - Updated prompt names and descriptions to be more user-friendly and descriptive.
  - Simplified and clarified prompt selection and image processing workflows with improved default behaviors.
  - Better organization of prompts through clear grouping and categorization.

- **Bug Fixes**
  - Improved validation and handling of image attachments during editing requests.

- **Refactor**
  - Internal code restructuring of prompts and provider logic for clarity and maintainability without affecting user workflows.
  - Refined message handling and content merging logic to ensure consistent prompt processing.
  - Adjusted image attachment rendering logic for improved display consistency.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-05-27 11:36:47 +00:00
parent 1e9cbdb65d
commit 3c0fa429c5
10 changed files with 342 additions and 145 deletions

View File

@@ -518,12 +518,7 @@ const actions = [
type: 'text' as const,
},
{
promptName: [
'debug:action:fal-face-to-sticker',
'debug:action:fal-remove-bg',
'debug:action:fal-sd15',
'debug:action:fal-upscaler',
],
promptName: ['Convert to sticker', 'Remove background', 'Upscale image'],
messages: [
{
role: 'user' as const,
@@ -590,6 +585,8 @@ for (const {
}))!;
t.truthy(provider, 'should have provider');
await retry(`action: ${promptName}`, t, async t => {
const finalConfig = Object.assign({}, prompt.config, config);
switch (type) {
case 'text': {
const result = await provider.text(
@@ -604,7 +601,7 @@ for (const {
),
...messages,
],
Object.assign({}, prompt.config, config)
finalConfig
);
t.truthy(result, 'should return result');
verifier?.(t, result);
@@ -622,23 +619,39 @@ for (const {
),
...messages,
],
Object.assign({}, prompt.config, config)
finalConfig
);
t.truthy(result, 'should return result');
verifier?.(t, result);
break;
}
case 'image': {
const stream = provider.streamImages({ modelId: prompt.model }, [
...prompt.finish(
messages.reduce(
// @ts-expect-error
(acc, m) => Object.assign(acc, m.params),
{}
)
),
...messages,
]);
const finalMessage = [...messages];
const params = {};
if (finalMessage.length === 1) {
const latestMessage = finalMessage.pop()!;
Object.assign(params, {
content: latestMessage.content,
attachments:
'attachments' in latestMessage
? latestMessage.attachments
: undefined,
});
}
const stream = provider.streamImages(
{ modelId: prompt.model },
[
...prompt.finish(
finalMessage.reduce(
// @ts-expect-error
(acc, m) => Object.assign(acc, m.params),
params
)
),
...finalMessage,
],
finalConfig
);
const result = [];
for await (const attachment of stream) {

View File

@@ -543,12 +543,19 @@ test('should be able to chat with special image model', async t => {
);
};
await testWithModel('debug:action:fal-sd15', 'some-tag');
await testWithModel('Generate image', 'some-tag');
await testWithModel(
'debug:action:fal-upscaler',
'best quality, 8K resolution, highres, clarity, some-tag'
'Convert to sticker',
'convert this image to sticker. you need to identify the subject matter and warp a circle of white stroke around the subject matter and with transparent background. some-tag'
);
await testWithModel(
'Upscale image',
'make the image more detailed. some-tag'
);
await testWithModel(
'Remove background',
'Keep the subject and remove other non-subject items. Transparent background. some-tag'
);
await testWithModel('debug:action:fal-remove-bg', 'some-tag');
Sinon.restore();
});

View File

@@ -84,29 +84,12 @@ export class MockCopilotProvider extends OpenAIProvider {
],
},
{
id: 'lcm-sd15-i2i',
id: 'gpt-image-1',
capabilities: [
{
input: [ModelInputType.Image],
output: [ModelOutputType.Image],
},
],
},
{
id: 'clarity-upscaler',
capabilities: [
{
input: [ModelInputType.Image],
output: [ModelOutputType.Image],
},
],
},
{
id: 'imageutils/rembg',
capabilities: [
{
input: [ModelInputType.Image],
input: [ModelInputType.Text, ModelInputType.Image],
output: [ModelOutputType.Image],
defaultForOutputType: true,
},
],
},

View File

@@ -20,12 +20,6 @@ type Prompt = Omit<
};
const workflows: Prompt[] = [
{
name: 'debug:action:fal-teed',
action: 'fal-teed',
model: 'workflowutils/teed',
messages: [{ role: 'user', content: '{{content}}' }],
},
{
name: 'workflow:presentation',
action: 'workflow:presentation',
@@ -305,48 +299,7 @@ const workflows: Prompt[] = [
},
];
const actions: Prompt[] = [
{
name: 'debug:action:dalle3',
action: 'image',
model: 'dall-e-3',
messages: [],
},
{
name: 'debug:action:gpt-image-1',
action: 'image',
model: 'gpt-image-1',
messages: [],
},
{
name: 'debug:action:fal-sd15',
action: 'image',
model: 'lcm-sd15-i2i',
messages: [],
},
{
name: 'debug:action:fal-upscaler',
action: 'Clearer',
model: 'clarity-upscaler',
messages: [
{
role: 'user',
content: 'best quality, 8K resolution, highres, clarity, {{content}}',
},
],
},
{
name: 'debug:action:fal-remove-bg',
action: 'Remove background',
model: 'imageutils/rembg',
messages: [],
},
{
name: 'debug:action:fal-face-to-sticker',
action: 'Convert to sticker',
model: 'face-to-sticker',
messages: [],
},
const textActions: Prompt[] = [
{
name: 'Transcript audio',
action: 'Transcript audio',
@@ -1449,6 +1402,161 @@ When sent new notes, respond ONLY with the contents of the html file.`,
},
];
const imageActions: Prompt[] = [
{
name: 'Generate image',
action: 'image',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content: '{{content}}',
},
],
},
{
name: 'Convert to Clay style',
action: 'Convert to Clay style',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content:
'Migration style. Migrates the style from the first image to the second. turn to clay/claymation style. {{content}}',
},
],
},
{
name: 'Convert to Sketch style',
action: 'Convert to Sketch style',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content: 'turn to mono-color sketch style. {{content}}',
},
],
},
{
name: 'Convert to Anime style',
action: 'Convert to Anime style',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content: 'turn to Suzume style like anime style. {{content}}',
},
],
},
{
name: 'Convert to Pixel style',
action: 'Convert to Pixel style',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content: 'turn to kairosoft pixel art. {{content}}',
},
],
},
{
name: 'Convert to sticker',
action: 'Convert to sticker',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content:
'convert this image to sticker. you need to identify the subject matter and warp a circle of white stroke around the subject matter and with transparent background. {{content}}',
},
],
},
{
name: 'Upscale image',
action: 'Upscale image',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content: 'make the image more detailed. {{content}}',
},
],
},
{
name: 'Remove background',
action: 'Remove background',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content:
'Keep the subject and remove other non-subject items. Transparent background. {{content}}',
},
],
},
// TODO(@darkskygit): deprecated, remove it after <0.22 version is outdated
{
name: 'debug:action:fal-remove-bg',
action: 'Remove background',
model: 'imageutils/rembg',
messages: [],
},
{
name: 'debug:action:fal-face-to-sticker',
action: 'Convert to sticker',
model: 'face-to-sticker',
messages: [],
},
{
name: 'debug:action:fal-teed',
action: 'fal-teed',
model: 'workflowutils/teed',
messages: [{ role: 'user', content: '{{content}}' }],
},
{
name: 'debug:action:dalle3',
action: 'image',
model: 'dall-e-3',
messages: [
{
role: 'user',
content: '{{content}}',
},
],
},
{
name: 'debug:action:gpt-image-1',
action: 'image',
model: 'gpt-image-1',
messages: [
{
role: 'user',
content: '{{content}}',
},
],
config: {
requireContent: false,
},
},
{
name: 'debug:action:fal-sd15',
action: 'image',
model: 'lcm-sd15-i2i',
messages: [],
},
{
name: 'debug:action:fal-upscaler',
action: 'Clearer',
model: 'clarity-upscaler',
messages: [
{
role: 'user',
content: 'best quality, 8K resolution, highres, clarity, {{content}}',
},
],
},
];
const CHAT_PROMPT: Omit<Prompt, 'name'> = {
model: 'gpt-4.1',
optionalModels: [
@@ -1622,7 +1730,12 @@ const chat: Prompt[] = [
},
];
export const prompts: Prompt[] = [...actions, ...chat, ...workflows];
export const prompts: Prompt[] = [
...textActions,
...imageActions,
...chat,
...workflows,
];
export async function refreshPrompts(db: PrismaClient) {
const needToSkip = await db.aiPrompt

View File

@@ -13,6 +13,7 @@ import {
streamText,
ToolSet,
} from 'ai';
import { z } from 'zod';
import {
CopilotPromptInvalid,
@@ -40,6 +41,20 @@ export type OpenAIConfig = {
baseUrl?: string;
};
const ImageResponseSchema = z.union([
z.object({
data: z.array(z.object({ b64_json: z.string() })),
}),
z.object({
error: z.object({
message: z.string(),
type: z.string().nullish(),
param: z.any().nullish(),
code: z.union([z.string(), z.number()]).nullish(),
}),
}),
]);
export class OpenAIProvider extends CopilotProvider<OpenAIConfig> {
readonly type = CopilotProviderType.OpenAI;
@@ -389,6 +404,63 @@ export class OpenAIProvider extends CopilotProvider<OpenAIConfig> {
}
}
// ====== text to image ======
private async *generateImageWithAttachments(
model: string,
prompt: string,
attachments: NonNullable<PromptMessage['attachments']>
): AsyncGenerator<string> {
const form = new FormData();
form.set('model', model);
form.set('prompt', prompt);
form.set('output_format', 'webp');
for (const [idx, entry] of attachments.entries()) {
const url = typeof entry === 'string' ? entry : entry.attachment;
const resp = await fetch(url);
if (resp.ok) {
const type = resp.headers.get('content-type');
if (type && type.startsWith('image/')) {
const buffer = new Uint8Array(await resp.arrayBuffer());
const file = new File([buffer], `${idx}.png`, { type });
form.append('image[]', file);
}
}
}
if (!form.getAll('image[]').length) {
throw new CopilotPromptInvalid(
'No valid image attachments found. Please attach images.'
);
}
const url = `${this.config.baseUrl || 'https://api.openai.com'}/v1/images/edits`;
const res = await fetch(url, {
method: 'POST',
headers: { Authorization: `Bearer ${this.config.apiKey}` },
body: form,
});
if (!res.ok) {
throw new Error(`OpenAI API error ${res.status}: ${await res.text()}`);
}
const json = await res.json();
const imageResponse = ImageResponseSchema.safeParse(json);
if (imageResponse.success) {
const data = imageResponse.data;
if ('error' in data) {
throw new Error(data.error.message);
} else {
for (const image of data.data) {
yield `data:image/webp;base64,${image.b64_json}`;
}
}
} else {
throw new Error(imageResponse.error.message);
}
}
override async *streamImages(
cond: ModelConditions,
messages: PromptMessage[],
@@ -402,30 +474,33 @@ export class OpenAIProvider extends CopilotProvider<OpenAIConfig> {
.counter('generate_images_stream_calls')
.add(1, { model: model.id });
const { content: prompt } = [...messages].pop() || {};
const { content: prompt, attachments } = [...messages].pop() || {};
if (!prompt) throw new CopilotPromptInvalid('Prompt is required');
try {
const modelInstance = this.#instance.image(model.id);
const result = await generateImage({
model: modelInstance,
prompt,
providerOptions: {
openai: {
quality: options.quality || null,
if (attachments && attachments.length > 0) {
yield* this.generateImageWithAttachments(model.id, prompt, attachments);
} else {
const modelInstance = this.#instance.image(model.id);
const result = await generateImage({
model: modelInstance,
prompt,
providerOptions: {
openai: {
quality: options.quality || null,
},
},
},
});
});
const imageUrls = result.images.map(
image => `data:image/png;base64,${image.base64}`
);
const imageUrls = result.images.map(
image => `data:image/png;base64,${image.base64}`
);
for (const imageUrl of imageUrls) {
yield imageUrl;
if (options.signal?.aborted) {
break;
for (const imageUrl of imageUrls) {
yield imageUrl;
if (options.signal?.aborted) {
break;
}
}
}
return;

View File

@@ -39,7 +39,7 @@ const FORMAT_INFER_MAP: Record<string, string> = {
flv: 'video/flv',
};
async function inferMimeType(url: string) {
export async function inferMimeType(url: string) {
if (url.startsWith('data:')) {
return url.split(';')[0].split(':')[1];
}

View File

@@ -141,16 +141,13 @@ export class ChatSession implements AsyncDisposable {
return ret;
}
finish(params: PromptParams): PromptMessage[] {
const messages = this.takeMessages();
private mergeUserContent(params: PromptParams) {
const messages = this.stashMessages;
const firstMessage = messages.at(0);
// TODO: refactor this {{content}} keyword agreement
// if the message in prompt config contains {{content}},
// we should combine it with the user message in the prompt
if (
messages.length === 1 &&
firstMessage &&
this.state.prompt.paramKeys.includes('content')
this.state.prompt.paramKeys.includes('content') &&
!messages.some(m => m.role === AiPromptRole.assistant) &&
firstMessage
) {
const normalizedParams = {
...params,
@@ -178,7 +175,18 @@ export class ChatSession implements AsyncDisposable {
return finished;
}
return;
}
finish(params: PromptParams): PromptMessage[] {
// if the message in prompt config contains {{content}},
// we should combine it with the user message in the prompt
const mergedMessage = this.mergeUserContent(params);
if (mergedMessage) {
return mergedMessage;
}
const messages = this.takeMessages();
const lastMessage = messages.at(-1);
return [
...this.state.prompt.finish(