mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 13:25:12 +00:00
feat(server): switch i2i to gpt (#12238)
fix AI-14 fix AI-17 fix AI-39 fix AI-112 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Expanded and reorganized prompt options for text and image actions, adding new prompts for image generation, style conversions, upscaling, background removal, and sticker creation. - Enhanced image editing capabilities with direct support for image attachments in prompts. - **Improvements** - Updated prompt names and descriptions to be more user-friendly and descriptive. - Simplified and clarified prompt selection and image processing workflows with improved default behaviors. - Better organization of prompts through clear grouping and categorization. - **Bug Fixes** - Improved validation and handling of image attachments during editing requests. - **Refactor** - Internal code restructuring of prompts and provider logic for clarity and maintainability without affecting user workflows. - Refined message handling and content merging logic to ensure consistent prompt processing. - Adjusted image attachment rendering logic for improved display consistency. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -518,12 +518,7 @@ const actions = [
|
||||
type: 'text' as const,
|
||||
},
|
||||
{
|
||||
promptName: [
|
||||
'debug:action:fal-face-to-sticker',
|
||||
'debug:action:fal-remove-bg',
|
||||
'debug:action:fal-sd15',
|
||||
'debug:action:fal-upscaler',
|
||||
],
|
||||
promptName: ['Convert to sticker', 'Remove background', 'Upscale image'],
|
||||
messages: [
|
||||
{
|
||||
role: 'user' as const,
|
||||
@@ -590,6 +585,8 @@ for (const {
|
||||
}))!;
|
||||
t.truthy(provider, 'should have provider');
|
||||
await retry(`action: ${promptName}`, t, async t => {
|
||||
const finalConfig = Object.assign({}, prompt.config, config);
|
||||
|
||||
switch (type) {
|
||||
case 'text': {
|
||||
const result = await provider.text(
|
||||
@@ -604,7 +601,7 @@ for (const {
|
||||
),
|
||||
...messages,
|
||||
],
|
||||
Object.assign({}, prompt.config, config)
|
||||
finalConfig
|
||||
);
|
||||
t.truthy(result, 'should return result');
|
||||
verifier?.(t, result);
|
||||
@@ -622,23 +619,39 @@ for (const {
|
||||
),
|
||||
...messages,
|
||||
],
|
||||
Object.assign({}, prompt.config, config)
|
||||
finalConfig
|
||||
);
|
||||
t.truthy(result, 'should return result');
|
||||
verifier?.(t, result);
|
||||
break;
|
||||
}
|
||||
case 'image': {
|
||||
const stream = provider.streamImages({ modelId: prompt.model }, [
|
||||
...prompt.finish(
|
||||
messages.reduce(
|
||||
// @ts-expect-error
|
||||
(acc, m) => Object.assign(acc, m.params),
|
||||
{}
|
||||
)
|
||||
),
|
||||
...messages,
|
||||
]);
|
||||
const finalMessage = [...messages];
|
||||
const params = {};
|
||||
if (finalMessage.length === 1) {
|
||||
const latestMessage = finalMessage.pop()!;
|
||||
Object.assign(params, {
|
||||
content: latestMessage.content,
|
||||
attachments:
|
||||
'attachments' in latestMessage
|
||||
? latestMessage.attachments
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
const stream = provider.streamImages(
|
||||
{ modelId: prompt.model },
|
||||
[
|
||||
...prompt.finish(
|
||||
finalMessage.reduce(
|
||||
// @ts-expect-error
|
||||
(acc, m) => Object.assign(acc, m.params),
|
||||
params
|
||||
)
|
||||
),
|
||||
...finalMessage,
|
||||
],
|
||||
finalConfig
|
||||
);
|
||||
|
||||
const result = [];
|
||||
for await (const attachment of stream) {
|
||||
|
||||
@@ -543,12 +543,19 @@ test('should be able to chat with special image model', async t => {
|
||||
);
|
||||
};
|
||||
|
||||
await testWithModel('debug:action:fal-sd15', 'some-tag');
|
||||
await testWithModel('Generate image', 'some-tag');
|
||||
await testWithModel(
|
||||
'debug:action:fal-upscaler',
|
||||
'best quality, 8K resolution, highres, clarity, some-tag'
|
||||
'Convert to sticker',
|
||||
'convert this image to sticker. you need to identify the subject matter and warp a circle of white stroke around the subject matter and with transparent background. some-tag'
|
||||
);
|
||||
await testWithModel(
|
||||
'Upscale image',
|
||||
'make the image more detailed. some-tag'
|
||||
);
|
||||
await testWithModel(
|
||||
'Remove background',
|
||||
'Keep the subject and remove other non-subject items. Transparent background. some-tag'
|
||||
);
|
||||
await testWithModel('debug:action:fal-remove-bg', 'some-tag');
|
||||
|
||||
Sinon.restore();
|
||||
});
|
||||
|
||||
@@ -84,29 +84,12 @@ export class MockCopilotProvider extends OpenAIProvider {
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'lcm-sd15-i2i',
|
||||
id: 'gpt-image-1',
|
||||
capabilities: [
|
||||
{
|
||||
input: [ModelInputType.Image],
|
||||
output: [ModelOutputType.Image],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'clarity-upscaler',
|
||||
capabilities: [
|
||||
{
|
||||
input: [ModelInputType.Image],
|
||||
output: [ModelOutputType.Image],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'imageutils/rembg',
|
||||
capabilities: [
|
||||
{
|
||||
input: [ModelInputType.Image],
|
||||
input: [ModelInputType.Text, ModelInputType.Image],
|
||||
output: [ModelOutputType.Image],
|
||||
defaultForOutputType: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
@@ -20,12 +20,6 @@ type Prompt = Omit<
|
||||
};
|
||||
|
||||
const workflows: Prompt[] = [
|
||||
{
|
||||
name: 'debug:action:fal-teed',
|
||||
action: 'fal-teed',
|
||||
model: 'workflowutils/teed',
|
||||
messages: [{ role: 'user', content: '{{content}}' }],
|
||||
},
|
||||
{
|
||||
name: 'workflow:presentation',
|
||||
action: 'workflow:presentation',
|
||||
@@ -305,48 +299,7 @@ const workflows: Prompt[] = [
|
||||
},
|
||||
];
|
||||
|
||||
const actions: Prompt[] = [
|
||||
{
|
||||
name: 'debug:action:dalle3',
|
||||
action: 'image',
|
||||
model: 'dall-e-3',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:gpt-image-1',
|
||||
action: 'image',
|
||||
model: 'gpt-image-1',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-sd15',
|
||||
action: 'image',
|
||||
model: 'lcm-sd15-i2i',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-upscaler',
|
||||
action: 'Clearer',
|
||||
model: 'clarity-upscaler',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: 'best quality, 8K resolution, highres, clarity, {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-remove-bg',
|
||||
action: 'Remove background',
|
||||
model: 'imageutils/rembg',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-face-to-sticker',
|
||||
action: 'Convert to sticker',
|
||||
model: 'face-to-sticker',
|
||||
messages: [],
|
||||
},
|
||||
const textActions: Prompt[] = [
|
||||
{
|
||||
name: 'Transcript audio',
|
||||
action: 'Transcript audio',
|
||||
@@ -1449,6 +1402,161 @@ When sent new notes, respond ONLY with the contents of the html file.`,
|
||||
},
|
||||
];
|
||||
|
||||
const imageActions: Prompt[] = [
|
||||
{
|
||||
name: 'Generate image',
|
||||
action: 'image',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: '{{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Convert to Clay style',
|
||||
action: 'Convert to Clay style',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content:
|
||||
'Migration style. Migrates the style from the first image to the second. turn to clay/claymation style. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Convert to Sketch style',
|
||||
action: 'Convert to Sketch style',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: 'turn to mono-color sketch style. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Convert to Anime style',
|
||||
action: 'Convert to Anime style',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: 'turn to Suzume style like anime style. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Convert to Pixel style',
|
||||
action: 'Convert to Pixel style',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: 'turn to kairosoft pixel art. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Convert to sticker',
|
||||
action: 'Convert to sticker',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content:
|
||||
'convert this image to sticker. you need to identify the subject matter and warp a circle of white stroke around the subject matter and with transparent background. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Upscale image',
|
||||
action: 'Upscale image',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: 'make the image more detailed. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Remove background',
|
||||
action: 'Remove background',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content:
|
||||
'Keep the subject and remove other non-subject items. Transparent background. {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
// TODO(@darkskygit): deprecated, remove it after <0.22 version is outdated
|
||||
{
|
||||
name: 'debug:action:fal-remove-bg',
|
||||
action: 'Remove background',
|
||||
model: 'imageutils/rembg',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-face-to-sticker',
|
||||
action: 'Convert to sticker',
|
||||
model: 'face-to-sticker',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-teed',
|
||||
action: 'fal-teed',
|
||||
model: 'workflowutils/teed',
|
||||
messages: [{ role: 'user', content: '{{content}}' }],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:dalle3',
|
||||
action: 'image',
|
||||
model: 'dall-e-3',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: '{{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:gpt-image-1',
|
||||
action: 'image',
|
||||
model: 'gpt-image-1',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: '{{content}}',
|
||||
},
|
||||
],
|
||||
config: {
|
||||
requireContent: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-sd15',
|
||||
action: 'image',
|
||||
model: 'lcm-sd15-i2i',
|
||||
messages: [],
|
||||
},
|
||||
{
|
||||
name: 'debug:action:fal-upscaler',
|
||||
action: 'Clearer',
|
||||
model: 'clarity-upscaler',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: 'best quality, 8K resolution, highres, clarity, {{content}}',
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const CHAT_PROMPT: Omit<Prompt, 'name'> = {
|
||||
model: 'gpt-4.1',
|
||||
optionalModels: [
|
||||
@@ -1622,7 +1730,12 @@ const chat: Prompt[] = [
|
||||
},
|
||||
];
|
||||
|
||||
export const prompts: Prompt[] = [...actions, ...chat, ...workflows];
|
||||
export const prompts: Prompt[] = [
|
||||
...textActions,
|
||||
...imageActions,
|
||||
...chat,
|
||||
...workflows,
|
||||
];
|
||||
|
||||
export async function refreshPrompts(db: PrismaClient) {
|
||||
const needToSkip = await db.aiPrompt
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
streamText,
|
||||
ToolSet,
|
||||
} from 'ai';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
CopilotPromptInvalid,
|
||||
@@ -40,6 +41,20 @@ export type OpenAIConfig = {
|
||||
baseUrl?: string;
|
||||
};
|
||||
|
||||
const ImageResponseSchema = z.union([
|
||||
z.object({
|
||||
data: z.array(z.object({ b64_json: z.string() })),
|
||||
}),
|
||||
z.object({
|
||||
error: z.object({
|
||||
message: z.string(),
|
||||
type: z.string().nullish(),
|
||||
param: z.any().nullish(),
|
||||
code: z.union([z.string(), z.number()]).nullish(),
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
|
||||
export class OpenAIProvider extends CopilotProvider<OpenAIConfig> {
|
||||
readonly type = CopilotProviderType.OpenAI;
|
||||
|
||||
@@ -389,6 +404,63 @@ export class OpenAIProvider extends CopilotProvider<OpenAIConfig> {
|
||||
}
|
||||
}
|
||||
|
||||
// ====== text to image ======
|
||||
private async *generateImageWithAttachments(
|
||||
model: string,
|
||||
prompt: string,
|
||||
attachments: NonNullable<PromptMessage['attachments']>
|
||||
): AsyncGenerator<string> {
|
||||
const form = new FormData();
|
||||
form.set('model', model);
|
||||
form.set('prompt', prompt);
|
||||
form.set('output_format', 'webp');
|
||||
|
||||
for (const [idx, entry] of attachments.entries()) {
|
||||
const url = typeof entry === 'string' ? entry : entry.attachment;
|
||||
const resp = await fetch(url);
|
||||
if (resp.ok) {
|
||||
const type = resp.headers.get('content-type');
|
||||
if (type && type.startsWith('image/')) {
|
||||
const buffer = new Uint8Array(await resp.arrayBuffer());
|
||||
const file = new File([buffer], `${idx}.png`, { type });
|
||||
form.append('image[]', file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!form.getAll('image[]').length) {
|
||||
throw new CopilotPromptInvalid(
|
||||
'No valid image attachments found. Please attach images.'
|
||||
);
|
||||
}
|
||||
|
||||
const url = `${this.config.baseUrl || 'https://api.openai.com'}/v1/images/edits`;
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { Authorization: `Bearer ${this.config.apiKey}` },
|
||||
body: form,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`OpenAI API error ${res.status}: ${await res.text()}`);
|
||||
}
|
||||
|
||||
const json = await res.json();
|
||||
const imageResponse = ImageResponseSchema.safeParse(json);
|
||||
if (imageResponse.success) {
|
||||
const data = imageResponse.data;
|
||||
if ('error' in data) {
|
||||
throw new Error(data.error.message);
|
||||
} else {
|
||||
for (const image of data.data) {
|
||||
yield `data:image/webp;base64,${image.b64_json}`;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error(imageResponse.error.message);
|
||||
}
|
||||
}
|
||||
|
||||
override async *streamImages(
|
||||
cond: ModelConditions,
|
||||
messages: PromptMessage[],
|
||||
@@ -402,30 +474,33 @@ export class OpenAIProvider extends CopilotProvider<OpenAIConfig> {
|
||||
.counter('generate_images_stream_calls')
|
||||
.add(1, { model: model.id });
|
||||
|
||||
const { content: prompt } = [...messages].pop() || {};
|
||||
const { content: prompt, attachments } = [...messages].pop() || {};
|
||||
if (!prompt) throw new CopilotPromptInvalid('Prompt is required');
|
||||
|
||||
try {
|
||||
const modelInstance = this.#instance.image(model.id);
|
||||
|
||||
const result = await generateImage({
|
||||
model: modelInstance,
|
||||
prompt,
|
||||
providerOptions: {
|
||||
openai: {
|
||||
quality: options.quality || null,
|
||||
if (attachments && attachments.length > 0) {
|
||||
yield* this.generateImageWithAttachments(model.id, prompt, attachments);
|
||||
} else {
|
||||
const modelInstance = this.#instance.image(model.id);
|
||||
const result = await generateImage({
|
||||
model: modelInstance,
|
||||
prompt,
|
||||
providerOptions: {
|
||||
openai: {
|
||||
quality: options.quality || null,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
const imageUrls = result.images.map(
|
||||
image => `data:image/png;base64,${image.base64}`
|
||||
);
|
||||
const imageUrls = result.images.map(
|
||||
image => `data:image/png;base64,${image.base64}`
|
||||
);
|
||||
|
||||
for (const imageUrl of imageUrls) {
|
||||
yield imageUrl;
|
||||
if (options.signal?.aborted) {
|
||||
break;
|
||||
for (const imageUrl of imageUrls) {
|
||||
yield imageUrl;
|
||||
if (options.signal?.aborted) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
@@ -39,7 +39,7 @@ const FORMAT_INFER_MAP: Record<string, string> = {
|
||||
flv: 'video/flv',
|
||||
};
|
||||
|
||||
async function inferMimeType(url: string) {
|
||||
export async function inferMimeType(url: string) {
|
||||
if (url.startsWith('data:')) {
|
||||
return url.split(';')[0].split(':')[1];
|
||||
}
|
||||
|
||||
@@ -141,16 +141,13 @@ export class ChatSession implements AsyncDisposable {
|
||||
return ret;
|
||||
}
|
||||
|
||||
finish(params: PromptParams): PromptMessage[] {
|
||||
const messages = this.takeMessages();
|
||||
private mergeUserContent(params: PromptParams) {
|
||||
const messages = this.stashMessages;
|
||||
const firstMessage = messages.at(0);
|
||||
// TODO: refactor this {{content}} keyword agreement
|
||||
// if the message in prompt config contains {{content}},
|
||||
// we should combine it with the user message in the prompt
|
||||
if (
|
||||
messages.length === 1 &&
|
||||
firstMessage &&
|
||||
this.state.prompt.paramKeys.includes('content')
|
||||
this.state.prompt.paramKeys.includes('content') &&
|
||||
!messages.some(m => m.role === AiPromptRole.assistant) &&
|
||||
firstMessage
|
||||
) {
|
||||
const normalizedParams = {
|
||||
...params,
|
||||
@@ -178,7 +175,18 @@ export class ChatSession implements AsyncDisposable {
|
||||
|
||||
return finished;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
finish(params: PromptParams): PromptMessage[] {
|
||||
// if the message in prompt config contains {{content}},
|
||||
// we should combine it with the user message in the prompt
|
||||
const mergedMessage = this.mergeUserContent(params);
|
||||
if (mergedMessage) {
|
||||
return mergedMessage;
|
||||
}
|
||||
|
||||
const messages = this.takeMessages();
|
||||
const lastMessage = messages.at(-1);
|
||||
return [
|
||||
...this.state.prompt.finish(
|
||||
|
||||
Reference in New Issue
Block a user