mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 12:55:00 +00:00
fix(server): token calculate (#12667)
This commit is contained in:
@@ -19,7 +19,6 @@ import { ChatMessageCache } from './message';
|
||||
import { PromptService } from './prompt';
|
||||
import { PromptMessage, PromptParams } from './providers';
|
||||
import {
|
||||
AvailableModel,
|
||||
ChatHistory,
|
||||
ChatMessage,
|
||||
ChatMessageSchema,
|
||||
@@ -38,7 +37,7 @@ export class ChatSession implements AsyncDisposable {
|
||||
private readonly messageCache: ChatMessageCache,
|
||||
private readonly state: ChatSessionState,
|
||||
private readonly dispose?: (state: ChatSessionState) => Promise<void>,
|
||||
private readonly maxTokenSize = 3840
|
||||
private readonly maxTokenSize = state.prompt.config?.maxTokens || 128 * 1024
|
||||
) {}
|
||||
|
||||
get model() {
|
||||
@@ -297,8 +296,8 @@ export class ChatSessionService {
|
||||
messageCost: { increment: userMessages.length },
|
||||
tokenCost: {
|
||||
increment: this.calculateTokenSize(
|
||||
userMessages,
|
||||
state.prompt.model as AvailableModel
|
||||
state.messages,
|
||||
state.prompt.model
|
||||
),
|
||||
},
|
||||
},
|
||||
@@ -402,10 +401,7 @@ export class ChatSessionService {
|
||||
});
|
||||
}
|
||||
|
||||
private calculateTokenSize(
|
||||
messages: PromptMessage[],
|
||||
model: AvailableModel
|
||||
): number {
|
||||
private calculateTokenSize(messages: PromptMessage[], model: string): number {
|
||||
const encoder = getTokenEncoder(model);
|
||||
return messages
|
||||
.map(m => encoder?.count(m.content) ?? 0)
|
||||
|
||||
@@ -47,26 +47,24 @@ export enum AvailableModels {
|
||||
Gpt41 = 'gpt-4.1',
|
||||
Gpt410414 = 'gpt-4.1-2025-04-14',
|
||||
Gpt41Mini = 'gpt-4.1-mini',
|
||||
Gpt41Nano = 'gpt-4.1-nano',
|
||||
// embeddings
|
||||
TextEmbedding3Large = 'text-embedding-3-large',
|
||||
TextEmbedding3Small = 'text-embedding-3-small',
|
||||
TextEmbeddingAda002 = 'text-embedding-ada-002',
|
||||
// moderation
|
||||
TextModerationLatest = 'text-moderation-latest',
|
||||
TextModerationStable = 'text-moderation-stable',
|
||||
// text to image
|
||||
DallE3 = 'dall-e-3',
|
||||
GptImage = 'gpt-image-1',
|
||||
}
|
||||
|
||||
export type AvailableModel = keyof typeof AvailableModels;
|
||||
const availableModels = Object.values(AvailableModels);
|
||||
|
||||
export function getTokenEncoder(model?: string | null): Tokenizer | null {
|
||||
if (!model) return null;
|
||||
const modelStr = AvailableModels[model as AvailableModel];
|
||||
if (!modelStr) return null;
|
||||
if (modelStr.startsWith('gpt')) {
|
||||
return fromModelName(modelStr);
|
||||
} else if (modelStr.startsWith('dall')) {
|
||||
if (!availableModels.includes(model as AvailableModels)) return null;
|
||||
if (model.startsWith('gpt')) {
|
||||
return fromModelName(model);
|
||||
} else if (model.startsWith('dall')) {
|
||||
// dalle don't need to calc the token
|
||||
return null;
|
||||
} else {
|
||||
|
||||
@@ -262,12 +262,12 @@ enum CopilotModels {
|
||||
Gpt4OmniMini0718
|
||||
Gpt41
|
||||
Gpt41Mini
|
||||
Gpt41Nano
|
||||
Gpt410414
|
||||
GptImage
|
||||
TextEmbedding3Large
|
||||
TextEmbedding3Small
|
||||
TextEmbeddingAda002
|
||||
TextModerationLatest
|
||||
TextModerationStable
|
||||
}
|
||||
|
||||
input CopilotPromptConfigInput {
|
||||
|
||||
Reference in New Issue
Block a user