fix(server): token calculate (#12667)

This commit is contained in:
darkskygit
2025-06-04 07:09:33 +00:00
parent f54bc0c047
commit 13fa4f922a
7 changed files with 105 additions and 203 deletions

View File

@@ -19,7 +19,6 @@ import { ChatMessageCache } from './message';
import { PromptService } from './prompt';
import { PromptMessage, PromptParams } from './providers';
import {
AvailableModel,
ChatHistory,
ChatMessage,
ChatMessageSchema,
@@ -38,7 +37,7 @@ export class ChatSession implements AsyncDisposable {
private readonly messageCache: ChatMessageCache,
private readonly state: ChatSessionState,
private readonly dispose?: (state: ChatSessionState) => Promise<void>,
private readonly maxTokenSize = 3840
private readonly maxTokenSize = state.prompt.config?.maxTokens || 128 * 1024
) {}
get model() {
@@ -297,8 +296,8 @@ export class ChatSessionService {
messageCost: { increment: userMessages.length },
tokenCost: {
increment: this.calculateTokenSize(
userMessages,
state.prompt.model as AvailableModel
state.messages,
state.prompt.model
),
},
},
@@ -402,10 +401,7 @@ export class ChatSessionService {
});
}
private calculateTokenSize(
messages: PromptMessage[],
model: AvailableModel
): number {
private calculateTokenSize(messages: PromptMessage[], model: string): number {
const encoder = getTokenEncoder(model);
return messages
.map(m => encoder?.count(m.content) ?? 0)

View File

@@ -47,26 +47,24 @@ export enum AvailableModels {
Gpt41 = 'gpt-4.1',
Gpt410414 = 'gpt-4.1-2025-04-14',
Gpt41Mini = 'gpt-4.1-mini',
Gpt41Nano = 'gpt-4.1-nano',
// embeddings
TextEmbedding3Large = 'text-embedding-3-large',
TextEmbedding3Small = 'text-embedding-3-small',
TextEmbeddingAda002 = 'text-embedding-ada-002',
// moderation
TextModerationLatest = 'text-moderation-latest',
TextModerationStable = 'text-moderation-stable',
// text to image
DallE3 = 'dall-e-3',
GptImage = 'gpt-image-1',
}
export type AvailableModel = keyof typeof AvailableModels;
const availableModels = Object.values(AvailableModels);
export function getTokenEncoder(model?: string | null): Tokenizer | null {
if (!model) return null;
const modelStr = AvailableModels[model as AvailableModel];
if (!modelStr) return null;
if (modelStr.startsWith('gpt')) {
return fromModelName(modelStr);
} else if (modelStr.startsWith('dall')) {
if (!availableModels.includes(model as AvailableModels)) return null;
if (model.startsWith('gpt')) {
return fromModelName(model);
} else if (model.startsWith('dall')) {
// dalle don't need to calc the token
return null;
} else {

View File

@@ -262,12 +262,12 @@ enum CopilotModels {
Gpt4OmniMini0718
Gpt41
Gpt41Mini
Gpt41Nano
Gpt410414
GptImage
TextEmbedding3Large
TextEmbedding3Small
TextEmbeddingAda002
TextModerationLatest
TextModerationStable
}
input CopilotPromptConfigInput {