feat(server): improve transcript (#13253)

fix AF-2758
fix AF-2759
This commit is contained in:
DarkSky
2025-07-17 17:20:14 +08:00
committed by GitHub
parent dc68c2385d
commit bdf1389258
5 changed files with 66 additions and 26 deletions

View File

@@ -303,7 +303,7 @@ const textActions: Prompt[] = [
{
name: 'Transcript audio',
action: 'Transcript audio',
model: 'gemini-2.5-pro',
model: 'gemini-2.5-flash',
optionalModels: ['gemini-2.5-flash', 'gemini-2.5-pro'],
messages: [
{
@@ -334,6 +334,7 @@ Convert a multi-speaker audio recording into a structured JSON format by transcr
config: {
requireContent: false,
requireAttachment: true,
maxRetries: 1,
},
},
{

View File

@@ -129,7 +129,16 @@ export abstract class GeminiProvider<T> extends CopilotProvider<T> {
system,
messages: msgs,
schema,
providerOptions: {
google: {
thinkingConfig: {
thinkingBudget: -1,
includeThoughts: false,
},
},
},
abortSignal: options.signal,
maxRetries: options.maxRetries || 3,
experimental_repairText: async ({ text, error }) => {
if (error instanceof JSONParseError) {
// strange fixed response, temporarily replace it

View File

@@ -15,7 +15,6 @@ import GraphQLUpload from 'graphql-upload/GraphQLUpload.mjs';
import {
CopilotTranscriptionAudioNotProvided,
CopilotTranscriptionJobNotFound,
type FileUpload,
} from '../../../base';
import { CurrentUser } from '../../../core/auth';
@@ -74,7 +73,7 @@ const FinishedStatus: Set<AiJobStatus> = new Set([
export class CopilotTranscriptionResolver {
constructor(
private readonly ac: AccessController,
private readonly service: CopilotTranscriptionService
private readonly transcript: CopilotTranscriptionService
) {}
private handleJobResult(
@@ -122,7 +121,7 @@ export class CopilotTranscriptionResolver {
throw new CopilotTranscriptionAudioNotProvided();
}
const jobResult = await this.service.submitTranscriptionJob(
const jobResult = await this.transcript.submitJob(
user.id,
workspaceId,
blobId,
@@ -144,19 +143,11 @@ export class CopilotTranscriptionResolver {
.allowLocal()
.assert('Workspace.Copilot');
const job = await this.service.queryTranscriptionJob(
const jobResult = await this.transcript.retryJob(
user.id,
workspaceId,
jobId
);
if (!job || !job.infos) {
throw new CopilotTranscriptionJobNotFound();
}
const jobResult = await this.service.executeTranscriptionJob(
job.id,
job.infos
);
return this.handleJobResult(jobResult);
}
@@ -166,7 +157,7 @@ export class CopilotTranscriptionResolver {
@CurrentUser() user: CurrentUser,
@Args('jobId') jobId: string
): Promise<TranscriptionResultType | null> {
const job = await this.service.claimTranscriptionJob(user.id, jobId);
const job = await this.transcript.claimJob(user.id, jobId);
return this.handleJobResult(job);
}
@@ -190,7 +181,7 @@ export class CopilotTranscriptionResolver {
.allowLocal()
.assert('Workspace.Copilot');
const job = await this.service.queryTranscriptionJob(
const job = await this.transcript.queryJob(
user.id,
copilot.workspaceId,
jobId,

View File

@@ -49,7 +49,17 @@ export class CopilotTranscriptionService {
private readonly providerFactory: CopilotProviderFactory
) {}
async submitTranscriptionJob(
private async getModel(userId: string) {
const prompt = await this.prompt.get('Transcript audio');
const hasAccess = await this.models.userFeature.has(
userId,
'unlimited_copilot'
);
// choose the pro model if user has copilot plan
return prompt?.optionalModels[hasAccess ? 1 : 0];
}
async submitJob(
userId: string,
workspaceId: string,
blobId: string,
@@ -78,12 +88,26 @@ export class CopilotTranscriptionService {
infos.push({ url, mimeType: blob.mimetype });
}
return await this.executeTranscriptionJob(jobId, infos);
const model = await this.getModel(userId);
return await this.executeJob(jobId, infos, model);
}
async executeTranscriptionJob(
async retryJob(userId: string, workspaceId: string, jobId: string) {
const job = await this.queryJob(userId, workspaceId, jobId);
if (!job || !job.infos) {
throw new CopilotTranscriptionJobNotFound();
}
const model = await this.getModel(userId);
const jobResult = await this.executeJob(job.id, job.infos, model);
return jobResult;
}
async executeJob(
jobId: string,
infos: AudioBlobInfos
infos: AudioBlobInfos,
modelId?: string
): Promise<TranscriptionJob> {
const status = AiJobStatus.running;
const success = await this.models.copilotJob.update(jobId, {
@@ -98,12 +122,13 @@ export class CopilotTranscriptionService {
await this.job.add('copilot.transcript.submit', {
jobId,
infos,
modelId,
});
return { id: jobId, status };
}
async claimTranscriptionJob(
async claimJob(
userId: string,
jobId: string
): Promise<TranscriptionJob | null> {
@@ -118,7 +143,7 @@ export class CopilotTranscriptionService {
return null;
}
async queryTranscriptionJob(
async queryJob(
userId: string,
workspaceId: string,
jobId?: string,
@@ -181,14 +206,20 @@ export class CopilotTranscriptionService {
promptName: string,
message: Partial<PromptMessage>,
schema?: ZodType<any>,
prefer?: CopilotProviderType
prefer?: CopilotProviderType,
modelId?: string
): Promise<string> {
const prompt = await this.prompt.get(promptName);
if (!prompt) {
throw new CopilotPromptNotFound({ name: promptName });
}
const cond = { modelId: prompt.model };
const cond = {
modelId:
modelId && prompt.optionalModels.includes(modelId)
? modelId
: prompt.model,
};
const msg = { role: 'user' as const, content: '', ...message };
const config = Object.assign({}, prompt.config);
if (schema) {
@@ -231,13 +262,19 @@ export class CopilotTranscriptionService {
return `${hoursStr}:${minutesStr}:${secondsStr}`;
}
private async callTranscript(url: string, mimeType: string, offset: number) {
private async callTranscript(
url: string,
mimeType: string,
offset: number,
modelId?: string
) {
// NOTE: Vertex provider not support transcription yet, we always use Gemini here
const result = await this.chatWithPrompt(
'Transcript audio',
{ attachments: [url], params: { mimetype: mimeType } },
TranscriptionResponseSchema,
CopilotProviderType.Gemini
CopilotProviderType.Gemini,
modelId
);
const transcription = TranscriptionResponseSchema.parse(
@@ -256,6 +293,7 @@ export class CopilotTranscriptionService {
async transcriptAudio({
jobId,
infos,
modelId,
// @deprecated
url,
mimeType,
@@ -264,7 +302,7 @@ export class CopilotTranscriptionService {
const blobInfos = this.mergeInfos(infos, url, mimeType);
const transcriptions = await Promise.all(
Array.from(blobInfos.entries()).map(([idx, { url, mimeType }]) =>
this.callTranscript(url, mimeType, idx * 10 * 60)
this.callTranscript(url, mimeType, idx * 10 * 60, modelId)
)
);

View File

@@ -56,6 +56,7 @@ declare global {
'copilot.transcript.submit': {
jobId: string;
infos?: AudioBlobInfos;
modelId?: string;
/// @deprecated use `infos` instead
url?: string;
/// @deprecated use `infos` instead