feat(server): support sliced audio (#11562)

fix AF-2479
This commit is contained in:
darkskygit
2025-04-09 08:21:19 +00:00
parent 06497773a7
commit 15abb78a6b
15 changed files with 239 additions and 75 deletions

View File

@@ -14,6 +14,7 @@ import { AiJobStatus } from '@prisma/client';
import GraphQLUpload from 'graphql-upload/GraphQLUpload.mjs';
import {
CopilotTranscriptionAudioNotProvided,
CopilotTranscriptionJobNotFound,
type FileUpload,
} from '../../../base';
@@ -100,20 +101,27 @@ export class CopilotTranscriptionResolver {
@CurrentUser() user: CurrentUser,
@Args('workspaceId') workspaceId: string,
@Args('blobId') blobId: string,
@Args({ name: 'blob', type: () => GraphQLUpload })
blob: FileUpload
@Args({ name: 'blob', type: () => GraphQLUpload, nullable: true })
blob: FileUpload | null,
@Args({ name: 'blobs', type: () => [GraphQLUpload], nullable: true })
blobs: FileUpload[] | null
): Promise<TranscriptionResultType | null> {
await this.ac
.user(user.id)
.workspace(workspaceId)
.allowLocal()
.assert('Workspace.Copilot');
// merge blobs
const allBlobs = blob ? [blob, ...(blobs || [])].filter(v => !!v) : blobs;
if (!allBlobs || allBlobs.length === 0) {
throw new CopilotTranscriptionAudioNotProvided();
}
const jobResult = await this.service.submitTranscriptionJob(
user.id,
workspaceId,
blobId,
blob
await Promise.all(allBlobs)
);
return this.handleJobResult(jobResult);
@@ -136,14 +144,13 @@ export class CopilotTranscriptionResolver {
workspaceId,
jobId
);
if (!job || !job.url || !job.mimeType) {
if (!job || !job.infos) {
throw new CopilotTranscriptionJobNotFound();
}
const jobResult = await this.service.executeTranscriptionJob(
job.id,
job.url,
job.mimeType
job.infos
);
return this.handleJobResult(jobResult);

View File

@@ -23,6 +23,7 @@ import {
} from '../providers';
import { CopilotStorage } from '../storage';
import {
AudioBlobInfos,
TranscriptionPayload,
TranscriptionResponseSchema,
TranscriptPayloadSchema,
@@ -32,8 +33,7 @@ import { readStream } from './utils';
export type TranscriptionJob = {
id: string;
status: AiJobStatus;
url?: string;
mimeType?: string;
infos?: AudioBlobInfos;
transcription?: TranscriptionPayload;
};
@@ -52,7 +52,7 @@ export class CopilotTranscriptionService {
userId: string,
workspaceId: string,
blobId: string,
blob: FileUpload
blobs: FileUpload[]
): Promise<TranscriptionJob> {
if (await this.models.copilotJob.has(userId, workspaceId, blobId)) {
throw new CopilotTranscriptionJobExists();
@@ -65,21 +65,24 @@ export class CopilotTranscriptionService {
type: AiJobType.transcription,
});
const buffer = await readStream(blob.createReadStream());
const url = await this.storage.put(userId, workspaceId, blobId, buffer);
const infos: AudioBlobInfos = [];
for (const blob of blobs) {
const buffer = await readStream(blob.createReadStream());
const url = await this.storage.put(userId, workspaceId, blobId, buffer);
infos.push({ url, mimeType: blob.mimetype });
}
return await this.executeTranscriptionJob(jobId, url, blob.mimetype);
return await this.executeTranscriptionJob(jobId, infos);
}
async executeTranscriptionJob(
jobId: string,
url: string,
mimeType: string
infos: AudioBlobInfos
): Promise<TranscriptionJob> {
const status = AiJobStatus.running;
const success = await this.models.copilotJob.update(jobId, {
status,
payload: { url, mimeType },
payload: { infos },
});
if (!success) {
@@ -88,8 +91,7 @@ export class CopilotTranscriptionService {
await this.job.add('copilot.transcript.submit', {
jobId,
url,
mimeType,
infos,
});
return { id: jobId, status };
@@ -132,8 +134,13 @@ export class CopilotTranscriptionService {
const payload = TranscriptPayloadSchema.safeParse(job.payload);
if (payload.success) {
ret.url = payload.data.url || undefined;
ret.mimeType = payload.data.mimeType || undefined;
let { url, mimeType, infos } = payload.data;
infos = infos || [];
if (url && mimeType) {
infos.push({ url, mimeType });
}
ret.infos = this.mergeInfos(infos, url, mimeType);
if (job.status === AiJobStatus.claimed) {
ret.transcription = payload.data;
}
@@ -173,7 +180,24 @@ export class CopilotTranscriptionService {
);
}
private convertTime(time: number) {
// TODO(@darkskygit): remove after old server down
private mergeInfos(
infos?: AudioBlobInfos | null,
url?: string | null,
mimeType?: string | null
) {
if (url && mimeType) {
if (infos) {
infos.push({ url, mimeType });
} else {
infos = [{ url, mimeType }];
}
}
return infos || [];
}
private convertTime(time: number, offset = 0) {
time = time + offset;
const minutes = Math.floor(time / 60);
const seconds = Math.floor(time % 60);
const hours = Math.floor(minutes / 60);
@@ -186,29 +210,38 @@ export class CopilotTranscriptionService {
@OnJob('copilot.transcript.submit')
async transcriptAudio({
jobId,
infos,
// @deprecated
url,
mimeType,
}: Jobs['copilot.transcript.submit']) {
try {
const result = await this.chatWithPrompt(
'Transcript audio',
{
attachments: [url],
params: { mimetype: mimeType },
},
TranscriptionResponseSchema
);
const blobInfos = this.mergeInfos(infos, url, mimeType);
const transcriptions = [];
for (const [idx, { url, mimeType }] of blobInfos.entries()) {
const result = await this.chatWithPrompt(
'Transcript audio',
{
attachments: [url],
params: { mimetype: mimeType },
},
TranscriptionResponseSchema
);
const offset = idx * 10 * 60;
const transcription = TranscriptionResponseSchema.parse(
JSON.parse(result)
).map(t => ({
speaker: t.a,
start: this.convertTime(t.s, offset),
end: this.convertTime(t.e, offset),
transcription: t.t,
}));
transcriptions.push(transcription);
}
const transcription = TranscriptionResponseSchema.parse(
JSON.parse(result)
).map(t => ({
speaker: t.a,
start: this.convertTime(t.s),
end: this.convertTime(t.e),
transcription: t.t,
}));
await this.models.copilotJob.update(jobId, {
payload: { transcription },
payload: { transcription: transcriptions.flat() },
});
await this.job.add('copilot.transcript.summary.submit', {

View File

@@ -20,9 +20,17 @@ const TranscriptionItemSchema = z.object({
export const TranscriptionSchema = z.array(TranscriptionItemSchema);
export const AudioBlobInfosSchema = z
.object({
url: z.string(),
mimeType: z.string(),
})
.array();
export const TranscriptPayloadSchema = z.object({
url: z.string().nullable().optional(),
mimeType: z.string().nullable().optional(),
infos: AudioBlobInfosSchema.nullable().optional(),
title: z.string().nullable().optional(),
summary: z.string().nullable().optional(),
transcription: TranscriptionSchema.nullable().optional(),
@@ -32,6 +40,8 @@ export type TranscriptionItem = z.infer<typeof TranscriptionItemSchema>;
export type Transcription = z.infer<typeof TranscriptionSchema>;
export type TranscriptionPayload = z.infer<typeof TranscriptPayloadSchema>;
export type AudioBlobInfos = z.infer<typeof AudioBlobInfosSchema>;
declare global {
interface Events {
'workspace.file.transcript.finished': {
@@ -44,8 +54,11 @@ declare global {
interface Jobs {
'copilot.transcript.submit': {
jobId: string;
url: string;
mimeType: string;
infos?: AudioBlobInfos;
/// @deprecated use `infos` instead
url?: string;
/// @deprecated use `infos` instead
mimeType?: string;
};
'copilot.transcript.summary.submit': {
jobId: string;