feat(core): call real endpoint for audio transcription (#11139)

fix AF-2359
This commit is contained in:
pengx17
2025-03-28 07:59:35 +00:00
parent c4032e1bc0
commit dccd7c20aa
17 changed files with 766 additions and 152 deletions

View File

@@ -22,4 +22,17 @@ export const notesButtonIcon = style({
fontSize: 24,
width: '1em',
height: '1em',
display: 'inline-flex',
alignItems: 'center',
justifyContent: 'center',
});
export const error = style({
color: cssVarV2('aI/errorText'),
});
export const publicUserLabel = style({
fontSize: cssVar('fontXs'),
fontWeight: 500,
userSelect: 'none',
});

View File

@@ -1,13 +1,17 @@
import { Button, Tooltip } from '@affine/component';
import { Button, Tooltip, useConfirmModal } from '@affine/component';
import { AudioPlayer } from '@affine/core/components/audio-player';
import { AnimatedTranscribeIcon } from '@affine/core/components/audio-player/lottie/animated-transcribe-icon';
import { useSeekTime } from '@affine/core/components/audio-player/use-seek-time';
import { useEnableAI } from '@affine/core/components/hooks/affine/use-enable-ai';
import { useAsyncCallback } from '@affine/core/components/hooks/affine-async-hooks';
import { CurrentServerScopeProvider } from '@affine/core/components/providers/current-server-scope';
import { PublicUserLabel } from '@affine/core/modules/cloud/views/public-user';
import { GlobalDialogService } from '@affine/core/modules/dialogs';
import type { AudioAttachmentBlock } from '@affine/core/modules/media/entities/audio-attachment-block';
import { useAttachmentMediaBlock } from '@affine/core/modules/media/views/use-attachment-media';
import { useI18n } from '@affine/i18n';
import { useLiveData } from '@toeverything/infra';
import { useCallback, useMemo } from 'react';
import { Trans, useI18n } from '@affine/i18n';
import { useLiveData, useService } from '@toeverything/infra';
import { useCallback, useMemo, useState } from 'react';
import type { AttachmentViewerProps } from '../types';
import * as styles from './audio-block.css';
@@ -19,12 +23,15 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
const stats = useLiveData(audioMedia.stats$);
const loading = useLiveData(audioMedia.loading$);
const expanded = useLiveData(block.expanded$);
const transcribing = useLiveData(block.transcribing$);
const transcribed = useLiveData(block.transcribed$);
const [preflightChecking, setPreflightChecking] = useState(false);
const transcribing =
useLiveData(block.transcriptionJob.transcribing$) || preflightChecking;
const error = useLiveData(block.transcriptionJob.error$);
const transcribed = useLiveData(block.hasTranscription$);
const handleClick = useCallback((e: React.MouseEvent<HTMLDivElement>) => {
e.stopPropagation();
}, []);
const confirmModal = useConfirmModal();
const seekTime = useSeekTime(playbackState, stats.duration);
const handlePlay = useCallback(() => {
@@ -50,6 +57,66 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
const enableAi = useEnableAI();
const globalDialogService = useService(GlobalDialogService);
const handleNotesClick = useAsyncCallback(async () => {
if (!enableAi || transcribing) {
return;
}
if (transcribed) {
block.expanded$.setValue(!expanded);
return;
}
if (!block.transcriptionJob.currentUserId) {
confirmModal.openConfirmModal({
title: t['com.affine.ai.login-required.dialog-title'](),
description: t['com.affine.ai.login-required.dialog-content'](),
confirmText: t['com.affine.ai.login-required.dialog-confirm'](),
confirmButtonOptions: {
variant: 'primary',
},
cancelText: t['com.affine.ai.login-required.dialog-cancel'](),
onConfirm: () => {
globalDialogService.open('sign-in', {});
},
});
return;
}
setPreflightChecking(true);
const result = await block.transcriptionJob.preflightCheck();
setPreflightChecking(false);
if (result?.error === 'created-by-others') {
confirmModal.openConfirmModal({
title: t['com.affine.audio.transcribe.non-owner.confirm.title'](),
description: (
<Trans i18nKey="com.affine.audio.transcribe.non-owner.confirm.message">
Please contact <PublicUserLabel id={result.userId} /> to upgrade AI
rights or resend the attachment.
</Trans>
),
onCancel: false,
confirmText: t['Confirm'](),
confirmButtonOptions: {
variant: 'primary',
},
});
} else {
await block.transcribe();
}
}, [
enableAi,
transcribing,
transcribed,
block,
expanded,
confirmModal,
t,
globalDialogService,
]);
const notesEntry = useMemo(() => {
if (!enableAi) {
return null;
@@ -62,37 +129,37 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
state={transcribing ? 'transcribing' : 'idle'}
/>
}
disabled={transcribing}
size="large"
prefixClassName={styles.notesButtonIcon}
className={styles.notesButton}
onClick={() => {
if (transcribed) {
block.expanded$.setValue(!expanded);
} else {
block.transcribe();
}
}}
onClick={handleNotesClick}
>
{t['com.affine.attachmentViewer.audio.notes']()}
{transcribing
? t['com.affine.audio.transcribing']()
: t['com.affine.audio.notes']()}
</Button>
);
if (transcribing) {
return (
<Tooltip
content={t['com.affine.attachmentViewer.audio.transcribing']()}
>
<Tooltip content={t['com.affine.audio.transcribing']()}>
{inner}
</Tooltip>
);
}
return inner;
}, [enableAi, transcribing, t, transcribed, block, expanded]);
}, [enableAi, transcribing, handleNotesClick, t]);
const sizeEntry = useMemo(() => {
if (error) {
return <div className={styles.error}>{error.message}</div>;
}
return block.props.props.size;
}, [error, block.props.props.size]);
return (
<AudioPlayer
name={block.props.props.name}
size={block.props.props.size}
size={sizeEntry}
loading={loading}
playbackState={playbackState?.state || 'idle'}
waveform={stats.waveform}
@@ -103,7 +170,9 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
onPause={handlePause}
onStop={handleStop}
onSeek={handleSeek}
notesEntry={notesEntry}
notesEntry={
<CurrentServerScopeProvider>{notesEntry}</CurrentServerScopeProvider>
}
/>
);
};

View File

@@ -23,7 +23,7 @@ const formatTime = (seconds: number): string => {
export interface AudioPlayerProps {
// Audio metadata
name: string;
size: number;
size: number | ReactNode; // the size entry may be used for drawing error message
waveform: number[] | null;
// Playback state
playbackState: 'idle' | 'playing' | 'paused' | 'stopped';
@@ -97,7 +97,9 @@ export const AudioPlayer = ({
<div className={styles.nameLabel}>{name}</div>
</div>
<div className={styles.upperRow}>
<div className={styles.sizeInfo}>{bytes(size)}</div>
<div className={styles.sizeInfo}>
{typeof size === 'number' ? bytes(size) : size}
</div>
</div>
</div>
<div className={styles.upperRight}>

View File

@@ -1,6 +1,8 @@
import { cssVarV2 } from '@toeverything/theme/v2';
import { globalStyle, style } from '@vanilla-extract/css';
export const root = style({});
export const root = style({
display: 'inline-flex',
});
// replace primary colors to cssVarV2('icon/primary')
const iconPrimaryColors = [

View File

@@ -26,7 +26,7 @@ export const useSeekTime = (
playbackState.state === 'playing'
? (Date.now() - playbackState.updateTime) / 1000
: 0;
// if timeElapsed + playbackState.seekOffset is closed to duration,
// if timeElapsed + playbackState.seekOffset is close to duration,
// set seekTime to duration
// this is to avoid the seek time being set to a value that is not exactly the same as the duration
// at the end of the audio

View File

@@ -0,0 +1,47 @@
import { ServersService } from '@affine/core/modules/cloud';
import { GlobalContextService } from '@affine/core/modules/global-context';
import { FrameworkScope, useLiveData, useService } from '@toeverything/infra';
import { useMemo } from 'react';
export const CurrentServerScopeProvider = ({
children,
}: {
children: React.ReactNode;
}) => {
const globalContext = useService(GlobalContextService).globalContext;
const serversService = useService(ServersService);
const currentServerId = useLiveData(globalContext.serverId.$);
const serverService = useLiveData(
useMemo(() => {
if (!currentServerId) {
return null;
}
return serversService.server$(currentServerId);
}, [currentServerId, serversService])
);
if (!serverService) {
// todo(@pengx17): render a loading/error component here if not found?
return null;
}
return (
<FrameworkScope scope={serverService.scope}>{children}</FrameworkScope>
);
};
export const useCurrentServerService = () => {
const globalContext = useService(GlobalContextService).globalContext;
const serversService = useService(ServersService);
const currentServerId = useLiveData(globalContext.serverId.$);
const serverService = useLiveData(
useMemo(() => {
if (!currentServerId) {
return null;
}
return serversService.server$(currentServerId);
}, [currentServerId, serversService])
);
return serverService ?? undefined;
};

View File

@@ -0,0 +1,20 @@
import { style } from '@vanilla-extract/css';
export const publicUserLabel = style({
fontSize: 'inherit',
});
export const publicUserLabelLoading = style([
publicUserLabel,
{
opacity: 0.5,
},
]);
export const publicUserLabelRemoved = style([
publicUserLabel,
{
opacity: 0.5,
textDecoration: 'line-through',
},
]);

View File

@@ -0,0 +1,38 @@
import { useCurrentServerService } from '@affine/core/components/providers/current-server-scope';
import { useI18n } from '@affine/i18n';
import { useLiveData } from '@toeverything/infra';
import { useLayoutEffect, useMemo } from 'react';
import { PublicUserService } from '../services/public-user';
import * as styles from './public-user.css';
export const PublicUserLabel = ({ id }: { id: string }) => {
const serverService = useCurrentServerService();
const publicUser = useMemo(() => {
return serverService?.scope.get(PublicUserService);
}, [serverService]);
useLayoutEffect(() => {
if (publicUser) {
publicUser.revalidate(id);
}
}, [id, publicUser]);
const user = useLiveData(publicUser?.publicUser$(id));
const isLoading = useLiveData(publicUser?.isLoading$(id));
const t = useI18n();
if (isLoading && !user) {
return <span className={styles.publicUserLabelLoading}>...</span>;
}
if (user?.removed) {
return (
<span className={styles.publicUserLabelRemoved}>
{t['Unknown User']()}
</span>
);
}
return <span className={styles.publicUserLabel}>{user?.name}</span>;
};

View File

@@ -1,3 +1,5 @@
import { DebugLogger } from '@affine/debug';
import { AiJobStatus } from '@affine/graphql';
import {
type AttachmentBlockModel,
TranscriptionBlockFlavour,
@@ -6,31 +8,16 @@ import {
import type { AffineTextAttributes } from '@blocksuite/affine/shared/types';
import { type DeltaInsert, Text } from '@blocksuite/affine/store';
import { computed } from '@preact/signals-core';
import {
catchErrorInto,
effect,
Entity,
fromPromise,
LiveData,
onComplete,
onStart,
} from '@toeverything/infra';
import { Entity, LiveData } from '@toeverything/infra';
import { cssVarV2 } from '@toeverything/theme/v2';
import { EMPTY, mergeMap, switchMap } from 'rxjs';
import type { WorkspaceService } from '../../workspace';
import type { AudioMediaManagerService } from '../services/audio-media-manager';
import type { AudioMedia } from './audio-media';
import { AudioTranscriptionJob } from './audio-transcription-job';
import type { TranscriptionResult } from './types';
export interface TranscriptionResult {
title: string;
summary: string;
segments: {
speaker: string;
start_time: string;
end_time: string;
transcription: string;
}[];
}
const logger = new DebugLogger('audio-attachment-block');
// BlockSuiteError: yText must not contain "\r" because it will break the range synchronization
function sanitizeText(text: string) {
@@ -41,42 +28,22 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
private readonly refCount$ = new LiveData<number>(0);
readonly audioMedia: AudioMedia;
constructor(
public readonly audioMediaManagerService: AudioMediaManagerService
readonly audioMediaManagerService: AudioMediaManagerService,
readonly workspaceService: WorkspaceService
) {
super();
const mediaRef = audioMediaManagerService.ensureMediaEntity(this.props);
this.audioMedia = mediaRef.media;
this.disposables.push(() => mediaRef.release());
this.disposables.push(() => {
this.transcriptionJob.dispose();
});
}
// rendering means the attachment is visible in the editor
// it is used to determine if we should show show the audio player on the sidebar
rendering$ = this.refCount$.map(refCount => refCount > 0);
expanded$ = new LiveData<boolean>(true);
transcribing$ = new LiveData<boolean>(false);
transcriptionError$ = new LiveData<Error | null>(null);
transcribed$ = LiveData.computed(get => {
const transcriptionBlock = get(this.transcriptionBlock$);
if (!transcriptionBlock) {
return null;
}
const childMap = get(LiveData.fromSignal(transcriptionBlock.childMap));
return childMap.size > 0;
});
transcribe = effect(
switchMap(() =>
fromPromise(this.doTranscribe()).pipe(
mergeMap(result => {
// attach transcription result to the block
this.fillTranscriptionResult(result);
return EMPTY;
}),
catchErrorInto(this.transcriptionError$),
onStart(() => this.transcribing$.setValue(true)),
onComplete(() => this.transcribing$.setValue(false))
)
)
);
readonly transcriptionBlock$ = LiveData.fromSignal(
computed(() => {
@@ -91,59 +58,108 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
})
);
// TODO: use real implementation
private readonly doTranscribe = async (): Promise<TranscriptionResult> => {
try {
const buffer = await this.audioMedia.getBuffer();
if (!buffer) {
throw new Error('No audio buffer available');
}
hasTranscription$ = LiveData.computed(get => {
const transcriptionBlock = get(this.transcriptionBlock$);
if (!transcriptionBlock) {
return null;
}
const childMap = get(LiveData.fromSignal(transcriptionBlock.childMap));
return childMap.size > 0;
});
// Send binary audio data directly
const blob = new Blob([buffer], { type: 'audio/wav' }); // adjust mime type if needed
const formData = new FormData();
formData.append('audio', blob);
transcriptionJob: AudioTranscriptionJob = this.createTranscriptionJob();
const response = await fetch('http://localhost:6544/transcribe', {
method: 'POST',
body: formData,
mount() {
if (
this.transcriptionJob.isCreator() &&
this.transcriptionJob.status$.value.status === 'waiting-for-job' &&
!this.hasTranscription$.value
) {
this.transcribe().catch(error => {
logger.error('Error transcribing audio:', error);
});
}
if (!response.ok) {
throw new Error(`Transcription failed: ${response.statusText}`);
this.refCount$.setValue(this.refCount$.value + 1);
}
unmount() {
this.refCount$.setValue(this.refCount$.value - 1);
}
private createTranscriptionJob() {
if (!this.props.props.sourceId) {
throw new Error('No source id');
}
let transcriptionBlockProps = this.transcriptionBlock$.value?.props;
if (!transcriptionBlockProps) {
// transcription block is not created yet, we need to create it
this.props.doc.addBlock(
'affine:transcription',
{
transcription: {},
},
this.props.id
);
transcriptionBlockProps = this.transcriptionBlock$.value?.props;
}
if (!transcriptionBlockProps) {
throw new Error('No transcription block props');
}
const job = this.framework.createEntity(AudioTranscriptionJob, {
blobId: this.props.props.sourceId,
blockProps: transcriptionBlockProps,
getAudioFile: async () => {
const buffer = await this.audioMedia.getBuffer();
if (!buffer) {
throw new Error('No audio buffer available');
}
const blob = new Blob([buffer], { type: this.props.props.type });
const file = new File([blob], this.props.props.name, {
type: this.props.props.type,
});
return file;
},
});
return job;
}
readonly transcribe = async () => {
try {
// if job is already running, we should not start it again
if (this.transcriptionJob.status$.value.status !== 'waiting-for-job') {
return;
}
const status = await this.transcriptionJob.start();
if (status.status === AiJobStatus.claimed) {
this.fillTranscriptionResult(status.result);
}
const result = await response.json();
return result.transcription;
} catch (error) {
console.error('Error transcribing audio:', error);
logger.error('Error transcribing audio:', error);
throw error;
}
};
private readonly fillTranscriptionResult = (result: TranscriptionResult) => {
this.props.props.caption = result.title;
// todo: add transcription block schema etc.
const transcriptionBlockId = this.props.doc.addBlock(
'affine:transcription',
{
transcription: result,
},
this.props.id
);
this.props.props.caption = result.title ?? '';
const calloutId = this.props.doc.addBlock(
'affine:callout',
{
emoji: '💬',
},
transcriptionBlockId
this.transcriptionBlock$.value?.id
);
// todo: refactor
const spearkerToColors = new Map<string, string>();
const speakerToColors = new Map<string, string>();
for (const segment of result.segments) {
let color = spearkerToColors.get(segment.speaker);
let color = speakerToColors.get(segment.speaker);
const colorOptions = [
cssVarV2.text.highlight.fg.red,
cssVarV2.text.highlight.fg.green,
@@ -156,12 +172,12 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
cssVarV2.text.highlight.fg.magenta,
];
if (!color) {
color = colorOptions[spearkerToColors.size % colorOptions.length];
spearkerToColors.set(segment.speaker, color);
color = colorOptions[speakerToColors.size % colorOptions.length];
speakerToColors.set(segment.speaker, color);
}
const deltaInserts: DeltaInsert<AffineTextAttributes>[] = [
{
insert: sanitizeText(segment.start_time + ' ' + segment.speaker),
insert: sanitizeText(segment.start + ' ' + segment.speaker),
attributes: {
color,
bold: true,
@@ -180,12 +196,4 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
);
}
};
mount() {
this.refCount$.setValue(this.refCount$.value + 1);
}
unmount() {
this.refCount$.setValue(this.refCount$.value - 1);
}
}

View File

@@ -162,7 +162,7 @@ export class AudioMedia extends Entity<AudioSource> {
const startTime = performance.now();
// calculating audio stats is expensive. Maybe persist the result in cache?
const stats = await this.calcuateStatsFromBuffer(blob);
const stats = await this.calculateStatsFromBuffer(blob);
logger.debug(
`Calculate audio stats time: ${performance.now() - startTime}ms`
);
@@ -177,9 +177,8 @@ export class AudioMedia extends Entity<AudioSource> {
return fromPromise(async () => {
return this.loadAudioBuffer();
}).pipe(
mergeMap(({ blob, duration, waveform }) => {
mergeMap(({ blob, waveform }) => {
const url = URL.createObjectURL(blob);
this.duration$.setValue(duration);
// Set the audio element source
this.audioElement.src = url;
this.waveform$.setValue(waveform);
@@ -187,6 +186,9 @@ export class AudioMedia extends Entity<AudioSource> {
if (this.playbackState$.getValue().state === 'playing') {
this.play(true);
}
this.audioElement.onloadedmetadata = () => {
this.duration$.setValue(this.audioElement.duration);
};
return EMPTY;
}),
onStart(() => this.loading$.setValue(true)),
@@ -397,13 +399,13 @@ export class AudioMedia extends Entity<AudioSource> {
return this.playbackState$.getValue();
}
private async calcuateStatsFromBuffer(buffer: Blob) {
private async calculateStatsFromBuffer(buffer: Blob) {
const audioContext = new AudioContext();
const audioBuffer = await audioContext.decodeAudioData(
await buffer.arrayBuffer()
);
const waveform = await this.calculateWaveform(audioBuffer);
return { waveform, duration: audioBuffer.duration };
return { waveform };
}
/**

View File

@@ -0,0 +1,96 @@
import {
claimAudioTranscriptionMutation,
getAudioTranscriptionQuery,
submitAudioTranscriptionMutation,
} from '@affine/graphql';
import { Entity } from '@toeverything/infra';
import type { DefaultServerService, WorkspaceServerService } from '../../cloud';
import { GraphQLService } from '../../cloud/services/graphql';
import type { WorkspaceService } from '../../workspace';
export class AudioTranscriptionJobStore extends Entity<{
readonly blobId: string;
readonly getAudioFile: () => Promise<File>;
}> {
constructor(
private readonly workspaceService: WorkspaceService,
private readonly workspaceServerService: WorkspaceServerService,
private readonly defaultServerService: DefaultServerService
) {
super();
}
private get serverService() {
return (
this.workspaceServerService.server || this.defaultServerService.server
);
}
private get graphqlService() {
return this.serverService?.scope.get(GraphQLService);
}
private get currentWorkspaceId() {
return this.workspaceService.workspace.id;
}
submitAudioTranscription = async () => {
const graphqlService = this.graphqlService;
if (!graphqlService) {
throw new Error('No graphql service available');
}
const file = await this.props.getAudioFile();
const response = await graphqlService.gql({
query: submitAudioTranscriptionMutation,
variables: {
workspaceId: this.currentWorkspaceId,
blobId: this.props.blobId,
blob: file,
},
});
if (!response.submitAudioTranscription?.id) {
throw new Error('Failed to submit audio transcription');
}
return response.submitAudioTranscription;
};
getAudioTranscription = async (blobId: string, jobId?: string) => {
const graphqlService = this.graphqlService;
if (!graphqlService) {
throw new Error('No graphql service available');
}
const currentWorkspaceId = this.currentWorkspaceId;
if (!currentWorkspaceId) {
throw new Error('No current workspace id');
}
const response = await graphqlService.gql({
query: getAudioTranscriptionQuery,
variables: {
workspaceId: currentWorkspaceId,
jobId,
blobId,
},
});
if (!response.currentUser?.copilot?.audioTranscription) {
return null;
}
return response.currentUser.copilot.audioTranscription;
};
claimAudioTranscription = async (jobId: string) => {
const graphqlService = this.graphqlService;
if (!graphqlService) {
throw new Error('No graphql service available');
}
const response = await graphqlService.gql({
query: claimAudioTranscriptionMutation,
variables: {
jobId,
},
});
if (!response.claimAudioTranscription) {
throw new Error('Failed to claim transcription result');
}
return response.claimAudioTranscription;
};
}

View File

@@ -0,0 +1,281 @@
import { shallowEqual } from '@affine/component';
import { DebugLogger } from '@affine/debug';
import { UserFriendlyError } from '@affine/error';
import { AiJobStatus } from '@affine/graphql';
import type { TranscriptionBlockProps } from '@blocksuite/affine/model';
import { Entity, LiveData } from '@toeverything/infra';
import type { DefaultServerService, WorkspaceServerService } from '../../cloud';
import { AuthService } from '../../cloud/services/auth';
import { AudioTranscriptionJobStore } from './audio-transcription-job-store';
import type { TranscriptionResult } from './types';
// The UI status of the transcription job
export type TranscriptionStatus =
| {
status: 'waiting-for-job';
}
| {
status: 'started';
}
| {
status: AiJobStatus.pending;
}
| {
status: AiJobStatus.running;
}
| {
status: AiJobStatus.failed;
error: UserFriendlyError; // <<- this is not visible on UI yet
}
| {
status: AiJobStatus.finished; // ready to be claimed, but may be rejected because of insufficient credits
}
| {
status: AiJobStatus.claimed;
result: TranscriptionResult;
};
const logger = new DebugLogger('audio-transcription-job');
// facts on transcription job ownership
// 1. jobid + blobid is unique for a given user
// 2. only the creator can claim the job
// 3. all users can query the claimed job result
// 4. claim a job requires AI credits
export class AudioTranscriptionJob extends Entity<{
readonly blockProps: TranscriptionBlockProps;
readonly blobId: string;
readonly getAudioFile: () => Promise<File>;
}> {
constructor(
private readonly workspaceServerService: WorkspaceServerService,
private readonly defaultServerService: DefaultServerService
) {
super();
this.disposables.push(() => {
this.disposed = true;
});
}
disposed = false;
private readonly _status$ = new LiveData<TranscriptionStatus>({
status: 'waiting-for-job',
});
private readonly store = this.framework.createEntity(
AudioTranscriptionJobStore,
{
blobId: this.props.blobId,
getAudioFile: this.props.getAudioFile,
}
);
status$ = this._status$.distinctUntilChanged(shallowEqual);
transcribing$ = this.status$.map(status => {
return (
status.status === 'started' ||
status.status === AiJobStatus.pending ||
status.status === AiJobStatus.running ||
status.status === AiJobStatus.finished
);
});
error$ = this.status$.map(status => {
if (status.status === AiJobStatus.failed) {
return status.error;
}
return null;
});
// check if we can kick start the transcription job
readonly preflightCheck = async () => {
// if the job id is given, check if the job exists
if (this.props.blockProps.jobId) {
const existingJob = await this.store.getAudioTranscription(
this.props.blobId,
this.props.blockProps.jobId
);
if (existingJob?.status === AiJobStatus.claimed) {
// if job exists, anyone can query it
return;
}
if (
!existingJob &&
this.props.blockProps.createdBy &&
this.props.blockProps.createdBy !== this.currentUserId
) {
return {
error: 'created-by-others',
userId: this.props.blockProps.createdBy,
};
}
}
// if no job id, anyone can start a new job
return;
};
async start() {
if (this.disposed) {
logger.debug('Job already disposed, cannot start');
throw new Error('Job already disposed');
}
this._status$.value = {
status: 'started',
};
try {
// firstly check if there is a job already
logger.debug('Checking for existing transcription job', {
blobId: this.props.blobId,
jobId: this.props.blockProps.jobId,
});
let job: {
id: string;
status: AiJobStatus;
} | null = await this.store.getAudioTranscription(
this.props.blobId,
this.props.blockProps.jobId
);
if (!job) {
logger.debug('No existing job found, submitting new transcription job');
job = await this.store.submitAudioTranscription();
} else {
logger.debug('Found existing job', {
jobId: job.id,
status: job.status,
});
}
this.props.blockProps.jobId = job.id;
this.props.blockProps.createdBy = this.currentUserId;
if (job.status !== AiJobStatus.failed) {
this._status$.value = {
status: AiJobStatus.pending,
};
} else {
logger.debug('Job submission failed');
throw UserFriendlyError.fromAny('failed to submit transcription');
}
await this.untilJobFinishedOrClaimed();
await this.claim();
} catch (err) {
logger.debug('Error during job submission', { error: err });
this._status$.value = {
status: AiJobStatus.failed,
error: UserFriendlyError.fromAny(err),
};
}
return this.status$.value;
}
private async untilJobFinishedOrClaimed() {
while (
!this.disposed &&
this.props.blockProps.jobId &&
this.props.blockProps.createdBy === this.currentUserId
) {
logger.debug('Polling job status', {
jobId: this.props.blockProps.jobId,
});
const job = await this.store.getAudioTranscription(
this.props.blobId,
this.props.blockProps.jobId
);
if (!job || job?.status === 'failed') {
logger.debug('Job failed during polling', {
jobId: this.props.blockProps.jobId,
});
throw UserFriendlyError.fromAny('Transcription job failed');
}
if (job?.status === 'finished' || job?.status === 'claimed') {
logger.debug('Job finished, ready to claim', {
jobId: this.props.blockProps.jobId,
});
this._status$.value = {
status: AiJobStatus.finished,
};
return;
}
// Add delay between polling attempts
await new Promise(resolve => setTimeout(resolve, 3000));
}
}
async claim() {
if (this.disposed) {
logger.debug('Job already disposed, cannot claim');
throw new Error('Job already disposed');
}
logger.debug('Attempting to claim job', {
jobId: this.props.blockProps.jobId,
});
if (!this.props.blockProps.jobId) {
logger.debug('No job id found, cannot claim');
throw new Error('No job id found');
}
const claimedJob = await this.store.claimAudioTranscription(
this.props.blockProps.jobId
);
if (claimedJob) {
logger.debug('Successfully claimed job', {
jobId: this.props.blockProps.jobId,
});
const result: TranscriptionResult = {
summary: claimedJob.summary ?? '',
title: claimedJob.title ?? '',
segments:
claimedJob.transcription?.map(segment => ({
speaker: segment.speaker,
start: segment.start,
end: segment.end,
transcription: segment.transcription,
})) ?? [],
};
this._status$.value = {
status: AiJobStatus.claimed,
result,
};
} else {
throw new Error('Failed to claim transcription result');
}
}
isCreator() {
return (
this.props.blockProps.jobId &&
this.props.blockProps.createdBy &&
this.props.blockProps.createdBy === this.currentUserId
);
}
private get serverService() {
return (
this.workspaceServerService.server || this.defaultServerService.server
);
}
get currentUserId() {
const authService = this.serverService?.scope.getOptional(AuthService);
if (!authService) {
return;
}
return authService.session.account$.value?.id;
}
}

View File

@@ -0,0 +1,10 @@
export interface TranscriptionResult {
title: string;
summary: string;
segments: {
speaker: string;
start: string;
end: string;
transcription: string;
}[];
}

View File

@@ -1,11 +1,14 @@
import type { Framework } from '@toeverything/infra';
import { DefaultServerService, WorkspaceServerService } from '../cloud';
import { DesktopApiService } from '../desktop-api';
import { GlobalState } from '../storage';
import { WorkbenchService } from '../workbench';
import { WorkspaceScope, WorkspaceService } from '../workspace';
import { AudioAttachmentBlock } from './entities/audio-attachment-block';
import { AudioMedia } from './entities/audio-media';
import { AudioTranscriptionJob } from './entities/audio-transcription-job';
import { AudioTranscriptionJobStore } from './entities/audio-transcription-job-store';
import {
ElectronGlobalMediaStateProvider,
GlobalMediaStateProvider,
@@ -15,31 +18,40 @@ import { AudioAttachmentService } from './services/audio-attachment';
import { AudioMediaManagerService } from './services/audio-media-manager';
export function configureMediaModule(framework: Framework) {
framework
.scope(WorkspaceScope)
.entity(AudioMedia, [WorkspaceService])
.entity(AudioAttachmentBlock, [AudioMediaManagerService, WorkspaceService])
.entity(AudioTranscriptionJob, [
WorkspaceServerService,
DefaultServerService,
])
.entity(AudioTranscriptionJobStore, [
WorkspaceService,
WorkspaceServerService,
DefaultServerService,
])
.service(AudioAttachmentService);
if (BUILD_CONFIG.isElectron) {
framework
.impl(GlobalMediaStateProvider, ElectronGlobalMediaStateProvider, [
GlobalState,
])
.scope(WorkspaceScope)
.entity(AudioMedia, [WorkspaceService])
.entity(AudioAttachmentBlock, [AudioMediaManagerService])
.service(AudioMediaManagerService, [
GlobalMediaStateProvider,
WorkbenchService,
DesktopApiService,
])
.service(AudioAttachmentService);
]);
} else {
framework
.impl(GlobalMediaStateProvider, WebGlobalMediaStateProvider)
.scope(WorkspaceScope)
.entity(AudioMedia, [WorkspaceService])
.entity(AudioAttachmentBlock, [AudioMediaManagerService])
.service(AudioMediaManagerService, [
GlobalMediaStateProvider,
WorkbenchService,
])
.service(AudioAttachmentService);
]);
}
}