diff --git a/blocksuite/affine/model/src/blocks/transcription/transcription-model.ts b/blocksuite/affine/model/src/blocks/transcription/transcription-model.ts index 5821e8d125..1e19d8971f 100644 --- a/blocksuite/affine/model/src/blocks/transcription/transcription-model.ts +++ b/blocksuite/affine/model/src/blocks/transcription/transcription-model.ts @@ -6,12 +6,15 @@ import { export const TranscriptionBlockFlavour = 'affine:transcription'; +const defaultProps: TranscriptionBlockProps = { + transcription: {}, + jobId: undefined, + createdBy: undefined, // the user id of the creator +}; + export const TranscriptionBlockSchema = defineBlockSchema({ flavour: TranscriptionBlockFlavour, - props: () => ({ - transcription: {}, - jobId: '', - }), + props: () => defaultProps, metadata: { version: 1, role: 'attachment-viewer', @@ -23,7 +26,8 @@ export const TranscriptionBlockSchema = defineBlockSchema({ export type TranscriptionBlockProps = { transcription: Record; - jobId: string; + jobId?: string; + createdBy?: string; }; export class TranscriptionBlockModel extends BlockModel {} diff --git a/packages/frontend/apps/electron-renderer/src/app/effects/recording.ts b/packages/frontend/apps/electron-renderer/src/app/effects/recording.ts index 827485f64c..91ac241cd2 100644 --- a/packages/frontend/apps/electron-renderer/src/app/effects/recording.ts +++ b/packages/frontend/apps/electron-renderer/src/app/effects/recording.ts @@ -97,7 +97,9 @@ export function setupRecordingEvents(frameworkProvider: FrameworkProvider) { using audioAttachment = workspace.scope .get(AudioAttachmentService) .get(model); - audioAttachment?.obj.transcribe(); + audioAttachment?.obj.transcribe().catch(err => { + logger.error('Failed to transcribe recording', err); + }); } })().catch(console.error); }, diff --git a/packages/frontend/component/src/ui/modal/confirm-modal.tsx b/packages/frontend/component/src/ui/modal/confirm-modal.tsx index be315c3ce4..e50d8649cf 100644 --- a/packages/frontend/component/src/ui/modal/confirm-modal.tsx +++ b/packages/frontend/component/src/ui/modal/confirm-modal.tsx @@ -16,7 +16,7 @@ export interface ConfirmModalProps extends ModalProps { confirmButtonOptions?: Omit; childrenContentClassName?: string; onConfirm?: (() => void) | (() => Promise); - onCancel?: () => void; + onCancel?: (() => void) | false; confirmText?: React.ReactNode; cancelText?: React.ReactNode; cancelButtonOptions?: Omit; @@ -52,6 +52,12 @@ export const ConfirmModal = ({ console.error(err); }); }, [onConfirm]); + const handleCancel = useCallback(() => { + if (onCancel === false) { + return; + } + onCancel?.(); + }, [onCancel]); return ( { e.stopPropagation(); - onCancel?.(); + handleCancel(); }, }} width={width} closeButtonOptions={{ - onClick: onCancel, + onClick: handleCancel, }} headerClassName={clsx(styles.header, headerClassName)} descriptionClassName={clsx(styles.description, descriptionClassName)} @@ -81,21 +87,23 @@ export const ConfirmModal = ({ reverse: reverseFooter, })} > - - - + {onCancel !== false ? ( + + + + ) : null} {CustomConfirmButton ? ( ) : ( diff --git a/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.css.ts b/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.css.ts index 3e2e9d4c23..8182ffac72 100644 --- a/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.css.ts +++ b/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.css.ts @@ -22,4 +22,17 @@ export const notesButtonIcon = style({ fontSize: 24, width: '1em', height: '1em', + display: 'inline-flex', + alignItems: 'center', + justifyContent: 'center', +}); + +export const error = style({ + color: cssVarV2('aI/errorText'), +}); + +export const publicUserLabel = style({ + fontSize: cssVar('fontXs'), + fontWeight: 500, + userSelect: 'none', }); diff --git a/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.tsx b/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.tsx index 895bd6f87f..ab54c6ab9d 100644 --- a/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.tsx +++ b/packages/frontend/core/src/blocksuite/attachment-viewer/audio/audio-block.tsx @@ -1,13 +1,17 @@ -import { Button, Tooltip } from '@affine/component'; +import { Button, Tooltip, useConfirmModal } from '@affine/component'; import { AudioPlayer } from '@affine/core/components/audio-player'; import { AnimatedTranscribeIcon } from '@affine/core/components/audio-player/lottie/animated-transcribe-icon'; import { useSeekTime } from '@affine/core/components/audio-player/use-seek-time'; import { useEnableAI } from '@affine/core/components/hooks/affine/use-enable-ai'; +import { useAsyncCallback } from '@affine/core/components/hooks/affine-async-hooks'; +import { CurrentServerScopeProvider } from '@affine/core/components/providers/current-server-scope'; +import { PublicUserLabel } from '@affine/core/modules/cloud/views/public-user'; +import { GlobalDialogService } from '@affine/core/modules/dialogs'; import type { AudioAttachmentBlock } from '@affine/core/modules/media/entities/audio-attachment-block'; import { useAttachmentMediaBlock } from '@affine/core/modules/media/views/use-attachment-media'; -import { useI18n } from '@affine/i18n'; -import { useLiveData } from '@toeverything/infra'; -import { useCallback, useMemo } from 'react'; +import { Trans, useI18n } from '@affine/i18n'; +import { useLiveData, useService } from '@toeverything/infra'; +import { useCallback, useMemo, useState } from 'react'; import type { AttachmentViewerProps } from '../types'; import * as styles from './audio-block.css'; @@ -19,12 +23,15 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => { const stats = useLiveData(audioMedia.stats$); const loading = useLiveData(audioMedia.loading$); const expanded = useLiveData(block.expanded$); - const transcribing = useLiveData(block.transcribing$); - const transcribed = useLiveData(block.transcribed$); + const [preflightChecking, setPreflightChecking] = useState(false); + const transcribing = + useLiveData(block.transcriptionJob.transcribing$) || preflightChecking; + const error = useLiveData(block.transcriptionJob.error$); + const transcribed = useLiveData(block.hasTranscription$); const handleClick = useCallback((e: React.MouseEvent) => { e.stopPropagation(); }, []); - + const confirmModal = useConfirmModal(); const seekTime = useSeekTime(playbackState, stats.duration); const handlePlay = useCallback(() => { @@ -50,6 +57,66 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => { const enableAi = useEnableAI(); + const globalDialogService = useService(GlobalDialogService); + + const handleNotesClick = useAsyncCallback(async () => { + if (!enableAi || transcribing) { + return; + } + + if (transcribed) { + block.expanded$.setValue(!expanded); + return; + } + + if (!block.transcriptionJob.currentUserId) { + confirmModal.openConfirmModal({ + title: t['com.affine.ai.login-required.dialog-title'](), + description: t['com.affine.ai.login-required.dialog-content'](), + confirmText: t['com.affine.ai.login-required.dialog-confirm'](), + confirmButtonOptions: { + variant: 'primary', + }, + cancelText: t['com.affine.ai.login-required.dialog-cancel'](), + onConfirm: () => { + globalDialogService.open('sign-in', {}); + }, + }); + return; + } + + setPreflightChecking(true); + const result = await block.transcriptionJob.preflightCheck(); + setPreflightChecking(false); + if (result?.error === 'created-by-others') { + confirmModal.openConfirmModal({ + title: t['com.affine.audio.transcribe.non-owner.confirm.title'](), + description: ( + + Please contact to upgrade AI + rights or resend the attachment. + + ), + onCancel: false, + confirmText: t['Confirm'](), + confirmButtonOptions: { + variant: 'primary', + }, + }); + } else { + await block.transcribe(); + } + }, [ + enableAi, + transcribing, + transcribed, + block, + expanded, + confirmModal, + t, + globalDialogService, + ]); + const notesEntry = useMemo(() => { if (!enableAi) { return null; @@ -62,37 +129,37 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => { state={transcribing ? 'transcribing' : 'idle'} /> } - disabled={transcribing} size="large" prefixClassName={styles.notesButtonIcon} className={styles.notesButton} - onClick={() => { - if (transcribed) { - block.expanded$.setValue(!expanded); - } else { - block.transcribe(); - } - }} + onClick={handleNotesClick} > - {t['com.affine.attachmentViewer.audio.notes']()} + {transcribing + ? t['com.affine.audio.transcribing']() + : t['com.affine.audio.notes']()} ); if (transcribing) { return ( - + {inner} ); } return inner; - }, [enableAi, transcribing, t, transcribed, block, expanded]); + }, [enableAi, transcribing, handleNotesClick, t]); + + const sizeEntry = useMemo(() => { + if (error) { + return
{error.message}
; + } + return block.props.props.size; + }, [error, block.props.props.size]); return ( { onPause={handlePause} onStop={handleStop} onSeek={handleSeek} - notesEntry={notesEntry} + notesEntry={ + {notesEntry} + } /> ); }; diff --git a/packages/frontend/core/src/components/audio-player/audio-player.tsx b/packages/frontend/core/src/components/audio-player/audio-player.tsx index 193053df5b..058167ef66 100644 --- a/packages/frontend/core/src/components/audio-player/audio-player.tsx +++ b/packages/frontend/core/src/components/audio-player/audio-player.tsx @@ -23,7 +23,7 @@ const formatTime = (seconds: number): string => { export interface AudioPlayerProps { // Audio metadata name: string; - size: number; + size: number | ReactNode; // the size entry may be used for drawing error message waveform: number[] | null; // Playback state playbackState: 'idle' | 'playing' | 'paused' | 'stopped'; @@ -97,7 +97,9 @@ export const AudioPlayer = ({
{name}
-
{bytes(size)}
+
+ {typeof size === 'number' ? bytes(size) : size} +
diff --git a/packages/frontend/core/src/components/audio-player/lottie/styles.css.ts b/packages/frontend/core/src/components/audio-player/lottie/styles.css.ts index ccaf638058..cf8e30183d 100644 --- a/packages/frontend/core/src/components/audio-player/lottie/styles.css.ts +++ b/packages/frontend/core/src/components/audio-player/lottie/styles.css.ts @@ -1,6 +1,8 @@ import { cssVarV2 } from '@toeverything/theme/v2'; import { globalStyle, style } from '@vanilla-extract/css'; -export const root = style({}); +export const root = style({ + display: 'inline-flex', +}); // replace primary colors to cssVarV2('icon/primary') const iconPrimaryColors = [ diff --git a/packages/frontend/core/src/components/audio-player/use-seek-time.ts b/packages/frontend/core/src/components/audio-player/use-seek-time.ts index e80b4d77a2..7d01ddf74d 100644 --- a/packages/frontend/core/src/components/audio-player/use-seek-time.ts +++ b/packages/frontend/core/src/components/audio-player/use-seek-time.ts @@ -26,7 +26,7 @@ export const useSeekTime = ( playbackState.state === 'playing' ? (Date.now() - playbackState.updateTime) / 1000 : 0; - // if timeElapsed + playbackState.seekOffset is closed to duration, + // if timeElapsed + playbackState.seekOffset is close to duration, // set seekTime to duration // this is to avoid the seek time being set to a value that is not exactly the same as the duration // at the end of the audio diff --git a/packages/frontend/core/src/components/providers/current-server-scope.tsx b/packages/frontend/core/src/components/providers/current-server-scope.tsx new file mode 100644 index 0000000000..bf28a6715d --- /dev/null +++ b/packages/frontend/core/src/components/providers/current-server-scope.tsx @@ -0,0 +1,47 @@ +import { ServersService } from '@affine/core/modules/cloud'; +import { GlobalContextService } from '@affine/core/modules/global-context'; +import { FrameworkScope, useLiveData, useService } from '@toeverything/infra'; +import { useMemo } from 'react'; + +export const CurrentServerScopeProvider = ({ + children, +}: { + children: React.ReactNode; +}) => { + const globalContext = useService(GlobalContextService).globalContext; + const serversService = useService(ServersService); + const currentServerId = useLiveData(globalContext.serverId.$); + const serverService = useLiveData( + useMemo(() => { + if (!currentServerId) { + return null; + } + return serversService.server$(currentServerId); + }, [currentServerId, serversService]) + ); + + if (!serverService) { + // todo(@pengx17): render a loading/error component here if not found? + return null; + } + + return ( + {children} + ); +}; + +export const useCurrentServerService = () => { + const globalContext = useService(GlobalContextService).globalContext; + const serversService = useService(ServersService); + const currentServerId = useLiveData(globalContext.serverId.$); + const serverService = useLiveData( + useMemo(() => { + if (!currentServerId) { + return null; + } + return serversService.server$(currentServerId); + }, [currentServerId, serversService]) + ); + + return serverService ?? undefined; +}; diff --git a/packages/frontend/core/src/modules/cloud/views/public-user.css.ts b/packages/frontend/core/src/modules/cloud/views/public-user.css.ts new file mode 100644 index 0000000000..eba825fd8e --- /dev/null +++ b/packages/frontend/core/src/modules/cloud/views/public-user.css.ts @@ -0,0 +1,20 @@ +import { style } from '@vanilla-extract/css'; + +export const publicUserLabel = style({ + fontSize: 'inherit', +}); + +export const publicUserLabelLoading = style([ + publicUserLabel, + { + opacity: 0.5, + }, +]); + +export const publicUserLabelRemoved = style([ + publicUserLabel, + { + opacity: 0.5, + textDecoration: 'line-through', + }, +]); diff --git a/packages/frontend/core/src/modules/cloud/views/public-user.tsx b/packages/frontend/core/src/modules/cloud/views/public-user.tsx new file mode 100644 index 0000000000..be2d48ac5e --- /dev/null +++ b/packages/frontend/core/src/modules/cloud/views/public-user.tsx @@ -0,0 +1,38 @@ +import { useCurrentServerService } from '@affine/core/components/providers/current-server-scope'; +import { useI18n } from '@affine/i18n'; +import { useLiveData } from '@toeverything/infra'; +import { useLayoutEffect, useMemo } from 'react'; + +import { PublicUserService } from '../services/public-user'; +import * as styles from './public-user.css'; + +export const PublicUserLabel = ({ id }: { id: string }) => { + const serverService = useCurrentServerService(); + const publicUser = useMemo(() => { + return serverService?.scope.get(PublicUserService); + }, [serverService]); + + useLayoutEffect(() => { + if (publicUser) { + publicUser.revalidate(id); + } + }, [id, publicUser]); + + const user = useLiveData(publicUser?.publicUser$(id)); + const isLoading = useLiveData(publicUser?.isLoading$(id)); + const t = useI18n(); + + if (isLoading && !user) { + return ...; + } + + if (user?.removed) { + return ( + + {t['Unknown User']()} + + ); + } + + return {user?.name}; +}; diff --git a/packages/frontend/core/src/modules/media/entities/audio-attachment-block.ts b/packages/frontend/core/src/modules/media/entities/audio-attachment-block.ts index 3696114767..879fe44687 100644 --- a/packages/frontend/core/src/modules/media/entities/audio-attachment-block.ts +++ b/packages/frontend/core/src/modules/media/entities/audio-attachment-block.ts @@ -1,3 +1,5 @@ +import { DebugLogger } from '@affine/debug'; +import { AiJobStatus } from '@affine/graphql'; import { type AttachmentBlockModel, TranscriptionBlockFlavour, @@ -6,31 +8,16 @@ import { import type { AffineTextAttributes } from '@blocksuite/affine/shared/types'; import { type DeltaInsert, Text } from '@blocksuite/affine/store'; import { computed } from '@preact/signals-core'; -import { - catchErrorInto, - effect, - Entity, - fromPromise, - LiveData, - onComplete, - onStart, -} from '@toeverything/infra'; +import { Entity, LiveData } from '@toeverything/infra'; import { cssVarV2 } from '@toeverything/theme/v2'; -import { EMPTY, mergeMap, switchMap } from 'rxjs'; +import type { WorkspaceService } from '../../workspace'; import type { AudioMediaManagerService } from '../services/audio-media-manager'; import type { AudioMedia } from './audio-media'; +import { AudioTranscriptionJob } from './audio-transcription-job'; +import type { TranscriptionResult } from './types'; -export interface TranscriptionResult { - title: string; - summary: string; - segments: { - speaker: string; - start_time: string; - end_time: string; - transcription: string; - }[]; -} +const logger = new DebugLogger('audio-attachment-block'); // BlockSuiteError: yText must not contain "\r" because it will break the range synchronization function sanitizeText(text: string) { @@ -41,42 +28,22 @@ export class AudioAttachmentBlock extends Entity { private readonly refCount$ = new LiveData(0); readonly audioMedia: AudioMedia; constructor( - public readonly audioMediaManagerService: AudioMediaManagerService + readonly audioMediaManagerService: AudioMediaManagerService, + readonly workspaceService: WorkspaceService ) { super(); const mediaRef = audioMediaManagerService.ensureMediaEntity(this.props); this.audioMedia = mediaRef.media; this.disposables.push(() => mediaRef.release()); + this.disposables.push(() => { + this.transcriptionJob.dispose(); + }); } // rendering means the attachment is visible in the editor + // it is used to determine if we should show show the audio player on the sidebar rendering$ = this.refCount$.map(refCount => refCount > 0); expanded$ = new LiveData(true); - transcribing$ = new LiveData(false); - transcriptionError$ = new LiveData(null); - transcribed$ = LiveData.computed(get => { - const transcriptionBlock = get(this.transcriptionBlock$); - if (!transcriptionBlock) { - return null; - } - const childMap = get(LiveData.fromSignal(transcriptionBlock.childMap)); - return childMap.size > 0; - }); - - transcribe = effect( - switchMap(() => - fromPromise(this.doTranscribe()).pipe( - mergeMap(result => { - // attach transcription result to the block - this.fillTranscriptionResult(result); - return EMPTY; - }), - catchErrorInto(this.transcriptionError$), - onStart(() => this.transcribing$.setValue(true)), - onComplete(() => this.transcribing$.setValue(false)) - ) - ) - ); readonly transcriptionBlock$ = LiveData.fromSignal( computed(() => { @@ -91,59 +58,108 @@ export class AudioAttachmentBlock extends Entity { }) ); - // TODO: use real implementation - private readonly doTranscribe = async (): Promise => { - try { - const buffer = await this.audioMedia.getBuffer(); - if (!buffer) { - throw new Error('No audio buffer available'); - } + hasTranscription$ = LiveData.computed(get => { + const transcriptionBlock = get(this.transcriptionBlock$); + if (!transcriptionBlock) { + return null; + } + const childMap = get(LiveData.fromSignal(transcriptionBlock.childMap)); + return childMap.size > 0; + }); - // Send binary audio data directly - const blob = new Blob([buffer], { type: 'audio/wav' }); // adjust mime type if needed - const formData = new FormData(); - formData.append('audio', blob); + transcriptionJob: AudioTranscriptionJob = this.createTranscriptionJob(); - const response = await fetch('http://localhost:6544/transcribe', { - method: 'POST', - body: formData, + mount() { + if ( + this.transcriptionJob.isCreator() && + this.transcriptionJob.status$.value.status === 'waiting-for-job' && + !this.hasTranscription$.value + ) { + this.transcribe().catch(error => { + logger.error('Error transcribing audio:', error); }); + } - if (!response.ok) { - throw new Error(`Transcription failed: ${response.statusText}`); + this.refCount$.setValue(this.refCount$.value + 1); + } + + unmount() { + this.refCount$.setValue(this.refCount$.value - 1); + } + + private createTranscriptionJob() { + if (!this.props.props.sourceId) { + throw new Error('No source id'); + } + + let transcriptionBlockProps = this.transcriptionBlock$.value?.props; + + if (!transcriptionBlockProps) { + // transcription block is not created yet, we need to create it + this.props.doc.addBlock( + 'affine:transcription', + { + transcription: {}, + }, + this.props.id + ); + transcriptionBlockProps = this.transcriptionBlock$.value?.props; + } + + if (!transcriptionBlockProps) { + throw new Error('No transcription block props'); + } + + const job = this.framework.createEntity(AudioTranscriptionJob, { + blobId: this.props.props.sourceId, + blockProps: transcriptionBlockProps, + getAudioFile: async () => { + const buffer = await this.audioMedia.getBuffer(); + if (!buffer) { + throw new Error('No audio buffer available'); + } + const blob = new Blob([buffer], { type: this.props.props.type }); + const file = new File([blob], this.props.props.name, { + type: this.props.props.type, + }); + return file; + }, + }); + + return job; + } + + readonly transcribe = async () => { + try { + // if job is already running, we should not start it again + if (this.transcriptionJob.status$.value.status !== 'waiting-for-job') { + return; + } + const status = await this.transcriptionJob.start(); + if (status.status === AiJobStatus.claimed) { + this.fillTranscriptionResult(status.result); } - - const result = await response.json(); - return result.transcription; } catch (error) { - console.error('Error transcribing audio:', error); + logger.error('Error transcribing audio:', error); throw error; } }; private readonly fillTranscriptionResult = (result: TranscriptionResult) => { - this.props.props.caption = result.title; - // todo: add transcription block schema etc. - const transcriptionBlockId = this.props.doc.addBlock( - 'affine:transcription', - { - transcription: result, - }, - this.props.id - ); + this.props.props.caption = result.title ?? ''; const calloutId = this.props.doc.addBlock( 'affine:callout', { emoji: '💬', }, - transcriptionBlockId + this.transcriptionBlock$.value?.id ); // todo: refactor - const spearkerToColors = new Map(); + const speakerToColors = new Map(); for (const segment of result.segments) { - let color = spearkerToColors.get(segment.speaker); + let color = speakerToColors.get(segment.speaker); const colorOptions = [ cssVarV2.text.highlight.fg.red, cssVarV2.text.highlight.fg.green, @@ -156,12 +172,12 @@ export class AudioAttachmentBlock extends Entity { cssVarV2.text.highlight.fg.magenta, ]; if (!color) { - color = colorOptions[spearkerToColors.size % colorOptions.length]; - spearkerToColors.set(segment.speaker, color); + color = colorOptions[speakerToColors.size % colorOptions.length]; + speakerToColors.set(segment.speaker, color); } const deltaInserts: DeltaInsert[] = [ { - insert: sanitizeText(segment.start_time + ' ' + segment.speaker), + insert: sanitizeText(segment.start + ' ' + segment.speaker), attributes: { color, bold: true, @@ -180,12 +196,4 @@ export class AudioAttachmentBlock extends Entity { ); } }; - - mount() { - this.refCount$.setValue(this.refCount$.value + 1); - } - - unmount() { - this.refCount$.setValue(this.refCount$.value - 1); - } } diff --git a/packages/frontend/core/src/modules/media/entities/audio-media.ts b/packages/frontend/core/src/modules/media/entities/audio-media.ts index 964f3eecc5..cff3570290 100644 --- a/packages/frontend/core/src/modules/media/entities/audio-media.ts +++ b/packages/frontend/core/src/modules/media/entities/audio-media.ts @@ -162,7 +162,7 @@ export class AudioMedia extends Entity { const startTime = performance.now(); // calculating audio stats is expensive. Maybe persist the result in cache? - const stats = await this.calcuateStatsFromBuffer(blob); + const stats = await this.calculateStatsFromBuffer(blob); logger.debug( `Calculate audio stats time: ${performance.now() - startTime}ms` ); @@ -177,9 +177,8 @@ export class AudioMedia extends Entity { return fromPromise(async () => { return this.loadAudioBuffer(); }).pipe( - mergeMap(({ blob, duration, waveform }) => { + mergeMap(({ blob, waveform }) => { const url = URL.createObjectURL(blob); - this.duration$.setValue(duration); // Set the audio element source this.audioElement.src = url; this.waveform$.setValue(waveform); @@ -187,6 +186,9 @@ export class AudioMedia extends Entity { if (this.playbackState$.getValue().state === 'playing') { this.play(true); } + this.audioElement.onloadedmetadata = () => { + this.duration$.setValue(this.audioElement.duration); + }; return EMPTY; }), onStart(() => this.loading$.setValue(true)), @@ -397,13 +399,13 @@ export class AudioMedia extends Entity { return this.playbackState$.getValue(); } - private async calcuateStatsFromBuffer(buffer: Blob) { + private async calculateStatsFromBuffer(buffer: Blob) { const audioContext = new AudioContext(); const audioBuffer = await audioContext.decodeAudioData( await buffer.arrayBuffer() ); const waveform = await this.calculateWaveform(audioBuffer); - return { waveform, duration: audioBuffer.duration }; + return { waveform }; } /** diff --git a/packages/frontend/core/src/modules/media/entities/audio-transcription-job-store.ts b/packages/frontend/core/src/modules/media/entities/audio-transcription-job-store.ts new file mode 100644 index 0000000000..82ad498f3b --- /dev/null +++ b/packages/frontend/core/src/modules/media/entities/audio-transcription-job-store.ts @@ -0,0 +1,96 @@ +import { + claimAudioTranscriptionMutation, + getAudioTranscriptionQuery, + submitAudioTranscriptionMutation, +} from '@affine/graphql'; +import { Entity } from '@toeverything/infra'; + +import type { DefaultServerService, WorkspaceServerService } from '../../cloud'; +import { GraphQLService } from '../../cloud/services/graphql'; +import type { WorkspaceService } from '../../workspace'; + +export class AudioTranscriptionJobStore extends Entity<{ + readonly blobId: string; + readonly getAudioFile: () => Promise; +}> { + constructor( + private readonly workspaceService: WorkspaceService, + private readonly workspaceServerService: WorkspaceServerService, + private readonly defaultServerService: DefaultServerService + ) { + super(); + } + + private get serverService() { + return ( + this.workspaceServerService.server || this.defaultServerService.server + ); + } + + private get graphqlService() { + return this.serverService?.scope.get(GraphQLService); + } + + private get currentWorkspaceId() { + return this.workspaceService.workspace.id; + } + + submitAudioTranscription = async () => { + const graphqlService = this.graphqlService; + if (!graphqlService) { + throw new Error('No graphql service available'); + } + const file = await this.props.getAudioFile(); + const response = await graphqlService.gql({ + query: submitAudioTranscriptionMutation, + variables: { + workspaceId: this.currentWorkspaceId, + blobId: this.props.blobId, + blob: file, + }, + }); + if (!response.submitAudioTranscription?.id) { + throw new Error('Failed to submit audio transcription'); + } + return response.submitAudioTranscription; + }; + + getAudioTranscription = async (blobId: string, jobId?: string) => { + const graphqlService = this.graphqlService; + if (!graphqlService) { + throw new Error('No graphql service available'); + } + const currentWorkspaceId = this.currentWorkspaceId; + if (!currentWorkspaceId) { + throw new Error('No current workspace id'); + } + const response = await graphqlService.gql({ + query: getAudioTranscriptionQuery, + variables: { + workspaceId: currentWorkspaceId, + jobId, + blobId, + }, + }); + if (!response.currentUser?.copilot?.audioTranscription) { + return null; + } + return response.currentUser.copilot.audioTranscription; + }; + claimAudioTranscription = async (jobId: string) => { + const graphqlService = this.graphqlService; + if (!graphqlService) { + throw new Error('No graphql service available'); + } + const response = await graphqlService.gql({ + query: claimAudioTranscriptionMutation, + variables: { + jobId, + }, + }); + if (!response.claimAudioTranscription) { + throw new Error('Failed to claim transcription result'); + } + return response.claimAudioTranscription; + }; +} diff --git a/packages/frontend/core/src/modules/media/entities/audio-transcription-job.ts b/packages/frontend/core/src/modules/media/entities/audio-transcription-job.ts new file mode 100644 index 0000000000..2a5d1a0eea --- /dev/null +++ b/packages/frontend/core/src/modules/media/entities/audio-transcription-job.ts @@ -0,0 +1,281 @@ +import { shallowEqual } from '@affine/component'; +import { DebugLogger } from '@affine/debug'; +import { UserFriendlyError } from '@affine/error'; +import { AiJobStatus } from '@affine/graphql'; +import type { TranscriptionBlockProps } from '@blocksuite/affine/model'; +import { Entity, LiveData } from '@toeverything/infra'; + +import type { DefaultServerService, WorkspaceServerService } from '../../cloud'; +import { AuthService } from '../../cloud/services/auth'; +import { AudioTranscriptionJobStore } from './audio-transcription-job-store'; +import type { TranscriptionResult } from './types'; + +// The UI status of the transcription job +export type TranscriptionStatus = + | { + status: 'waiting-for-job'; + } + | { + status: 'started'; + } + | { + status: AiJobStatus.pending; + } + | { + status: AiJobStatus.running; + } + | { + status: AiJobStatus.failed; + error: UserFriendlyError; // <<- this is not visible on UI yet + } + | { + status: AiJobStatus.finished; // ready to be claimed, but may be rejected because of insufficient credits + } + | { + status: AiJobStatus.claimed; + result: TranscriptionResult; + }; + +const logger = new DebugLogger('audio-transcription-job'); + +// facts on transcription job ownership +// 1. jobid + blobid is unique for a given user +// 2. only the creator can claim the job +// 3. all users can query the claimed job result +// 4. claim a job requires AI credits +export class AudioTranscriptionJob extends Entity<{ + readonly blockProps: TranscriptionBlockProps; + readonly blobId: string; + readonly getAudioFile: () => Promise; +}> { + constructor( + private readonly workspaceServerService: WorkspaceServerService, + private readonly defaultServerService: DefaultServerService + ) { + super(); + this.disposables.push(() => { + this.disposed = true; + }); + } + + disposed = false; + + private readonly _status$ = new LiveData({ + status: 'waiting-for-job', + }); + + private readonly store = this.framework.createEntity( + AudioTranscriptionJobStore, + { + blobId: this.props.blobId, + getAudioFile: this.props.getAudioFile, + } + ); + + status$ = this._status$.distinctUntilChanged(shallowEqual); + transcribing$ = this.status$.map(status => { + return ( + status.status === 'started' || + status.status === AiJobStatus.pending || + status.status === AiJobStatus.running || + status.status === AiJobStatus.finished + ); + }); + + error$ = this.status$.map(status => { + if (status.status === AiJobStatus.failed) { + return status.error; + } + return null; + }); + + // check if we can kick start the transcription job + readonly preflightCheck = async () => { + // if the job id is given, check if the job exists + if (this.props.blockProps.jobId) { + const existingJob = await this.store.getAudioTranscription( + this.props.blobId, + this.props.blockProps.jobId + ); + + if (existingJob?.status === AiJobStatus.claimed) { + // if job exists, anyone can query it + return; + } + + if ( + !existingJob && + this.props.blockProps.createdBy && + this.props.blockProps.createdBy !== this.currentUserId + ) { + return { + error: 'created-by-others', + userId: this.props.blockProps.createdBy, + }; + } + } + + // if no job id, anyone can start a new job + return; + }; + + async start() { + if (this.disposed) { + logger.debug('Job already disposed, cannot start'); + throw new Error('Job already disposed'); + } + + this._status$.value = { + status: 'started', + }; + + try { + // firstly check if there is a job already + logger.debug('Checking for existing transcription job', { + blobId: this.props.blobId, + jobId: this.props.blockProps.jobId, + }); + let job: { + id: string; + status: AiJobStatus; + } | null = await this.store.getAudioTranscription( + this.props.blobId, + this.props.blockProps.jobId + ); + + if (!job) { + logger.debug('No existing job found, submitting new transcription job'); + job = await this.store.submitAudioTranscription(); + } else { + logger.debug('Found existing job', { + jobId: job.id, + status: job.status, + }); + } + + this.props.blockProps.jobId = job.id; + this.props.blockProps.createdBy = this.currentUserId; + + if (job.status !== AiJobStatus.failed) { + this._status$.value = { + status: AiJobStatus.pending, + }; + } else { + logger.debug('Job submission failed'); + throw UserFriendlyError.fromAny('failed to submit transcription'); + } + + await this.untilJobFinishedOrClaimed(); + await this.claim(); + } catch (err) { + logger.debug('Error during job submission', { error: err }); + this._status$.value = { + status: AiJobStatus.failed, + error: UserFriendlyError.fromAny(err), + }; + } + return this.status$.value; + } + + private async untilJobFinishedOrClaimed() { + while ( + !this.disposed && + this.props.blockProps.jobId && + this.props.blockProps.createdBy === this.currentUserId + ) { + logger.debug('Polling job status', { + jobId: this.props.blockProps.jobId, + }); + const job = await this.store.getAudioTranscription( + this.props.blobId, + this.props.blockProps.jobId + ); + + if (!job || job?.status === 'failed') { + logger.debug('Job failed during polling', { + jobId: this.props.blockProps.jobId, + }); + throw UserFriendlyError.fromAny('Transcription job failed'); + } + + if (job?.status === 'finished' || job?.status === 'claimed') { + logger.debug('Job finished, ready to claim', { + jobId: this.props.blockProps.jobId, + }); + this._status$.value = { + status: AiJobStatus.finished, + }; + return; + } + + // Add delay between polling attempts + await new Promise(resolve => setTimeout(resolve, 3000)); + } + } + + async claim() { + if (this.disposed) { + logger.debug('Job already disposed, cannot claim'); + throw new Error('Job already disposed'); + } + + logger.debug('Attempting to claim job', { + jobId: this.props.blockProps.jobId, + }); + + if (!this.props.blockProps.jobId) { + logger.debug('No job id found, cannot claim'); + throw new Error('No job id found'); + } + + const claimedJob = await this.store.claimAudioTranscription( + this.props.blockProps.jobId + ); + + if (claimedJob) { + logger.debug('Successfully claimed job', { + jobId: this.props.blockProps.jobId, + }); + const result: TranscriptionResult = { + summary: claimedJob.summary ?? '', + title: claimedJob.title ?? '', + segments: + claimedJob.transcription?.map(segment => ({ + speaker: segment.speaker, + start: segment.start, + end: segment.end, + transcription: segment.transcription, + })) ?? [], + }; + + this._status$.value = { + status: AiJobStatus.claimed, + result, + }; + } else { + throw new Error('Failed to claim transcription result'); + } + } + + isCreator() { + return ( + this.props.blockProps.jobId && + this.props.blockProps.createdBy && + this.props.blockProps.createdBy === this.currentUserId + ); + } + + private get serverService() { + return ( + this.workspaceServerService.server || this.defaultServerService.server + ); + } + + get currentUserId() { + const authService = this.serverService?.scope.getOptional(AuthService); + if (!authService) { + return; + } + return authService.session.account$.value?.id; + } +} diff --git a/packages/frontend/core/src/modules/media/entities/types.ts b/packages/frontend/core/src/modules/media/entities/types.ts new file mode 100644 index 0000000000..f1794c3466 --- /dev/null +++ b/packages/frontend/core/src/modules/media/entities/types.ts @@ -0,0 +1,10 @@ +export interface TranscriptionResult { + title: string; + summary: string; + segments: { + speaker: string; + start: string; + end: string; + transcription: string; + }[]; +} diff --git a/packages/frontend/core/src/modules/media/index.ts b/packages/frontend/core/src/modules/media/index.ts index bbb3f47592..54b76cc5e7 100644 --- a/packages/frontend/core/src/modules/media/index.ts +++ b/packages/frontend/core/src/modules/media/index.ts @@ -1,11 +1,14 @@ import type { Framework } from '@toeverything/infra'; +import { DefaultServerService, WorkspaceServerService } from '../cloud'; import { DesktopApiService } from '../desktop-api'; import { GlobalState } from '../storage'; import { WorkbenchService } from '../workbench'; import { WorkspaceScope, WorkspaceService } from '../workspace'; import { AudioAttachmentBlock } from './entities/audio-attachment-block'; import { AudioMedia } from './entities/audio-media'; +import { AudioTranscriptionJob } from './entities/audio-transcription-job'; +import { AudioTranscriptionJobStore } from './entities/audio-transcription-job-store'; import { ElectronGlobalMediaStateProvider, GlobalMediaStateProvider, @@ -15,31 +18,40 @@ import { AudioAttachmentService } from './services/audio-attachment'; import { AudioMediaManagerService } from './services/audio-media-manager'; export function configureMediaModule(framework: Framework) { + framework + .scope(WorkspaceScope) + .entity(AudioMedia, [WorkspaceService]) + .entity(AudioAttachmentBlock, [AudioMediaManagerService, WorkspaceService]) + .entity(AudioTranscriptionJob, [ + WorkspaceServerService, + DefaultServerService, + ]) + .entity(AudioTranscriptionJobStore, [ + WorkspaceService, + WorkspaceServerService, + DefaultServerService, + ]) + .service(AudioAttachmentService); + if (BUILD_CONFIG.isElectron) { framework .impl(GlobalMediaStateProvider, ElectronGlobalMediaStateProvider, [ GlobalState, ]) .scope(WorkspaceScope) - .entity(AudioMedia, [WorkspaceService]) - .entity(AudioAttachmentBlock, [AudioMediaManagerService]) .service(AudioMediaManagerService, [ GlobalMediaStateProvider, WorkbenchService, DesktopApiService, - ]) - .service(AudioAttachmentService); + ]); } else { framework .impl(GlobalMediaStateProvider, WebGlobalMediaStateProvider) .scope(WorkspaceScope) - .entity(AudioMedia, [WorkspaceService]) - .entity(AudioAttachmentBlock, [AudioMediaManagerService]) .service(AudioMediaManagerService, [ GlobalMediaStateProvider, WorkbenchService, - ]) - .service(AudioAttachmentService); + ]); } }