mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 21:05:19 +00:00
feat(core): call real endpoint for audio transcription (#11139)
fix AF-2359
This commit is contained in:
@@ -22,4 +22,17 @@ export const notesButtonIcon = style({
|
||||
fontSize: 24,
|
||||
width: '1em',
|
||||
height: '1em',
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
});
|
||||
|
||||
export const error = style({
|
||||
color: cssVarV2('aI/errorText'),
|
||||
});
|
||||
|
||||
export const publicUserLabel = style({
|
||||
fontSize: cssVar('fontXs'),
|
||||
fontWeight: 500,
|
||||
userSelect: 'none',
|
||||
});
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import { Button, Tooltip } from '@affine/component';
|
||||
import { Button, Tooltip, useConfirmModal } from '@affine/component';
|
||||
import { AudioPlayer } from '@affine/core/components/audio-player';
|
||||
import { AnimatedTranscribeIcon } from '@affine/core/components/audio-player/lottie/animated-transcribe-icon';
|
||||
import { useSeekTime } from '@affine/core/components/audio-player/use-seek-time';
|
||||
import { useEnableAI } from '@affine/core/components/hooks/affine/use-enable-ai';
|
||||
import { useAsyncCallback } from '@affine/core/components/hooks/affine-async-hooks';
|
||||
import { CurrentServerScopeProvider } from '@affine/core/components/providers/current-server-scope';
|
||||
import { PublicUserLabel } from '@affine/core/modules/cloud/views/public-user';
|
||||
import { GlobalDialogService } from '@affine/core/modules/dialogs';
|
||||
import type { AudioAttachmentBlock } from '@affine/core/modules/media/entities/audio-attachment-block';
|
||||
import { useAttachmentMediaBlock } from '@affine/core/modules/media/views/use-attachment-media';
|
||||
import { useI18n } from '@affine/i18n';
|
||||
import { useLiveData } from '@toeverything/infra';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { Trans, useI18n } from '@affine/i18n';
|
||||
import { useLiveData, useService } from '@toeverything/infra';
|
||||
import { useCallback, useMemo, useState } from 'react';
|
||||
|
||||
import type { AttachmentViewerProps } from '../types';
|
||||
import * as styles from './audio-block.css';
|
||||
@@ -19,12 +23,15 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
|
||||
const stats = useLiveData(audioMedia.stats$);
|
||||
const loading = useLiveData(audioMedia.loading$);
|
||||
const expanded = useLiveData(block.expanded$);
|
||||
const transcribing = useLiveData(block.transcribing$);
|
||||
const transcribed = useLiveData(block.transcribed$);
|
||||
const [preflightChecking, setPreflightChecking] = useState(false);
|
||||
const transcribing =
|
||||
useLiveData(block.transcriptionJob.transcribing$) || preflightChecking;
|
||||
const error = useLiveData(block.transcriptionJob.error$);
|
||||
const transcribed = useLiveData(block.hasTranscription$);
|
||||
const handleClick = useCallback((e: React.MouseEvent<HTMLDivElement>) => {
|
||||
e.stopPropagation();
|
||||
}, []);
|
||||
|
||||
const confirmModal = useConfirmModal();
|
||||
const seekTime = useSeekTime(playbackState, stats.duration);
|
||||
|
||||
const handlePlay = useCallback(() => {
|
||||
@@ -50,6 +57,66 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
|
||||
|
||||
const enableAi = useEnableAI();
|
||||
|
||||
const globalDialogService = useService(GlobalDialogService);
|
||||
|
||||
const handleNotesClick = useAsyncCallback(async () => {
|
||||
if (!enableAi || transcribing) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (transcribed) {
|
||||
block.expanded$.setValue(!expanded);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!block.transcriptionJob.currentUserId) {
|
||||
confirmModal.openConfirmModal({
|
||||
title: t['com.affine.ai.login-required.dialog-title'](),
|
||||
description: t['com.affine.ai.login-required.dialog-content'](),
|
||||
confirmText: t['com.affine.ai.login-required.dialog-confirm'](),
|
||||
confirmButtonOptions: {
|
||||
variant: 'primary',
|
||||
},
|
||||
cancelText: t['com.affine.ai.login-required.dialog-cancel'](),
|
||||
onConfirm: () => {
|
||||
globalDialogService.open('sign-in', {});
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
setPreflightChecking(true);
|
||||
const result = await block.transcriptionJob.preflightCheck();
|
||||
setPreflightChecking(false);
|
||||
if (result?.error === 'created-by-others') {
|
||||
confirmModal.openConfirmModal({
|
||||
title: t['com.affine.audio.transcribe.non-owner.confirm.title'](),
|
||||
description: (
|
||||
<Trans i18nKey="com.affine.audio.transcribe.non-owner.confirm.message">
|
||||
Please contact <PublicUserLabel id={result.userId} /> to upgrade AI
|
||||
rights or resend the attachment.
|
||||
</Trans>
|
||||
),
|
||||
onCancel: false,
|
||||
confirmText: t['Confirm'](),
|
||||
confirmButtonOptions: {
|
||||
variant: 'primary',
|
||||
},
|
||||
});
|
||||
} else {
|
||||
await block.transcribe();
|
||||
}
|
||||
}, [
|
||||
enableAi,
|
||||
transcribing,
|
||||
transcribed,
|
||||
block,
|
||||
expanded,
|
||||
confirmModal,
|
||||
t,
|
||||
globalDialogService,
|
||||
]);
|
||||
|
||||
const notesEntry = useMemo(() => {
|
||||
if (!enableAi) {
|
||||
return null;
|
||||
@@ -62,37 +129,37 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
|
||||
state={transcribing ? 'transcribing' : 'idle'}
|
||||
/>
|
||||
}
|
||||
disabled={transcribing}
|
||||
size="large"
|
||||
prefixClassName={styles.notesButtonIcon}
|
||||
className={styles.notesButton}
|
||||
onClick={() => {
|
||||
if (transcribed) {
|
||||
block.expanded$.setValue(!expanded);
|
||||
} else {
|
||||
block.transcribe();
|
||||
}
|
||||
}}
|
||||
onClick={handleNotesClick}
|
||||
>
|
||||
{t['com.affine.attachmentViewer.audio.notes']()}
|
||||
{transcribing
|
||||
? t['com.affine.audio.transcribing']()
|
||||
: t['com.affine.audio.notes']()}
|
||||
</Button>
|
||||
);
|
||||
if (transcribing) {
|
||||
return (
|
||||
<Tooltip
|
||||
content={t['com.affine.attachmentViewer.audio.transcribing']()}
|
||||
>
|
||||
<Tooltip content={t['com.affine.audio.transcribing']()}>
|
||||
{inner}
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
return inner;
|
||||
}, [enableAi, transcribing, t, transcribed, block, expanded]);
|
||||
}, [enableAi, transcribing, handleNotesClick, t]);
|
||||
|
||||
const sizeEntry = useMemo(() => {
|
||||
if (error) {
|
||||
return <div className={styles.error}>{error.message}</div>;
|
||||
}
|
||||
return block.props.props.size;
|
||||
}, [error, block.props.props.size]);
|
||||
|
||||
return (
|
||||
<AudioPlayer
|
||||
name={block.props.props.name}
|
||||
size={block.props.props.size}
|
||||
size={sizeEntry}
|
||||
loading={loading}
|
||||
playbackState={playbackState?.state || 'idle'}
|
||||
waveform={stats.waveform}
|
||||
@@ -103,7 +170,9 @@ const AttachmentAudioPlayer = ({ block }: { block: AudioAttachmentBlock }) => {
|
||||
onPause={handlePause}
|
||||
onStop={handleStop}
|
||||
onSeek={handleSeek}
|
||||
notesEntry={notesEntry}
|
||||
notesEntry={
|
||||
<CurrentServerScopeProvider>{notesEntry}</CurrentServerScopeProvider>
|
||||
}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -23,7 +23,7 @@ const formatTime = (seconds: number): string => {
|
||||
export interface AudioPlayerProps {
|
||||
// Audio metadata
|
||||
name: string;
|
||||
size: number;
|
||||
size: number | ReactNode; // the size entry may be used for drawing error message
|
||||
waveform: number[] | null;
|
||||
// Playback state
|
||||
playbackState: 'idle' | 'playing' | 'paused' | 'stopped';
|
||||
@@ -97,7 +97,9 @@ export const AudioPlayer = ({
|
||||
<div className={styles.nameLabel}>{name}</div>
|
||||
</div>
|
||||
<div className={styles.upperRow}>
|
||||
<div className={styles.sizeInfo}>{bytes(size)}</div>
|
||||
<div className={styles.sizeInfo}>
|
||||
{typeof size === 'number' ? bytes(size) : size}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className={styles.upperRight}>
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { cssVarV2 } from '@toeverything/theme/v2';
|
||||
import { globalStyle, style } from '@vanilla-extract/css';
|
||||
export const root = style({});
|
||||
export const root = style({
|
||||
display: 'inline-flex',
|
||||
});
|
||||
|
||||
// replace primary colors to cssVarV2('icon/primary')
|
||||
const iconPrimaryColors = [
|
||||
|
||||
@@ -26,7 +26,7 @@ export const useSeekTime = (
|
||||
playbackState.state === 'playing'
|
||||
? (Date.now() - playbackState.updateTime) / 1000
|
||||
: 0;
|
||||
// if timeElapsed + playbackState.seekOffset is closed to duration,
|
||||
// if timeElapsed + playbackState.seekOffset is close to duration,
|
||||
// set seekTime to duration
|
||||
// this is to avoid the seek time being set to a value that is not exactly the same as the duration
|
||||
// at the end of the audio
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
import { ServersService } from '@affine/core/modules/cloud';
|
||||
import { GlobalContextService } from '@affine/core/modules/global-context';
|
||||
import { FrameworkScope, useLiveData, useService } from '@toeverything/infra';
|
||||
import { useMemo } from 'react';
|
||||
|
||||
export const CurrentServerScopeProvider = ({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const globalContext = useService(GlobalContextService).globalContext;
|
||||
const serversService = useService(ServersService);
|
||||
const currentServerId = useLiveData(globalContext.serverId.$);
|
||||
const serverService = useLiveData(
|
||||
useMemo(() => {
|
||||
if (!currentServerId) {
|
||||
return null;
|
||||
}
|
||||
return serversService.server$(currentServerId);
|
||||
}, [currentServerId, serversService])
|
||||
);
|
||||
|
||||
if (!serverService) {
|
||||
// todo(@pengx17): render a loading/error component here if not found?
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<FrameworkScope scope={serverService.scope}>{children}</FrameworkScope>
|
||||
);
|
||||
};
|
||||
|
||||
export const useCurrentServerService = () => {
|
||||
const globalContext = useService(GlobalContextService).globalContext;
|
||||
const serversService = useService(ServersService);
|
||||
const currentServerId = useLiveData(globalContext.serverId.$);
|
||||
const serverService = useLiveData(
|
||||
useMemo(() => {
|
||||
if (!currentServerId) {
|
||||
return null;
|
||||
}
|
||||
return serversService.server$(currentServerId);
|
||||
}, [currentServerId, serversService])
|
||||
);
|
||||
|
||||
return serverService ?? undefined;
|
||||
};
|
||||
@@ -0,0 +1,20 @@
|
||||
import { style } from '@vanilla-extract/css';
|
||||
|
||||
export const publicUserLabel = style({
|
||||
fontSize: 'inherit',
|
||||
});
|
||||
|
||||
export const publicUserLabelLoading = style([
|
||||
publicUserLabel,
|
||||
{
|
||||
opacity: 0.5,
|
||||
},
|
||||
]);
|
||||
|
||||
export const publicUserLabelRemoved = style([
|
||||
publicUserLabel,
|
||||
{
|
||||
opacity: 0.5,
|
||||
textDecoration: 'line-through',
|
||||
},
|
||||
]);
|
||||
@@ -0,0 +1,38 @@
|
||||
import { useCurrentServerService } from '@affine/core/components/providers/current-server-scope';
|
||||
import { useI18n } from '@affine/i18n';
|
||||
import { useLiveData } from '@toeverything/infra';
|
||||
import { useLayoutEffect, useMemo } from 'react';
|
||||
|
||||
import { PublicUserService } from '../services/public-user';
|
||||
import * as styles from './public-user.css';
|
||||
|
||||
export const PublicUserLabel = ({ id }: { id: string }) => {
|
||||
const serverService = useCurrentServerService();
|
||||
const publicUser = useMemo(() => {
|
||||
return serverService?.scope.get(PublicUserService);
|
||||
}, [serverService]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
if (publicUser) {
|
||||
publicUser.revalidate(id);
|
||||
}
|
||||
}, [id, publicUser]);
|
||||
|
||||
const user = useLiveData(publicUser?.publicUser$(id));
|
||||
const isLoading = useLiveData(publicUser?.isLoading$(id));
|
||||
const t = useI18n();
|
||||
|
||||
if (isLoading && !user) {
|
||||
return <span className={styles.publicUserLabelLoading}>...</span>;
|
||||
}
|
||||
|
||||
if (user?.removed) {
|
||||
return (
|
||||
<span className={styles.publicUserLabelRemoved}>
|
||||
{t['Unknown User']()}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
return <span className={styles.publicUserLabel}>{user?.name}</span>;
|
||||
};
|
||||
@@ -1,3 +1,5 @@
|
||||
import { DebugLogger } from '@affine/debug';
|
||||
import { AiJobStatus } from '@affine/graphql';
|
||||
import {
|
||||
type AttachmentBlockModel,
|
||||
TranscriptionBlockFlavour,
|
||||
@@ -6,31 +8,16 @@ import {
|
||||
import type { AffineTextAttributes } from '@blocksuite/affine/shared/types';
|
||||
import { type DeltaInsert, Text } from '@blocksuite/affine/store';
|
||||
import { computed } from '@preact/signals-core';
|
||||
import {
|
||||
catchErrorInto,
|
||||
effect,
|
||||
Entity,
|
||||
fromPromise,
|
||||
LiveData,
|
||||
onComplete,
|
||||
onStart,
|
||||
} from '@toeverything/infra';
|
||||
import { Entity, LiveData } from '@toeverything/infra';
|
||||
import { cssVarV2 } from '@toeverything/theme/v2';
|
||||
import { EMPTY, mergeMap, switchMap } from 'rxjs';
|
||||
|
||||
import type { WorkspaceService } from '../../workspace';
|
||||
import type { AudioMediaManagerService } from '../services/audio-media-manager';
|
||||
import type { AudioMedia } from './audio-media';
|
||||
import { AudioTranscriptionJob } from './audio-transcription-job';
|
||||
import type { TranscriptionResult } from './types';
|
||||
|
||||
export interface TranscriptionResult {
|
||||
title: string;
|
||||
summary: string;
|
||||
segments: {
|
||||
speaker: string;
|
||||
start_time: string;
|
||||
end_time: string;
|
||||
transcription: string;
|
||||
}[];
|
||||
}
|
||||
const logger = new DebugLogger('audio-attachment-block');
|
||||
|
||||
// BlockSuiteError: yText must not contain "\r" because it will break the range synchronization
|
||||
function sanitizeText(text: string) {
|
||||
@@ -41,42 +28,22 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
|
||||
private readonly refCount$ = new LiveData<number>(0);
|
||||
readonly audioMedia: AudioMedia;
|
||||
constructor(
|
||||
public readonly audioMediaManagerService: AudioMediaManagerService
|
||||
readonly audioMediaManagerService: AudioMediaManagerService,
|
||||
readonly workspaceService: WorkspaceService
|
||||
) {
|
||||
super();
|
||||
const mediaRef = audioMediaManagerService.ensureMediaEntity(this.props);
|
||||
this.audioMedia = mediaRef.media;
|
||||
this.disposables.push(() => mediaRef.release());
|
||||
this.disposables.push(() => {
|
||||
this.transcriptionJob.dispose();
|
||||
});
|
||||
}
|
||||
|
||||
// rendering means the attachment is visible in the editor
|
||||
// it is used to determine if we should show show the audio player on the sidebar
|
||||
rendering$ = this.refCount$.map(refCount => refCount > 0);
|
||||
expanded$ = new LiveData<boolean>(true);
|
||||
transcribing$ = new LiveData<boolean>(false);
|
||||
transcriptionError$ = new LiveData<Error | null>(null);
|
||||
transcribed$ = LiveData.computed(get => {
|
||||
const transcriptionBlock = get(this.transcriptionBlock$);
|
||||
if (!transcriptionBlock) {
|
||||
return null;
|
||||
}
|
||||
const childMap = get(LiveData.fromSignal(transcriptionBlock.childMap));
|
||||
return childMap.size > 0;
|
||||
});
|
||||
|
||||
transcribe = effect(
|
||||
switchMap(() =>
|
||||
fromPromise(this.doTranscribe()).pipe(
|
||||
mergeMap(result => {
|
||||
// attach transcription result to the block
|
||||
this.fillTranscriptionResult(result);
|
||||
return EMPTY;
|
||||
}),
|
||||
catchErrorInto(this.transcriptionError$),
|
||||
onStart(() => this.transcribing$.setValue(true)),
|
||||
onComplete(() => this.transcribing$.setValue(false))
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
readonly transcriptionBlock$ = LiveData.fromSignal(
|
||||
computed(() => {
|
||||
@@ -91,59 +58,108 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
|
||||
})
|
||||
);
|
||||
|
||||
// TODO: use real implementation
|
||||
private readonly doTranscribe = async (): Promise<TranscriptionResult> => {
|
||||
try {
|
||||
const buffer = await this.audioMedia.getBuffer();
|
||||
if (!buffer) {
|
||||
throw new Error('No audio buffer available');
|
||||
}
|
||||
hasTranscription$ = LiveData.computed(get => {
|
||||
const transcriptionBlock = get(this.transcriptionBlock$);
|
||||
if (!transcriptionBlock) {
|
||||
return null;
|
||||
}
|
||||
const childMap = get(LiveData.fromSignal(transcriptionBlock.childMap));
|
||||
return childMap.size > 0;
|
||||
});
|
||||
|
||||
// Send binary audio data directly
|
||||
const blob = new Blob([buffer], { type: 'audio/wav' }); // adjust mime type if needed
|
||||
const formData = new FormData();
|
||||
formData.append('audio', blob);
|
||||
transcriptionJob: AudioTranscriptionJob = this.createTranscriptionJob();
|
||||
|
||||
const response = await fetch('http://localhost:6544/transcribe', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
mount() {
|
||||
if (
|
||||
this.transcriptionJob.isCreator() &&
|
||||
this.transcriptionJob.status$.value.status === 'waiting-for-job' &&
|
||||
!this.hasTranscription$.value
|
||||
) {
|
||||
this.transcribe().catch(error => {
|
||||
logger.error('Error transcribing audio:', error);
|
||||
});
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Transcription failed: ${response.statusText}`);
|
||||
this.refCount$.setValue(this.refCount$.value + 1);
|
||||
}
|
||||
|
||||
unmount() {
|
||||
this.refCount$.setValue(this.refCount$.value - 1);
|
||||
}
|
||||
|
||||
private createTranscriptionJob() {
|
||||
if (!this.props.props.sourceId) {
|
||||
throw new Error('No source id');
|
||||
}
|
||||
|
||||
let transcriptionBlockProps = this.transcriptionBlock$.value?.props;
|
||||
|
||||
if (!transcriptionBlockProps) {
|
||||
// transcription block is not created yet, we need to create it
|
||||
this.props.doc.addBlock(
|
||||
'affine:transcription',
|
||||
{
|
||||
transcription: {},
|
||||
},
|
||||
this.props.id
|
||||
);
|
||||
transcriptionBlockProps = this.transcriptionBlock$.value?.props;
|
||||
}
|
||||
|
||||
if (!transcriptionBlockProps) {
|
||||
throw new Error('No transcription block props');
|
||||
}
|
||||
|
||||
const job = this.framework.createEntity(AudioTranscriptionJob, {
|
||||
blobId: this.props.props.sourceId,
|
||||
blockProps: transcriptionBlockProps,
|
||||
getAudioFile: async () => {
|
||||
const buffer = await this.audioMedia.getBuffer();
|
||||
if (!buffer) {
|
||||
throw new Error('No audio buffer available');
|
||||
}
|
||||
const blob = new Blob([buffer], { type: this.props.props.type });
|
||||
const file = new File([blob], this.props.props.name, {
|
||||
type: this.props.props.type,
|
||||
});
|
||||
return file;
|
||||
},
|
||||
});
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
readonly transcribe = async () => {
|
||||
try {
|
||||
// if job is already running, we should not start it again
|
||||
if (this.transcriptionJob.status$.value.status !== 'waiting-for-job') {
|
||||
return;
|
||||
}
|
||||
const status = await this.transcriptionJob.start();
|
||||
if (status.status === AiJobStatus.claimed) {
|
||||
this.fillTranscriptionResult(status.result);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return result.transcription;
|
||||
} catch (error) {
|
||||
console.error('Error transcribing audio:', error);
|
||||
logger.error('Error transcribing audio:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
private readonly fillTranscriptionResult = (result: TranscriptionResult) => {
|
||||
this.props.props.caption = result.title;
|
||||
// todo: add transcription block schema etc.
|
||||
const transcriptionBlockId = this.props.doc.addBlock(
|
||||
'affine:transcription',
|
||||
{
|
||||
transcription: result,
|
||||
},
|
||||
this.props.id
|
||||
);
|
||||
this.props.props.caption = result.title ?? '';
|
||||
|
||||
const calloutId = this.props.doc.addBlock(
|
||||
'affine:callout',
|
||||
{
|
||||
emoji: '💬',
|
||||
},
|
||||
transcriptionBlockId
|
||||
this.transcriptionBlock$.value?.id
|
||||
);
|
||||
|
||||
// todo: refactor
|
||||
const spearkerToColors = new Map<string, string>();
|
||||
const speakerToColors = new Map<string, string>();
|
||||
for (const segment of result.segments) {
|
||||
let color = spearkerToColors.get(segment.speaker);
|
||||
let color = speakerToColors.get(segment.speaker);
|
||||
const colorOptions = [
|
||||
cssVarV2.text.highlight.fg.red,
|
||||
cssVarV2.text.highlight.fg.green,
|
||||
@@ -156,12 +172,12 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
|
||||
cssVarV2.text.highlight.fg.magenta,
|
||||
];
|
||||
if (!color) {
|
||||
color = colorOptions[spearkerToColors.size % colorOptions.length];
|
||||
spearkerToColors.set(segment.speaker, color);
|
||||
color = colorOptions[speakerToColors.size % colorOptions.length];
|
||||
speakerToColors.set(segment.speaker, color);
|
||||
}
|
||||
const deltaInserts: DeltaInsert<AffineTextAttributes>[] = [
|
||||
{
|
||||
insert: sanitizeText(segment.start_time + ' ' + segment.speaker),
|
||||
insert: sanitizeText(segment.start + ' ' + segment.speaker),
|
||||
attributes: {
|
||||
color,
|
||||
bold: true,
|
||||
@@ -180,12 +196,4 @@ export class AudioAttachmentBlock extends Entity<AttachmentBlockModel> {
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
mount() {
|
||||
this.refCount$.setValue(this.refCount$.value + 1);
|
||||
}
|
||||
|
||||
unmount() {
|
||||
this.refCount$.setValue(this.refCount$.value - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,7 +162,7 @@ export class AudioMedia extends Entity<AudioSource> {
|
||||
|
||||
const startTime = performance.now();
|
||||
// calculating audio stats is expensive. Maybe persist the result in cache?
|
||||
const stats = await this.calcuateStatsFromBuffer(blob);
|
||||
const stats = await this.calculateStatsFromBuffer(blob);
|
||||
logger.debug(
|
||||
`Calculate audio stats time: ${performance.now() - startTime}ms`
|
||||
);
|
||||
@@ -177,9 +177,8 @@ export class AudioMedia extends Entity<AudioSource> {
|
||||
return fromPromise(async () => {
|
||||
return this.loadAudioBuffer();
|
||||
}).pipe(
|
||||
mergeMap(({ blob, duration, waveform }) => {
|
||||
mergeMap(({ blob, waveform }) => {
|
||||
const url = URL.createObjectURL(blob);
|
||||
this.duration$.setValue(duration);
|
||||
// Set the audio element source
|
||||
this.audioElement.src = url;
|
||||
this.waveform$.setValue(waveform);
|
||||
@@ -187,6 +186,9 @@ export class AudioMedia extends Entity<AudioSource> {
|
||||
if (this.playbackState$.getValue().state === 'playing') {
|
||||
this.play(true);
|
||||
}
|
||||
this.audioElement.onloadedmetadata = () => {
|
||||
this.duration$.setValue(this.audioElement.duration);
|
||||
};
|
||||
return EMPTY;
|
||||
}),
|
||||
onStart(() => this.loading$.setValue(true)),
|
||||
@@ -397,13 +399,13 @@ export class AudioMedia extends Entity<AudioSource> {
|
||||
return this.playbackState$.getValue();
|
||||
}
|
||||
|
||||
private async calcuateStatsFromBuffer(buffer: Blob) {
|
||||
private async calculateStatsFromBuffer(buffer: Blob) {
|
||||
const audioContext = new AudioContext();
|
||||
const audioBuffer = await audioContext.decodeAudioData(
|
||||
await buffer.arrayBuffer()
|
||||
);
|
||||
const waveform = await this.calculateWaveform(audioBuffer);
|
||||
return { waveform, duration: audioBuffer.duration };
|
||||
return { waveform };
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
import {
|
||||
claimAudioTranscriptionMutation,
|
||||
getAudioTranscriptionQuery,
|
||||
submitAudioTranscriptionMutation,
|
||||
} from '@affine/graphql';
|
||||
import { Entity } from '@toeverything/infra';
|
||||
|
||||
import type { DefaultServerService, WorkspaceServerService } from '../../cloud';
|
||||
import { GraphQLService } from '../../cloud/services/graphql';
|
||||
import type { WorkspaceService } from '../../workspace';
|
||||
|
||||
export class AudioTranscriptionJobStore extends Entity<{
|
||||
readonly blobId: string;
|
||||
readonly getAudioFile: () => Promise<File>;
|
||||
}> {
|
||||
constructor(
|
||||
private readonly workspaceService: WorkspaceService,
|
||||
private readonly workspaceServerService: WorkspaceServerService,
|
||||
private readonly defaultServerService: DefaultServerService
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
private get serverService() {
|
||||
return (
|
||||
this.workspaceServerService.server || this.defaultServerService.server
|
||||
);
|
||||
}
|
||||
|
||||
private get graphqlService() {
|
||||
return this.serverService?.scope.get(GraphQLService);
|
||||
}
|
||||
|
||||
private get currentWorkspaceId() {
|
||||
return this.workspaceService.workspace.id;
|
||||
}
|
||||
|
||||
submitAudioTranscription = async () => {
|
||||
const graphqlService = this.graphqlService;
|
||||
if (!graphqlService) {
|
||||
throw new Error('No graphql service available');
|
||||
}
|
||||
const file = await this.props.getAudioFile();
|
||||
const response = await graphqlService.gql({
|
||||
query: submitAudioTranscriptionMutation,
|
||||
variables: {
|
||||
workspaceId: this.currentWorkspaceId,
|
||||
blobId: this.props.blobId,
|
||||
blob: file,
|
||||
},
|
||||
});
|
||||
if (!response.submitAudioTranscription?.id) {
|
||||
throw new Error('Failed to submit audio transcription');
|
||||
}
|
||||
return response.submitAudioTranscription;
|
||||
};
|
||||
|
||||
getAudioTranscription = async (blobId: string, jobId?: string) => {
|
||||
const graphqlService = this.graphqlService;
|
||||
if (!graphqlService) {
|
||||
throw new Error('No graphql service available');
|
||||
}
|
||||
const currentWorkspaceId = this.currentWorkspaceId;
|
||||
if (!currentWorkspaceId) {
|
||||
throw new Error('No current workspace id');
|
||||
}
|
||||
const response = await graphqlService.gql({
|
||||
query: getAudioTranscriptionQuery,
|
||||
variables: {
|
||||
workspaceId: currentWorkspaceId,
|
||||
jobId,
|
||||
blobId,
|
||||
},
|
||||
});
|
||||
if (!response.currentUser?.copilot?.audioTranscription) {
|
||||
return null;
|
||||
}
|
||||
return response.currentUser.copilot.audioTranscription;
|
||||
};
|
||||
claimAudioTranscription = async (jobId: string) => {
|
||||
const graphqlService = this.graphqlService;
|
||||
if (!graphqlService) {
|
||||
throw new Error('No graphql service available');
|
||||
}
|
||||
const response = await graphqlService.gql({
|
||||
query: claimAudioTranscriptionMutation,
|
||||
variables: {
|
||||
jobId,
|
||||
},
|
||||
});
|
||||
if (!response.claimAudioTranscription) {
|
||||
throw new Error('Failed to claim transcription result');
|
||||
}
|
||||
return response.claimAudioTranscription;
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
import { shallowEqual } from '@affine/component';
|
||||
import { DebugLogger } from '@affine/debug';
|
||||
import { UserFriendlyError } from '@affine/error';
|
||||
import { AiJobStatus } from '@affine/graphql';
|
||||
import type { TranscriptionBlockProps } from '@blocksuite/affine/model';
|
||||
import { Entity, LiveData } from '@toeverything/infra';
|
||||
|
||||
import type { DefaultServerService, WorkspaceServerService } from '../../cloud';
|
||||
import { AuthService } from '../../cloud/services/auth';
|
||||
import { AudioTranscriptionJobStore } from './audio-transcription-job-store';
|
||||
import type { TranscriptionResult } from './types';
|
||||
|
||||
// The UI status of the transcription job
|
||||
export type TranscriptionStatus =
|
||||
| {
|
||||
status: 'waiting-for-job';
|
||||
}
|
||||
| {
|
||||
status: 'started';
|
||||
}
|
||||
| {
|
||||
status: AiJobStatus.pending;
|
||||
}
|
||||
| {
|
||||
status: AiJobStatus.running;
|
||||
}
|
||||
| {
|
||||
status: AiJobStatus.failed;
|
||||
error: UserFriendlyError; // <<- this is not visible on UI yet
|
||||
}
|
||||
| {
|
||||
status: AiJobStatus.finished; // ready to be claimed, but may be rejected because of insufficient credits
|
||||
}
|
||||
| {
|
||||
status: AiJobStatus.claimed;
|
||||
result: TranscriptionResult;
|
||||
};
|
||||
|
||||
const logger = new DebugLogger('audio-transcription-job');
|
||||
|
||||
// facts on transcription job ownership
|
||||
// 1. jobid + blobid is unique for a given user
|
||||
// 2. only the creator can claim the job
|
||||
// 3. all users can query the claimed job result
|
||||
// 4. claim a job requires AI credits
|
||||
export class AudioTranscriptionJob extends Entity<{
|
||||
readonly blockProps: TranscriptionBlockProps;
|
||||
readonly blobId: string;
|
||||
readonly getAudioFile: () => Promise<File>;
|
||||
}> {
|
||||
constructor(
|
||||
private readonly workspaceServerService: WorkspaceServerService,
|
||||
private readonly defaultServerService: DefaultServerService
|
||||
) {
|
||||
super();
|
||||
this.disposables.push(() => {
|
||||
this.disposed = true;
|
||||
});
|
||||
}
|
||||
|
||||
disposed = false;
|
||||
|
||||
private readonly _status$ = new LiveData<TranscriptionStatus>({
|
||||
status: 'waiting-for-job',
|
||||
});
|
||||
|
||||
private readonly store = this.framework.createEntity(
|
||||
AudioTranscriptionJobStore,
|
||||
{
|
||||
blobId: this.props.blobId,
|
||||
getAudioFile: this.props.getAudioFile,
|
||||
}
|
||||
);
|
||||
|
||||
status$ = this._status$.distinctUntilChanged(shallowEqual);
|
||||
transcribing$ = this.status$.map(status => {
|
||||
return (
|
||||
status.status === 'started' ||
|
||||
status.status === AiJobStatus.pending ||
|
||||
status.status === AiJobStatus.running ||
|
||||
status.status === AiJobStatus.finished
|
||||
);
|
||||
});
|
||||
|
||||
error$ = this.status$.map(status => {
|
||||
if (status.status === AiJobStatus.failed) {
|
||||
return status.error;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
// check if we can kick start the transcription job
|
||||
readonly preflightCheck = async () => {
|
||||
// if the job id is given, check if the job exists
|
||||
if (this.props.blockProps.jobId) {
|
||||
const existingJob = await this.store.getAudioTranscription(
|
||||
this.props.blobId,
|
||||
this.props.blockProps.jobId
|
||||
);
|
||||
|
||||
if (existingJob?.status === AiJobStatus.claimed) {
|
||||
// if job exists, anyone can query it
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
!existingJob &&
|
||||
this.props.blockProps.createdBy &&
|
||||
this.props.blockProps.createdBy !== this.currentUserId
|
||||
) {
|
||||
return {
|
||||
error: 'created-by-others',
|
||||
userId: this.props.blockProps.createdBy,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// if no job id, anyone can start a new job
|
||||
return;
|
||||
};
|
||||
|
||||
async start() {
|
||||
if (this.disposed) {
|
||||
logger.debug('Job already disposed, cannot start');
|
||||
throw new Error('Job already disposed');
|
||||
}
|
||||
|
||||
this._status$.value = {
|
||||
status: 'started',
|
||||
};
|
||||
|
||||
try {
|
||||
// firstly check if there is a job already
|
||||
logger.debug('Checking for existing transcription job', {
|
||||
blobId: this.props.blobId,
|
||||
jobId: this.props.blockProps.jobId,
|
||||
});
|
||||
let job: {
|
||||
id: string;
|
||||
status: AiJobStatus;
|
||||
} | null = await this.store.getAudioTranscription(
|
||||
this.props.blobId,
|
||||
this.props.blockProps.jobId
|
||||
);
|
||||
|
||||
if (!job) {
|
||||
logger.debug('No existing job found, submitting new transcription job');
|
||||
job = await this.store.submitAudioTranscription();
|
||||
} else {
|
||||
logger.debug('Found existing job', {
|
||||
jobId: job.id,
|
||||
status: job.status,
|
||||
});
|
||||
}
|
||||
|
||||
this.props.blockProps.jobId = job.id;
|
||||
this.props.blockProps.createdBy = this.currentUserId;
|
||||
|
||||
if (job.status !== AiJobStatus.failed) {
|
||||
this._status$.value = {
|
||||
status: AiJobStatus.pending,
|
||||
};
|
||||
} else {
|
||||
logger.debug('Job submission failed');
|
||||
throw UserFriendlyError.fromAny('failed to submit transcription');
|
||||
}
|
||||
|
||||
await this.untilJobFinishedOrClaimed();
|
||||
await this.claim();
|
||||
} catch (err) {
|
||||
logger.debug('Error during job submission', { error: err });
|
||||
this._status$.value = {
|
||||
status: AiJobStatus.failed,
|
||||
error: UserFriendlyError.fromAny(err),
|
||||
};
|
||||
}
|
||||
return this.status$.value;
|
||||
}
|
||||
|
||||
private async untilJobFinishedOrClaimed() {
|
||||
while (
|
||||
!this.disposed &&
|
||||
this.props.blockProps.jobId &&
|
||||
this.props.blockProps.createdBy === this.currentUserId
|
||||
) {
|
||||
logger.debug('Polling job status', {
|
||||
jobId: this.props.blockProps.jobId,
|
||||
});
|
||||
const job = await this.store.getAudioTranscription(
|
||||
this.props.blobId,
|
||||
this.props.blockProps.jobId
|
||||
);
|
||||
|
||||
if (!job || job?.status === 'failed') {
|
||||
logger.debug('Job failed during polling', {
|
||||
jobId: this.props.blockProps.jobId,
|
||||
});
|
||||
throw UserFriendlyError.fromAny('Transcription job failed');
|
||||
}
|
||||
|
||||
if (job?.status === 'finished' || job?.status === 'claimed') {
|
||||
logger.debug('Job finished, ready to claim', {
|
||||
jobId: this.props.blockProps.jobId,
|
||||
});
|
||||
this._status$.value = {
|
||||
status: AiJobStatus.finished,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
// Add delay between polling attempts
|
||||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||||
}
|
||||
}
|
||||
|
||||
async claim() {
|
||||
if (this.disposed) {
|
||||
logger.debug('Job already disposed, cannot claim');
|
||||
throw new Error('Job already disposed');
|
||||
}
|
||||
|
||||
logger.debug('Attempting to claim job', {
|
||||
jobId: this.props.blockProps.jobId,
|
||||
});
|
||||
|
||||
if (!this.props.blockProps.jobId) {
|
||||
logger.debug('No job id found, cannot claim');
|
||||
throw new Error('No job id found');
|
||||
}
|
||||
|
||||
const claimedJob = await this.store.claimAudioTranscription(
|
||||
this.props.blockProps.jobId
|
||||
);
|
||||
|
||||
if (claimedJob) {
|
||||
logger.debug('Successfully claimed job', {
|
||||
jobId: this.props.blockProps.jobId,
|
||||
});
|
||||
const result: TranscriptionResult = {
|
||||
summary: claimedJob.summary ?? '',
|
||||
title: claimedJob.title ?? '',
|
||||
segments:
|
||||
claimedJob.transcription?.map(segment => ({
|
||||
speaker: segment.speaker,
|
||||
start: segment.start,
|
||||
end: segment.end,
|
||||
transcription: segment.transcription,
|
||||
})) ?? [],
|
||||
};
|
||||
|
||||
this._status$.value = {
|
||||
status: AiJobStatus.claimed,
|
||||
result,
|
||||
};
|
||||
} else {
|
||||
throw new Error('Failed to claim transcription result');
|
||||
}
|
||||
}
|
||||
|
||||
isCreator() {
|
||||
return (
|
||||
this.props.blockProps.jobId &&
|
||||
this.props.blockProps.createdBy &&
|
||||
this.props.blockProps.createdBy === this.currentUserId
|
||||
);
|
||||
}
|
||||
|
||||
private get serverService() {
|
||||
return (
|
||||
this.workspaceServerService.server || this.defaultServerService.server
|
||||
);
|
||||
}
|
||||
|
||||
get currentUserId() {
|
||||
const authService = this.serverService?.scope.getOptional(AuthService);
|
||||
if (!authService) {
|
||||
return;
|
||||
}
|
||||
return authService.session.account$.value?.id;
|
||||
}
|
||||
}
|
||||
10
packages/frontend/core/src/modules/media/entities/types.ts
Normal file
10
packages/frontend/core/src/modules/media/entities/types.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
export interface TranscriptionResult {
|
||||
title: string;
|
||||
summary: string;
|
||||
segments: {
|
||||
speaker: string;
|
||||
start: string;
|
||||
end: string;
|
||||
transcription: string;
|
||||
}[];
|
||||
}
|
||||
@@ -1,11 +1,14 @@
|
||||
import type { Framework } from '@toeverything/infra';
|
||||
|
||||
import { DefaultServerService, WorkspaceServerService } from '../cloud';
|
||||
import { DesktopApiService } from '../desktop-api';
|
||||
import { GlobalState } from '../storage';
|
||||
import { WorkbenchService } from '../workbench';
|
||||
import { WorkspaceScope, WorkspaceService } from '../workspace';
|
||||
import { AudioAttachmentBlock } from './entities/audio-attachment-block';
|
||||
import { AudioMedia } from './entities/audio-media';
|
||||
import { AudioTranscriptionJob } from './entities/audio-transcription-job';
|
||||
import { AudioTranscriptionJobStore } from './entities/audio-transcription-job-store';
|
||||
import {
|
||||
ElectronGlobalMediaStateProvider,
|
||||
GlobalMediaStateProvider,
|
||||
@@ -15,31 +18,40 @@ import { AudioAttachmentService } from './services/audio-attachment';
|
||||
import { AudioMediaManagerService } from './services/audio-media-manager';
|
||||
|
||||
export function configureMediaModule(framework: Framework) {
|
||||
framework
|
||||
.scope(WorkspaceScope)
|
||||
.entity(AudioMedia, [WorkspaceService])
|
||||
.entity(AudioAttachmentBlock, [AudioMediaManagerService, WorkspaceService])
|
||||
.entity(AudioTranscriptionJob, [
|
||||
WorkspaceServerService,
|
||||
DefaultServerService,
|
||||
])
|
||||
.entity(AudioTranscriptionJobStore, [
|
||||
WorkspaceService,
|
||||
WorkspaceServerService,
|
||||
DefaultServerService,
|
||||
])
|
||||
.service(AudioAttachmentService);
|
||||
|
||||
if (BUILD_CONFIG.isElectron) {
|
||||
framework
|
||||
.impl(GlobalMediaStateProvider, ElectronGlobalMediaStateProvider, [
|
||||
GlobalState,
|
||||
])
|
||||
.scope(WorkspaceScope)
|
||||
.entity(AudioMedia, [WorkspaceService])
|
||||
.entity(AudioAttachmentBlock, [AudioMediaManagerService])
|
||||
.service(AudioMediaManagerService, [
|
||||
GlobalMediaStateProvider,
|
||||
WorkbenchService,
|
||||
DesktopApiService,
|
||||
])
|
||||
.service(AudioAttachmentService);
|
||||
]);
|
||||
} else {
|
||||
framework
|
||||
.impl(GlobalMediaStateProvider, WebGlobalMediaStateProvider)
|
||||
.scope(WorkspaceScope)
|
||||
.entity(AudioMedia, [WorkspaceService])
|
||||
.entity(AudioAttachmentBlock, [AudioMediaManagerService])
|
||||
.service(AudioMediaManagerService, [
|
||||
GlobalMediaStateProvider,
|
||||
WorkbenchService,
|
||||
])
|
||||
.service(AudioAttachmentService);
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user