refactor(electron): use mp4 container instead of webm for encoded audio (#12247)

fix AF-2609
webm container with opus has limited support on Safari. Change to mp4 instead.

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->

## Summary by CodeRabbit

- **New Features**
  - Audio recordings are now saved in MP4 format instead of WebM, improving compatibility with a wider range of devices and applications.

- **Chores**
  - Updated dependencies to use MP4 muxing tools in place of WebM.
  - Internal audio encoding utilities and references updated to support MP4 container format.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
pengx17
2025-05-13 09:01:39 +00:00
parent 843f1e34c6
commit 0b6d1eacc8
6 changed files with 34 additions and 23 deletions

View File

@@ -20,7 +20,7 @@ async function saveRecordingBlob(blobEngine: BlobEngine, filepath: string) {
res.arrayBuffer() res.arrayBuffer()
); );
const blob = new Blob([opusBuffer], { const blob = new Blob([opusBuffer], {
type: 'audio/webm', type: 'audio/mp4',
}); });
const blobId = await blobEngine.set(blob); const blobId = await blobEngine.set(blob);
logger.debug('Recording saved', blobId); logger.debug('Recording saved', blobId);

View File

@@ -5,7 +5,7 @@ import {
createStreamEncoder, createStreamEncoder,
encodeRawBufferToOpus, encodeRawBufferToOpus,
type OpusStreamEncoder, type OpusStreamEncoder,
} from '@affine/core/utils/webm-encoding'; } from '@affine/core/utils/opus-encoding';
import { apis, events } from '@affine/electron-api'; import { apis, events } from '@affine/electron-api';
import { useI18n } from '@affine/i18n'; import { useI18n } from '@affine/i18n';
import track from '@affine/track'; import track from '@affine/track';

View File

@@ -66,6 +66,7 @@
"lit": "^3.2.1", "lit": "^3.2.1",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"lottie-react": "^2.4.0", "lottie-react": "^2.4.0",
"mp4-muxer": "^5.2.1",
"nanoid": "^5.0.9", "nanoid": "^5.0.9",
"next-themes": "^0.4.4", "next-themes": "^0.4.4",
"query-string": "^9.1.1", "query-string": "^9.1.1",
@@ -80,7 +81,6 @@
"socket.io-client": "^4.8.1", "socket.io-client": "^4.8.1",
"swr": "2.3.3", "swr": "2.3.3",
"tinykeys": "patch:tinykeys@npm%3A2.1.0#~/.yarn/patches/tinykeys-npm-2.1.0-819feeaed0.patch", "tinykeys": "patch:tinykeys@npm%3A2.1.0#~/.yarn/patches/tinykeys-npm-2.1.0-819feeaed0.patch",
"webm-muxer": "^5.1.0",
"y-protocols": "^1.0.6", "y-protocols": "^1.0.6",
"yjs": "^13.6.21", "yjs": "^13.6.21",
"zod": "^3.24.1" "zod": "^3.24.1"

View File

@@ -3,7 +3,7 @@ import {
type TranscriptionBlockModel, type TranscriptionBlockModel,
} from '@affine/core/blocksuite/ai/blocks/transcription-block/model'; } from '@affine/core/blocksuite/ai/blocks/transcription-block/model';
import { insertFromMarkdown } from '@affine/core/blocksuite/utils'; import { insertFromMarkdown } from '@affine/core/blocksuite/utils';
import { encodeAudioBlobToOpusSlices } from '@affine/core/utils/webm-encoding'; import { encodeAudioBlobToOpusSlices } from '@affine/core/utils/opus-encoding';
import { DebugLogger } from '@affine/debug'; import { DebugLogger } from '@affine/debug';
import { AiJobStatus } from '@affine/graphql'; import { AiJobStatus } from '@affine/graphql';
import track from '@affine/track'; import track from '@affine/track';

View File

@@ -1,6 +1,6 @@
import { DebugLogger } from '@affine/debug'; import { DebugLogger } from '@affine/debug';
import { apis } from '@affine/electron-api'; import { apis } from '@affine/electron-api';
import { ArrayBufferTarget, Muxer } from 'webm-muxer'; import { ArrayBufferTarget, Muxer } from 'mp4-muxer';
interface AudioEncodingConfig { interface AudioEncodingConfig {
sampleRate: number; sampleRate: number;
@@ -13,7 +13,7 @@ interface AudioEncodingResult {
config: AudioEncodingConfig; config: AudioEncodingConfig;
} }
const logger = new DebugLogger('webm-encoding'); const logger = new DebugLogger('opus-encoding');
// Constants // Constants
const DEFAULT_BITRATE = 64000; const DEFAULT_BITRATE = 64000;
@@ -134,9 +134,9 @@ async function encodeAudioFrames({
} }
/** /**
* Creates a WebM container with the encoded audio chunks * Creates a mp4 container with the encoded audio chunks
*/ */
export function muxToWebM( export function muxToMp4(
encodedChunks: EncodedAudioChunk[], encodedChunks: EncodedAudioChunk[],
config: AudioEncodingConfig config: AudioEncodingConfig
): Uint8Array { ): Uint8Array {
@@ -144,10 +144,11 @@ export function muxToWebM(
const muxer = new Muxer({ const muxer = new Muxer({
target, target,
audio: { audio: {
codec: 'A_OPUS', codec: 'opus',
sampleRate: config.sampleRate, sampleRate: config.sampleRate,
numberOfChannels: config.numberOfChannels, numberOfChannels: config.numberOfChannels,
}, },
fastStart: 'in-memory',
}); });
for (const chunk of encodedChunks) { for (const chunk of encodedChunks) {
@@ -185,7 +186,7 @@ async function encodeAudioBufferToOpus(
} }
/** /**
* Encodes raw audio data to Opus in WebM container. * Encodes raw audio data to Opus in MP4 container.
*/ */
export async function encodeRawBufferToOpus({ export async function encodeRawBufferToOpus({
filepath, filepath,
@@ -237,16 +238,16 @@ export async function encodeRawBufferToOpus({
encoder, encoder,
}); });
const webm = muxToWebM(encodedChunks, { sampleRate, numberOfChannels }); const mp4 = muxToMp4(encodedChunks, { sampleRate, numberOfChannels });
logger.debug('Encoded raw buffer to Opus'); logger.debug('Encoded raw buffer to Opus');
return webm; return mp4;
} }
/** /**
* Encodes an audio file Blob to Opus in WebM container with specified bitrate. * Encodes an audio file Blob to Opus in MP4 container with specified bitrate.
* @param blob Input audio file blob (supports any browser-decodable format) * @param blob Input audio file blob (supports any browser-decodable format)
* @param targetBitrate Target bitrate in bits per second (bps) * @param targetBitrate Target bitrate in bits per second (bps)
* @returns Promise resolving to encoded WebM data as Uint8Array * @returns Promise resolving to encoded MP4 data as Uint8Array
*/ */
export async function encodeAudioBlobToOpus( export async function encodeAudioBlobToOpus(
blob: Blob | ArrayBuffer | Uint8Array, blob: Blob | ArrayBuffer | Uint8Array,
@@ -263,9 +264,9 @@ export async function encodeAudioBlobToOpus(
targetBitrate targetBitrate
); );
const webm = muxToWebM(encodedChunks, config); const mp4 = muxToMp4(encodedChunks, config);
logger.debug('Encoded audio blob to Opus'); logger.debug('Encoded audio blob to Opus');
return webm; return mp4;
} finally { } finally {
await audioContext.close(); await audioContext.close();
} }
@@ -371,14 +372,14 @@ export async function encodeAudioBlobToOpusSlices(
encoder, encoder,
}); });
// Mux to WebM and add to slices // Mux to MP4 and add to slices
const webm = muxToWebM(encodedChunks, { const mp4 = muxToMp4(encodedChunks, {
sampleRate, sampleRate,
numberOfChannels, numberOfChannels,
bitrate: targetBitrate, bitrate: targetBitrate,
}); });
slices.push(webm); slices.push(mp4);
// Move to next slice // Move to next slice
startSample = endSample; startSample = endSample;
@@ -471,7 +472,7 @@ export const createStreamEncoder = (
logger.debug('Finishing encoding'); logger.debug('Finishing encoding');
await next(); await next();
close(); close();
const buffer = muxToWebM(encodedChunks, { const buffer = muxToMp4(encodedChunks, {
sampleRate: codecs.sampleRate, sampleRate: codecs.sampleRate,
numberOfChannels: codecs.numberOfChannels, numberOfChannels: codecs.numberOfChannels,
bitrate: codecs.targetBitrate, bitrate: codecs.targetBitrate,

View File

@@ -459,6 +459,7 @@ __metadata:
lit: "npm:^3.2.1" lit: "npm:^3.2.1"
lodash-es: "npm:^4.17.21" lodash-es: "npm:^4.17.21"
lottie-react: "npm:^2.4.0" lottie-react: "npm:^2.4.0"
mp4-muxer: "npm:^5.2.1"
nanoid: "npm:^5.0.9" nanoid: "npm:^5.0.9"
next-themes: "npm:^0.4.4" next-themes: "npm:^0.4.4"
query-string: "npm:^9.1.1" query-string: "npm:^9.1.1"
@@ -474,7 +475,6 @@ __metadata:
swr: "npm:2.3.3" swr: "npm:2.3.3"
tinykeys: "patch:tinykeys@npm%3A2.1.0#~/.yarn/patches/tinykeys-npm-2.1.0-819feeaed0.patch" tinykeys: "patch:tinykeys@npm%3A2.1.0#~/.yarn/patches/tinykeys-npm-2.1.0-819feeaed0.patch"
vitest: "npm:3.1.3" vitest: "npm:3.1.3"
webm-muxer: "npm:^5.1.0"
y-protocols: "npm:^1.0.6" y-protocols: "npm:^1.0.6"
yjs: "npm:^13.6.21" yjs: "npm:^13.6.21"
zod: "npm:^3.24.1" zod: "npm:^3.24.1"
@@ -14916,7 +14916,7 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@types/dom-webcodecs@npm:^0.1.4": "@types/dom-webcodecs@npm:^0.1.4, @types/dom-webcodecs@npm:^0.1.6":
version: 0.1.15 version: 0.1.15
resolution: "@types/dom-webcodecs@npm:0.1.15" resolution: "@types/dom-webcodecs@npm:0.1.15"
checksum: 10/0d1ce12007803b92594968c657e3bcdb24c5f4b7b89a3b094670bcc39f5c3b395adba1ab64a930007bd947ae6be1689ddd64ccc48127d1acf3ef7be63f3cdc98 checksum: 10/0d1ce12007803b92594968c657e3bcdb24c5f4b7b89a3b094670bcc39f5c3b395adba1ab64a930007bd947ae6be1689ddd64ccc48127d1acf3ef7be63f3cdc98
@@ -26932,6 +26932,16 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"mp4-muxer@npm:^5.2.1":
version: 5.2.1
resolution: "mp4-muxer@npm:5.2.1"
dependencies:
"@types/dom-webcodecs": "npm:^0.1.6"
"@types/wicg-file-system-access": "npm:^2020.9.5"
checksum: 10/7169fd43e4a3a604c4590970276ff58bee479444530af8455ba66b70207e3317e74ae662b488df58f4619dd29973950228347585ec579539aa4356ead9d203be
languageName: node
linkType: hard
"mri@npm:^1.2.0": "mri@npm:^1.2.0":
version: 1.2.0 version: 1.2.0
resolution: "mri@npm:1.2.0" resolution: "mri@npm:1.2.0"
@@ -34136,7 +34146,7 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"webm-muxer@npm:^5.0.3, webm-muxer@npm:^5.1.0": "webm-muxer@npm:^5.0.3":
version: 5.1.2 version: 5.1.2
resolution: "webm-muxer@npm:5.1.2" resolution: "webm-muxer@npm:5.1.2"
dependencies: dependencies: