Merge branch 'canary' into darksky/native-sync-state

feat: native record encoding
2026-02-04 08:38:34 +00:00 · 2026-01-04 00:27:17 +08:00 · 2025-12-31 12:47:34 +08:00
18 changed files with 970 additions and 516 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -98,6 +98,9 @@ dependencies = [
 "napi-derive",
 "objc2",
 "objc2-foundation",
+ "ogg",
+ "opus-codec",
+ "rand 0.9.1",
 "rubato",
 "screencapturekit",
 "symphonia",
@@ -566,6 +569,26 @@ dependencies = [
 "syn 2.0.111",
 ]

+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags 2.9.1",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 2.1.1",
+ "shlex",
+ "syn 2.0.111",
+]
+
 [[package]]
 name = "bit-set"
 version = "0.5.3"
@@ -925,6 +948,15 @@ version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"

+[[package]]
+name = "cmake"
+version = "0.1.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "colorchoice"
 version = "1.0.3"
@@ -1104,7 +1136,7 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2ce857aa0b77d77287acc1ac3e37a05a8c95a2af3647d23b15f263bdaeb7562b"
 dependencies = [
- "bindgen",
+ "bindgen 0.70.1",
 ]

 [[package]]
@@ -3046,6 +3078,15 @@ dependencies = [
 "cc",
 ]

+[[package]]
+name = "ogg"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdab8dcd8d4052eaacaf8fb07a3ccd9a6e26efadb42878a413c68fc4af1dee2b"
+dependencies = [
+ "byteorder",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -3064,6 +3105,17 @@ version = "11.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"

+[[package]]
+name = "opus-codec"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37755dfadaa9c70fd26a4c1ea13d9bd035993cd0a19eb5b76449301609228280"
+dependencies = [
+ "bindgen 0.72.1",
+ "cmake",
+ "pkg-config",
+]
+
 [[package]]
 name = "ordered-float"
 version = "5.0.0"
@@ -3399,6 +3451,16 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"

+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.111",
+]
+
 [[package]]
 name = "primal-check"
 version = "0.3.4"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,6 +64,7 @@ resolver = "3"
  notify = { version = "8", features = ["serde"] }
  objc2 = "0.6"
  objc2-foundation = "0.3"
+  ogg = "0.9"
  once_cell = "1"
  ordered-float = "5"
  parking_lot = "0.12"
--- a/packages/frontend/apps/electron-renderer/src/popup/recording/index.tsx
+++ b/packages/frontend/apps/electron-renderer/src/popup/recording/index.tsx
@@ -1,11 +1,6 @@
 import { Button } from '@affine/component';
 import { useAsyncCallback } from '@affine/core/components/hooks/affine-async-hooks';
 import { appIconMap } from '@affine/core/utils';
-import {
-  createStreamEncoder,
-  encodeRawBufferToOpus,
-  type OpusStreamEncoder,
-} from '@affine/core/utils/opus-encoding';
 import { apis, events } from '@affine/electron-api';
 import { useI18n } from '@affine/i18n';
 import track from '@affine/track';
@@ -105,61 +100,8 @@ export function Recording() {
    await apis?.recording?.stopRecording(status.id);
  }, [status]);

-  const handleProcessStoppedRecording = useAsyncCallback(
-    async (currentStreamEncoder?: OpusStreamEncoder) => {
-      let id: number | undefined;
-      try {
-        const result = await apis?.recording?.getCurrentRecording();
-
-        if (!result) {
-          return;
-        }
-
-        id = result.id;
-
-        const { filepath, sampleRate, numberOfChannels } = result;
-        if (!filepath || !sampleRate || !numberOfChannels) {
-          return;
-        }
-        const [buffer] = await Promise.all([
-          currentStreamEncoder
-            ? currentStreamEncoder.finish()
-            : encodeRawBufferToOpus({
-                filepath,
-                sampleRate,
-                numberOfChannels,
-              }),
-          new Promise<void>(resolve => {
-            setTimeout(() => {
-              resolve();
-            }, 500); // wait at least 500ms for better user experience
-          }),
-        ]);
-        await apis?.recording.readyRecording(result.id, buffer);
-      } catch (error) {
-        console.error('Failed to stop recording', error);
-        await apis?.popup?.dismissCurrentRecording();
-        if (id) {
-          await apis?.recording.removeRecording(id);
-        }
-      }
-    },
-    []
-  );
-
  useEffect(() => {
    let removed = false;
-    let currentStreamEncoder: OpusStreamEncoder | undefined;
-
-    apis?.recording
-      .getCurrentRecording()
-      .then(status => {
-        if (status) {
-          return handleRecordingStatusChanged(status);
-        }
-        return;
-      })
-      .catch(console.error);

    const handleRecordingStatusChanged = async (status: Status) => {
      if (removed) {
@@ -171,27 +113,18 @@ export function Recording() {
          appName: status.appName || 'System Audio',
        });
      }
-
-      if (
-        status?.status === 'recording' &&
-        status.sampleRate &&
-        status.numberOfChannels &&
-        (!currentStreamEncoder || currentStreamEncoder.id !== status.id)
-      ) {
-        currentStreamEncoder?.close();
-        currentStreamEncoder = createStreamEncoder(status.id, {
-          sampleRate: status.sampleRate,
-          numberOfChannels: status.numberOfChannels,
-        });
-        currentStreamEncoder.poll().catch(console.error);
-      }
-
-      if (status?.status === 'stopped') {
-        handleProcessStoppedRecording(currentStreamEncoder);
-        currentStreamEncoder = undefined;
-      }
    };

+    apis?.recording
+      .getCurrentRecording()
+      .then(status => {
+        if (status) {
+          return handleRecordingStatusChanged(status);
+        }
+        return;
+      })
+      .catch(console.error);
+
    // allow processing stopped event in tray menu as well:
    const unsubscribe = events?.recording.onRecordingStatusChanged(status => {
      if (status) {
@@ -202,9 +135,8 @@ export function Recording() {
    return () => {
      removed = true;
      unsubscribe?.();
-      currentStreamEncoder?.close();
    };
-  }, [handleProcessStoppedRecording]);
+  }, []);

  const handleStartRecording = useAsyncCallback(async () => {
    if (!status) {
--- a/packages/frontend/apps/electron/src/main/recording/feature.ts
+++ b/packages/frontend/apps/electron/src/main/recording/feature.ts
@@ -1,6 +1,5 @@
 /* oxlint-disable no-var-requires */
 import { execSync } from 'node:child_process';
-import { createHash } from 'node:crypto';
 import fsp from 'node:fs/promises';
 import path from 'node:path';

@@ -32,12 +31,7 @@ import { getMainWindow } from '../windows-manager';
 import { popupManager } from '../windows-manager/popup';
 import { isAppNameAllowed } from './allow-list';
 import { recordingStateMachine } from './state-machine';
-import type {
-  AppGroupInfo,
-  Recording,
-  RecordingStatus,
-  TappableAppInfo,
-} from './types';
+import type { AppGroupInfo, RecordingStatus, TappableAppInfo } from './types';

 export const MeetingsSettingsState = {
  $: globalStateStorage.watch<MeetingSettingsSchema>(MeetingSettingsKey).pipe(
@@ -56,7 +50,12 @@ export const MeetingsSettingsState = {
  },
 };

+type Subscriber = {
+  unsubscribe: () => void;
+};
+
 const subscribers: Subscriber[] = [];
+let appStateSubscribers: Subscriber[] = [];

 // recordings are saved in the app data directory
 // may need a way to clean up old recordings
@@ -67,8 +66,22 @@ export const SAVED_RECORDINGS_DIR = path.join(

 let shareableContent: ShareableContentType | null = null;

+type NativeModule = typeof import('@affine/native');
+
+function getNativeModule(): NativeModule {
+  return require('@affine/native') as NativeModule;
+}
+
 function cleanup() {
  shareableContent = null;
+  appStateSubscribers.forEach(subscriber => {
+    try {
+      subscriber.unsubscribe();
+    } catch {
+      // ignore unsubscribe error
+    }
+  });
+  appStateSubscribers = [];
  subscribers.forEach(subscriber => {
    try {
      subscriber.unsubscribe();
@@ -76,6 +89,9 @@ function cleanup() {
      // ignore unsubscribe error
    }
  });
+  subscribers.length = 0;
+  applications$.next([]);
+  appGroups$.next([]);
 }

 beforeAppQuit(() => {
@@ -87,18 +103,12 @@ export const appGroups$ = new BehaviorSubject<AppGroupInfo[]>([]);

 export const updateApplicationsPing$ = new Subject<number>();

-// recording id -> recording
-// recordings will be saved in memory before consumed and created as an audio block to user's doc
-const recordings = new Map<number, Recording>();
-
-// there should be only one active recording at a time
-// We'll now use recordingStateMachine.status$ instead of our own BehaviorSubject
+// There should be only one active recording at a time; state is managed by the state machine
 export const recordingStatus$ = recordingStateMachine.status$;

 function createAppGroup(processGroupId: number): AppGroupInfo | undefined {
  // MUST require dynamically to avoid loading @affine/native for unsupported platforms
-  const SC: typeof ShareableContentType =
-    require('@affine/native').ShareableContent;
+  const SC: typeof ShareableContentType = getNativeModule().ShareableContent;
  const groupProcess = SC?.applicationWithProcessId(processGroupId);
  if (!groupProcess) {
    return;
@@ -176,7 +186,9 @@ function setupNewRunningAppGroup() {
  const debounceStartRecording = debounce((appGroup: AppGroupInfo) => {
    // check if the app is running again
    if (appGroup.isRunning) {
-      startRecording(appGroup);
+      startRecording(appGroup).catch(err => {
+        logger.error('failed to start recording', err);
+      });
    }
  }, 1000);

@@ -242,91 +254,20 @@ function setupNewRunningAppGroup() {
  );
 }

-function getSanitizedAppId(bundleIdentifier?: string) {
-  if (!bundleIdentifier) {
-    return 'unknown';
-  }
-
-  return isWindows()
-    ? createHash('sha256')
-        .update(bundleIdentifier)
-        .digest('hex')
-        .substring(0, 8)
-    : bundleIdentifier;
-}
-
-export function createRecording(status: RecordingStatus) {
-  let recording = recordings.get(status.id);
-  if (recording) {
-    return recording;
-  }
-
-  const appId = getSanitizedAppId(status.appGroup?.bundleIdentifier);
-
-  const bufferedFilePath = path.join(
-    SAVED_RECORDINGS_DIR,
-    `${appId}-${status.id}-${status.startTime}.raw`
-  );
-
-  fs.ensureDirSync(SAVED_RECORDINGS_DIR);
-  const file = fs.createWriteStream(bufferedFilePath);
-
-  function tapAudioSamples(err: Error | null, samples: Float32Array) {
-    const recordingStatus = recordingStatus$.getValue();
-    if (
-      !recordingStatus ||
-      recordingStatus.id !== status.id ||
-      recordingStatus.status === 'paused'
-    ) {
-      return;
-    }
-
-    if (err) {
-      logger.error('failed to get audio samples', err);
-    } else {
-      // Writing raw Float32Array samples directly to file
-      // For stereo audio, samples are interleaved [L,R,L,R,...]
-      file.write(Buffer.from(samples.buffer));
-    }
-  }
-
-  // MUST require dynamically to avoid loading @affine/native for unsupported platforms
-  const SC: typeof ShareableContentType =
-    require('@affine/native').ShareableContent;
-
-  const stream = status.app
-    ? SC.tapAudio(status.app.processId, tapAudioSamples)
-    : SC.tapGlobalAudio(null, tapAudioSamples);
-
-  recording = {
-    id: status.id,
-    startTime: status.startTime,
-    app: status.app,
-    appGroup: status.appGroup,
-    file,
-    session: stream,
-  };
-
-  recordings.set(status.id, recording);
-
-  return recording;
-}
-
 export async function getRecording(id: number) {
-  const recording = recordings.get(id);
-  if (!recording) {
+  const recording = recordingStateMachine.status;
+  if (!recording || recording.id !== id) {
    logger.error(`Recording ${id} not found`);
    return;
  }
-  const rawFilePath = String(recording.file.path);
  return {
    id,
    appGroup: recording.appGroup,
    app: recording.app,
    startTime: recording.startTime,
-    filepath: rawFilePath,
-    sampleRate: recording.session.sampleRate,
-    numberOfChannels: recording.session.channels,
+    filepath: recording.filepath,
+    sampleRate: recording.sampleRate,
+    numberOfChannels: recording.numberOfChannels,
  };
 }

@@ -350,18 +291,7 @@ function setupRecordingListeners() {
          });
        }

-        if (status?.status === 'recording') {
-          let recording = recordings.get(status.id);
-          // create a recording if not exists
-          if (!recording) {
-            recording = createRecording(status);
-          }
-        } else if (status?.status === 'stopped') {
-          const recording = recordings.get(status.id);
-          if (recording) {
-            recording.session.stop();
-          }
-        } else if (
+        if (
          status?.status === 'create-block-success' ||
          status?.status === 'create-block-failed'
        ) {
@@ -400,9 +330,7 @@ function getAllApps(): TappableAppInfo[] {
  }

  // MUST require dynamically to avoid loading @affine/native for unsupported platforms
-  const { ShareableContent } = require('@affine/native') as {
-    ShareableContent: typeof ShareableContentType;
-  };
+  const { ShareableContent } = getNativeModule();

  const apps = ShareableContent.applications().map(app => {
    try {
@@ -433,12 +361,8 @@ function getAllApps(): TappableAppInfo[] {
  return filteredApps;
 }

-type Subscriber = {
-  unsubscribe: () => void;
-};
-
 function setupMediaListeners() {
-  const ShareableContent = require('@affine/native').ShareableContent;
+  const ShareableContent = getNativeModule().ShareableContent;
  applications$.next(getAllApps());
  subscribers.push(
    interval(3000).subscribe(() => {
@@ -454,8 +378,6 @@ function setupMediaListeners() {
      })
  );

-  let appStateSubscribers: Subscriber[] = [];
-
  subscribers.push(
    applications$.subscribe(apps => {
      appStateSubscribers.forEach(subscriber => {
@@ -484,15 +406,6 @@ function setupMediaListeners() {
      });

      appStateSubscribers = _appStateSubscribers;
-      return () => {
-        _appStateSubscribers.forEach(subscriber => {
-          try {
-            subscriber.unsubscribe();
-          } catch {
-            // ignore unsubscribe error
-          }
-        });
-      };
    })
  );
 }
@@ -502,7 +415,7 @@ function askForScreenRecordingPermission() {
    return false;
  }
  try {
-    const ShareableContent = require('@affine/native').ShareableContent;
+    const ShareableContent = getNativeModule().ShareableContent;
    // this will trigger the permission prompt
    new ShareableContent();
    return true;
@@ -519,7 +432,7 @@ export function setupRecordingFeature() {
  }

  try {
-    const ShareableContent = require('@affine/native').ShareableContent;
+    const ShareableContent = getNativeModule().ShareableContent;
    if (!shareableContent) {
      shareableContent = new ShareableContent();
      setupMediaListeners();
@@ -558,24 +471,48 @@ export function newRecording(
  });
 }

-export function startRecording(
+export async function startRecording(
  appGroup?: AppGroupInfo | number
-): RecordingStatus | null {
-  const state = recordingStateMachine.dispatch(
-    {
-      type: 'START_RECORDING',
-      appGroup: normalizeAppGroupInfo(appGroup),
-    },
-    false
-  );
+): Promise<RecordingStatus | null> {
+  const state = recordingStateMachine.dispatch({
+    type: 'START_RECORDING',
+    appGroup: normalizeAppGroupInfo(appGroup),
+  });

-  if (state?.status === 'recording') {
-    createRecording(state);
+  if (!state || state.status !== 'recording') {
+    return state;
  }

-  recordingStateMachine.status$.next(state);
+  try {
+    fs.ensureDirSync(SAVED_RECORDINGS_DIR);

-  return state;
+    const meta = getNativeModule().startRecording({
+      appProcessId: state.app?.processId,
+      outputDir: SAVED_RECORDINGS_DIR,
+      format: 'opus',
+      id: String(state.id),
+    });
+
+    const filepath = assertRecordingFilepath(meta.filepath);
+    const nextState = recordingStateMachine.dispatch({
+      type: 'ATTACH_NATIVE_RECORDING',
+      id: state.id,
+      nativeId: meta.id,
+      startTime: meta.startedAt ?? state.startTime,
+      filepath,
+      sampleRate: meta.sampleRate,
+      numberOfChannels: meta.channels,
+    });
+
+    return nextState;
+  } catch (error) {
+    logger.error('failed to start recording', error);
+    return recordingStateMachine.dispatch({
+      type: 'CREATE_BLOCK_FAILED',
+      id: state.id,
+      error: error instanceof Error ? error : undefined,
+    });
+  }
 }

 export function pauseRecording(id: number) {
@@ -587,61 +524,49 @@ export function resumeRecording(id: number) {
 }

 export async function stopRecording(id: number) {
-  const recording = recordings.get(id);
-  if (!recording) {
+  const recording = recordingStateMachine.status;
+  if (!recording || recording.id !== id) {
    logger.error(`stopRecording: Recording ${id} not found`);
    return;
  }

-  if (!recording.file.path) {
-    logger.error(`Recording ${id} has no file path`);
+  if (!recording.nativeId) {
+    logger.error(`stopRecording: Recording ${id} missing native id`);
    return;
  }

-  const { file, session: stream } = recording;
-
-  // First stop the audio stream to prevent more data coming in
-  try {
-    stream.stop();
-  } catch (err) {
-    logger.error('Failed to stop audio stream', err);
-  }
-
-  // End the file with a timeout
-  file.end();
+  recordingStateMachine.dispatch({
+    type: 'STOP_RECORDING',
+    id,
+  });

  try {
-    await Promise.race([
-      new Promise<void>((resolve, reject) => {
-        file.on('finish', () => {
-          // check if the file is empty
-          const stats = fs.statSync(file.path);
-          if (stats.size === 0) {
-            reject(new Error('Recording is empty'));
-            return;
-          }
-          resolve();
-        });
-
-        file.on('error', err => {
-          reject(err);
-        });
-      }),
-      new Promise<never>((_, reject) =>
-        setTimeout(() => reject(new Error('File writing timeout')), 10000)
-      ),
-    ]);
-
-    const recordingStatus = recordingStateMachine.dispatch({
-      type: 'STOP_RECORDING',
+    const artifact = getNativeModule().stopRecording(recording.nativeId);
+    const filepath = assertRecordingFilepath(artifact.filepath);
+    const readyStatus = recordingStateMachine.dispatch({
+      type: 'SAVE_RECORDING',
      id,
+      filepath,
+      sampleRate: artifact.sampleRate,
+      numberOfChannels: artifact.channels,
    });

-    if (!recordingStatus) {
-      logger.error('No recording status to stop');
+    if (!readyStatus) {
+      logger.error('No recording status to save');
      return;
    }
-    return serializeRecordingStatus(recordingStatus);
+
+    getMainWindow()
+      .then(mainWindow => {
+        if (mainWindow) {
+          mainWindow.show();
+        }
+      })
+      .catch(err => {
+        logger.error('failed to bring up the window', err);
+      });
+
+    return serializeRecordingStatus(readyStatus);
  } catch (error: unknown) {
    logger.error('Failed to stop recording', error);
    const recordingStatus = recordingStateMachine.dispatch({
@@ -654,38 +579,9 @@ export async function stopRecording(id: number) {
      return;
    }
    return serializeRecordingStatus(recordingStatus);
-  } finally {
-    // Clean up the file stream if it's still open
-    if (!file.closed) {
-      file.destroy();
-    }
  }
 }

-export async function getRawAudioBuffers(
-  id: number,
-  cursor?: number
-): Promise<{
-  buffer: Buffer;
-  nextCursor: number;
-}> {
-  const recording = recordings.get(id);
-  if (!recording) {
-    throw new Error(`getRawAudioBuffers: Recording ${id} not found`);
-  }
-  const start = cursor ?? 0;
-  const file = await fsp.open(recording.file.path, 'r');
-  const stats = await file.stat();
-  const buffer = Buffer.alloc(stats.size - start);
-  const result = await file.read(buffer, 0, buffer.length, start);
-  await file.close();
-
-  return {
-    buffer,
-    nextCursor: start + result.bytesRead,
-  };
-}
-
 function assertRecordingFilepath(filepath: string) {
  const normalizedPath = path.normalize(filepath);
  const normalizedBase = path.normalize(SAVED_RECORDINGS_DIR + path.sep);
@@ -702,55 +598,6 @@ export async function readRecordingFile(filepath: string) {
  return fsp.readFile(normalizedPath);
 }

-export async function readyRecording(id: number, buffer: Buffer) {
-  logger.info('readyRecording', id);
-
-  const recordingStatus = recordingStatus$.value;
-  const recording = recordings.get(id);
-  if (!recordingStatus || recordingStatus.id !== id || !recording) {
-    logger.error(`readyRecording: Recording ${id} not found`);
-    return;
-  }
-
-  const rawFilePath = String(recording.file.path);
-
-  const filepath = rawFilePath.replace('.raw', '.opus');
-
-  if (!filepath) {
-    logger.error(`readyRecording: Recording ${id} has no filepath`);
-    return;
-  }
-
-  await fs.writeFile(filepath, buffer);
-
-  // can safely remove the raw file now
-  logger.info('remove raw file', rawFilePath);
-  if (rawFilePath) {
-    try {
-      await fs.unlink(rawFilePath);
-    } catch (err) {
-      logger.error('failed to remove raw file', err);
-    }
-  }
-  // Update the status through the state machine
-  recordingStateMachine.dispatch({
-    type: 'SAVE_RECORDING',
-    id,
-    filepath,
-  });
-
-  // bring up the window
-  getMainWindow()
-    .then(mainWindow => {
-      if (mainWindow) {
-        mainWindow.show();
-      }
-    })
-    .catch(err => {
-      logger.error('failed to bring up the window', err);
-    });
-}
-
 export async function handleBlockCreationSuccess(id: number) {
  recordingStateMachine.dispatch({
    type: 'CREATE_BLOCK_SUCCESS',
@@ -767,7 +614,6 @@ export async function handleBlockCreationFailed(id: number, error?: Error) {
 }

 export function removeRecording(id: number) {
-  recordings.delete(id);
  recordingStateMachine.dispatch({ type: 'REMOVE_RECORDING', id });
 }

@@ -787,7 +633,6 @@ export interface SerializedRecordingStatus {
 export function serializeRecordingStatus(
  status: RecordingStatus
 ): SerializedRecordingStatus | null {
-  const recording = recordings.get(status.id);
  return {
    id: status.id,
    status: status.status,
@@ -795,10 +640,9 @@ export function serializeRecordingStatus(
    appGroupId: status.appGroup?.processGroupId,
    icon: status.appGroup?.icon,
    startTime: status.startTime,
-    filepath:
-      status.filepath ?? (recording ? String(recording.file.path) : undefined),
-    sampleRate: recording?.session.sampleRate,
-    numberOfChannels: recording?.session.channels,
+    filepath: status.filepath,
+    sampleRate: status.sampleRate,
+    numberOfChannels: status.numberOfChannels,
  };
 }

--- a/packages/frontend/apps/electron/src/main/recording/index.ts
+++ b/packages/frontend/apps/electron/src/main/recording/index.ts
@@ -14,13 +14,11 @@ import {
  checkMeetingPermissions,
  checkRecordingAvailable,
  disableRecordingFeature,
-  getRawAudioBuffers,
  getRecording,
  handleBlockCreationFailed,
  handleBlockCreationSuccess,
  pauseRecording,
  readRecordingFile,
-  readyRecording,
  recordingStatus$,
  removeRecording,
  SAVED_RECORDINGS_DIR,
@@ -51,16 +49,9 @@ export const recordingHandlers = {
  stopRecording: async (_, id: number) => {
    return stopRecording(id);
  },
-  getRawAudioBuffers: async (_, id: number, cursor?: number) => {
-    return getRawAudioBuffers(id, cursor);
-  },
  readRecordingFile: async (_, filepath: string) => {
    return readRecordingFile(filepath);
  },
-  // save the encoded recording buffer to the file system
-  readyRecording: async (_, id: number, buffer: Uint8Array) => {
-    return readyRecording(id, Buffer.from(buffer));
-  },
  handleBlockCreationSuccess: async (_, id: number) => {
    return handleBlockCreationSuccess(id);
  },
--- a/packages/frontend/apps/electron/src/main/recording/state-machine.ts
+++ b/packages/frontend/apps/electron/src/main/recording/state-machine.ts
@@ -13,6 +13,15 @@ export type RecordingEvent =
      type: 'START_RECORDING';
      appGroup?: AppGroupInfo;
    }
+  | {
+      type: 'ATTACH_NATIVE_RECORDING';
+      id: number;
+      nativeId: string;
+      startTime: number;
+      filepath: string;
+      sampleRate: number;
+      numberOfChannels: number;
+    }
  | { type: 'PAUSE_RECORDING'; id: number }
  | { type: 'RESUME_RECORDING'; id: number }
  | {
@@ -23,6 +32,8 @@ export type RecordingEvent =
      type: 'SAVE_RECORDING';
      id: number;
      filepath: string;
+      sampleRate?: number;
+      numberOfChannels?: number;
    }
  | {
      type: 'CREATE_BLOCK_FAILED';
@@ -74,6 +85,9 @@ export class RecordingStateMachine {
      case 'START_RECORDING':
        newStatus = this.handleStartRecording(event.appGroup);
        break;
+      case 'ATTACH_NATIVE_RECORDING':
+        newStatus = this.handleAttachNativeRecording(event);
+        break;
      case 'PAUSE_RECORDING':
        newStatus = this.handlePauseRecording();
        break;
@@ -84,7 +98,12 @@ export class RecordingStateMachine {
        newStatus = this.handleStopRecording(event.id);
        break;
      case 'SAVE_RECORDING':
-        newStatus = this.handleSaveRecording(event.id, event.filepath);
+        newStatus = this.handleSaveRecording(
+          event.id,
+          event.filepath,
+          event.sampleRate,
+          event.numberOfChannels
+        );
        break;
      case 'CREATE_BLOCK_SUCCESS':
        newStatus = this.handleCreateBlockSuccess(event.id);
@@ -159,6 +178,35 @@ export class RecordingStateMachine {
    }
  }

+  /**
+   * Attach native recording metadata to the current recording
+   */
+  private handleAttachNativeRecording(
+    event: Extract<RecordingEvent, { type: 'ATTACH_NATIVE_RECORDING' }>
+  ): RecordingStatus | null {
+    const currentStatus = this.recordingStatus$.value;
+    if (!currentStatus || currentStatus.id !== event.id) {
+      logger.error(`Recording ${event.id} not found for native attachment`);
+      return currentStatus;
+    }
+
+    if (currentStatus.status !== 'recording') {
+      logger.error(
+        `Cannot attach native metadata when recording is in ${currentStatus.status} state`
+      );
+      return currentStatus;
+    }
+
+    return {
+      ...currentStatus,
+      nativeId: event.nativeId,
+      startTime: event.startTime,
+      filepath: event.filepath,
+      sampleRate: event.sampleRate,
+      numberOfChannels: event.numberOfChannels,
+    };
+  }
+
  /**
   * Handle the PAUSE_RECORDING event
   */
@@ -233,7 +281,9 @@ export class RecordingStateMachine {
   */
  private handleSaveRecording(
    id: number,
-    filepath: string
+    filepath: string,
+    sampleRate?: number,
+    numberOfChannels?: number
  ): RecordingStatus | null {
    const currentStatus = this.recordingStatus$.value;

@@ -246,6 +296,8 @@ export class RecordingStateMachine {
      ...currentStatus,
      status: 'ready',
      filepath,
+      sampleRate,
+      numberOfChannels,
    };
  }

--- a/packages/frontend/apps/electron/src/main/recording/types.ts
+++ b/packages/frontend/apps/electron/src/main/recording/types.ts
@@ -1,6 +1,4 @@
-import type { WriteStream } from 'node:fs';
-
-import type { ApplicationInfo, AudioCaptureSession } from '@affine/native';
+import type { ApplicationInfo } from '@affine/native';

 export interface TappableAppInfo {
  info: ApplicationInfo;
@@ -20,18 +18,6 @@ export interface AppGroupInfo {
  isRunning: boolean;
 }

-export interface Recording {
-  id: number;
-  // the app may not be available if the user choose to record system audio
-  app?: TappableAppInfo;
-  appGroup?: AppGroupInfo;
-  // the buffered file that is being recorded streamed to
-  file: WriteStream;
-  session: AudioCaptureSession;
-  startTime: number;
-  filepath?: string; // the filepath of the recording (only available when status is ready)
-}
-
 export interface RecordingStatus {
  id: number; // corresponds to the recording id
  // the status of the recording in a linear state machine
@@ -54,4 +40,7 @@ export interface RecordingStatus {
  appGroup?: AppGroupInfo;
  startTime: number; // 0 means not started yet
  filepath?: string; // encoded file path
+  nativeId?: string;
+  sampleRate?: number;
+  numberOfChannels?: number;
 }
--- a/packages/frontend/core/src/utils/opus-encoding.ts
+++ b/packages/frontend/core/src/utils/opus-encoding.ts
@@ -423,98 +423,3 @@ export async function encodeAudioBlobToOpusSlices(
    await audioContext.close();
  }
 }
-
-export const createStreamEncoder = (
-  recordingId: number,
-  codecs: {
-    sampleRate: number;
-    numberOfChannels: number;
-    targetBitrate?: number;
-  }
-) => {
-  const { encoder, encodedChunks } = createOpusEncoder({
-    sampleRate: codecs.sampleRate,
-    numberOfChannels: codecs.numberOfChannels,
-    bitrate: codecs.targetBitrate,
-  });
-
-  const toAudioData = (buffer: Uint8Array) => {
-    // Each sample in f32 format is 4 bytes
-    const BYTES_PER_SAMPLE = 4;
-    return new AudioData({
-      format: 'f32',
-      sampleRate: codecs.sampleRate,
-      numberOfChannels: codecs.numberOfChannels,
-      numberOfFrames:
-        buffer.length / BYTES_PER_SAMPLE / codecs.numberOfChannels,
-      timestamp: 0,
-      data: buffer,
-    });
-  };
-
-  let cursor = 0;
-  let isClosed = false;
-
-  const next = async () => {
-    if (!apis) {
-      throw new Error('Electron API is not available');
-    }
-    if (isClosed) {
-      return;
-    }
-    const { buffer, nextCursor } = await apis.recording.getRawAudioBuffers(
-      recordingId,
-      cursor
-    );
-    if (isClosed || cursor === nextCursor) {
-      return;
-    }
-    cursor = nextCursor;
-    logger.debug('Encoding next chunk', cursor, nextCursor);
-    encoder.encode(toAudioData(buffer));
-  };
-
-  const poll = async () => {
-    if (isClosed) {
-      return;
-    }
-    logger.debug('Polling next chunk');
-    await next();
-    await new Promise(resolve => setTimeout(resolve, 1000));
-    await poll();
-  };
-
-  const close = () => {
-    if (isClosed) {
-      return;
-    }
-    isClosed = true;
-    return encoder.close();
-  };
-
-  return {
-    id: recordingId,
-    next,
-    poll,
-    flush: () => {
-      return encoder.flush();
-    },
-    close,
-    finish: async () => {
-      logger.debug('Finishing encoding');
-      await next();
-      close();
-      const buffer = muxToMp4(encodedChunks, {
-        sampleRate: codecs.sampleRate,
-        numberOfChannels: codecs.numberOfChannels,
-        bitrate: codecs.targetBitrate,
-      });
-      return buffer;
-    },
-    [Symbol.dispose]: () => {
-      close();
-    },
-  };
-};
-
-export type OpusStreamEncoder = ReturnType<typeof createStreamEncoder>;
--- a/packages/frontend/native/index.d.ts
+++ b/packages/frontend/native/index.d.ts
@@ -40,6 +40,37 @@ export declare function decodeAudio(buf: Uint8Array, destSampleRate?: number | u

 /** Decode audio file into a Float32Array */
 export declare function decodeAudioSync(buf: Uint8Array, destSampleRate?: number | undefined | null, filename?: string | undefined | null): Float32Array
+
+export interface RecordingArtifact {
+  id: string
+  filepath: string
+  sampleRate: number
+  channels: number
+  durationMs: number
+  size: number
+}
+
+export interface RecordingSessionMeta {
+  id: string
+  filepath: string
+  sampleRate: number
+  channels: number
+  startedAt: number
+}
+
+export interface RecordingStartOptions {
+  appProcessId?: number
+  excludeProcessIds?: Array<number>
+  outputDir: string
+  format?: string
+  sampleRate?: number
+  channels?: number
+  id?: string
+}
+
+export declare function startRecording(opts: RecordingStartOptions): RecordingSessionMeta
+
+export declare function stopRecording(id: string): RecordingArtifact
 export declare function mintChallengeResponse(resource: string, bits?: number | undefined | null): Promise<string>

 export declare function verifyChallengeResponse(response: string, bits: number, resource: string): Promise<boolean>
--- a/packages/frontend/native/index.js
+++ b/packages/frontend/native/index.js
@@ -579,6 +579,8 @@ module.exports.AudioCaptureSession = nativeBinding.AudioCaptureSession
 module.exports.ShareableContent = nativeBinding.ShareableContent
 module.exports.decodeAudio = nativeBinding.decodeAudio
 module.exports.decodeAudioSync = nativeBinding.decodeAudioSync
+module.exports.startRecording = nativeBinding.startRecording
+module.exports.stopRecording = nativeBinding.stopRecording
 module.exports.mintChallengeResponse = nativeBinding.mintChallengeResponse
 module.exports.verifyChallengeResponse = nativeBinding.verifyChallengeResponse
 module.exports.DocStorage = nativeBinding.DocStorage
--- a/packages/frontend/native/media_capture/Cargo.toml
+++ b/packages/frontend/native/media_capture/Cargo.toml
@@ -11,11 +11,15 @@ harness = false
 name    = "mix_audio_samples"

 [dependencies]
-napi        = { workspace = true, features = ["napi4"] }
-napi-derive = { workspace = true, features = ["type-def"] }
-rubato      = { workspace = true }
-symphonia   = { workspace = true, features = ["all", "opt-simd"] }
-thiserror   = { workspace = true }
+crossbeam-channel = { workspace = true }
+napi              = { workspace = true, features = ["napi4"] }
+napi-derive       = { workspace = true, features = ["type-def"] }
+ogg               = { workspace = true }
+opus-codec        = "0.1.2"
+rand              = { workspace = true }
+rubato            = { workspace = true }
+symphonia         = { workspace = true, features = ["all", "opt-simd"] }
+thiserror         = { workspace = true }

 [target.'cfg(target_os = "macos")'.dependencies]
 block2           = { workspace = true }
@@ -29,10 +33,9 @@ screencapturekit = { workspace = true }
 uuid             = { workspace = true, features = ["v4"] }

 [target.'cfg(target_os = "windows")'.dependencies]
-cpal              = { workspace = true }
-crossbeam-channel = { workspace = true }
-windows           = { workspace = true }
-windows-core      = { workspace = true }
+cpal         = { workspace = true }
+windows      = { workspace = true }
+windows-core = { workspace = true }

 [dev-dependencies]
 criterion2 = { workspace = true }
--- a/packages/frontend/native/media_capture/src/audio_callback.rs
+++ b/packages/frontend/native/media_capture/src/audio_callback.rs
@@ -0,0 +1,29 @@
+use crossbeam_channel::Sender;
+use napi::{
+  bindgen_prelude::Float32Array,
+  threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
+};
+use std::sync::Arc;
+
+/// Internal callback abstraction so audio taps can target JS or native pipelines.
+#[derive(Clone)]
+pub enum AudioCallback {
+  Js(Arc<ThreadsafeFunction<Float32Array, ()>>),
+  Channel(Sender<Vec<f32>>),
+}
+
+impl AudioCallback {
+  pub fn call(&self, samples: Vec<f32>) {
+    match self {
+      Self::Js(func) => {
+        // Non-blocking call into JS; errors are ignored to avoid blocking the
+        // audio thread.
+        let _ = func.call(Ok(samples.into()), ThreadsafeFunctionCallMode::NonBlocking);
+      }
+      Self::Channel(sender) => {
+        // Drop the chunk if the channel is full to avoid blocking capture.
+        let _ = sender.try_send(samples);
+      }
+    }
+  }
+}
--- a/packages/frontend/native/media_capture/src/lib.rs
+++ b/packages/frontend/native/media_capture/src/lib.rs
@@ -8,4 +8,6 @@ pub mod windows;
 #[cfg(target_os = "windows")]
 pub use windows::*;

+pub mod audio_callback;
 pub mod audio_decoder;
+pub mod recording;
--- a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
+++ b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
@@ -36,6 +36,7 @@ use screencapturekit::shareable_content::SCShareableContent;
 use uuid::Uuid;

 use crate::{
+  audio_callback::AudioCallback,
  error::CoreAudioError,
  pid::{audio_process_list, get_process_property},
  tap_audio::{AggregateDeviceManager, AudioCaptureSession},
@@ -677,10 +678,9 @@ impl ShareableContent {
    Ok(false)
  }

-  #[napi]
-  pub fn tap_audio(
+  pub(crate) fn tap_audio_with_callback(
    process_id: u32,
-    audio_stream_callback: ThreadsafeFunction<napi::bindgen_prelude::Float32Array, ()>,
+    audio_stream_callback: AudioCallback,
  ) -> Result<AudioCaptureSession> {
    let app = ShareableContent::applications()?
      .into_iter()
@@ -694,13 +694,10 @@ impl ShareableContent {
        ));
      }

-      // Convert ThreadsafeFunction to Arc<ThreadsafeFunction>
-      let callback_arc = Arc::new(audio_stream_callback);
-
      // Use AggregateDeviceManager instead of AggregateDevice directly
      // This provides automatic default device change detection
      let mut device_manager = AggregateDeviceManager::new(&app)?;
-      device_manager.start_capture(callback_arc)?;
+      device_manager.start_capture(audio_stream_callback)?;
      let boxed_manager = Box::new(device_manager);
      Ok(AudioCaptureSession::new(boxed_manager))
    } else {
@@ -712,9 +709,19 @@ impl ShareableContent {
  }

  #[napi]
-  pub fn tap_global_audio(
-    excluded_processes: Option<Vec<&ApplicationInfo>>,
+  pub fn tap_audio(
+    process_id: u32,
    audio_stream_callback: ThreadsafeFunction<napi::bindgen_prelude::Float32Array, ()>,
+  ) -> Result<AudioCaptureSession> {
+    ShareableContent::tap_audio_with_callback(
+      process_id,
+      AudioCallback::Js(Arc::new(audio_stream_callback)),
+    )
+  }
+
+  pub(crate) fn tap_global_audio_with_callback(
+    excluded_processes: Option<Vec<&ApplicationInfo>>,
+    audio_stream_callback: AudioCallback,
  ) -> Result<AudioCaptureSession> {
    let excluded_object_ids = excluded_processes
      .unwrap_or_default()
@@ -722,13 +729,21 @@ impl ShareableContent {
      .map(|app| app.object_id)
      .collect::<Vec<_>>();

-    // Convert ThreadsafeFunction to Arc<ThreadsafeFunction>
-    let callback_arc = Arc::new(audio_stream_callback);
-
    // Use the new AggregateDeviceManager for automatic device adaptation
    let mut device_manager = AggregateDeviceManager::new_global(&excluded_object_ids)?;
-    device_manager.start_capture(callback_arc)?;
+    device_manager.start_capture(audio_stream_callback)?;
    let boxed_manager = Box::new(device_manager);
    Ok(AudioCaptureSession::new(boxed_manager))
  }
+
+  #[napi]
+  pub fn tap_global_audio(
+    excluded_processes: Option<Vec<&ApplicationInfo>>,
+    audio_stream_callback: ThreadsafeFunction<napi::bindgen_prelude::Float32Array, ()>,
+  ) -> Result<AudioCaptureSession> {
+    ShareableContent::tap_global_audio_with_callback(
+      excluded_processes,
+      AudioCallback::Js(Arc::new(audio_stream_callback)),
+    )
+  }
 }
--- a/packages/frontend/native/media_capture/src/macos/tap_audio.rs
+++ b/packages/frontend/native/media_capture/src/macos/tap_audio.rs
@@ -23,15 +23,13 @@ use coreaudio::sys::{
  AudioObjectGetPropertyDataSize, AudioObjectID, AudioObjectPropertyAddress,
  AudioObjectRemovePropertyListenerBlock, AudioTimeStamp, OSStatus,
 };
-use napi::{
-  bindgen_prelude::{Float32Array, Result, Status},
-  threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
-};
+use napi::bindgen_prelude::Result;
 use napi_derive::napi;
 use objc2::runtime::AnyObject;

 use crate::{
  audio_buffer::InputAndOutputAudioBufferList,
+  audio_callback::AudioCallback,
  ca_tap_description::CATapDescription,
  cf_types::CFDictionaryBuilder,
  device::get_device_uid,
@@ -241,7 +239,7 @@ impl AggregateDevice {
  /// Implementation for the AggregateDevice to start processing audio
  pub fn start(
    &mut self,
-    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, Status, true>>,
+    audio_stream_callback: AudioCallback,
    // Add original_audio_stats to ensure consistent target rate
    original_audio_stats: AudioStats,
  ) -> Result<AudioTapStream> {
@@ -300,11 +298,8 @@ impl AggregateDevice {
            return kAudioHardwareBadStreamError as i32;
          };

-          // Send the processed audio data to JavaScript
-          audio_stream_callback.call(
-            Ok(mixed_samples.into()),
-            ThreadsafeFunctionCallMode::NonBlocking,
-          );
+          // Send the processed audio data to the configured sink
+          audio_stream_callback.call(mixed_samples);

          kAudioHardwareNoError as i32
        },
@@ -576,7 +571,7 @@ pub struct AggregateDeviceManager {
  app_id: Option<AudioObjectID>,
  excluded_processes: Vec<AudioObjectID>,
  active_stream: Option<Arc<std::sync::Mutex<Option<AudioTapStream>>>>,
-  audio_callback: Option<Arc<ThreadsafeFunction<Float32Array, (), Float32Array, Status, true>>>,
+  audio_callback: Option<AudioCallback>,
  original_audio_stats: Option<AudioStats>,
 }

@@ -614,10 +609,7 @@ impl AggregateDeviceManager {
  }

  /// This sets up the initial stream and listeners.
-  pub fn start_capture(
-    &mut self,
-    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, Status, true>>,
-  ) -> Result<()> {
+  pub fn start_capture(&mut self, audio_stream_callback: AudioCallback) -> Result<()> {
    // Store the callback for potential device switch later
    self.audio_callback = Some(audio_stream_callback.clone());

--- a/packages/frontend/native/media_capture/src/recording.rs
+++ b/packages/frontend/native/media_capture/src/recording.rs
@@ -0,0 +1,581 @@
+use std::{
+  collections::HashMap,
+  fs,
+  io::{BufWriter, Write},
+  path::PathBuf,
+  sync::{LazyLock, Mutex},
+  thread::{self, JoinHandle},
+  time::{SystemTime, UNIX_EPOCH},
+};
+
+use crossbeam_channel::{bounded, Receiver, Sender};
+use napi::{bindgen_prelude::Result, Error, Status};
+use napi_derive::napi;
+use ogg::writing::{PacketWriteEndInfo, PacketWriter};
+use opus_codec::{Application, Channels, Encoder, FrameSize, SampleRate as OpusSampleRate};
+use rubato::Resampler;
+
+use crate::audio_callback::AudioCallback;
+#[cfg(target_os = "macos")]
+use crate::macos::screen_capture_kit::{ApplicationInfo, ShareableContent};
+#[cfg(target_os = "windows")]
+use crate::windows::screen_capture_kit::ShareableContent;
+
+const ENCODE_SAMPLE_RATE: OpusSampleRate = OpusSampleRate::Hz48000;
+const MAX_PACKET_SIZE: usize = 4096;
+const RESAMPLER_INPUT_CHUNK: usize = 1024;
+
+type RecordingResult<T> = std::result::Result<T, RecordingError>;
+
+#[napi(object)]
+pub struct RecordingStartOptions {
+  pub app_process_id: Option<u32>,
+  pub exclude_process_ids: Option<Vec<u32>>,
+  pub output_dir: String,
+  pub format: Option<String>,
+  pub sample_rate: Option<u32>,
+  pub channels: Option<u32>,
+  pub id: Option<String>,
+}
+
+#[napi(object)]
+pub struct RecordingSessionMeta {
+  pub id: String,
+  pub filepath: String,
+  pub sample_rate: u32,
+  pub channels: u32,
+  pub started_at: i64,
+}
+
+#[napi(object)]
+pub struct RecordingArtifact {
+  pub id: String,
+  pub filepath: String,
+  pub sample_rate: u32,
+  pub channels: u32,
+  pub duration_ms: i64,
+  pub size: i64,
+}
+
+#[derive(Debug, thiserror::Error)]
+enum RecordingError {
+  #[error("unsupported platform")]
+  UnsupportedPlatform,
+  #[error("invalid output directory")]
+  InvalidOutputDir,
+  #[error("invalid format {0}")]
+  InvalidFormat(String),
+  #[error("io error: {0}")]
+  Io(#[from] std::io::Error),
+  #[error("encoding error: {0}")]
+  Encoding(String),
+  #[error("recording not found")]
+  NotFound,
+  #[error("empty recording")]
+  Empty,
+  #[error("start failure: {0}")]
+  Start(String),
+  #[error("join failure")]
+  Join,
+}
+
+impl RecordingError {
+  fn code(&self) -> &'static str {
+    match self {
+      RecordingError::UnsupportedPlatform => "unsupported-platform",
+      RecordingError::InvalidOutputDir => "invalid-output-dir",
+      RecordingError::InvalidFormat(_) => "invalid-format",
+      RecordingError::Io(_) => "io-error",
+      RecordingError::Encoding(_) => "encoding-error",
+      RecordingError::NotFound => "not-found",
+      RecordingError::Empty => "empty-recording",
+      RecordingError::Start(_) => "start-failure",
+      RecordingError::Join => "join-failure",
+    }
+  }
+}
+
+impl From<RecordingError> for Error {
+  fn from(err: RecordingError) -> Self {
+    Error::new(Status::GenericFailure, format!("{}: {}", err.code(), err))
+  }
+}
+
+struct InterleavedResampler {
+  resampler: rubato::FastFixedIn<f32>,
+  channels: usize,
+  fifo: Vec<Vec<f32>>,
+  warmed: bool,
+}
+
+impl InterleavedResampler {
+  fn new(from_sr: u32, to_sr: u32, channels: usize) -> RecordingResult<Self> {
+    let ratio = to_sr as f64 / from_sr as f64;
+    let resampler = rubato::FastFixedIn::<f32>::new(
+      ratio,
+      1.0,
+      rubato::PolynomialDegree::Linear,
+      RESAMPLER_INPUT_CHUNK,
+      channels,
+    )
+    .map_err(|e| RecordingError::Encoding(format!("resampler init failed: {e}")))?;
+
+    Ok(Self {
+      resampler,
+      channels,
+      fifo: vec![Vec::<f32>::new(); channels],
+      warmed: false,
+    })
+  }
+
+  fn feed(&mut self, interleaved: &[f32]) -> Vec<f32> {
+    for frame in interleaved.chunks(self.channels) {
+      for (idx, sample) in frame.iter().enumerate() {
+        if let Some(channel_fifo) = self.fifo.get_mut(idx) {
+          channel_fifo.push(*sample);
+        }
+      }
+    }
+
+    let mut out = Vec::new();
+
+    while self.fifo.first().map(|q| q.len()).unwrap_or(0) >= RESAMPLER_INPUT_CHUNK {
+      let mut chunk: Vec<Vec<f32>> = Vec::with_capacity(self.channels);
+      for channel in &mut self.fifo {
+        let take: Vec<f32> = channel.drain(..RESAMPLER_INPUT_CHUNK).collect();
+        chunk.push(take);
+      }
+
+      if let Ok(blocks) = self.resampler.process(&chunk, None) {
+        if blocks.is_empty() || blocks.len() != self.channels {
+          continue;
+        }
+        if !self.warmed {
+          self.warmed = true;
+          continue;
+        }
+        let out_len = blocks[0].len();
+        for i in 0..out_len {
+          for ch in 0..self.channels {
+            out.push(blocks[ch][i]);
+          }
+        }
+      }
+    }
+
+    out
+  }
+}
+
+struct OggOpusWriter {
+  writer: PacketWriter<'static, BufWriter<fs::File>>,
+  encoder: Encoder,
+  frame_samples: usize,
+  pending: Vec<f32>,
+  granule_position: u64,
+  samples_written: u64,
+  channels: Channels,
+  sample_rate: OpusSampleRate,
+  resampler: Option<InterleavedResampler>,
+  filepath: PathBuf,
+  stream_serial: u32,
+}
+
+impl OggOpusWriter {
+  fn new(filepath: PathBuf, source_sample_rate: u32, channels: u32) -> RecordingResult<Self> {
+    let channels = if channels > 1 {
+      Channels::Stereo
+    } else {
+      Channels::Mono
+    };
+
+    let sample_rate = ENCODE_SAMPLE_RATE;
+    let resampler = if source_sample_rate != sample_rate.as_i32() as u32 {
+      Some(InterleavedResampler::new(
+        source_sample_rate,
+        sample_rate.as_i32() as u32,
+        channels.as_usize(),
+      )?)
+    } else {
+      None
+    };
+
+    if let Some(parent) = filepath.parent() {
+      fs::create_dir_all(parent)?;
+    }
+
+    let file = fs::File::create(&filepath)?;
+    let mut writer = PacketWriter::new(BufWriter::new(file));
+
+    let stream_serial: u32 = rand::random();
+    write_opus_headers(&mut writer, stream_serial, channels, sample_rate)?;
+
+    let frame_samples = FrameSize::Ms20.samples(sample_rate);
+    let encoder = Encoder::new(sample_rate, channels, Application::Audio)
+      .map_err(|e| RecordingError::Encoding(e.to_string()))?;
+
+    Ok(Self {
+      writer,
+      encoder,
+      frame_samples,
+      pending: Vec::new(),
+      granule_position: 0,
+      samples_written: 0,
+      channels,
+      sample_rate,
+      resampler,
+      filepath,
+      stream_serial,
+    })
+  }
+
+  fn push_samples(&mut self, samples: &[f32]) -> RecordingResult<()> {
+    let mut processed = if let Some(resampler) = &mut self.resampler {
+      resampler.feed(samples)
+    } else {
+      samples.to_vec()
+    };
+
+    if processed.is_empty() {
+      return Ok(());
+    }
+
+    self.pending.append(&mut processed);
+    let frame_len = self.frame_samples * self.channels.as_usize();
+
+    while self.pending.len() >= frame_len {
+      let frame: Vec<f32> = self.pending.drain(..frame_len).collect();
+      self.encode_frame(frame, self.frame_samples, PacketWriteEndInfo::NormalPacket)?;
+    }
+
+    Ok(())
+  }
+
+  fn encode_frame(
+    &mut self,
+    frame: Vec<f32>,
+    samples_in_frame: usize,
+    end: PacketWriteEndInfo,
+  ) -> RecordingResult<()> {
+    let mut out = vec![0u8; MAX_PACKET_SIZE];
+    let encoded = self
+      .encoder
+      .encode_float(&frame, &mut out)
+      .map_err(|e| RecordingError::Encoding(e.to_string()))?;
+
+    self.granule_position += samples_in_frame as u64;
+    self.samples_written += samples_in_frame as u64;
+
+    let packet = out[..encoded].to_vec();
+
+    self
+      .writer
+      .write_packet(packet, self.stream_serial, end, self.granule_position)
+      .map_err(|e| RecordingError::Encoding(format!("failed to write packet: {e}")))?;
+
+    Ok(())
+  }
+
+  fn finish(mut self) -> RecordingResult<RecordingArtifact> {
+    let frame_len = self.frame_samples * self.channels.as_usize();
+    if !self.pending.is_empty() {
+      let mut frame = self.pending.clone();
+      let samples_in_frame = frame.len() / self.channels.as_usize();
+      frame.resize(frame_len, 0.0);
+      self.encode_frame(frame, samples_in_frame, PacketWriteEndInfo::NormalPacket)?;
+      self.pending.clear();
+    }
+
+    // Mark end of stream with an empty packet if nothing was written, otherwise
+    // flag the last packet as end of stream.
+    if self.samples_written == 0 {
+      fs::remove_file(&self.filepath).ok();
+      return Err(RecordingError::Empty);
+    }
+
+    // Flush a final end-of-stream marker.
+    self
+      .writer
+      .write_packet(
+        Vec::<u8>::new(),
+        self.stream_serial,
+        PacketWriteEndInfo::EndStream,
+        self.granule_position,
+      )
+      .map_err(|e| RecordingError::Encoding(format!("failed to finish stream: {e}")))?;
+
+    let _ = self.writer.inner_mut().flush();
+
+    let size = fs::metadata(&self.filepath)?.len() as i64;
+    let duration_ms = (self.samples_written * 1000) as i64 / self.sample_rate.as_i32() as i64;
+
+    Ok(RecordingArtifact {
+      id: String::new(),
+      filepath: self.filepath.to_string_lossy().to_string(),
+      sample_rate: self.sample_rate.as_i32() as u32,
+      channels: self.channels.as_usize() as u32,
+      duration_ms,
+      size,
+    })
+  }
+}
+
+fn write_opus_headers(
+  writer: &mut PacketWriter<'static, BufWriter<fs::File>>,
+  stream_serial: u32,
+  channels: Channels,
+  sample_rate: OpusSampleRate,
+) -> RecordingResult<()> {
+  let mut opus_head = Vec::with_capacity(19);
+  opus_head.extend_from_slice(b"OpusHead");
+  opus_head.push(1); // version
+  opus_head.push(channels.as_usize() as u8);
+  opus_head.extend_from_slice(&0u16.to_le_bytes()); // pre-skip
+  opus_head.extend_from_slice(&(sample_rate.as_i32() as u32).to_le_bytes());
+  opus_head.extend_from_slice(&0i16.to_le_bytes()); // output gain
+  opus_head.push(0); // channel mapping
+
+  writer
+    .write_packet(opus_head, stream_serial, PacketWriteEndInfo::EndPage, 0)
+    .map_err(|e| RecordingError::Encoding(format!("failed to write OpusHead: {e}")))?;
+
+  let vendor = b"AFFiNE Native";
+  let mut opus_tags = Vec::new();
+  opus_tags.extend_from_slice(b"OpusTags");
+  opus_tags.extend_from_slice(&(vendor.len() as u32).to_le_bytes());
+  opus_tags.extend_from_slice(vendor);
+  opus_tags.extend_from_slice(&0u32.to_le_bytes()); // user comment list length
+
+  writer
+    .write_packet(opus_tags, stream_serial, PacketWriteEndInfo::EndPage, 0)
+    .map_err(|e| RecordingError::Encoding(format!("failed to write OpusTags: {e}")))?;
+
+  Ok(())
+}
+
+enum PlatformCapture {
+  #[cfg(target_os = "macos")]
+  Mac(crate::macos::tap_audio::AudioCaptureSession),
+  #[cfg(target_os = "windows")]
+  Windows(crate::windows::audio_capture::AudioCaptureSession),
+}
+
+unsafe impl Send for PlatformCapture {}
+
+impl PlatformCapture {
+  fn stop(&mut self) -> Result<()> {
+    match self {
+      #[cfg(target_os = "macos")]
+      PlatformCapture::Mac(session) => session.stop(),
+      #[cfg(target_os = "windows")]
+      PlatformCapture::Windows(session) => session.stop(),
+      #[allow(unreachable_patterns)]
+      _ => Err(RecordingError::UnsupportedPlatform.into()),
+    }
+  }
+}
+
+struct ActiveRecording {
+  sender: Option<Sender<Vec<f32>>>,
+  capture: PlatformCapture,
+  worker: Option<JoinHandle<std::result::Result<RecordingArtifact, RecordingError>>>,
+}
+
+static ACTIVE_RECORDINGS: LazyLock<Mutex<HashMap<String, ActiveRecording>>> =
+  LazyLock::new(|| Mutex::new(HashMap::new()));
+
+fn now_millis() -> i64 {
+  SystemTime::now()
+    .duration_since(UNIX_EPOCH)
+    .map(|d| d.as_millis() as i64)
+    .unwrap_or(0)
+}
+
+fn sanitize_id(id: Option<String>) -> String {
+  let raw = id.unwrap_or_else(|| format!("{}", now_millis()));
+  let filtered: String = raw
+    .chars()
+    .filter(|c| c.is_ascii_alphanumeric() || *c == '-' || *c == '_')
+    .collect();
+  if filtered.is_empty() {
+    format!("{}", now_millis())
+  } else {
+    filtered
+  }
+}
+
+fn validate_output_dir(path: &str) -> Result<PathBuf> {
+  let dir = PathBuf::from(path);
+  if !dir.is_absolute() {
+    return Err(RecordingError::InvalidOutputDir.into());
+  }
+  fs::create_dir_all(&dir)?;
+  let normalized = dir
+    .canonicalize()
+    .map_err(|_| RecordingError::InvalidOutputDir)?;
+  Ok(normalized)
+}
+
+#[cfg(target_os = "macos")]
+fn build_excluded_refs(ids: &[u32]) -> Result<Vec<ApplicationInfo>> {
+  if ids.is_empty() {
+    return Ok(Vec::new());
+  }
+  let apps = ShareableContent::applications()?;
+  let mut excluded = Vec::new();
+  for app in apps {
+    if ids.contains(&(app.process_id as u32)) {
+      excluded.push(app);
+    }
+  }
+  Ok(excluded)
+}
+
+fn start_capture(
+  opts: &RecordingStartOptions,
+  tx: Sender<Vec<f32>>,
+) -> Result<(PlatformCapture, u32, u32)> {
+  #[cfg(target_os = "macos")]
+  {
+    let callback = AudioCallback::Channel(tx);
+    let session = if let Some(app_id) = opts.app_process_id {
+      ShareableContent::tap_audio_with_callback(app_id, callback)?
+    } else {
+      let excluded_apps = build_excluded_refs(
+        opts
+          .exclude_process_ids
+          .as_ref()
+          .map(|v| v.as_slice())
+          .unwrap_or(&[]),
+      )?;
+      let excluded_refs: Vec<&ApplicationInfo> = excluded_apps.iter().collect();
+      ShareableContent::tap_global_audio_with_callback(Some(excluded_refs), callback)?
+    };
+    let sample_rate = session.get_sample_rate()?.round().clamp(1.0, f64::MAX) as u32;
+    let channels = session.get_channels()?;
+    return Ok((PlatformCapture::Mac(session), sample_rate, channels));
+  }
+
+  #[cfg(target_os = "windows")]
+  {
+    let callback = AudioCallback::Channel(tx);
+    let session = ShareableContent::tap_audio_with_callback(
+      opts.app_process_id.unwrap_or(0),
+      callback,
+      opts.sample_rate,
+    )?;
+    let sample_rate = session.get_sample_rate().round() as u32;
+    let channels = session.get_channels();
+    return Ok((PlatformCapture::Windows(session), sample_rate, channels));
+  }
+
+  #[cfg(not(any(target_os = "macos", target_os = "windows")))]
+  {
+    let _ = opts;
+    let _ = tx;
+    Err(RecordingError::UnsupportedPlatform.into())
+  }
+}
+
+fn spawn_worker(
+  id: String,
+  filepath: PathBuf,
+  rx: Receiver<Vec<f32>>,
+  source_sample_rate: u32,
+  channels: u32,
+) -> JoinHandle<std::result::Result<RecordingArtifact, RecordingError>> {
+  thread::spawn(move || {
+    let mut writer = OggOpusWriter::new(filepath.clone(), source_sample_rate, channels)?;
+    for chunk in rx {
+      writer.push_samples(&chunk)?;
+    }
+    let mut artifact = writer.finish()?;
+    artifact.id = id;
+    Ok(artifact)
+  })
+}
+
+#[napi]
+pub fn start_recording(opts: RecordingStartOptions) -> Result<RecordingSessionMeta> {
+  if let Some(fmt) = opts.format.as_deref() {
+    if fmt.to_ascii_lowercase() != "opus" {
+      return Err(RecordingError::InvalidFormat(fmt.to_string()).into());
+    }
+  }
+
+  let output_dir = validate_output_dir(&opts.output_dir)?;
+  let id = sanitize_id(opts.id.clone());
+  let filepath = output_dir.join(format!("{id}.opus"));
+  if filepath.exists() {
+    fs::remove_file(&filepath)?;
+  }
+
+  let (tx, rx) = bounded::<Vec<f32>>(32);
+  let (capture, capture_rate, capture_channels) =
+    start_capture(&opts, tx.clone()).map_err(|e| RecordingError::Start(e.to_string()))?;
+
+  let encoding_channels = match opts.channels {
+    Some(1) => 1,
+    Some(2) => 2,
+    _ => capture_channels,
+  };
+
+  let worker = spawn_worker(
+    id.clone(),
+    filepath.clone(),
+    rx,
+    capture_rate,
+    encoding_channels,
+  );
+
+  let meta = RecordingSessionMeta {
+    id: id.clone(),
+    filepath: filepath.to_string_lossy().to_string(),
+    sample_rate: ENCODE_SAMPLE_RATE.as_i32() as u32,
+    channels: encoding_channels,
+    started_at: now_millis(),
+  };
+
+  let mut recordings = ACTIVE_RECORDINGS
+    .lock()
+    .map_err(|_| RecordingError::Start("lock poisoned".into()))?;
+
+  if recordings.contains_key(&id) {
+    return Err(RecordingError::Start("duplicate recording id".into()).into());
+  }
+
+  recordings.insert(
+    id,
+    ActiveRecording {
+      sender: Some(tx),
+      capture,
+      worker: Some(worker),
+    },
+  );
+
+  Ok(meta)
+}
+
+#[napi]
+pub fn stop_recording(id: String) -> Result<RecordingArtifact> {
+  let mut recordings = ACTIVE_RECORDINGS
+    .lock()
+    .map_err(|_| RecordingError::Start("lock poisoned".into()))?;
+
+  let mut entry = recordings.remove(&id).ok_or(RecordingError::NotFound)?;
+
+  entry
+    .capture
+    .stop()
+    .map_err(|e| RecordingError::Start(e.to_string()))?;
+
+  drop(entry.sender.take());
+
+  let handle = entry.worker.take().ok_or(RecordingError::Join)?;
+  let artifact = handle
+    .join()
+    .map_err(|_| RecordingError::Join)?
+    .map_err(|e| e)?;
+
+  Ok(artifact)
+}
--- a/packages/frontend/native/media_capture/src/windows/audio_capture.rs
+++ b/packages/frontend/native/media_capture/src/windows/audio_capture.rs
@@ -8,16 +8,13 @@ use std::{
  thread::JoinHandle,
 };

+use crate::audio_callback::AudioCallback;
 use cpal::{
  traits::{DeviceTrait, HostTrait, StreamTrait},
  SampleRate,
 };
 use crossbeam_channel::unbounded;
-use napi::{
-  bindgen_prelude::{Float32Array, Result},
-  threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
-  Error, Status,
-};
+use napi::{bindgen_prelude::Result, Error, Status};
 use napi_derive::napi;
 use rubato::{FastFixedIn, PolynomialDegree, Resampler};

@@ -221,7 +218,8 @@ impl Drop for AudioCaptureSession {
 }

 pub fn start_recording(
-  audio_buffer_callback: ThreadsafeFunction<Float32Array, ()>,
+  audio_buffer_callback: AudioCallback,
+  target_sample_rate: Option<SampleRate>,
 ) -> Result<AudioCaptureSession> {
  let available_hosts = cpal::available_hosts();
  let host_id = available_hosts
@@ -247,7 +245,7 @@ pub fn start_recording(

  let mic_sample_rate = mic_config.sample_rate();
  let lb_sample_rate = lb_config.sample_rate();
-  let target_rate = SampleRate(mic_sample_rate.min(lb_sample_rate).0);
+  let target_rate = target_sample_rate.unwrap_or(SampleRate(mic_sample_rate.min(lb_sample_rate).0));

  let mic_channels = mic_config.channels();
  let lb_channels = lb_config.channels();
@@ -347,10 +345,7 @@ pub fn start_recording(
        let lb_chunk: Vec<f32> = post_lb.drain(..TARGET_FRAME_SIZE).collect();
        let mixed = mix(&mic_chunk, &lb_chunk);
        if !mixed.is_empty() {
-          let _ = audio_buffer_callback.call(
-            Ok(mixed.clone().into()),
-            ThreadsafeFunctionCallMode::NonBlocking,
-          );
+          audio_buffer_callback.call(mixed);
        }
      }

--- a/packages/frontend/native/media_capture/src/windows/screen_capture_kit.rs
+++ b/packages/frontend/native/media_capture/src/windows/screen_capture_kit.rs
@@ -10,6 +10,7 @@ use std::{
  time::Duration,
 };

+use cpal::SampleRate;
 use napi::{
  bindgen_prelude::{Buffer, Error, Result, Status},
  threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
@@ -27,6 +28,7 @@ use windows::Win32::System::{
 };

 // Import the function from microphone_listener
+use crate::audio_callback::AudioCallback;
 use crate::windows::microphone_listener::is_process_actively_using_microphone;

 // Type alias to match macOS API
@@ -230,6 +232,15 @@ impl ShareableContent {
    }
  }

+  pub(crate) fn tap_audio_with_callback(
+    _process_id: u32,
+    audio_stream_callback: AudioCallback,
+    target_sample_rate: Option<u32>,
+  ) -> Result<AudioCaptureSession> {
+    let target = target_sample_rate.map(SampleRate);
+    crate::windows::audio_capture::start_recording(audio_stream_callback, target)
+  }
+
  #[napi]
  pub fn tap_audio(
    _process_id: u32, // Currently unused - Windows captures global audio
@@ -237,7 +248,22 @@ impl ShareableContent {
  ) -> Result<AudioCaptureSession> {
    // On Windows with CPAL, we capture global audio (mic + loopback)
    // since per-application audio tapping isn't supported the same way as macOS
-    crate::windows::audio_capture::start_recording(audio_stream_callback)
+    ShareableContent::tap_audio_with_callback(
+      _process_id,
+      AudioCallback::Js(Arc::new(audio_stream_callback)),
+      None,
+    )
+  }
+
+  pub(crate) fn tap_global_audio_with_callback(
+    _excluded_processes: Option<Vec<&ApplicationInfo>>,
+    audio_stream_callback: AudioCallback,
+    target_sample_rate: Option<u32>,
+  ) -> Result<AudioCaptureSession> {
+    let target = target_sample_rate.map(SampleRate);
+    // Delegate to audio_capture::start_recording which handles mixing mic +
+    // loopback
+    crate::windows::audio_capture::start_recording(audio_stream_callback, target)
  }

  #[napi]
@@ -245,9 +271,11 @@ impl ShareableContent {
    _excluded_processes: Option<Vec<&ApplicationInfo>>,
    audio_stream_callback: ThreadsafeFunction<napi::bindgen_prelude::Float32Array, ()>,
  ) -> Result<AudioCaptureSession> {
-    // Delegate to audio_capture::start_recording which handles mixing mic +
-    // loopback
-    crate::windows::audio_capture::start_recording(audio_stream_callback)
+    ShareableContent::tap_global_audio_with_callback(
+      _excluded_processes,
+      AudioCallback::Js(Arc::new(audio_stream_callback)),
+      None,
+    )
  }

  #[napi]
Author	SHA1	Message	Date
DarkSky	2ac9158f87	Merge branch 'canary' into darksky/native-sync-state	2026-01-04 00:27:17 +08:00
DarkSky	7ef550a736	feat: native record encoding	2025-12-31 12:47:34 +08:00