From 0442c0e2b6753abab86cf71c482551c7e42ecb39 Mon Sep 17 00:00:00 2001
From: pengx17 <pengxiao@outlook.com>
Date: Mon, 21 Apr 2025 08:35:32 +0000
Subject: [PATCH] feat(electron): adapts to default audio input/output device
 changes (#11815)

Enable automatic adoption of new default audio devices upon detection.

1. add `AggregateDeviceManager` to watch for audio device changes, creating & maintaining `AggregateDevice` instances and cleaning up
2. use a workaround of `FastFixedIn` to deal with resampling delay issue (this is low quality and have some artifacts in the resampled audio)

fix AF-2536
---
 .../electron/src/main/recording/feature.ts    |  14 +-
 .../apps/electron/src/main/recording/types.ts |   4 +-
 .../media-capture-playground/server/main.ts   | 269 ++++---
 packages/frontend/native/index.d.ts           |   8 +-
 packages/frontend/native/index.js             |   2 +-
 .../media_capture/src/macos/audio_buffer.rs   |  62 +-
 .../src/macos/screen_capture_kit.rs           |  68 +-
 .../media_capture/src/macos/tap_audio.rs      | 716 ++++++++++++++++--
 .../native/media_capture/src/macos/utils.rs   |  36 +-
 9 files changed, 890 insertions(+), 289 deletions(-)

diff --git a/packages/frontend/apps/electron/src/main/recording/feature.ts b/packages/frontend/apps/electron/src/main/recording/feature.ts
index d967c92936..670a391a73 100644
--- a/packages/frontend/apps/electron/src/main/recording/feature.ts
+++ b/packages/frontend/apps/electron/src/main/recording/feature.ts
@@ -287,7 +287,7 @@ export function createRecording(status: RecordingStatus) {
     app: status.app,
     appGroup: status.appGroup,
     file,
-    stream,
+    session: stream,
   };
 
   recordings.set(status.id, recording);
@@ -308,8 +308,8 @@ export async function getRecording(id: number) {
     app: recording.app,
     startTime: recording.startTime,
     filepath: rawFilePath,
-    sampleRate: recording.stream.sampleRate,
-    numberOfChannels: recording.stream.channels,
+    sampleRate: recording.session.sampleRate,
+    numberOfChannels: recording.session.channels,
   };
 }
 
@@ -342,7 +342,7 @@ function setupRecordingListeners() {
         } else if (status?.status === 'stopped') {
           const recording = recordings.get(status.id);
           if (recording) {
-            recording.stream.stop();
+            recording.session.stop();
           }
         } else if (
           status?.status === 'create-block-success' ||
@@ -564,7 +564,7 @@ export async function stopRecording(id: number) {
     return;
   }
 
-  const { file, stream } = recording;
+  const { file, session: stream } = recording;
 
   // First stop the audio stream to prevent more data coming in
   try {
@@ -742,8 +742,8 @@ export function serializeRecordingStatus(
     startTime: status.startTime,
     filepath:
       status.filepath ?? (recording ? String(recording.file.path) : undefined),
-    sampleRate: recording?.stream.sampleRate,
-    numberOfChannels: recording?.stream.channels,
+    sampleRate: recording?.session.sampleRate,
+    numberOfChannels: recording?.session.channels,
   };
 }
 
diff --git a/packages/frontend/apps/electron/src/main/recording/types.ts b/packages/frontend/apps/electron/src/main/recording/types.ts
index d5826cf2e5..b1d504acc9 100644
--- a/packages/frontend/apps/electron/src/main/recording/types.ts
+++ b/packages/frontend/apps/electron/src/main/recording/types.ts
@@ -1,6 +1,6 @@
 import type { WriteStream } from 'node:fs';
 
-import type { AudioTapStream, TappableApplication } from '@affine/native';
+import type { AudioCaptureSession, TappableApplication } from '@affine/native';
 
 export interface TappableAppInfo {
   rawInstance: TappableApplication;
@@ -27,7 +27,7 @@ export interface Recording {
   appGroup?: AppGroupInfo;
   // the buffered file that is being recorded streamed to
   file: WriteStream;
-  stream: AudioTapStream;
+  session: AudioCaptureSession;
   startTime: number;
   filepath?: string; // the filepath of the recording (only available when status is ready)
 }
diff --git a/packages/frontend/media-capture-playground/server/main.ts b/packages/frontend/media-capture-playground/server/main.ts
index d8b450aff2..0e9a87d1c1 100644
--- a/packages/frontend/media-capture-playground/server/main.ts
+++ b/packages/frontend/media-capture-playground/server/main.ts
@@ -4,13 +4,18 @@ import path from 'node:path';
 
 import {
   type Application,
-  type AudioTapStream,
+  type AudioCaptureSession,
   ShareableContent,
   type TappableApplication,
 } from '@affine/native';
 import type { FSWatcher } from 'chokidar';
 import chokidar from 'chokidar';
-import express from 'express';
+import express, {
+  type NextFunction,
+  type Request,
+  type RequestHandler,
+  type Response,
+} from 'express';
 import rateLimit from 'express-rate-limit';
 import fs from 'fs-extra';
 import { debounce } from 'lodash-es';
@@ -33,7 +38,7 @@ interface Recording {
   app: TappableApplication | null;
   appGroup: Application | null;
   buffers: Float32Array[];
-  stream: AudioTapStream;
+  session: AudioCaptureSession;
   startTime: number;
   isWriting: boolean;
   isGlobal?: boolean;
@@ -90,15 +95,19 @@ const io = new Server(httpServer, {
 });
 
 // Add CORS headers middleware
-app.use((req, res, next) => {
-  res.header('Access-Control-Allow-Origin', '*');
-  res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
-  res.header(
+app.use((req: Request, res: Response, next: NextFunction): void => {
+  res.setHeader('Access-Control-Allow-Origin', '*');
+  res.setHeader(
+    'Access-Control-Allow-Methods',
+    'GET, POST, PUT, DELETE, OPTIONS'
+  );
+  res.setHeader(
     'Access-Control-Allow-Headers',
     'Origin, X-Requested-With, Content-Type, Accept'
   );
   if (req.method === 'OPTIONS') {
-    return res.sendStatus(200);
+    res.status(200).end();
+    return;
   }
   next();
 });
@@ -108,16 +117,18 @@ app.use(express.json());
 // Update the static file serving to handle the new folder structure
 app.use(
   '/recordings',
-  (req, res, next) => {
+  (req: Request, res: Response, next: NextFunction): void => {
     // Extract the folder name from the path
     const parts = req.path.split('/');
     if (parts.length < 2) {
-      return res.status(400).json({ error: 'Invalid request path' });
+      res.status(400).json({ error: 'Invalid request path' });
+      return;
     }
 
     const folderName = parts[1];
     if (!validateAndSanitizeFolderName(folderName)) {
-      return res.status(400).json({ error: 'Invalid folder name format' });
+      res.status(400).json({ error: 'Invalid folder name format' });
+      return;
     }
 
     if (req.path.endsWith('.mp3')) {
@@ -141,7 +152,11 @@ const upload = multer({
 });
 
 // Recording management
-async function saveRecording(recording: Recording): Promise<string | null> {
+async function saveRecording(
+  recording: Recording,
+  sampleRate: number,
+  channels: number
+): Promise<string | null> {
   try {
     recording.isWriting = true;
     const app = recording.isGlobal ? null : recording.appGroup || recording.app;
@@ -154,9 +169,8 @@ async function saveRecording(recording: Recording): Promise<string | null> {
     const recordingEndTime = Date.now();
     const recordingDuration = (recordingEndTime - recording.startTime) / 1000;
 
-    // Get the actual sample rate from the stream's audio stats
-    const actualSampleRate = recording.stream.sampleRate;
-    const channelCount = recording.stream.channels;
+    const actualSampleRate = sampleRate;
+    const channelCount = channels;
     const expectedSamples = recordingDuration * actualSampleRate;
 
     if (recording.isGlobal) {
@@ -303,7 +317,7 @@ async function startRecording(app: TappableApplication) {
     );
 
     const buffers: Float32Array[] = [];
-    const stream = app.tapAudio((err, samples) => {
+    const session = app.tapAudio((err, samples) => {
       if (err) {
         console.error(`❌ Audio stream error for ${rootApp.name}:`, err);
         return;
@@ -318,7 +332,7 @@ async function startRecording(app: TappableApplication) {
       app,
       appGroup: rootApp,
       buffers,
-      stream,
+      session,
       startTime: Date.now(),
       isWriting: false,
     });
@@ -347,8 +361,22 @@ async function stopRecording(processId: number) {
     `⏱️ Recording duration: ${((Date.now() - recording.startTime) / 1000).toFixed(2)}s`
   );
 
-  recording.stream.stop();
-  const filename = await saveRecording(recording);
+  let sampleRate = 0;
+  let channels = 0;
+  try {
+    // Get properties BEFORE stopping the session
+    sampleRate = recording.session.sampleRate;
+    channels = recording.session.channels;
+  } catch (e) {
+    console.error('❌ Failed to get session properties before stopping:', e);
+    // Handle error appropriately, maybe use default values or skip saving?
+    // For now, log and continue, saveRecording might fail later if values are 0.
+  }
+
+  recording.session.stop(); // Stop the session
+
+  // Pass the retrieved values to saveRecording
+  const filename = await saveRecording(recording, sampleRate, channels);
   recordingMap.delete(processId);
 
   if (filename) {
@@ -654,11 +682,6 @@ const rateLimiter = rateLimit({
   message: { error: 'Too many requests, please try again later.' },
 });
 
-app.get('/permissions', (req, res) => {
-  const permission = shareableContent.checkRecordingPermissions();
-  res.json({ permission });
-});
-
 app.get('/apps', async (_req, res) => {
   const apps = await getAllApps();
   listenToAppStateChanges(apps);
@@ -691,11 +714,15 @@ function validateAndSanitizeFolderName(folderName: string): string | null {
   return sanitized;
 }
 
-app.delete('/recordings/:foldername', rateLimiter, async (req, res) => {
+app.delete('/recordings/:foldername', rateLimiter, (async (
+  req: Request,
+  res: Response
+): Promise<void> => {
   const foldername = validateAndSanitizeFolderName(req.params.foldername);
   if (!foldername) {
     console.error('❌ Invalid folder name format:', req.params.foldername);
-    return res.status(400).json({ error: 'Invalid folder name format' });
+    res.status(400).json({ error: 'Invalid folder name format' });
+    return;
   }
 
   // Construct the path safely using path.join to avoid path traversal
@@ -712,7 +739,8 @@ app.delete('/recordings/:foldername', rateLimiter, async (req, res) => {
         recordingDirPath,
         requestedFile: foldername,
       });
-      return res.status(403).json({ error: 'Access denied' });
+      res.status(403).json({ error: 'Access denied' });
+      return;
     }
 
     console.log(`🗑️ Deleting recording folder: ${foldername}`);
@@ -736,7 +764,7 @@ app.delete('/recordings/:foldername', rateLimiter, async (req, res) => {
       });
     }
   }
-});
+}) as RequestHandler);
 
 app.get('/apps/:process_id/icon', (req, res) => {
   const processId = parseInt(req.params.process_id);
@@ -778,100 +806,101 @@ app.post('/apps/:process_id/stop', async (req, res) => {
 });
 
 // Update transcription endpoint to use folder validation
-app.post(
-  '/recordings/:foldername/transcribe',
-  rateLimiter,
-  async (req, res) => {
-    const foldername = validateAndSanitizeFolderName(req.params.foldername);
-    if (!foldername) {
-      console.error('❌ Invalid folder name format:', req.params.foldername);
-      return res.status(400).json({ error: 'Invalid folder name format' });
-    }
-
-    const recordingDir = `${RECORDING_DIR}/${foldername}`;
-
-    try {
-      // Check if directory exists
-      await fs.access(recordingDir);
-
-      const transcriptionWavPath = `${recordingDir}/transcription.wav`;
-      const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
-
-      // Check if transcription file exists
-      await fs.access(transcriptionWavPath);
-
-      // Create initial transcription metadata
-      const initialMetadata: TranscriptionMetadata = {
-        transcriptionStartTime: Date.now(),
-        transcriptionEndTime: 0,
-        transcriptionStatus: 'pending',
-      };
-      await fs.writeJson(transcriptionMetadataPath, initialMetadata);
-
-      // Notify clients that transcription has started
-      io.emit('apps:recording-transcription-start', { filename: foldername });
-
-      const transcription = await gemini(transcriptionWavPath, {
-        mode: 'transcript',
-      });
-
-      // Update transcription metadata with results
-      const metadata: TranscriptionMetadata = {
-        transcriptionStartTime: initialMetadata.transcriptionStartTime,
-        transcriptionEndTime: Date.now(),
-        transcriptionStatus: 'completed',
-        transcription: transcription ?? undefined,
-      };
-
-      await fs.writeJson(transcriptionMetadataPath, metadata);
-
-      // Notify clients that transcription is complete
-      io.emit('apps:recording-transcription-end', {
-        filename: foldername,
-        success: true,
-        transcription,
-      });
-
-      res.json({ success: true });
-    } catch (error) {
-      console.error('❌ Error during transcription:', error);
-
-      // Update transcription metadata with error
-      const metadata: TranscriptionMetadata = {
-        transcriptionStartTime: Date.now(),
-        transcriptionEndTime: Date.now(),
-        transcriptionStatus: 'error',
-        error: error instanceof Error ? error.message : 'Unknown error',
-      };
-
-      await fs
-        .writeJson(`${recordingDir}/transcription.json`, metadata)
-        .catch(err => {
-          console.error('❌ Error saving transcription metadata:', err);
-        });
-
-      // Notify clients of transcription error
-      io.emit('apps:recording-transcription-end', {
-        filename: foldername,
-        success: false,
-        error: error instanceof Error ? error.message : 'Unknown error',
-      });
-
-      res.status(500).json({
-        error: error instanceof Error ? error.message : 'Unknown error',
-      });
-    }
+app.post('/recordings/:foldername/transcribe', rateLimiter, (async (
+  req: Request,
+  res: Response
+): Promise<void> => {
+  const foldername = validateAndSanitizeFolderName(req.params.foldername);
+  if (!foldername) {
+    console.error('❌ Invalid folder name format:', req.params.foldername);
+    res.status(400).json({ error: 'Invalid folder name format' });
+    return;
   }
-);
+
+  const recordingDir = `${RECORDING_DIR}/${foldername}`;
+
+  try {
+    // Check if directory exists
+    await fs.access(recordingDir);
+
+    const transcriptionWavPath = `${recordingDir}/transcription.wav`;
+    const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
+
+    // Check if transcription file exists
+    await fs.access(transcriptionWavPath);
+
+    // Create initial transcription metadata
+    const initialMetadata: TranscriptionMetadata = {
+      transcriptionStartTime: Date.now(),
+      transcriptionEndTime: 0,
+      transcriptionStatus: 'pending',
+    };
+    await fs.writeJson(transcriptionMetadataPath, initialMetadata);
+
+    // Notify clients that transcription has started
+    io.emit('apps:recording-transcription-start', { filename: foldername });
+
+    const transcription = await gemini(transcriptionWavPath, {
+      mode: 'transcript',
+    });
+
+    // Update transcription metadata with results
+    const metadata: TranscriptionMetadata = {
+      transcriptionStartTime: initialMetadata.transcriptionStartTime,
+      transcriptionEndTime: Date.now(),
+      transcriptionStatus: 'completed',
+      transcription: transcription ?? undefined,
+    };
+
+    await fs.writeJson(transcriptionMetadataPath, metadata);
+
+    // Notify clients that transcription is complete
+    io.emit('apps:recording-transcription-end', {
+      filename: foldername,
+      success: true,
+      transcription,
+    });
+
+    res.json({ success: true });
+  } catch (error) {
+    console.error('❌ Error during transcription:', error);
+
+    // Update transcription metadata with error
+    const metadata: TranscriptionMetadata = {
+      transcriptionStartTime: Date.now(),
+      transcriptionEndTime: Date.now(),
+      transcriptionStatus: 'error',
+      error: error instanceof Error ? error.message : 'Unknown error',
+    };
+
+    await fs
+      .writeJson(`${recordingDir}/transcription.json`, metadata)
+      .catch(err => {
+        console.error('❌ Error saving transcription metadata:', err);
+      });
+
+    // Notify clients of transcription error
+    io.emit('apps:recording-transcription-end', {
+      filename: foldername,
+      success: false,
+      error: error instanceof Error ? error.message : 'Unknown error',
+    });
+
+    res.status(500).json({
+      error: error instanceof Error ? error.message : 'Unknown error',
+    });
+  }
+}) as RequestHandler);
 
 app.post(
   '/transcribe',
   rateLimiter,
-  upload.single('audio') as any,
-  async (req, res) => {
+  upload.single('audio') as unknown as RequestHandler,
+  (async (req: Request, res: Response): Promise<void> => {
     try {
       if (!req.file) {
-        return res.status(400).json({ error: 'No audio file provided' });
+        res.status(400).json({ error: 'No audio file provided' });
+        return;
       }
 
       // Notify clients that transcription has started
@@ -896,7 +925,7 @@ app.post(
         error: error instanceof Error ? error.message : 'Unknown error',
       });
     }
-  }
+  }) as RequestHandler
 );
 
 async function startGlobalRecording() {
@@ -910,7 +939,7 @@ async function startGlobalRecording() {
     console.log('🎙️ Starting global recording');
 
     const buffers: Float32Array[] = [];
-    const stream = ShareableContent.tapGlobalAudio(
+    const session = ShareableContent.tapGlobalAudio(
       null,
       (err: Error | null, samples: Float32Array) => {
         if (err) {
@@ -928,7 +957,7 @@ async function startGlobalRecording() {
       app: null,
       appGroup: null,
       buffers,
-      stream,
+      session,
       startTime: Date.now(),
       isWriting: false,
       isGlobal: true,
@@ -963,9 +992,7 @@ httpServer.listen(PORT, () => {
   console.log(`
 🎙️  Media Capture Server started successfully:
 - Port: ${PORT}
-- Recordings directory: ${RECORDING_DIR}
-- Sample rate: 44.1kHz
-- Channels: Mono
+- Recordings directory: ${path.join(process.cwd(), RECORDING_DIR)}
 `);
 });
 
diff --git a/packages/frontend/native/index.d.ts b/packages/frontend/native/index.d.ts
index 7d096e7f18..31c0b3f2d6 100644
--- a/packages/frontend/native/index.d.ts
+++ b/packages/frontend/native/index.d.ts
@@ -17,10 +17,11 @@ export declare class ApplicationStateChangedSubscriber {
   unsubscribe(): void
 }
 
-export declare class AudioTapStream {
+export declare class AudioCaptureSession {
   stop(): void
   get sampleRate(): number
   get channels(): number
+  get actualSampleRate(): number
 }
 
 export declare class DocStorage {
@@ -75,8 +76,7 @@ export declare class ShareableContent {
   applications(): Array<TappableApplication>
   applicationWithProcessId(processId: number): Application | null
   tappableApplicationWithProcessId(processId: number): TappableApplication | null
-  checkRecordingPermissions(): RecordingPermissions
-  static tapGlobalAudio(excludedProcesses: Array<TappableApplication> | undefined | null, audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioTapStream
+  static tapGlobalAudio(excludedProcesses: Array<TappableApplication> | undefined | null, audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioCaptureSession
 }
 
 export declare class SqliteConnection {
@@ -127,7 +127,7 @@ export declare class TappableApplication {
   get objectId(): number
   get icon(): Buffer
   get isRunning(): boolean
-  tapAudio(audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioTapStream
+  tapAudio(audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioCaptureSession
 }
 
 export interface Blob {
diff --git a/packages/frontend/native/index.js b/packages/frontend/native/index.js
index d722c9592b..ee8de356f3 100644
--- a/packages/frontend/native/index.js
+++ b/packages/frontend/native/index.js
@@ -377,7 +377,7 @@ if (!nativeBinding) {
 module.exports.Application = nativeBinding.Application
 module.exports.ApplicationListChangedSubscriber = nativeBinding.ApplicationListChangedSubscriber
 module.exports.ApplicationStateChangedSubscriber = nativeBinding.ApplicationStateChangedSubscriber
-module.exports.AudioTapStream = nativeBinding.AudioTapStream
+module.exports.AudioCaptureSession = nativeBinding.AudioCaptureSession
 module.exports.DocStorage = nativeBinding.DocStorage
 module.exports.DocStoragePool = nativeBinding.DocStoragePool
 module.exports.RecordingPermissions = nativeBinding.RecordingPermissions
diff --git a/packages/frontend/native/media_capture/src/macos/audio_buffer.rs b/packages/frontend/native/media_capture/src/macos/audio_buffer.rs
index 93e95692cb..38cc74bb8d 100644
--- a/packages/frontend/native/media_capture/src/macos/audio_buffer.rs
+++ b/packages/frontend/native/media_capture/src/macos/audio_buffer.rs
@@ -202,37 +202,49 @@ impl InputAndOutputAudioBufferList {
     input_sample_rate: f64,
     output_sample_rate: f64,
   ) -> Result<Vec<f32>, CoreAudioError> {
-    let [AudioBuffer {
-      mData: m_data_input,
-      mNumberChannels: m_number_channels_input,
-      mDataByteSize: m_data_byte_size_input,
-    }, AudioBuffer {
-      mData: m_data_output,
-      mNumberChannels: m_number_channels_output,
-      mDataByteSize: m_data_byte_size_output,
-    }] = self.0.mBuffers;
-    let Some(processed_samples_input) = process_audio_frame(
-      m_data_input,
-      m_data_byte_size_input,
-      m_number_channels_input,
+    let mut mixed_samples = Vec::new();
+
+    // Directly access buffers from the list
+    let [input_buffer, output_buffer] = self.0.mBuffers;
+
+    if let Some(processed_input) = process_audio_frame(
+      input_buffer.mData,
+      input_buffer.mDataByteSize,
+      input_buffer.mNumberChannels,
       input_sample_rate,
       target_sample_rate,
-    ) else {
-      return Err(CoreAudioError::ProcessAudioFrameFailed("input"));
-    };
+    ) {
+      mixed_samples = processed_input;
+    }
 
-    let Some(processed_samples_output) = process_audio_frame(
-      m_data_output,
-      m_data_byte_size_output,
-      m_number_channels_output,
+    if let Some(processed_output) = process_audio_frame(
+      output_buffer.mData,
+      output_buffer.mDataByteSize,
+      output_buffer.mNumberChannels,
       output_sample_rate,
       target_sample_rate,
-    ) else {
-      return Err(CoreAudioError::ProcessAudioFrameFailed("output"));
-    };
+    ) {
+      if mixed_samples.is_empty() {
+        mixed_samples = processed_output;
+      } else {
+        let len1 = mixed_samples.len();
+        let len2 = processed_output.len();
+        if len1 < len2 {
+          mixed_samples.resize(len2, 0.0);
+        } else if len2 < len1 {
+          let mut padded_output = processed_output;
+          padded_output.resize(len1, 0.0);
+          for (sample1, sample2) in mixed_samples.iter_mut().zip(padded_output.iter()) {
+            *sample1 = (*sample1 + *sample2) / 2.0;
+          }
+          return Ok(mixed_samples);
+        }
 
-    // Use the extracted mixing function with the const weights
-    let mixed_samples = mix_audio_samples(&processed_samples_input, &processed_samples_output);
+        for (sample1, sample2) in mixed_samples.iter_mut().zip(processed_output.iter()) {
+          *sample1 = (*sample1 + *sample2) / 2.0;
+        }
+      }
+    }
 
     Ok(mixed_samples)
   }
diff --git a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
index c98de4ab9a..6f59ab4e59 100644
--- a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
+++ b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
@@ -38,7 +38,7 @@ use uuid::Uuid;
 use crate::{
   error::CoreAudioError,
   pid::{audio_process_list, get_process_property},
-  tap_audio::{AggregateDevice, AudioTapStream},
+  tap_audio::{AggregateDeviceManager, AudioCaptureSession},
 };
 
 #[repr(C)]
@@ -89,12 +89,6 @@ static APPLICATION_STATE_CHANGED_LISTENER_BLOCKS: LazyLock<
 static NSRUNNING_APPLICATION_CLASS: LazyLock<Option<&'static AnyClass>> =
   LazyLock::new(|| AnyClass::get(c"NSRunningApplication"));
 
-static AVCAPTUREDEVICE_CLASS: LazyLock<Option<&'static AnyClass>> =
-  LazyLock::new(|| AnyClass::get(c"AVCaptureDevice"));
-
-static SCSTREAM_CLASS: LazyLock<Option<&'static AnyClass>> =
-  LazyLock::new(|| AnyClass::get(c"SCStream"));
-
 #[napi]
 pub struct Application {
   pub(crate) process_id: i32,
@@ -445,10 +439,13 @@ impl TappableApplication {
   pub fn tap_audio(
     &self,
     audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
-  ) -> Result<AudioTapStream> {
-    // Use the new method that takes a TappableApplication directly
-    let mut device = AggregateDevice::new(self)?;
-    device.start(audio_stream_callback)
+  ) -> Result<AudioCaptureSession> {
+    // Use AggregateDeviceManager instead of AggregateDevice directly
+    // This provides automatic default device change detection
+    let mut device_manager = AggregateDeviceManager::new(self)?;
+    device_manager.start_capture(audio_stream_callback)?;
+    let boxed_manager = Box::new(device_manager);
+    Ok(AudioCaptureSession::new(boxed_manager))
   }
 }
 
@@ -742,46 +739,21 @@ impl ShareableContent {
     }
   }
 
-  #[napi]
-  pub fn check_recording_permissions(&self) -> Result<RecordingPermissions> {
-    let av_capture_class = AVCAPTUREDEVICE_CLASS
-      .as_ref()
-      .ok_or_else(|| Error::new(Status::GenericFailure, "AVCaptureDevice class not found"))?;
-
-    let sc_stream_class = SCSTREAM_CLASS
-      .as_ref()
-      .ok_or_else(|| Error::new(Status::GenericFailure, "SCStream class not found"))?;
-
-    let media_type = NSString::from_str("com.apple.avfoundation.avcapturedevice.built-in_audio");
-
-    let audio_status: i32 = unsafe {
-      msg_send![
-        *av_capture_class,
-        authorizationStatusForMediaType: &*media_type
-      ]
-    };
-
-    let screen_status: bool = unsafe { msg_send![*sc_stream_class, isScreenCaptureAuthorized] };
-
-    Ok(RecordingPermissions {
-      // AVAuthorizationStatusAuthorized = 3
-      audio: audio_status == 3,
-      screen: screen_status,
-    })
-  }
-
   #[napi]
   pub fn tap_global_audio(
     excluded_processes: Option<Vec<&TappableApplication>>,
     audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
-  ) -> Result<AudioTapStream> {
-    let mut device = AggregateDevice::create_global_tap_but_exclude_processes(
-      &excluded_processes
-        .unwrap_or_default()
-        .iter()
-        .map(|app| app.object_id)
-        .collect::<Vec<_>>(),
-    )?;
-    device.start(audio_stream_callback)
+  ) -> Result<AudioCaptureSession> {
+    let excluded_object_ids = excluded_processes
+      .unwrap_or_default()
+      .iter()
+      .map(|app| app.object_id)
+      .collect::<Vec<_>>();
+
+    // Use the new AggregateDeviceManager for automatic device adaptation
+    let mut device_manager = AggregateDeviceManager::new_global(&excluded_object_ids)?;
+    device_manager.start_capture(audio_stream_callback)?;
+    let boxed_manager = Box::new(device_manager);
+    Ok(AudioCaptureSession::new(boxed_manager))
   }
 }
diff --git a/packages/frontend/native/media_capture/src/macos/tap_audio.rs b/packages/frontend/native/media_capture/src/macos/tap_audio.rs
index be0b9ed0e5..95aa10538f 100644
--- a/packages/frontend/native/media_capture/src/macos/tap_audio.rs
+++ b/packages/frontend/native/media_capture/src/macos/tap_audio.rs
@@ -12,12 +12,16 @@ use coreaudio::sys::{
   kAudioAggregateDeviceIsStackedKey, kAudioAggregateDeviceMainSubDeviceKey,
   kAudioAggregateDeviceNameKey, kAudioAggregateDeviceSubDeviceListKey,
   kAudioAggregateDeviceTapAutoStartKey, kAudioAggregateDeviceTapListKey,
-  kAudioAggregateDeviceUIDKey, kAudioDevicePropertyNominalSampleRate, kAudioHardwareBadDeviceError,
+  kAudioAggregateDeviceUIDKey, kAudioDevicePropertyDeviceIsAlive,
+  kAudioDevicePropertyNominalSampleRate, kAudioHardwareBadDeviceError,
   kAudioHardwareBadStreamError, kAudioHardwareNoError, kAudioHardwarePropertyDefaultInputDevice,
-  kAudioHardwarePropertyDefaultOutputDevice, kAudioSubDeviceUIDKey, kAudioSubTapUIDKey,
-  AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, AudioDeviceIOProcID,
-  AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
-  AudioHardwareDestroyAggregateDevice, AudioObjectID, AudioTimeStamp, OSStatus,
+  kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyElementMain,
+  kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, kAudioSubDeviceUIDKey,
+  kAudioSubTapUIDKey, AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID,
+  AudioDeviceIOProcID, AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
+  AudioHardwareDestroyAggregateDevice, AudioObjectAddPropertyListenerBlock,
+  AudioObjectGetPropertyDataSize, AudioObjectID, AudioObjectPropertyAddress,
+  AudioObjectRemovePropertyListenerBlock, AudioTimeStamp, OSStatus,
 };
 use napi::{
   bindgen_prelude::Float32Array,
@@ -164,6 +168,7 @@ impl AggregateDevice {
       output_proc_id: None,
     };
 
+    // Restore the activation logic as it seems necessary for audio flow
     // Configure the aggregate device to ensure proper handling of both input and
     // output
     device.get_aggregate_device_stats()?;
@@ -234,65 +239,78 @@ impl AggregateDevice {
     Ok(dummy_proc_id)
   }
 
+  /// Implementation for the AggregateDevice to start processing audio
   pub fn start(
     &mut self,
     audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+    // Add original_audio_stats to ensure consistent target rate
+    original_audio_stats: AudioStats,
   ) -> Result<AudioTapStream> {
-    let mut audio_stats = self.get_aggregate_device_stats()?;
+    let mut current_audio_stats = self.get_aggregate_device_stats()?;
 
     let queue = create_audio_tap_queue();
     let mut in_proc_id: AudioDeviceIOProcID = None;
+
+    // Get the current input and output sample rates
     let mut input_device_sample_rate: f64 = 0.0;
     get_global_main_property(
       self.input_device_id,
       kAudioDevicePropertyNominalSampleRate,
       &mut input_device_sample_rate,
     )?;
-    let output_sample_rate = audio_stats.sample_rate;
-    let target_sample_rate = input_device_sample_rate.max(output_sample_rate);
 
-    audio_stats.sample_rate = target_sample_rate;
+    let output_sample_rate = current_audio_stats.sample_rate;
+    // Use the consistent original sample rate as the target for the IO block
+    let target_sample_rate = original_audio_stats.sample_rate;
 
-    let audio_stats_clone = audio_stats;
-    self.audio_stats = Some(audio_stats);
+    // Update the device's reported stats to the consistent one
+    current_audio_stats.sample_rate = target_sample_rate;
+    current_audio_stats.channels = original_audio_stats.channels;
+    self.audio_stats = Some(current_audio_stats);
+
+    // Use the consistent stats for the stream object returned
+    let audio_stats_for_stream = current_audio_stats;
 
     let in_io_block: RcBlock<
       dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32,
-    > = RcBlock::new(
-      move |_in_now: *mut c_void,
-            in_input_data: *mut c_void,
-            in_input_time: *mut c_void,
-            _in_output_data: *mut c_void,
-            _in_output_time: *mut c_void| {
-        let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() };
+    >;
+    {
+      in_io_block = RcBlock::new(
+        move |_in_now: *mut c_void,
+              in_input_data: *mut c_void,
+              in_input_time: *mut c_void,
+              _in_output_data: *mut c_void,
+              _in_output_time: *mut c_void| {
+          let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() };
 
-        // ignore pre-roll
-        if *mSampleTime < 0.0 {
-          return kAudioHardwareNoError as i32;
-        }
-        let Ok(dua_audio_buffer_list) =
-          (unsafe { InputAndOutputAudioBufferList::from_raw(in_input_data) })
-        else {
-          return kAudioHardwareBadDeviceError as i32;
-        };
+          // ignore pre-roll
+          if *mSampleTime < 0.0 {
+            return kAudioHardwareNoError as i32;
+          }
+          let Ok(dua_audio_buffer_list) =
+            (unsafe { InputAndOutputAudioBufferList::from_raw(in_input_data) })
+          else {
+            return kAudioHardwareBadDeviceError as i32;
+          };
 
-        let Ok(mixed_samples) = dua_audio_buffer_list.mix_input_and_output(
-          target_sample_rate,
-          input_device_sample_rate,
-          output_sample_rate,
-        ) else {
-          return kAudioHardwareBadStreamError as i32;
-        };
+          let Ok(mixed_samples) = dua_audio_buffer_list.mix_input_and_output(
+            target_sample_rate,
+            input_device_sample_rate,
+            output_sample_rate,
+          ) else {
+            return kAudioHardwareBadStreamError as i32;
+          };
 
-        // Send the processed audio data to JavaScript
-        audio_stream_callback.call(
-          Ok(mixed_samples.into()),
-          ThreadsafeFunctionCallMode::NonBlocking,
-        );
+          // Send the processed audio data to JavaScript
+          audio_stream_callback.call(
+            Ok(mixed_samples.into()),
+            ThreadsafeFunctionCallMode::NonBlocking,
+          );
 
-        kAudioHardwareNoError as i32
-      },
-    );
+          kAudioHardwareNoError as i32
+        },
+      );
+    }
 
     let status = unsafe {
       AudioDeviceCreateIOProcIDWithBlock(
@@ -310,8 +328,11 @@ impl AggregateDevice {
     if status != 0 {
       return Err(CoreAudioError::CreateIOProcIDWithBlockFailed(status).into());
     }
+
     let status = unsafe { AudioDeviceStart(self.id, in_proc_id) };
     if status != 0 {
+      // Attempt to clean up the IO proc if start failed
+      let _cleanup_status = unsafe { AudioDeviceDestroyIOProcID(self.id, in_proc_id) };
       return Err(CoreAudioError::AudioDeviceStartFailed(status).into());
     }
 
@@ -319,7 +340,7 @@ impl AggregateDevice {
       device_id: self.id,
       in_proc_id,
       stop_called: false,
-      audio_stats: audio_stats_clone, // Use the updated audio_stats with the actual sample rate
+      audio_stats: audio_stats_for_stream,
       input_device_id: self.input_device_id,
       output_device_id: self.output_device_id,
       input_proc_id: self.input_proc_id,
@@ -386,7 +407,6 @@ impl AggregateDevice {
   }
 }
 
-#[napi]
 pub struct AudioTapStream {
   device_id: AudioObjectID,
   in_proc_id: AudioDeviceIOProcID,
@@ -398,61 +418,621 @@ pub struct AudioTapStream {
   output_proc_id: Option<AudioDeviceIOProcID>,
 }
 
-#[napi]
 impl AudioTapStream {
-  #[napi]
   pub fn stop(&mut self) -> Result<()> {
     if self.stop_called {
       return Ok(());
     }
+
     self.stop_called = true;
 
-    // Stop the main aggregate device
-    let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
-    if status != 0 {
-      return Err(CoreAudioError::AudioDeviceStopFailed(status).into());
+    // Check if device exists before attempting to stop it
+    let mut device_exists = false;
+    let mut dummy_size: u32 = 0;
+    let device_check_status = unsafe {
+      AudioObjectGetPropertyDataSize(
+        self.device_id,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioDevicePropertyDeviceIsAlive,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        0,
+        ptr::null(),
+        &mut dummy_size,
+      )
+    };
+
+    if device_check_status == 0 {
+      device_exists = true;
+    }
+
+    // Stop the main aggregate device - ignore errors as device might already be
+    // stopped or disconnected
+    if device_exists {
+      let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
+      // Don't fail the whole stop process if this fails, just log the error and
+      // continue cleanup
+      if status != 0 {
+        // kAudioHardwareBadDeviceError (560227702 / 0x2166616E in hex) indicates the
+        // device is gone, which is expected in some scenarios (like device
+        // unplug). Treat this as non-existent.
+        if status == kAudioHardwareBadDeviceError as i32 {
+          device_exists = false; // Treat as non-existent for subsequent steps
+        }
+      }
     }
 
     // Stop the input device if it was activated
     if let Some(proc_id) = self.input_proc_id {
-      let _ = unsafe { AudioDeviceStop(self.input_device_id, proc_id) };
-      let _ = unsafe { AudioDeviceDestroyIOProcID(self.input_device_id, proc_id) };
+      // Ignore errors as device might be disconnected
+      let status = unsafe { AudioDeviceStop(self.input_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Input device stop failed with status: {}",
+          status
+        );
+      }
+
+      let status = unsafe { AudioDeviceDestroyIOProcID(self.input_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Input device destroy IO proc failed with status: {}",
+          status
+        );
+      }
     }
 
     // Stop the output device if it was activated
     if let Some(proc_id) = self.output_proc_id {
-      let _ = unsafe { AudioDeviceStop(self.output_device_id, proc_id) };
-      let _ = unsafe { AudioDeviceDestroyIOProcID(self.output_device_id, proc_id) };
+      // Ignore errors as device might be disconnected
+      let status = unsafe { AudioDeviceStop(self.output_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Output device stop failed with status: {}",
+          status
+        );
+      }
+
+      let status = unsafe { AudioDeviceDestroyIOProcID(self.output_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Output device destroy IO proc failed with status: {}",
+          status
+        );
+      }
     }
 
-    // Destroy the main IO proc
-    let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
-    if status != 0 {
-      return Err(CoreAudioError::AudioDeviceDestroyIOProcIDFailed(status).into());
+    // Destroy the main IO proc if device still exists
+    if device_exists {
+      let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Destroy IO proc failed with status: {}",
+          status
+        );
+      }
     }
-
-    // Destroy the aggregate device
     let status = unsafe { AudioHardwareDestroyAggregateDevice(self.device_id) };
     if status != 0 {
-      return Err(CoreAudioError::AudioHardwareDestroyAggregateDeviceFailed(status).into());
+      println!(
+        "DEBUG: WARNING: AudioHardwareDestroyAggregateDevice failed with status: {}",
+        status
+      );
     }
 
-    // Destroy the process tap
+    // Destroy the process tap - don't fail if this fails
     let status = unsafe { AudioHardwareDestroyProcessTap(self.device_id) };
     if status != 0 {
-      return Err(CoreAudioError::AudioHardwareDestroyProcessTapFailed(status).into());
+      println!(
+        "DEBUG: WARNING: AudioHardwareDestroyProcessTap failed with status: {}",
+        status
+      );
+    }
+
+    // Always return success to prevent errors from bubbling up to JavaScript
+    // since we've made a best effort to clean up
+    Ok(())
+  }
+
+  pub fn get_sample_rate(&self) -> f64 {
+    self.audio_stats.sample_rate
+  }
+
+  /// Gets the actual sample rate of the current device
+  ///
+  /// This can be different from the original sample rate if the default device
+  /// has changed. The original sample rate is maintained for consistency in
+  /// audio processing, but applications might need to know the actual device
+  /// sample rate for certain operations.
+  pub fn get_actual_sample_rate(&self) -> Result<f64> {
+    let device_id = self.output_device_id;
+    let mut actual_sample_rate: f64 = 0.0;
+    let status = unsafe {
+      let address = AudioObjectPropertyAddress {
+        mSelector: kAudioDevicePropertyNominalSampleRate,
+        mScope: kAudioObjectPropertyScopeGlobal,
+        mElement: kAudioObjectPropertyElementMain,
+      };
+
+      let mut size = std::mem::size_of::<f64>() as u32;
+      coreaudio::sys::AudioObjectGetPropertyData(
+        device_id,
+        &address,
+        0,
+        ptr::null(),
+        &mut size,
+        &mut actual_sample_rate as *mut f64 as *mut c_void,
+      )
+    };
+
+    if status != 0 {
+      return Err(CoreAudioError::GetPropertyDataFailed(status).into());
+    }
+
+    Ok(actual_sample_rate)
+  }
+
+  pub fn get_channels(&self) -> u32 {
+    self.audio_stats.channels
+  }
+
+  /// Mark the stream as stopped without performing actual cleanup
+  /// This is used when the device is known to be in an invalid state
+  pub fn mark_stopped_without_cleanup(&mut self) {
+    self.stop_called = true;
+  }
+}
+
+/// A manager for audio device handling that automatically adapts to device
+/// changes
+pub struct AggregateDeviceManager {
+  device: AggregateDevice,
+  default_devices_listener: Option<*mut c_void>,
+  is_app_specific: bool,
+  app_id: Option<AudioObjectID>,
+  excluded_processes: Vec<AudioObjectID>,
+  active_stream: Option<Arc<std::sync::Mutex<Option<AudioTapStream>>>>,
+  audio_callback: Option<Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>>,
+  original_audio_stats: Option<AudioStats>,
+}
+
+impl AggregateDeviceManager {
+  /// Creates a new AggregateDeviceManager for a specific application
+  pub fn new(app: &TappableApplication) -> Result<Self> {
+    let device = AggregateDevice::new(app)?;
+
+    Ok(Self {
+      device,
+      default_devices_listener: None,
+      is_app_specific: true,
+      app_id: Some(app.object_id),
+      excluded_processes: Vec::new(),
+      active_stream: None,
+      audio_callback: None,
+      original_audio_stats: None,
+    })
+  }
+
+  /// Creates a new AggregateDeviceManager for global audio with option to
+  /// exclude processes
+  pub fn new_global(excluded_processes: &[AudioObjectID]) -> Result<Self> {
+    let device = AggregateDevice::create_global_tap_but_exclude_processes(excluded_processes)?;
+    Ok(Self {
+      device,
+      default_devices_listener: None,
+      is_app_specific: false,
+      app_id: None,
+      excluded_processes: excluded_processes.to_vec(),
+      active_stream: None,
+      audio_callback: None,
+      original_audio_stats: None,
+    })
+  }
+
+  /// This sets up the initial stream and listeners.
+  pub fn start_capture(
+    &mut self,
+    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+  ) -> Result<()> {
+    // Store the callback for potential device switch later
+    self.audio_callback = Some(audio_stream_callback.clone());
+
+    // Create a shared reference for the active stream
+    let stream_mutex = Arc::new(std::sync::Mutex::new(None));
+    self.active_stream = Some(stream_mutex.clone());
+
+    // Start the initial stream
+    // Pass the initially determined consistent audio stats
+    let original_audio_stats = self
+      .device
+      .get_aggregate_device_stats()
+      .unwrap_or(AudioStats {
+        sample_rate: 48000.0, // Match fallback in setup_device_change_listeners
+        channels: 1,
+      });
+    self.original_audio_stats = Some(original_audio_stats); // Store for listener use
+
+    let initial_audio_tap_stream = self
+      .device
+      .start(audio_stream_callback.clone(), original_audio_stats)?; // Pass clone of callback
+
+    // Setup device change listeners AFTER getting initial stats and stream
+    self.setup_device_change_listeners()?;
+
+    // Store a reference to the stream
+    if let Ok(mut stream_guard) = stream_mutex.lock() {
+      *stream_guard = Some(initial_audio_tap_stream);
+    } else {
+      println!("DEBUG: Failed to lock stream_mutex to store AudioTapStream reference");
+      // If we can't store the initial stream, something is wrong.
+      // Attempt to stop the stream we just created? Or just return error?
+      // For now, return an error.
+      return Err(napi::Error::from_reason(
+        "Failed to lock internal stream mutex during startup",
+      ));
     }
 
     Ok(())
   }
 
-  #[napi(getter)]
-  pub fn get_sample_rate(&self) -> f64 {
-    self.audio_stats.sample_rate
+  /// Sets up listeners for default device changes
+  fn setup_device_change_listeners(&mut self) -> Result<()> {
+    // We need to clean up any existing listeners first
+    self.cleanup_device_listeners();
+
+    // Create a weak reference to self to avoid circular references
+    let stream_arc = self.active_stream.clone();
+    let callback_arc = self.audio_callback.clone();
+    let is_app_specific = self.is_app_specific;
+    let app_id = self.app_id;
+    let excluded_processes = self.excluded_processes.clone();
+
+    // Retrieve the stored original audio stats
+    let Some(original_audio_stats) = self.original_audio_stats else {
+      return Err(napi::Error::from_reason(
+        "Internal error: Original audio stats not available for listener.",
+      ));
+    };
+
+    // Create a block that will handle device changes
+    let device_changed_block = RcBlock::new(
+      move |_in_number_addresses: u32, _in_addresses: *mut c_void| {
+        // Skip if we don't have all required information
+        let Some(stream_mutex) = stream_arc.as_ref() else {
+          return;
+        };
+        let Some(callback) = callback_arc.as_ref() else {
+          return;
+        };
+
+        // Try to lock the stream mutex
+        let Ok(mut stream_guard) = stream_mutex.lock() else {
+          return;
+        };
+
+        // Create a new device with updated default devices
+        let result: Result<AggregateDevice> = (|| {
+          if is_app_specific {
+            if let Some(id) = app_id {
+              let app = TappableApplication::new(id)?;
+              AggregateDevice::new(&app)
+            } else {
+              Err(CoreAudioError::CreateProcessTapFailed(0).into())
+            }
+          } else {
+            AggregateDevice::create_global_tap_but_exclude_processes(&excluded_processes)
+          }
+        })();
+
+        // If we successfully created a new device, stop the old stream and start a new
+        // one
+        match result {
+          Ok(mut new_device) => {
+            // Stop and drop the old stream if it exists
+            if let Some(mut old_stream) = stream_guard.take() {
+              // Explicitly drop the old stream's Box before creating the new device.
+              // The drop implementation handles cleanup.
+              // We call stop() directly.
+              let stop_result = old_stream.stop();
+              match stop_result {
+                Ok(_) => {}
+                Err(e) => println!(
+                  "DEBUG: Error stopping old stream (proceeding anyway): {}",
+                  e
+                ),
+              };
+              drop(old_stream); // Ensure it's dropped now
+            }
+
+            match new_device.start(callback.clone(), original_audio_stats) {
+              Ok(new_stream) => {
+                // Use the existing stream_guard which already holds the lock
+                *stream_guard = Some(new_stream);
+              }
+              Err(e) => {
+                println!("DEBUG: Failed to start new stream: {}", e);
+              }
+            }
+          }
+          Err(e) => {
+            println!("DEBUG: Failed to create new device: {}", e);
+          }
+        }
+      },
+    );
+
+    // Create pointers to the device_changed_block that can be used in C functions
+    let block_ptr = &*device_changed_block as *const Block<dyn Fn(u32, *mut c_void)>;
+    let block_ptr_cast = block_ptr.cast_mut().cast();
+
+    // Register listeners for both input and output device changes
+    unsafe {
+      let status = AudioObjectAddPropertyListenerBlock(
+        kAudioObjectSystemObject,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioHardwarePropertyDefaultInputDevice,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        ptr::null_mut(),
+        block_ptr_cast,
+      );
+
+      if status != 0 {
+        println!(
+          "DEBUG: Failed to register input device listener, status: {}",
+          status
+        );
+        return Err(CoreAudioError::AddPropertyListenerBlockFailed(status).into());
+      }
+
+      let status = AudioObjectAddPropertyListenerBlock(
+        kAudioObjectSystemObject,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioHardwarePropertyDefaultOutputDevice,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        ptr::null_mut(),
+        block_ptr_cast,
+      );
+
+      if status != 0 {
+        println!(
+          "DEBUG: Failed to register output device listener, status: {}",
+          status
+        );
+        // Clean up the first listener if the second one fails
+        AudioObjectRemovePropertyListenerBlock(
+          kAudioObjectSystemObject,
+          &AudioObjectPropertyAddress {
+            mSelector: kAudioHardwarePropertyDefaultInputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain,
+          },
+          ptr::null_mut(),
+          block_ptr_cast,
+        );
+        return Err(CoreAudioError::AddPropertyListenerBlockFailed(status).into());
+      }
+    }
+
+    // Store the listener pointer for cleanup
+    self.default_devices_listener = Some(block_ptr_cast);
+
+    Ok(())
+  }
+
+  /// Cleans up device change listeners
+  fn cleanup_device_listeners(&mut self) {
+    if let Some(listener) = self.default_devices_listener.take() {
+      unsafe {
+        // Remove input device change listener
+        let status = AudioObjectRemovePropertyListenerBlock(
+          kAudioObjectSystemObject,
+          &AudioObjectPropertyAddress {
+            mSelector: kAudioHardwarePropertyDefaultInputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain,
+          },
+          ptr::null_mut(),
+          listener,
+        );
+        if status != 0 {
+          println!(
+            "DEBUG: Failed to remove input device listener, status: {}",
+            status
+          );
+        }
+
+        let status = AudioObjectRemovePropertyListenerBlock(
+          kAudioObjectSystemObject,
+          &AudioObjectPropertyAddress {
+            mSelector: kAudioHardwarePropertyDefaultOutputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain,
+          },
+          ptr::null_mut(),
+          listener,
+        );
+        if status != 0 {
+          println!(
+            "DEBUG: Failed to remove output device listener, status: {}",
+            status
+          );
+        }
+      }
+    }
+  }
+
+  /// Stops the active stream and cleans up listeners.
+  pub fn stop_capture(&mut self) -> Result<()> {
+    self.cleanup_device_listeners();
+
+    let stream_to_stop = if let Some(stream_mutex) = &self.active_stream {
+      if let Ok(mut stream_guard) = stream_mutex.lock() {
+        stream_guard.take() // Take ownership from the Option<Arc<Mutex<...>>>
+      } else {
+        println!("DEBUG: Failed to lock stream mutex during stop");
+        None
+      }
+    } else {
+      None
+    };
+
+    if let Some(mut stream) = stream_to_stop {
+      match stream.stop() {
+        Ok(_) => {}
+        Err(e) => println!(
+          "DEBUG: Error stopping stream in stop_capture (ignored): {}",
+          e
+        ),
+      }
+      // Explicitly drop here after stopping
+      drop(stream);
+    }
+
+    // Clear related fields
+    self.active_stream = None;
+    self.audio_callback = None;
+    self.original_audio_stats = None;
+
+    Ok(())
+  }
+
+  /// Gets the stats of the currently active stream, if any.
+  pub fn get_current_stats(&self) -> Option<AudioStats> {
+    if let Some(stream_mutex) = &self.active_stream {
+      if let Ok(stream_guard) = stream_mutex.lock() {
+        // Borrow the stream Option, then map to get stats
+        stream_guard.as_ref().map(|stream| stream.audio_stats)
+      } else {
+        println!("DEBUG: Failed to lock stream mutex for get_current_stats");
+        None
+      }
+    } else {
+      None
+    }
+  }
+
+  /// Gets the actual sample rate of the currently active stream's output
+  /// device.
+  pub fn get_current_actual_sample_rate(&self) -> Result<Option<f64>> {
+    let maybe_stream_ref = if let Some(stream_mutex) = &self.active_stream {
+      match stream_mutex.lock() {
+        Ok(guard) => guard,
+        Err(_) => {
+          println!("DEBUG: Failed to lock stream mutex for get_current_actual_sample_rate");
+          // Return Ok(None) or an error? Let's return None.
+          return Ok(None);
+        }
+      }
+    } else {
+      return Ok(None); // No active stream manager
+    };
+
+    if let Some(stream) = maybe_stream_ref.as_ref() {
+      // Call the existing non-napi method on AudioTapStream
+      match stream.get_actual_sample_rate() {
+        Ok(rate) => Ok(Some(rate)),
+        Err(e) => {
+          println!("DEBUG: Error getting actual sample rate from stream: {}", e);
+          // Propagate the error
+          Err(e)
+        }
+      }
+    } else {
+      Ok(None) // No active stream
+    }
+  }
+}
+
+impl Drop for AggregateDeviceManager {
+  fn drop(&mut self) {
+    // Call stop_capture which handles listener cleanup and stream stopping
+    match self.stop_capture() {
+      Ok(_) => {}
+      Err(e) => println!("DEBUG: Error during stop_capture in Drop (ignored): {}", e),
+    }
+  }
+}
+
+// NEW NAPI Struct: AudioCaptureSession
+#[napi]
+pub struct AudioCaptureSession {
+  // Use Option<Box<...>> to allow taking ownership in stop()
+  manager: Option<Box<AggregateDeviceManager>>,
+}
+
+#[napi]
+impl AudioCaptureSession {
+  // Constructor called internally, not directly via NAPI
+  pub(crate) fn new(manager: Box<AggregateDeviceManager>) -> Self {
+    Self {
+      manager: Some(manager),
+    }
+  }
+
+  #[napi]
+  pub fn stop(&mut self) -> Result<()> {
+    if let Some(manager) = self.manager.take() {
+      // manager.stop_capture() will be called by its Drop impl
+      // We just need to drop the manager here.
+      drop(manager);
+      Ok(())
+    } else {
+      println!("DEBUG: AudioCaptureSession.stop() called, but manager was already taken");
+      // Return Ok even if called multiple times, idempotent behavior
+      Ok(())
+    }
   }
 
   #[napi(getter)]
-  pub fn get_channels(&self) -> u32 {
-    self.audio_stats.channels
+  pub fn get_sample_rate(&self) -> Result<f64> {
+    if let Some(manager) = &self.manager {
+      manager
+        .get_current_stats()
+        .map(|stats| stats.sample_rate)
+        .ok_or_else(|| napi::Error::from_reason("No active audio stream to get sample rate from"))
+    } else {
+      Err(napi::Error::from_reason("Audio session is stopped"))
+    }
+  }
+
+  #[napi(getter)]
+  pub fn get_channels(&self) -> Result<u32> {
+    if let Some(manager) = &self.manager {
+      manager
+        .get_current_stats()
+        .map(|stats| stats.channels)
+        .ok_or_else(|| napi::Error::from_reason("No active audio stream to get channels from"))
+    } else {
+      Err(napi::Error::from_reason("Audio session is stopped"))
+    }
+  }
+
+  #[napi(getter)]
+  pub fn get_actual_sample_rate(&self) -> Result<f64> {
+    if let Some(manager) = &self.manager {
+      manager
+        .get_current_actual_sample_rate()? // Propagate CoreAudioError
+        .ok_or_else(|| {
+          napi::Error::from_reason("No active audio stream to get actual sample rate from")
+        })
+    } else {
+      Err(napi::Error::from_reason("Audio session is stopped"))
+    }
+  }
+}
+
+// Ensure the manager is dropped if the session object is dropped without
+// calling stop()
+impl Drop for AudioCaptureSession {
+  fn drop(&mut self) {
+    // Automatically calls drop on self.manager if it's Some
+    if let Some(manager) = self.manager.take() {
+      drop(manager);
+    }
   }
 }
diff --git a/packages/frontend/native/media_capture/src/macos/utils.rs b/packages/frontend/native/media_capture/src/macos/utils.rs
index b17e11b323..83858a4bdc 100644
--- a/packages/frontend/native/media_capture/src/macos/utils.rs
+++ b/packages/frontend/native/media_capture/src/macos/utils.rs
@@ -5,7 +5,7 @@ use coreaudio::sys::{
   kAudioObjectPropertyElementMain, kAudioObjectPropertyScopeGlobal, AudioObjectGetPropertyData,
   AudioObjectID, AudioObjectPropertyAddress,
 };
-use rubato::{Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType};
+use rubato::{FastFixedIn, PolynomialDegree, Resampler};
 
 use crate::error::CoreAudioError;
 
@@ -81,22 +81,32 @@ pub fn process_audio_frame(
   };
 
   if current_sample_rate != target_sample_rate {
-    let params = SincInterpolationParameters {
-      sinc_len: 256,
-      f_cutoff: 0.95,
-      interpolation: SincInterpolationType::Linear,
-      oversampling_factor: 256,
-      window: rubato::WindowFunction::BlackmanHarris2,
-    };
-    let mut resampler = SincFixedIn::<f32>::new(
+    // TODO: may use SincFixedOut to improve the sample quality
+    // however, it's not working as expected if we only process samples in chunks
+    // e.g., even with ratio 1.0, resampling 512 samples will result in 382 samples,
+    // which will produce very bad quality. The reason is that the resampler is
+    // meant to be used for dealing with larger input size. The reduced number
+    // of samples is a "delay" of the resampler for better quality.
+    let mut resampler = match FastFixedIn::<f32>::new(
       target_sample_rate / current_sample_rate,
       2.0,
-      params,
+      PolynomialDegree::Cubic,
       processed_samples.len(),
       1,
-    )
-    .ok()?;
-    let mut waves_out = resampler.process(&[processed_samples], None).ok()?;
+    ) {
+      Ok(r) => r,
+      Err(e) => {
+        eprintln!("Error creating resampler: {:?}", e);
+        return None;
+      }
+    };
+    let mut waves_out = match resampler.process(&[processed_samples], None) {
+      Ok(w) => w,
+      Err(e) => {
+        eprintln!("Error processing audio with resampler: {:?}", e);
+        return None;
+      }
+    };
     waves_out.pop()
   } else {
     Some(processed_samples)