feat(electron): adapts to default audio input/output device changes (#11815)

Enable automatic adoption of new default audio devices upon detection. 1. add `AggregateDeviceManager` to watch for audio device changes, creating & maintaining `AggregateDevice` instances and cleaning up 2. use a workaround of `FastFixedIn` to deal with resampling delay issue (this is low quality and have some artifacts in the resampled audio) fix AF-2536
2026-02-25 02:13:00 +08:00 · 2025-04-21 08:35:32 +00:00
parent f65c8f8fa6
commit 0442c0e2b6
9 changed files with 890 additions and 289 deletions
--- a/packages/frontend/native/media_capture/src/macos/audio_buffer.rs
+++ b/packages/frontend/native/media_capture/src/macos/audio_buffer.rs
@@ -202,37 +202,49 @@ impl InputAndOutputAudioBufferList {
    input_sample_rate: f64,
    output_sample_rate: f64,
  ) -> Result<Vec<f32>, CoreAudioError> {
-    let [AudioBuffer {
-      mData: m_data_input,
-      mNumberChannels: m_number_channels_input,
-      mDataByteSize: m_data_byte_size_input,
-    }, AudioBuffer {
-      mData: m_data_output,
-      mNumberChannels: m_number_channels_output,
-      mDataByteSize: m_data_byte_size_output,
-    }] = self.0.mBuffers;
-    let Some(processed_samples_input) = process_audio_frame(
-      m_data_input,
-      m_data_byte_size_input,
-      m_number_channels_input,
+    let mut mixed_samples = Vec::new();
+
+    // Directly access buffers from the list
+    let [input_buffer, output_buffer] = self.0.mBuffers;
+
+    if let Some(processed_input) = process_audio_frame(
+      input_buffer.mData,
+      input_buffer.mDataByteSize,
+      input_buffer.mNumberChannels,
      input_sample_rate,
      target_sample_rate,
-    ) else {
-      return Err(CoreAudioError::ProcessAudioFrameFailed("input"));
-    };
+    ) {
+      mixed_samples = processed_input;
+    }

-    let Some(processed_samples_output) = process_audio_frame(
-      m_data_output,
-      m_data_byte_size_output,
-      m_number_channels_output,
+    if let Some(processed_output) = process_audio_frame(
+      output_buffer.mData,
+      output_buffer.mDataByteSize,
+      output_buffer.mNumberChannels,
      output_sample_rate,
      target_sample_rate,
-    ) else {
-      return Err(CoreAudioError::ProcessAudioFrameFailed("output"));
-    };
+    ) {
+      if mixed_samples.is_empty() {
+        mixed_samples = processed_output;
+      } else {
+        let len1 = mixed_samples.len();
+        let len2 = processed_output.len();
+        if len1 < len2 {
+          mixed_samples.resize(len2, 0.0);
+        } else if len2 < len1 {
+          let mut padded_output = processed_output;
+          padded_output.resize(len1, 0.0);
+          for (sample1, sample2) in mixed_samples.iter_mut().zip(padded_output.iter()) {
+            *sample1 = (*sample1 + *sample2) / 2.0;
+          }
+          return Ok(mixed_samples);
+        }

-    // Use the extracted mixing function with the const weights
-    let mixed_samples = mix_audio_samples(&processed_samples_input, &processed_samples_output);
+        for (sample1, sample2) in mixed_samples.iter_mut().zip(processed_output.iter()) {
+          *sample1 = (*sample1 + *sample2) / 2.0;
+        }
+      }
+    }

    Ok(mixed_samples)
  }
--- a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
+++ b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
@@ -38,7 +38,7 @@ use uuid::Uuid;
 use crate::{
  error::CoreAudioError,
  pid::{audio_process_list, get_process_property},
-  tap_audio::{AggregateDevice, AudioTapStream},
+  tap_audio::{AggregateDeviceManager, AudioCaptureSession},
 };

 #[repr(C)]
@@ -89,12 +89,6 @@ static APPLICATION_STATE_CHANGED_LISTENER_BLOCKS: LazyLock<
 static NSRUNNING_APPLICATION_CLASS: LazyLock<Option<&'static AnyClass>> =
  LazyLock::new(|| AnyClass::get(c"NSRunningApplication"));

-static AVCAPTUREDEVICE_CLASS: LazyLock<Option<&'static AnyClass>> =
-  LazyLock::new(|| AnyClass::get(c"AVCaptureDevice"));
-
-static SCSTREAM_CLASS: LazyLock<Option<&'static AnyClass>> =
-  LazyLock::new(|| AnyClass::get(c"SCStream"));
-
 #[napi]
 pub struct Application {
  pub(crate) process_id: i32,
@@ -445,10 +439,13 @@ impl TappableApplication {
  pub fn tap_audio(
    &self,
    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
-  ) -> Result<AudioTapStream> {
-    // Use the new method that takes a TappableApplication directly
-    let mut device = AggregateDevice::new(self)?;
-    device.start(audio_stream_callback)
+  ) -> Result<AudioCaptureSession> {
+    // Use AggregateDeviceManager instead of AggregateDevice directly
+    // This provides automatic default device change detection
+    let mut device_manager = AggregateDeviceManager::new(self)?;
+    device_manager.start_capture(audio_stream_callback)?;
+    let boxed_manager = Box::new(device_manager);
+    Ok(AudioCaptureSession::new(boxed_manager))
  }
 }

@@ -742,46 +739,21 @@ impl ShareableContent {
    }
  }

-  #[napi]
-  pub fn check_recording_permissions(&self) -> Result<RecordingPermissions> {
-    let av_capture_class = AVCAPTUREDEVICE_CLASS
-      .as_ref()
-      .ok_or_else(|| Error::new(Status::GenericFailure, "AVCaptureDevice class not found"))?;
-
-    let sc_stream_class = SCSTREAM_CLASS
-      .as_ref()
-      .ok_or_else(|| Error::new(Status::GenericFailure, "SCStream class not found"))?;
-
-    let media_type = NSString::from_str("com.apple.avfoundation.avcapturedevice.built-in_audio");
-
-    let audio_status: i32 = unsafe {
-      msg_send![
-        *av_capture_class,
-        authorizationStatusForMediaType: &*media_type
-      ]
-    };
-
-    let screen_status: bool = unsafe { msg_send![*sc_stream_class, isScreenCaptureAuthorized] };
-
-    Ok(RecordingPermissions {
-      // AVAuthorizationStatusAuthorized = 3
-      audio: audio_status == 3,
-      screen: screen_status,
-    })
-  }
-
  #[napi]
  pub fn tap_global_audio(
    excluded_processes: Option<Vec<&TappableApplication>>,
    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
-  ) -> Result<AudioTapStream> {
-    let mut device = AggregateDevice::create_global_tap_but_exclude_processes(
-      &excluded_processes
-        .unwrap_or_default()
-        .iter()
-        .map(|app| app.object_id)
-        .collect::<Vec<_>>(),
-    )?;
-    device.start(audio_stream_callback)
+  ) -> Result<AudioCaptureSession> {
+    let excluded_object_ids = excluded_processes
+      .unwrap_or_default()
+      .iter()
+      .map(|app| app.object_id)
+      .collect::<Vec<_>>();
+
+    // Use the new AggregateDeviceManager for automatic device adaptation
+    let mut device_manager = AggregateDeviceManager::new_global(&excluded_object_ids)?;
+    device_manager.start_capture(audio_stream_callback)?;
+    let boxed_manager = Box::new(device_manager);
+    Ok(AudioCaptureSession::new(boxed_manager))
  }
 }
--- a/packages/frontend/native/media_capture/src/macos/tap_audio.rs
+++ b/packages/frontend/native/media_capture/src/macos/tap_audio.rs
@@ -12,12 +12,16 @@ use coreaudio::sys::{
  kAudioAggregateDeviceIsStackedKey, kAudioAggregateDeviceMainSubDeviceKey,
  kAudioAggregateDeviceNameKey, kAudioAggregateDeviceSubDeviceListKey,
  kAudioAggregateDeviceTapAutoStartKey, kAudioAggregateDeviceTapListKey,
-  kAudioAggregateDeviceUIDKey, kAudioDevicePropertyNominalSampleRate, kAudioHardwareBadDeviceError,
+  kAudioAggregateDeviceUIDKey, kAudioDevicePropertyDeviceIsAlive,
+  kAudioDevicePropertyNominalSampleRate, kAudioHardwareBadDeviceError,
  kAudioHardwareBadStreamError, kAudioHardwareNoError, kAudioHardwarePropertyDefaultInputDevice,
-  kAudioHardwarePropertyDefaultOutputDevice, kAudioSubDeviceUIDKey, kAudioSubTapUIDKey,
-  AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, AudioDeviceIOProcID,
-  AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
-  AudioHardwareDestroyAggregateDevice, AudioObjectID, AudioTimeStamp, OSStatus,
+  kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyElementMain,
+  kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, kAudioSubDeviceUIDKey,
+  kAudioSubTapUIDKey, AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID,
+  AudioDeviceIOProcID, AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
+  AudioHardwareDestroyAggregateDevice, AudioObjectAddPropertyListenerBlock,
+  AudioObjectGetPropertyDataSize, AudioObjectID, AudioObjectPropertyAddress,
+  AudioObjectRemovePropertyListenerBlock, AudioTimeStamp, OSStatus,
 };
 use napi::{
  bindgen_prelude::Float32Array,
@@ -164,6 +168,7 @@ impl AggregateDevice {
      output_proc_id: None,
    };

+    // Restore the activation logic as it seems necessary for audio flow
    // Configure the aggregate device to ensure proper handling of both input and
    // output
    device.get_aggregate_device_stats()?;
@@ -234,65 +239,78 @@ impl AggregateDevice {
    Ok(dummy_proc_id)
  }

+  /// Implementation for the AggregateDevice to start processing audio
  pub fn start(
    &mut self,
    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+    // Add original_audio_stats to ensure consistent target rate
+    original_audio_stats: AudioStats,
  ) -> Result<AudioTapStream> {
-    let mut audio_stats = self.get_aggregate_device_stats()?;
+    let mut current_audio_stats = self.get_aggregate_device_stats()?;

    let queue = create_audio_tap_queue();
    let mut in_proc_id: AudioDeviceIOProcID = None;
+
+    // Get the current input and output sample rates
    let mut input_device_sample_rate: f64 = 0.0;
    get_global_main_property(
      self.input_device_id,
      kAudioDevicePropertyNominalSampleRate,
      &mut input_device_sample_rate,
    )?;
-    let output_sample_rate = audio_stats.sample_rate;
-    let target_sample_rate = input_device_sample_rate.max(output_sample_rate);

-    audio_stats.sample_rate = target_sample_rate;
+    let output_sample_rate = current_audio_stats.sample_rate;
+    // Use the consistent original sample rate as the target for the IO block
+    let target_sample_rate = original_audio_stats.sample_rate;

-    let audio_stats_clone = audio_stats;
-    self.audio_stats = Some(audio_stats);
+    // Update the device's reported stats to the consistent one
+    current_audio_stats.sample_rate = target_sample_rate;
+    current_audio_stats.channels = original_audio_stats.channels;
+    self.audio_stats = Some(current_audio_stats);
+
+    // Use the consistent stats for the stream object returned
+    let audio_stats_for_stream = current_audio_stats;

    let in_io_block: RcBlock<
      dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32,
-    > = RcBlock::new(
-      move |_in_now: *mut c_void,
-            in_input_data: *mut c_void,
-            in_input_time: *mut c_void,
-            _in_output_data: *mut c_void,
-            _in_output_time: *mut c_void| {
-        let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() };
+    >;
+    {
+      in_io_block = RcBlock::new(
+        move |_in_now: *mut c_void,
+              in_input_data: *mut c_void,
+              in_input_time: *mut c_void,
+              _in_output_data: *mut c_void,
+              _in_output_time: *mut c_void| {
+          let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() };

-        // ignore pre-roll
-        if *mSampleTime < 0.0 {
-          return kAudioHardwareNoError as i32;
-        }
-        let Ok(dua_audio_buffer_list) =
-          (unsafe { InputAndOutputAudioBufferList::from_raw(in_input_data) })
-        else {
-          return kAudioHardwareBadDeviceError as i32;
-        };
+          // ignore pre-roll
+          if *mSampleTime < 0.0 {
+            return kAudioHardwareNoError as i32;
+          }
+          let Ok(dua_audio_buffer_list) =
+            (unsafe { InputAndOutputAudioBufferList::from_raw(in_input_data) })
+          else {
+            return kAudioHardwareBadDeviceError as i32;
+          };

-        let Ok(mixed_samples) = dua_audio_buffer_list.mix_input_and_output(
-          target_sample_rate,
-          input_device_sample_rate,
-          output_sample_rate,
-        ) else {
-          return kAudioHardwareBadStreamError as i32;
-        };
+          let Ok(mixed_samples) = dua_audio_buffer_list.mix_input_and_output(
+            target_sample_rate,
+            input_device_sample_rate,
+            output_sample_rate,
+          ) else {
+            return kAudioHardwareBadStreamError as i32;
+          };

-        // Send the processed audio data to JavaScript
-        audio_stream_callback.call(
-          Ok(mixed_samples.into()),
-          ThreadsafeFunctionCallMode::NonBlocking,
-        );
+          // Send the processed audio data to JavaScript
+          audio_stream_callback.call(
+            Ok(mixed_samples.into()),
+            ThreadsafeFunctionCallMode::NonBlocking,
+          );

-        kAudioHardwareNoError as i32
-      },
-    );
+          kAudioHardwareNoError as i32
+        },
+      );
+    }

    let status = unsafe {
      AudioDeviceCreateIOProcIDWithBlock(
@@ -310,8 +328,11 @@ impl AggregateDevice {
    if status != 0 {
      return Err(CoreAudioError::CreateIOProcIDWithBlockFailed(status).into());
    }
+
    let status = unsafe { AudioDeviceStart(self.id, in_proc_id) };
    if status != 0 {
+      // Attempt to clean up the IO proc if start failed
+      let _cleanup_status = unsafe { AudioDeviceDestroyIOProcID(self.id, in_proc_id) };
      return Err(CoreAudioError::AudioDeviceStartFailed(status).into());
    }

@@ -319,7 +340,7 @@ impl AggregateDevice {
      device_id: self.id,
      in_proc_id,
      stop_called: false,
-      audio_stats: audio_stats_clone, // Use the updated audio_stats with the actual sample rate
+      audio_stats: audio_stats_for_stream,
      input_device_id: self.input_device_id,
      output_device_id: self.output_device_id,
      input_proc_id: self.input_proc_id,
@@ -386,7 +407,6 @@ impl AggregateDevice {
  }
 }

-#[napi]
 pub struct AudioTapStream {
  device_id: AudioObjectID,
  in_proc_id: AudioDeviceIOProcID,
@@ -398,61 +418,621 @@ pub struct AudioTapStream {
  output_proc_id: Option<AudioDeviceIOProcID>,
 }

-#[napi]
 impl AudioTapStream {
-  #[napi]
  pub fn stop(&mut self) -> Result<()> {
    if self.stop_called {
      return Ok(());
    }
+
    self.stop_called = true;

-    // Stop the main aggregate device
-    let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
-    if status != 0 {
-      return Err(CoreAudioError::AudioDeviceStopFailed(status).into());
+    // Check if device exists before attempting to stop it
+    let mut device_exists = false;
+    let mut dummy_size: u32 = 0;
+    let device_check_status = unsafe {
+      AudioObjectGetPropertyDataSize(
+        self.device_id,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioDevicePropertyDeviceIsAlive,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        0,
+        ptr::null(),
+        &mut dummy_size,
+      )
+    };
+
+    if device_check_status == 0 {
+      device_exists = true;
+    }
+
+    // Stop the main aggregate device - ignore errors as device might already be
+    // stopped or disconnected
+    if device_exists {
+      let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
+      // Don't fail the whole stop process if this fails, just log the error and
+      // continue cleanup
+      if status != 0 {
+        // kAudioHardwareBadDeviceError (560227702 / 0x2166616E in hex) indicates the
+        // device is gone, which is expected in some scenarios (like device
+        // unplug). Treat this as non-existent.
+        if status == kAudioHardwareBadDeviceError as i32 {
+          device_exists = false; // Treat as non-existent for subsequent steps
+        }
+      }
    }

    // Stop the input device if it was activated
    if let Some(proc_id) = self.input_proc_id {
-      let _ = unsafe { AudioDeviceStop(self.input_device_id, proc_id) };
-      let _ = unsafe { AudioDeviceDestroyIOProcID(self.input_device_id, proc_id) };
+      // Ignore errors as device might be disconnected
+      let status = unsafe { AudioDeviceStop(self.input_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Input device stop failed with status: {}",
+          status
+        );
+      }
+
+      let status = unsafe { AudioDeviceDestroyIOProcID(self.input_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Input device destroy IO proc failed with status: {}",
+          status
+        );
+      }
    }

    // Stop the output device if it was activated
    if let Some(proc_id) = self.output_proc_id {
-      let _ = unsafe { AudioDeviceStop(self.output_device_id, proc_id) };
-      let _ = unsafe { AudioDeviceDestroyIOProcID(self.output_device_id, proc_id) };
+      // Ignore errors as device might be disconnected
+      let status = unsafe { AudioDeviceStop(self.output_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Output device stop failed with status: {}",
+          status
+        );
+      }
+
+      let status = unsafe { AudioDeviceDestroyIOProcID(self.output_device_id, proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Output device destroy IO proc failed with status: {}",
+          status
+        );
+      }
    }

-    // Destroy the main IO proc
-    let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
-    if status != 0 {
-      return Err(CoreAudioError::AudioDeviceDestroyIOProcIDFailed(status).into());
+    // Destroy the main IO proc if device still exists
+    if device_exists {
+      let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
+      if status != 0 {
+        println!(
+          "DEBUG: WARNING: Destroy IO proc failed with status: {}",
+          status
+        );
+      }
    }
-
-    // Destroy the aggregate device
    let status = unsafe { AudioHardwareDestroyAggregateDevice(self.device_id) };
    if status != 0 {
-      return Err(CoreAudioError::AudioHardwareDestroyAggregateDeviceFailed(status).into());
+      println!(
+        "DEBUG: WARNING: AudioHardwareDestroyAggregateDevice failed with status: {}",
+        status
+      );
    }

-    // Destroy the process tap
+    // Destroy the process tap - don't fail if this fails
    let status = unsafe { AudioHardwareDestroyProcessTap(self.device_id) };
    if status != 0 {
-      return Err(CoreAudioError::AudioHardwareDestroyProcessTapFailed(status).into());
+      println!(
+        "DEBUG: WARNING: AudioHardwareDestroyProcessTap failed with status: {}",
+        status
+      );
+    }
+
+    // Always return success to prevent errors from bubbling up to JavaScript
+    // since we've made a best effort to clean up
+    Ok(())
+  }
+
+  pub fn get_sample_rate(&self) -> f64 {
+    self.audio_stats.sample_rate
+  }
+
+  /// Gets the actual sample rate of the current device
+  ///
+  /// This can be different from the original sample rate if the default device
+  /// has changed. The original sample rate is maintained for consistency in
+  /// audio processing, but applications might need to know the actual device
+  /// sample rate for certain operations.
+  pub fn get_actual_sample_rate(&self) -> Result<f64> {
+    let device_id = self.output_device_id;
+    let mut actual_sample_rate: f64 = 0.0;
+    let status = unsafe {
+      let address = AudioObjectPropertyAddress {
+        mSelector: kAudioDevicePropertyNominalSampleRate,
+        mScope: kAudioObjectPropertyScopeGlobal,
+        mElement: kAudioObjectPropertyElementMain,
+      };
+
+      let mut size = std::mem::size_of::<f64>() as u32;
+      coreaudio::sys::AudioObjectGetPropertyData(
+        device_id,
+        &address,
+        0,
+        ptr::null(),
+        &mut size,
+        &mut actual_sample_rate as *mut f64 as *mut c_void,
+      )
+    };
+
+    if status != 0 {
+      return Err(CoreAudioError::GetPropertyDataFailed(status).into());
+    }
+
+    Ok(actual_sample_rate)
+  }
+
+  pub fn get_channels(&self) -> u32 {
+    self.audio_stats.channels
+  }
+
+  /// Mark the stream as stopped without performing actual cleanup
+  /// This is used when the device is known to be in an invalid state
+  pub fn mark_stopped_without_cleanup(&mut self) {
+    self.stop_called = true;
+  }
+}
+
+/// A manager for audio device handling that automatically adapts to device
+/// changes
+pub struct AggregateDeviceManager {
+  device: AggregateDevice,
+  default_devices_listener: Option<*mut c_void>,
+  is_app_specific: bool,
+  app_id: Option<AudioObjectID>,
+  excluded_processes: Vec<AudioObjectID>,
+  active_stream: Option<Arc<std::sync::Mutex<Option<AudioTapStream>>>>,
+  audio_callback: Option<Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>>,
+  original_audio_stats: Option<AudioStats>,
+}
+
+impl AggregateDeviceManager {
+  /// Creates a new AggregateDeviceManager for a specific application
+  pub fn new(app: &TappableApplication) -> Result<Self> {
+    let device = AggregateDevice::new(app)?;
+
+    Ok(Self {
+      device,
+      default_devices_listener: None,
+      is_app_specific: true,
+      app_id: Some(app.object_id),
+      excluded_processes: Vec::new(),
+      active_stream: None,
+      audio_callback: None,
+      original_audio_stats: None,
+    })
+  }
+
+  /// Creates a new AggregateDeviceManager for global audio with option to
+  /// exclude processes
+  pub fn new_global(excluded_processes: &[AudioObjectID]) -> Result<Self> {
+    let device = AggregateDevice::create_global_tap_but_exclude_processes(excluded_processes)?;
+    Ok(Self {
+      device,
+      default_devices_listener: None,
+      is_app_specific: false,
+      app_id: None,
+      excluded_processes: excluded_processes.to_vec(),
+      active_stream: None,
+      audio_callback: None,
+      original_audio_stats: None,
+    })
+  }
+
+  /// This sets up the initial stream and listeners.
+  pub fn start_capture(
+    &mut self,
+    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+  ) -> Result<()> {
+    // Store the callback for potential device switch later
+    self.audio_callback = Some(audio_stream_callback.clone());
+
+    // Create a shared reference for the active stream
+    let stream_mutex = Arc::new(std::sync::Mutex::new(None));
+    self.active_stream = Some(stream_mutex.clone());
+
+    // Start the initial stream
+    // Pass the initially determined consistent audio stats
+    let original_audio_stats = self
+      .device
+      .get_aggregate_device_stats()
+      .unwrap_or(AudioStats {
+        sample_rate: 48000.0, // Match fallback in setup_device_change_listeners
+        channels: 1,
+      });
+    self.original_audio_stats = Some(original_audio_stats); // Store for listener use
+
+    let initial_audio_tap_stream = self
+      .device
+      .start(audio_stream_callback.clone(), original_audio_stats)?; // Pass clone of callback
+
+    // Setup device change listeners AFTER getting initial stats and stream
+    self.setup_device_change_listeners()?;
+
+    // Store a reference to the stream
+    if let Ok(mut stream_guard) = stream_mutex.lock() {
+      *stream_guard = Some(initial_audio_tap_stream);
+    } else {
+      println!("DEBUG: Failed to lock stream_mutex to store AudioTapStream reference");
+      // If we can't store the initial stream, something is wrong.
+      // Attempt to stop the stream we just created? Or just return error?
+      // For now, return an error.
+      return Err(napi::Error::from_reason(
+        "Failed to lock internal stream mutex during startup",
+      ));
    }

    Ok(())
  }

-  #[napi(getter)]
-  pub fn get_sample_rate(&self) -> f64 {
-    self.audio_stats.sample_rate
+  /// Sets up listeners for default device changes
+  fn setup_device_change_listeners(&mut self) -> Result<()> {
+    // We need to clean up any existing listeners first
+    self.cleanup_device_listeners();
+
+    // Create a weak reference to self to avoid circular references
+    let stream_arc = self.active_stream.clone();
+    let callback_arc = self.audio_callback.clone();
+    let is_app_specific = self.is_app_specific;
+    let app_id = self.app_id;
+    let excluded_processes = self.excluded_processes.clone();
+
+    // Retrieve the stored original audio stats
+    let Some(original_audio_stats) = self.original_audio_stats else {
+      return Err(napi::Error::from_reason(
+        "Internal error: Original audio stats not available for listener.",
+      ));
+    };
+
+    // Create a block that will handle device changes
+    let device_changed_block = RcBlock::new(
+      move |_in_number_addresses: u32, _in_addresses: *mut c_void| {
+        // Skip if we don't have all required information
+        let Some(stream_mutex) = stream_arc.as_ref() else {
+          return;
+        };
+        let Some(callback) = callback_arc.as_ref() else {
+          return;
+        };
+
+        // Try to lock the stream mutex
+        let Ok(mut stream_guard) = stream_mutex.lock() else {
+          return;
+        };
+
+        // Create a new device with updated default devices
+        let result: Result<AggregateDevice> = (|| {
+          if is_app_specific {
+            if let Some(id) = app_id {
+              let app = TappableApplication::new(id)?;
+              AggregateDevice::new(&app)
+            } else {
+              Err(CoreAudioError::CreateProcessTapFailed(0).into())
+            }
+          } else {
+            AggregateDevice::create_global_tap_but_exclude_processes(&excluded_processes)
+          }
+        })();
+
+        // If we successfully created a new device, stop the old stream and start a new
+        // one
+        match result {
+          Ok(mut new_device) => {
+            // Stop and drop the old stream if it exists
+            if let Some(mut old_stream) = stream_guard.take() {
+              // Explicitly drop the old stream's Box before creating the new device.
+              // The drop implementation handles cleanup.
+              // We call stop() directly.
+              let stop_result = old_stream.stop();
+              match stop_result {
+                Ok(_) => {}
+                Err(e) => println!(
+                  "DEBUG: Error stopping old stream (proceeding anyway): {}",
+                  e
+                ),
+              };
+              drop(old_stream); // Ensure it's dropped now
+            }
+
+            match new_device.start(callback.clone(), original_audio_stats) {
+              Ok(new_stream) => {
+                // Use the existing stream_guard which already holds the lock
+                *stream_guard = Some(new_stream);
+              }
+              Err(e) => {
+                println!("DEBUG: Failed to start new stream: {}", e);
+              }
+            }
+          }
+          Err(e) => {
+            println!("DEBUG: Failed to create new device: {}", e);
+          }
+        }
+      },
+    );
+
+    // Create pointers to the device_changed_block that can be used in C functions
+    let block_ptr = &*device_changed_block as *const Block<dyn Fn(u32, *mut c_void)>;
+    let block_ptr_cast = block_ptr.cast_mut().cast();
+
+    // Register listeners for both input and output device changes
+    unsafe {
+      let status = AudioObjectAddPropertyListenerBlock(
+        kAudioObjectSystemObject,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioHardwarePropertyDefaultInputDevice,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        ptr::null_mut(),
+        block_ptr_cast,
+      );
+
+      if status != 0 {
+        println!(
+          "DEBUG: Failed to register input device listener, status: {}",
+          status
+        );
+        return Err(CoreAudioError::AddPropertyListenerBlockFailed(status).into());
+      }
+
+      let status = AudioObjectAddPropertyListenerBlock(
+        kAudioObjectSystemObject,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioHardwarePropertyDefaultOutputDevice,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        ptr::null_mut(),
+        block_ptr_cast,
+      );
+
+      if status != 0 {
+        println!(
+          "DEBUG: Failed to register output device listener, status: {}",
+          status
+        );
+        // Clean up the first listener if the second one fails
+        AudioObjectRemovePropertyListenerBlock(
+          kAudioObjectSystemObject,
+          &AudioObjectPropertyAddress {
+            mSelector: kAudioHardwarePropertyDefaultInputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain,
+          },
+          ptr::null_mut(),
+          block_ptr_cast,
+        );
+        return Err(CoreAudioError::AddPropertyListenerBlockFailed(status).into());
+      }
+    }
+
+    // Store the listener pointer for cleanup
+    self.default_devices_listener = Some(block_ptr_cast);
+
+    Ok(())
+  }
+
+  /// Cleans up device change listeners
+  fn cleanup_device_listeners(&mut self) {
+    if let Some(listener) = self.default_devices_listener.take() {
+      unsafe {
+        // Remove input device change listener
+        let status = AudioObjectRemovePropertyListenerBlock(
+          kAudioObjectSystemObject,
+          &AudioObjectPropertyAddress {
+            mSelector: kAudioHardwarePropertyDefaultInputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain,
+          },
+          ptr::null_mut(),
+          listener,
+        );
+        if status != 0 {
+          println!(
+            "DEBUG: Failed to remove input device listener, status: {}",
+            status
+          );
+        }
+
+        let status = AudioObjectRemovePropertyListenerBlock(
+          kAudioObjectSystemObject,
+          &AudioObjectPropertyAddress {
+            mSelector: kAudioHardwarePropertyDefaultOutputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain,
+          },
+          ptr::null_mut(),
+          listener,
+        );
+        if status != 0 {
+          println!(
+            "DEBUG: Failed to remove output device listener, status: {}",
+            status
+          );
+        }
+      }
+    }
+  }
+
+  /// Stops the active stream and cleans up listeners.
+  pub fn stop_capture(&mut self) -> Result<()> {
+    self.cleanup_device_listeners();
+
+    let stream_to_stop = if let Some(stream_mutex) = &self.active_stream {
+      if let Ok(mut stream_guard) = stream_mutex.lock() {
+        stream_guard.take() // Take ownership from the Option<Arc<Mutex<...>>>
+      } else {
+        println!("DEBUG: Failed to lock stream mutex during stop");
+        None
+      }
+    } else {
+      None
+    };
+
+    if let Some(mut stream) = stream_to_stop {
+      match stream.stop() {
+        Ok(_) => {}
+        Err(e) => println!(
+          "DEBUG: Error stopping stream in stop_capture (ignored): {}",
+          e
+        ),
+      }
+      // Explicitly drop here after stopping
+      drop(stream);
+    }
+
+    // Clear related fields
+    self.active_stream = None;
+    self.audio_callback = None;
+    self.original_audio_stats = None;
+
+    Ok(())
+  }
+
+  /// Gets the stats of the currently active stream, if any.
+  pub fn get_current_stats(&self) -> Option<AudioStats> {
+    if let Some(stream_mutex) = &self.active_stream {
+      if let Ok(stream_guard) = stream_mutex.lock() {
+        // Borrow the stream Option, then map to get stats
+        stream_guard.as_ref().map(|stream| stream.audio_stats)
+      } else {
+        println!("DEBUG: Failed to lock stream mutex for get_current_stats");
+        None
+      }
+    } else {
+      None
+    }
+  }
+
+  /// Gets the actual sample rate of the currently active stream's output
+  /// device.
+  pub fn get_current_actual_sample_rate(&self) -> Result<Option<f64>> {
+    let maybe_stream_ref = if let Some(stream_mutex) = &self.active_stream {
+      match stream_mutex.lock() {
+        Ok(guard) => guard,
+        Err(_) => {
+          println!("DEBUG: Failed to lock stream mutex for get_current_actual_sample_rate");
+          // Return Ok(None) or an error? Let's return None.
+          return Ok(None);
+        }
+      }
+    } else {
+      return Ok(None); // No active stream manager
+    };
+
+    if let Some(stream) = maybe_stream_ref.as_ref() {
+      // Call the existing non-napi method on AudioTapStream
+      match stream.get_actual_sample_rate() {
+        Ok(rate) => Ok(Some(rate)),
+        Err(e) => {
+          println!("DEBUG: Error getting actual sample rate from stream: {}", e);
+          // Propagate the error
+          Err(e)
+        }
+      }
+    } else {
+      Ok(None) // No active stream
+    }
+  }
+}
+
+impl Drop for AggregateDeviceManager {
+  fn drop(&mut self) {
+    // Call stop_capture which handles listener cleanup and stream stopping
+    match self.stop_capture() {
+      Ok(_) => {}
+      Err(e) => println!("DEBUG: Error during stop_capture in Drop (ignored): {}", e),
+    }
+  }
+}
+
+// NEW NAPI Struct: AudioCaptureSession
+#[napi]
+pub struct AudioCaptureSession {
+  // Use Option<Box<...>> to allow taking ownership in stop()
+  manager: Option<Box<AggregateDeviceManager>>,
+}
+
+#[napi]
+impl AudioCaptureSession {
+  // Constructor called internally, not directly via NAPI
+  pub(crate) fn new(manager: Box<AggregateDeviceManager>) -> Self {
+    Self {
+      manager: Some(manager),
+    }
+  }
+
+  #[napi]
+  pub fn stop(&mut self) -> Result<()> {
+    if let Some(manager) = self.manager.take() {
+      // manager.stop_capture() will be called by its Drop impl
+      // We just need to drop the manager here.
+      drop(manager);
+      Ok(())
+    } else {
+      println!("DEBUG: AudioCaptureSession.stop() called, but manager was already taken");
+      // Return Ok even if called multiple times, idempotent behavior
+      Ok(())
+    }
  }

  #[napi(getter)]
-  pub fn get_channels(&self) -> u32 {
-    self.audio_stats.channels
+  pub fn get_sample_rate(&self) -> Result<f64> {
+    if let Some(manager) = &self.manager {
+      manager
+        .get_current_stats()
+        .map(|stats| stats.sample_rate)
+        .ok_or_else(|| napi::Error::from_reason("No active audio stream to get sample rate from"))
+    } else {
+      Err(napi::Error::from_reason("Audio session is stopped"))
+    }
+  }
+
+  #[napi(getter)]
+  pub fn get_channels(&self) -> Result<u32> {
+    if let Some(manager) = &self.manager {
+      manager
+        .get_current_stats()
+        .map(|stats| stats.channels)
+        .ok_or_else(|| napi::Error::from_reason("No active audio stream to get channels from"))
+    } else {
+      Err(napi::Error::from_reason("Audio session is stopped"))
+    }
+  }
+
+  #[napi(getter)]
+  pub fn get_actual_sample_rate(&self) -> Result<f64> {
+    if let Some(manager) = &self.manager {
+      manager
+        .get_current_actual_sample_rate()? // Propagate CoreAudioError
+        .ok_or_else(|| {
+          napi::Error::from_reason("No active audio stream to get actual sample rate from")
+        })
+    } else {
+      Err(napi::Error::from_reason("Audio session is stopped"))
+    }
+  }
+}
+
+// Ensure the manager is dropped if the session object is dropped without
+// calling stop()
+impl Drop for AudioCaptureSession {
+  fn drop(&mut self) {
+    // Automatically calls drop on self.manager if it's Some
+    if let Some(manager) = self.manager.take() {
+      drop(manager);
+    }
  }
 }
--- a/packages/frontend/native/media_capture/src/macos/utils.rs
+++ b/packages/frontend/native/media_capture/src/macos/utils.rs
@@ -5,7 +5,7 @@ use coreaudio::sys::{
  kAudioObjectPropertyElementMain, kAudioObjectPropertyScopeGlobal, AudioObjectGetPropertyData,
  AudioObjectID, AudioObjectPropertyAddress,
 };
-use rubato::{Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType};
+use rubato::{FastFixedIn, PolynomialDegree, Resampler};

 use crate::error::CoreAudioError;

@@ -81,22 +81,32 @@ pub fn process_audio_frame(
  };

  if current_sample_rate != target_sample_rate {
-    let params = SincInterpolationParameters {
-      sinc_len: 256,
-      f_cutoff: 0.95,
-      interpolation: SincInterpolationType::Linear,
-      oversampling_factor: 256,
-      window: rubato::WindowFunction::BlackmanHarris2,
-    };
-    let mut resampler = SincFixedIn::<f32>::new(
+    // TODO: may use SincFixedOut to improve the sample quality
+    // however, it's not working as expected if we only process samples in chunks
+    // e.g., even with ratio 1.0, resampling 512 samples will result in 382 samples,
+    // which will produce very bad quality. The reason is that the resampler is
+    // meant to be used for dealing with larger input size. The reduced number
+    // of samples is a "delay" of the resampler for better quality.
+    let mut resampler = match FastFixedIn::<f32>::new(
      target_sample_rate / current_sample_rate,
      2.0,
-      params,
+      PolynomialDegree::Cubic,
      processed_samples.len(),
      1,
-    )
-    .ok()?;
-    let mut waves_out = resampler.process(&[processed_samples], None).ok()?;
+    ) {
+      Ok(r) => r,
+      Err(e) => {
+        eprintln!("Error creating resampler: {:?}", e);
+        return None;
+      }
+    };
+    let mut waves_out = match resampler.process(&[processed_samples], None) {
+      Ok(w) => w,
+      Err(e) => {
+        eprintln!("Error processing audio with resampler: {:?}", e);
+        return None;
+      }
+    };
    waves_out.pop()
  } else {
    Some(processed_samples)