feat(electron): create recording through tray (#10526)

- added tray menu for controlling recording status - recording watcher for monitoring system audio input events
2026-02-13 21:05:19 +00:00 · 2025-03-18 04:12:30 +00:00
parent 05329e96c7
commit a016630a82
29 changed files with 1186 additions and 258 deletions
--- a/packages/frontend/native/media_capture/src/macos/tap_audio.rs
+++ b/packages/frontend/native/media_capture/src/macos/tap_audio.rs
@@ -1,4 +1,4 @@
-use std::{ffi::c_void, sync::Arc};
+use std::{ffi::c_void, ptr, sync::Arc};

 use block2::{Block, RcBlock};
 use core_foundation::{
@@ -13,12 +13,15 @@ use coreaudio::sys::{
  kAudioAggregateDeviceIsPrivateKey, kAudioAggregateDeviceIsStackedKey,
  kAudioAggregateDeviceMainSubDeviceKey, kAudioAggregateDeviceNameKey,
  kAudioAggregateDeviceSubDeviceListKey, kAudioAggregateDeviceTapAutoStartKey,
-  kAudioAggregateDeviceTapListKey, kAudioAggregateDeviceUIDKey, kAudioHardwareNoError,
-  kAudioHardwarePropertyDefaultInputDevice, kAudioHardwarePropertyDefaultSystemOutputDevice,
-  kAudioSubDeviceUIDKey, kAudioSubTapDriftCompensationKey, kAudioSubTapUIDKey,
-  AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, AudioDeviceIOProcID,
-  AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
-  AudioHardwareDestroyAggregateDevice, AudioObjectID, AudioTimeStamp, OSStatus,
+  kAudioAggregateDeviceTapListKey, kAudioAggregateDeviceUIDKey,
+  kAudioDevicePropertyAvailableNominalSampleRates, kAudioDevicePropertyNominalSampleRate,
+  kAudioHardwareNoError, kAudioHardwarePropertyDefaultInputDevice,
+  kAudioHardwarePropertyDefaultSystemOutputDevice, kAudioObjectPropertyElementMain,
+  kAudioObjectPropertyScopeGlobal, kAudioSubDeviceUIDKey, kAudioSubTapDriftCompensationKey,
+  kAudioSubTapUIDKey, AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID,
+  AudioDeviceIOProcID, AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
+  AudioHardwareDestroyAggregateDevice, AudioObjectGetPropertyData, AudioObjectGetPropertyDataSize,
+  AudioObjectID, AudioObjectPropertyAddress, AudioObjectSetPropertyData, AudioTimeStamp, OSStatus,
 };
 use napi::{
  bindgen_prelude::Float32Array,
@@ -30,8 +33,11 @@ use objc2::{runtime::AnyObject, Encode, Encoding, RefEncode};

 use crate::{
  audio_stream_basic_desc::read_audio_stream_basic_description,
-  ca_tap_description::CATapDescription, device::get_device_uid, error::CoreAudioError,
-  queue::create_audio_tap_queue, screen_capture_kit::TappableApplication,
+  ca_tap_description::CATapDescription,
+  device::{get_device_audio_id, get_device_uid},
+  error::CoreAudioError,
+  queue::create_audio_tap_queue,
+  screen_capture_kit::TappableApplication,
 };

 extern "C" {
@@ -53,6 +59,14 @@ pub struct AudioBuffer {
  pub mData: *mut c_void,
 }

+// Define a struct to represent sample rate ranges
+#[repr(C)]
+#[allow(non_snake_case)]
+struct AudioValueRange {
+  mMinimum: f64,
+  mMaximum: f64,
+}
+
 unsafe impl Encode for AudioBuffer {
  const ENCODING: Encoding = Encoding::Struct(
    "AudioBuffer",
@@ -94,6 +108,10 @@ pub struct AggregateDevice {
  pub tap_id: AudioObjectID,
  pub id: AudioObjectID,
  pub audio_stats: Option<AudioStats>,
+  pub input_device_id: Option<AudioObjectID>,
+  pub output_device_id: Option<AudioObjectID>,
+  pub input_proc_id: Option<AudioDeviceIOProcID>,
+  pub output_proc_id: Option<AudioDeviceIOProcID>,
 }

 impl AggregateDevice {
@@ -128,6 +146,10 @@ impl AggregateDevice {
      tap_id,
      id: aggregate_device_id,
      audio_stats: None,
+      input_device_id: None,
+      output_device_id: None,
+      input_proc_id: None,
+      output_proc_id: None,
    })
  }

@@ -160,6 +182,10 @@ impl AggregateDevice {
      tap_id,
      id: aggregate_device_id,
      audio_stats: None,
+      input_device_id: None,
+      output_device_id: None,
+      input_proc_id: None,
+      output_proc_id: None,
    })
  }

@@ -173,6 +199,12 @@ impl AggregateDevice {
      return Err(CoreAudioError::CreateProcessTapFailed(status).into());
    }

+    // Get the default input device (microphone) UID and ID
+    let input_device_id = get_device_audio_id(kAudioHardwarePropertyDefaultInputDevice)?;
+
+    // Get the default output device ID
+    let output_device_id = get_device_audio_id(kAudioHardwarePropertyDefaultSystemOutputDevice)?;
+
    let description_dict = Self::create_aggregate_description(tap_id, tap_description.get_uuid()?)?;

    let mut aggregate_device_id: AudioObjectID = 0;
@@ -189,30 +221,246 @@ impl AggregateDevice {
      return Err(CoreAudioError::CreateAggregateDeviceFailed(status).into());
    }

-    Ok(Self {
+    // Create a device with stored device IDs
+    let mut device = Self {
      tap_id,
      id: aggregate_device_id,
      audio_stats: None,
-    })
+      input_device_id: Some(input_device_id),
+      output_device_id: Some(output_device_id),
+      input_proc_id: None,
+      output_proc_id: None,
+    };
+
+    // Configure the aggregate device to ensure proper handling of both input and
+    // output
+    device.configure_aggregate_device()?;
+
+    // Activate both the input and output devices and store their proc IDs
+    let input_proc_id = device.activate_audio_device(input_device_id)?;
+    let output_proc_id = device.activate_audio_device(output_device_id)?;
+
+    device.input_proc_id = Some(input_proc_id);
+    device.output_proc_id = Some(output_proc_id);
+
+    Ok(device)
+  }
+
+  // Configures the aggregate device to ensure proper handling of both input and
+  // output streams
+  fn configure_aggregate_device(&self) -> Result<AudioStats> {
+    // Read the current audio format to ensure it's properly configured
+    let audio_format = read_audio_stream_basic_description(self.tap_id)?;
+
+    // Create initial audio stats with the actual sample rate but always use mono
+    let initial_sample_rate = audio_format.0.mSampleRate;
+    let mut audio_stats = AudioStats {
+      sample_rate: initial_sample_rate,
+      channels: 1, // Always set to 1 channel (mono)
+    };
+
+    // Set the preferred sample rate on the device
+    // This is similar to how Screen Capture Kit allows setting the sample rate
+    let preferred_sample_rate = initial_sample_rate; // Use the device's current sample rate
+
+    // First, check if the preferred sample rate is available
+    let mut is_sample_rate_available = false;
+    let mut best_available_rate = preferred_sample_rate; // Default to preferred rate
+
+    unsafe {
+      // Get the available sample rates
+      let address = AudioObjectPropertyAddress {
+        mSelector: kAudioDevicePropertyAvailableNominalSampleRates,
+        mScope: kAudioObjectPropertyScopeGlobal,
+        mElement: kAudioObjectPropertyElementMain,
+      };
+
+      // Get the size of the property data
+      let mut data_size: u32 = 0;
+      let status = AudioObjectGetPropertyDataSize(
+        self.id,
+        &address as *const AudioObjectPropertyAddress,
+        0,
+        std::ptr::null(),
+        &mut data_size as *mut u32,
+      );
+
+      if status == 0 && data_size > 0 {
+        // Calculate how many ranges we have
+        let range_count = data_size as usize / std::mem::size_of::<AudioValueRange>();
+
+        // Allocate memory for the ranges
+        let mut ranges: Vec<AudioValueRange> = Vec::with_capacity(range_count);
+        ranges.set_len(range_count);
+
+        // Get the available sample rates
+        let status = AudioObjectGetPropertyData(
+          self.id,
+          &address as *const AudioObjectPropertyAddress,
+          0,
+          std::ptr::null(),
+          &mut data_size as *mut u32,
+          ranges.as_mut_ptr() as *mut std::ffi::c_void,
+        );
+
+        if status == 0 {
+          // Check if our preferred sample rate is within any of the available ranges
+          for range in &ranges {
+            if preferred_sample_rate >= range.mMinimum && preferred_sample_rate <= range.mMaximum {
+              is_sample_rate_available = true;
+              break;
+            }
+          }
+
+          // If not available, find the best available rate
+          if !is_sample_rate_available && !ranges.is_empty() {
+            // Common preferred sample rates in order of preference
+            let common_rates = [48000.0, 44100.0, 96000.0, 88200.0, 24000.0, 22050.0];
+            let mut found_common_rate = false;
+
+            // First try to find a common rate that's available
+            for &rate in &common_rates {
+              for range in &ranges {
+                if rate >= range.mMinimum && rate <= range.mMaximum {
+                  best_available_rate = rate;
+                  found_common_rate = true;
+                  break;
+                }
+              }
+              if found_common_rate {
+                break;
+              }
+            }
+
+            // If no common rate is available, use the highest available rate
+            if !found_common_rate {
+              // Find the highest available rate
+              for range in &ranges {
+                // Use the maximum of the range as our best available rate
+                if range.mMaximum > best_available_rate {
+                  best_available_rate = range.mMaximum;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    // Set the sample rate to either the preferred rate or the best available rate
+    let sample_rate_to_set = if is_sample_rate_available {
+      preferred_sample_rate
+    } else {
+      best_available_rate
+    };
+
+    let status = unsafe {
+      // Note on scope usage:
+      // We use kAudioObjectPropertyScopeGlobal here because it works reliably for
+      // setting the nominal sample rate on the device. While
+      // kAudioObjectPropertyScopeInput or kAudioObjectPropertyScopeOutput might
+      // also work in some cases (as mentioned in the comments),
+      // kAudioObjectPropertyScopeGlobal is the most consistent approach.
+      //
+      // The CoreAudio documentation doesn't explicitly specify which scope to use
+      // with kAudioDevicePropertyNominalSampleRate, but in practice,
+      // kAudioObjectPropertyScopeGlobal ensures the sample rate is set for the
+      // entire device, affecting both input and output.
+      let address = AudioObjectPropertyAddress {
+        mSelector: kAudioDevicePropertyNominalSampleRate,
+        mScope: kAudioObjectPropertyScopeGlobal,
+        mElement: kAudioObjectPropertyElementMain,
+      };
+
+      // Set the sample rate property
+      AudioObjectSetPropertyData(
+        self.id,
+        &address as *const AudioObjectPropertyAddress,
+        0,
+        std::ptr::null(),
+        std::mem::size_of::<f64>() as u32,
+        &sample_rate_to_set as *const f64 as *const std::ffi::c_void,
+      )
+    };
+
+    // Update the audio_stats with the actual sample rate that was set if successful
+    if status == 0 {
+      audio_stats.sample_rate = sample_rate_to_set;
+
+      // Verify the actual sample rate by reading it back
+      unsafe {
+        let address = AudioObjectPropertyAddress {
+          mSelector: kAudioDevicePropertyNominalSampleRate,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        };
+
+        let mut actual_rate: f64 = 0.0;
+        let mut data_size = std::mem::size_of::<f64>() as u32;
+
+        let status = AudioObjectGetPropertyData(
+          self.id,
+          &address as *const AudioObjectPropertyAddress,
+          0,
+          std::ptr::null(),
+          &mut data_size as *mut u32,
+          &mut actual_rate as *mut f64 as *mut std::ffi::c_void,
+        );
+
+        if status == 0 {
+          // Update with the verified rate
+          audio_stats.sample_rate = actual_rate;
+        }
+      }
+    }
+
+    Ok(audio_stats)
+  }
+
+  // Activates an audio device by creating a dummy IO proc
+  fn activate_audio_device(&self, device_id: AudioObjectID) -> Result<AudioDeviceIOProcID> {
+    // Create a simple no-op dummy proc
+    let dummy_block = RcBlock::new(
+      |_: *mut c_void, _: *mut c_void, _: *mut c_void, _: *mut c_void, _: *mut c_void| {
+        // No-op function that just returns success
+        kAudioHardwareNoError as i32
+      },
+    );
+
+    let mut dummy_proc_id: AudioDeviceIOProcID = None;
+
+    // Create the IO proc with our dummy block
+    let status = unsafe {
+      AudioDeviceCreateIOProcIDWithBlock(
+        &mut dummy_proc_id,
+        device_id,
+        ptr::null_mut(),
+        (&*dummy_block.copy() as *const Block<dyn Fn(_, _, _, _, _) -> i32>)
+          .cast_mut()
+          .cast(),
+      )
+    };
+
+    if status != 0 {
+      return Err(CoreAudioError::CreateIOProcIDWithBlockFailed(status).into());
+    }
+
+    // Start the device to activate it
+    let status = unsafe { AudioDeviceStart(device_id, dummy_proc_id) };
+    if status != 0 {
+      return Err(CoreAudioError::AudioDeviceStartFailed(status).into());
+    }
+
+    // Return the proc ID for later cleanup
+    Ok(dummy_proc_id)
  }

  pub fn start(
    &mut self,
    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
  ) -> Result<AudioTapStream> {
-    // Read and log the audio format before starting the device
-    let mut audio_stats = AudioStats {
-      sample_rate: 44100.0,
-      channels: 1, // Always set to 1 channel (mono)
-    };
-
-    if let Ok(audio_format) = read_audio_stream_basic_description(self.tap_id) {
-      // Store the audio format information
-      audio_stats.sample_rate = audio_format.0.mSampleRate;
-      // Always use 1 channel regardless of what the system reports
-      audio_stats.channels = 1;
-    }
-
+    // Configure the aggregate device and get audio stats before starting
+    let audio_stats = self.configure_aggregate_device()?;
    self.audio_stats = Some(audio_stats);
    let audio_stats_clone = audio_stats;

@@ -242,7 +490,7 @@ impl AggregateDevice {
        }] = mBuffers;
        // Only create slice if we have valid data
        if !mData.is_null() && *mDataByteSize > 0 {
-          // Calculate total number of samples (accounting for interleaved stereo)
+          // Calculate total number of samples (total bytes / bytes per sample)
          let total_samples = *mDataByteSize as usize / 4; // 4 bytes per f32

          // Create a slice of all samples
@@ -253,27 +501,18 @@ impl AggregateDevice {
          let channel_count = *mNumberChannels as usize;

          // Process the audio based on channel count
-          let mut processed_samples: Vec<f32>;
+          let processed_samples: Vec<f32>;

          if channel_count > 1 {
            // For stereo, samples are interleaved: [L, R, L, R, ...]
            // We need to average each pair to get mono
-            let frame_count = total_samples / channel_count;
-            processed_samples = Vec::with_capacity(frame_count);
-
-            for i in 0..frame_count {
-              let mut frame_sum = 0.0;
-              for c in 0..channel_count {
-                frame_sum += samples[i * channel_count + c];
-              }
-              processed_samples.push(frame_sum / (channel_count as f32));
-            }
+            processed_samples = process_mixed_audio(samples, channel_count);
          } else {
-            // Already mono, just copy the samples
+            // For mono, just copy the samples
            processed_samples = samples.to_vec();
          }

-          // Pass the processed samples to the callback
+          // Send the processed audio data to JavaScript
          audio_stream_callback.call(
            Ok(processed_samples.into()),
            ThreadsafeFunctionCallMode::NonBlocking,
@@ -309,7 +548,11 @@ impl AggregateDevice {
      device_id: self.id,
      in_proc_id,
      stop_called: false,
-      audio_stats: audio_stats_clone,
+      audio_stats: audio_stats_clone, // Use the updated audio_stats with the actual sample rate
+      input_device_id: self.input_device_id,
+      output_device_id: self.output_device_id,
+      input_proc_id: self.input_proc_id,
+      output_proc_id: self.output_proc_id,
    })
  }

@@ -325,15 +568,29 @@ impl AggregateDevice {
    let aggregate_device_uid_string = aggregate_device_uid.to_string();

    // Sub-device UID key and dictionary
-    let sub_device_output_dict = CFDictionary::from_CFType_pairs(&[(
-      cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
-      system_output_uid.as_CFType(),
-    )]);
+    let sub_device_output_dict = CFDictionary::from_CFType_pairs(&[
+      (
+        cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
+        system_output_uid.as_CFType(),
+      ),
+      // Explicitly mark this as an output device
+      (
+        CFString::new("com.apple.audio.roles").as_CFType(),
+        CFString::new("output").as_CFType(),
+      ),
+    ]);

-    let sub_device_input_dict = CFDictionary::from_CFType_pairs(&[(
-      cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
-      default_input_uid.as_CFType(),
-    )]);
+    let sub_device_input_dict = CFDictionary::from_CFType_pairs(&[
+      (
+        cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
+        default_input_uid.as_CFType(),
+      ),
+      // Explicitly mark this as an input device
+      (
+        CFString::new("com.apple.audio.roles").as_CFType(),
+        CFString::new("input").as_CFType(),
+      ),
+    ]);

    let tap_device_dict = CFDictionary::from_CFType_pairs(&[
      (
@@ -346,6 +603,7 @@ impl AggregateDevice {
      ),
    ]);

+    // Put input device first in the list to prioritize it
    let capture_device_list = vec![sub_device_input_dict, sub_device_output_dict];

    // Sub-device list
@@ -353,7 +611,8 @@ impl AggregateDevice {

    let tap_list = CFArray::from_CFTypes(&[tap_device_dict]);

-    // Create the aggregate device description dictionary
+    // Create the aggregate device description dictionary with a balanced
+    // configuration
    let description_dict = CFDictionary::from_CFType_pairs(&[
      (
        cfstring_from_bytes_with_nul(kAudioAggregateDeviceNameKey).as_CFType(),
@@ -365,7 +624,9 @@ impl AggregateDevice {
      ),
      (
        cfstring_from_bytes_with_nul(kAudioAggregateDeviceMainSubDeviceKey).as_CFType(),
-        system_output_uid.as_CFType(),
+        // Use a balanced approach that includes both input and output
+        // but prioritize input for microphone capture
+        default_input_uid.as_CFType(),
      ),
      (
        cfstring_from_bytes_with_nul(kAudioAggregateDeviceIsPrivateKey).as_CFType(),
@@ -398,6 +659,10 @@ pub struct AudioTapStream {
  in_proc_id: AudioDeviceIOProcID,
  stop_called: bool,
  audio_stats: AudioStats,
+  input_device_id: Option<AudioObjectID>,
+  output_device_id: Option<AudioObjectID>,
+  input_proc_id: Option<AudioDeviceIOProcID>,
+  output_proc_id: Option<AudioDeviceIOProcID>,
 }

 #[napi]
@@ -408,22 +673,47 @@ impl AudioTapStream {
      return Ok(());
    }
    self.stop_called = true;
+
+    // Stop the main aggregate device
    let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
    if status != 0 {
      return Err(CoreAudioError::AudioDeviceStopFailed(status).into());
    }
+
+    // Stop the input device if it was activated
+    if let Some(input_id) = self.input_device_id {
+      if let Some(proc_id) = self.input_proc_id {
+        let _ = unsafe { AudioDeviceStop(input_id, proc_id) };
+        let _ = unsafe { AudioDeviceDestroyIOProcID(input_id, proc_id) };
+      }
+    }
+
+    // Stop the output device if it was activated
+    if let Some(output_id) = self.output_device_id {
+      if let Some(proc_id) = self.output_proc_id {
+        let _ = unsafe { AudioDeviceStop(output_id, proc_id) };
+        let _ = unsafe { AudioDeviceDestroyIOProcID(output_id, proc_id) };
+      }
+    }
+
+    // Destroy the main IO proc
    let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
    if status != 0 {
      return Err(CoreAudioError::AudioDeviceDestroyIOProcIDFailed(status).into());
    }
+
+    // Destroy the aggregate device
    let status = unsafe { AudioHardwareDestroyAggregateDevice(self.device_id) };
    if status != 0 {
      return Err(CoreAudioError::AudioHardwareDestroyAggregateDeviceFailed(status).into());
    }
+
+    // Destroy the process tap
    let status = unsafe { AudioHardwareDestroyProcessTap(self.device_id) };
    if status != 0 {
      return Err(CoreAudioError::AudioHardwareDestroyProcessTapFailed(status).into());
    }
+
    Ok(())
  }

@@ -445,3 +735,21 @@ fn cfstring_from_bytes_with_nul(bytes: &'static [u8]) -> CFString {
      .as_ref(),
  )
 }
+
+// Process mixed audio from multiple channels
+fn process_mixed_audio(samples: &[f32], channel_count: usize) -> Vec<f32> {
+  // For stereo or multi-channel audio, we need to mix down to mono
+  let samples_per_channel = samples.len() / channel_count;
+  let mut mixed_samples = Vec::with_capacity(samples_per_channel);
+
+  for i in 0..samples_per_channel {
+    let mut sample_sum = 0.0;
+    for c in 0..channel_count {
+      sample_sum += samples[i * channel_count + c];
+    }
+    // Average the samples from all channels
+    mixed_samples.push(sample_sum / channel_count as f32);
+  }
+
+  mixed_samples
+}