From 0442c0e2b6753abab86cf71c482551c7e42ecb39 Mon Sep 17 00:00:00 2001 From: pengx17 Date: Mon, 21 Apr 2025 08:35:32 +0000 Subject: [PATCH] feat(electron): adapts to default audio input/output device changes (#11815) Enable automatic adoption of new default audio devices upon detection. 1. add `AggregateDeviceManager` to watch for audio device changes, creating & maintaining `AggregateDevice` instances and cleaning up 2. use a workaround of `FastFixedIn` to deal with resampling delay issue (this is low quality and have some artifacts in the resampled audio) fix AF-2536 --- .../electron/src/main/recording/feature.ts | 14 +- .../apps/electron/src/main/recording/types.ts | 4 +- .../media-capture-playground/server/main.ts | 269 ++++--- packages/frontend/native/index.d.ts | 8 +- packages/frontend/native/index.js | 2 +- .../media_capture/src/macos/audio_buffer.rs | 62 +- .../src/macos/screen_capture_kit.rs | 68 +- .../media_capture/src/macos/tap_audio.rs | 716 ++++++++++++++++-- .../native/media_capture/src/macos/utils.rs | 36 +- 9 files changed, 890 insertions(+), 289 deletions(-) diff --git a/packages/frontend/apps/electron/src/main/recording/feature.ts b/packages/frontend/apps/electron/src/main/recording/feature.ts index d967c92936..670a391a73 100644 --- a/packages/frontend/apps/electron/src/main/recording/feature.ts +++ b/packages/frontend/apps/electron/src/main/recording/feature.ts @@ -287,7 +287,7 @@ export function createRecording(status: RecordingStatus) { app: status.app, appGroup: status.appGroup, file, - stream, + session: stream, }; recordings.set(status.id, recording); @@ -308,8 +308,8 @@ export async function getRecording(id: number) { app: recording.app, startTime: recording.startTime, filepath: rawFilePath, - sampleRate: recording.stream.sampleRate, - numberOfChannels: recording.stream.channels, + sampleRate: recording.session.sampleRate, + numberOfChannels: recording.session.channels, }; } @@ -342,7 +342,7 @@ function setupRecordingListeners() { } else if (status?.status === 'stopped') { const recording = recordings.get(status.id); if (recording) { - recording.stream.stop(); + recording.session.stop(); } } else if ( status?.status === 'create-block-success' || @@ -564,7 +564,7 @@ export async function stopRecording(id: number) { return; } - const { file, stream } = recording; + const { file, session: stream } = recording; // First stop the audio stream to prevent more data coming in try { @@ -742,8 +742,8 @@ export function serializeRecordingStatus( startTime: status.startTime, filepath: status.filepath ?? (recording ? String(recording.file.path) : undefined), - sampleRate: recording?.stream.sampleRate, - numberOfChannels: recording?.stream.channels, + sampleRate: recording?.session.sampleRate, + numberOfChannels: recording?.session.channels, }; } diff --git a/packages/frontend/apps/electron/src/main/recording/types.ts b/packages/frontend/apps/electron/src/main/recording/types.ts index d5826cf2e5..b1d504acc9 100644 --- a/packages/frontend/apps/electron/src/main/recording/types.ts +++ b/packages/frontend/apps/electron/src/main/recording/types.ts @@ -1,6 +1,6 @@ import type { WriteStream } from 'node:fs'; -import type { AudioTapStream, TappableApplication } from '@affine/native'; +import type { AudioCaptureSession, TappableApplication } from '@affine/native'; export interface TappableAppInfo { rawInstance: TappableApplication; @@ -27,7 +27,7 @@ export interface Recording { appGroup?: AppGroupInfo; // the buffered file that is being recorded streamed to file: WriteStream; - stream: AudioTapStream; + session: AudioCaptureSession; startTime: number; filepath?: string; // the filepath of the recording (only available when status is ready) } diff --git a/packages/frontend/media-capture-playground/server/main.ts b/packages/frontend/media-capture-playground/server/main.ts index d8b450aff2..0e9a87d1c1 100644 --- a/packages/frontend/media-capture-playground/server/main.ts +++ b/packages/frontend/media-capture-playground/server/main.ts @@ -4,13 +4,18 @@ import path from 'node:path'; import { type Application, - type AudioTapStream, + type AudioCaptureSession, ShareableContent, type TappableApplication, } from '@affine/native'; import type { FSWatcher } from 'chokidar'; import chokidar from 'chokidar'; -import express from 'express'; +import express, { + type NextFunction, + type Request, + type RequestHandler, + type Response, +} from 'express'; import rateLimit from 'express-rate-limit'; import fs from 'fs-extra'; import { debounce } from 'lodash-es'; @@ -33,7 +38,7 @@ interface Recording { app: TappableApplication | null; appGroup: Application | null; buffers: Float32Array[]; - stream: AudioTapStream; + session: AudioCaptureSession; startTime: number; isWriting: boolean; isGlobal?: boolean; @@ -90,15 +95,19 @@ const io = new Server(httpServer, { }); // Add CORS headers middleware -app.use((req, res, next) => { - res.header('Access-Control-Allow-Origin', '*'); - res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS'); - res.header( +app.use((req: Request, res: Response, next: NextFunction): void => { + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader( + 'Access-Control-Allow-Methods', + 'GET, POST, PUT, DELETE, OPTIONS' + ); + res.setHeader( 'Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept' ); if (req.method === 'OPTIONS') { - return res.sendStatus(200); + res.status(200).end(); + return; } next(); }); @@ -108,16 +117,18 @@ app.use(express.json()); // Update the static file serving to handle the new folder structure app.use( '/recordings', - (req, res, next) => { + (req: Request, res: Response, next: NextFunction): void => { // Extract the folder name from the path const parts = req.path.split('/'); if (parts.length < 2) { - return res.status(400).json({ error: 'Invalid request path' }); + res.status(400).json({ error: 'Invalid request path' }); + return; } const folderName = parts[1]; if (!validateAndSanitizeFolderName(folderName)) { - return res.status(400).json({ error: 'Invalid folder name format' }); + res.status(400).json({ error: 'Invalid folder name format' }); + return; } if (req.path.endsWith('.mp3')) { @@ -141,7 +152,11 @@ const upload = multer({ }); // Recording management -async function saveRecording(recording: Recording): Promise { +async function saveRecording( + recording: Recording, + sampleRate: number, + channels: number +): Promise { try { recording.isWriting = true; const app = recording.isGlobal ? null : recording.appGroup || recording.app; @@ -154,9 +169,8 @@ async function saveRecording(recording: Recording): Promise { const recordingEndTime = Date.now(); const recordingDuration = (recordingEndTime - recording.startTime) / 1000; - // Get the actual sample rate from the stream's audio stats - const actualSampleRate = recording.stream.sampleRate; - const channelCount = recording.stream.channels; + const actualSampleRate = sampleRate; + const channelCount = channels; const expectedSamples = recordingDuration * actualSampleRate; if (recording.isGlobal) { @@ -303,7 +317,7 @@ async function startRecording(app: TappableApplication) { ); const buffers: Float32Array[] = []; - const stream = app.tapAudio((err, samples) => { + const session = app.tapAudio((err, samples) => { if (err) { console.error(`❌ Audio stream error for ${rootApp.name}:`, err); return; @@ -318,7 +332,7 @@ async function startRecording(app: TappableApplication) { app, appGroup: rootApp, buffers, - stream, + session, startTime: Date.now(), isWriting: false, }); @@ -347,8 +361,22 @@ async function stopRecording(processId: number) { `⏱️ Recording duration: ${((Date.now() - recording.startTime) / 1000).toFixed(2)}s` ); - recording.stream.stop(); - const filename = await saveRecording(recording); + let sampleRate = 0; + let channels = 0; + try { + // Get properties BEFORE stopping the session + sampleRate = recording.session.sampleRate; + channels = recording.session.channels; + } catch (e) { + console.error('❌ Failed to get session properties before stopping:', e); + // Handle error appropriately, maybe use default values or skip saving? + // For now, log and continue, saveRecording might fail later if values are 0. + } + + recording.session.stop(); // Stop the session + + // Pass the retrieved values to saveRecording + const filename = await saveRecording(recording, sampleRate, channels); recordingMap.delete(processId); if (filename) { @@ -654,11 +682,6 @@ const rateLimiter = rateLimit({ message: { error: 'Too many requests, please try again later.' }, }); -app.get('/permissions', (req, res) => { - const permission = shareableContent.checkRecordingPermissions(); - res.json({ permission }); -}); - app.get('/apps', async (_req, res) => { const apps = await getAllApps(); listenToAppStateChanges(apps); @@ -691,11 +714,15 @@ function validateAndSanitizeFolderName(folderName: string): string | null { return sanitized; } -app.delete('/recordings/:foldername', rateLimiter, async (req, res) => { +app.delete('/recordings/:foldername', rateLimiter, (async ( + req: Request, + res: Response +): Promise => { const foldername = validateAndSanitizeFolderName(req.params.foldername); if (!foldername) { console.error('❌ Invalid folder name format:', req.params.foldername); - return res.status(400).json({ error: 'Invalid folder name format' }); + res.status(400).json({ error: 'Invalid folder name format' }); + return; } // Construct the path safely using path.join to avoid path traversal @@ -712,7 +739,8 @@ app.delete('/recordings/:foldername', rateLimiter, async (req, res) => { recordingDirPath, requestedFile: foldername, }); - return res.status(403).json({ error: 'Access denied' }); + res.status(403).json({ error: 'Access denied' }); + return; } console.log(`🗑️ Deleting recording folder: ${foldername}`); @@ -736,7 +764,7 @@ app.delete('/recordings/:foldername', rateLimiter, async (req, res) => { }); } } -}); +}) as RequestHandler); app.get('/apps/:process_id/icon', (req, res) => { const processId = parseInt(req.params.process_id); @@ -778,100 +806,101 @@ app.post('/apps/:process_id/stop', async (req, res) => { }); // Update transcription endpoint to use folder validation -app.post( - '/recordings/:foldername/transcribe', - rateLimiter, - async (req, res) => { - const foldername = validateAndSanitizeFolderName(req.params.foldername); - if (!foldername) { - console.error('❌ Invalid folder name format:', req.params.foldername); - return res.status(400).json({ error: 'Invalid folder name format' }); - } - - const recordingDir = `${RECORDING_DIR}/${foldername}`; - - try { - // Check if directory exists - await fs.access(recordingDir); - - const transcriptionWavPath = `${recordingDir}/transcription.wav`; - const transcriptionMetadataPath = `${recordingDir}/transcription.json`; - - // Check if transcription file exists - await fs.access(transcriptionWavPath); - - // Create initial transcription metadata - const initialMetadata: TranscriptionMetadata = { - transcriptionStartTime: Date.now(), - transcriptionEndTime: 0, - transcriptionStatus: 'pending', - }; - await fs.writeJson(transcriptionMetadataPath, initialMetadata); - - // Notify clients that transcription has started - io.emit('apps:recording-transcription-start', { filename: foldername }); - - const transcription = await gemini(transcriptionWavPath, { - mode: 'transcript', - }); - - // Update transcription metadata with results - const metadata: TranscriptionMetadata = { - transcriptionStartTime: initialMetadata.transcriptionStartTime, - transcriptionEndTime: Date.now(), - transcriptionStatus: 'completed', - transcription: transcription ?? undefined, - }; - - await fs.writeJson(transcriptionMetadataPath, metadata); - - // Notify clients that transcription is complete - io.emit('apps:recording-transcription-end', { - filename: foldername, - success: true, - transcription, - }); - - res.json({ success: true }); - } catch (error) { - console.error('❌ Error during transcription:', error); - - // Update transcription metadata with error - const metadata: TranscriptionMetadata = { - transcriptionStartTime: Date.now(), - transcriptionEndTime: Date.now(), - transcriptionStatus: 'error', - error: error instanceof Error ? error.message : 'Unknown error', - }; - - await fs - .writeJson(`${recordingDir}/transcription.json`, metadata) - .catch(err => { - console.error('❌ Error saving transcription metadata:', err); - }); - - // Notify clients of transcription error - io.emit('apps:recording-transcription-end', { - filename: foldername, - success: false, - error: error instanceof Error ? error.message : 'Unknown error', - }); - - res.status(500).json({ - error: error instanceof Error ? error.message : 'Unknown error', - }); - } +app.post('/recordings/:foldername/transcribe', rateLimiter, (async ( + req: Request, + res: Response +): Promise => { + const foldername = validateAndSanitizeFolderName(req.params.foldername); + if (!foldername) { + console.error('❌ Invalid folder name format:', req.params.foldername); + res.status(400).json({ error: 'Invalid folder name format' }); + return; } -); + + const recordingDir = `${RECORDING_DIR}/${foldername}`; + + try { + // Check if directory exists + await fs.access(recordingDir); + + const transcriptionWavPath = `${recordingDir}/transcription.wav`; + const transcriptionMetadataPath = `${recordingDir}/transcription.json`; + + // Check if transcription file exists + await fs.access(transcriptionWavPath); + + // Create initial transcription metadata + const initialMetadata: TranscriptionMetadata = { + transcriptionStartTime: Date.now(), + transcriptionEndTime: 0, + transcriptionStatus: 'pending', + }; + await fs.writeJson(transcriptionMetadataPath, initialMetadata); + + // Notify clients that transcription has started + io.emit('apps:recording-transcription-start', { filename: foldername }); + + const transcription = await gemini(transcriptionWavPath, { + mode: 'transcript', + }); + + // Update transcription metadata with results + const metadata: TranscriptionMetadata = { + transcriptionStartTime: initialMetadata.transcriptionStartTime, + transcriptionEndTime: Date.now(), + transcriptionStatus: 'completed', + transcription: transcription ?? undefined, + }; + + await fs.writeJson(transcriptionMetadataPath, metadata); + + // Notify clients that transcription is complete + io.emit('apps:recording-transcription-end', { + filename: foldername, + success: true, + transcription, + }); + + res.json({ success: true }); + } catch (error) { + console.error('❌ Error during transcription:', error); + + // Update transcription metadata with error + const metadata: TranscriptionMetadata = { + transcriptionStartTime: Date.now(), + transcriptionEndTime: Date.now(), + transcriptionStatus: 'error', + error: error instanceof Error ? error.message : 'Unknown error', + }; + + await fs + .writeJson(`${recordingDir}/transcription.json`, metadata) + .catch(err => { + console.error('❌ Error saving transcription metadata:', err); + }); + + // Notify clients of transcription error + io.emit('apps:recording-transcription-end', { + filename: foldername, + success: false, + error: error instanceof Error ? error.message : 'Unknown error', + }); + + res.status(500).json({ + error: error instanceof Error ? error.message : 'Unknown error', + }); + } +}) as RequestHandler); app.post( '/transcribe', rateLimiter, - upload.single('audio') as any, - async (req, res) => { + upload.single('audio') as unknown as RequestHandler, + (async (req: Request, res: Response): Promise => { try { if (!req.file) { - return res.status(400).json({ error: 'No audio file provided' }); + res.status(400).json({ error: 'No audio file provided' }); + return; } // Notify clients that transcription has started @@ -896,7 +925,7 @@ app.post( error: error instanceof Error ? error.message : 'Unknown error', }); } - } + }) as RequestHandler ); async function startGlobalRecording() { @@ -910,7 +939,7 @@ async function startGlobalRecording() { console.log('🎙️ Starting global recording'); const buffers: Float32Array[] = []; - const stream = ShareableContent.tapGlobalAudio( + const session = ShareableContent.tapGlobalAudio( null, (err: Error | null, samples: Float32Array) => { if (err) { @@ -928,7 +957,7 @@ async function startGlobalRecording() { app: null, appGroup: null, buffers, - stream, + session, startTime: Date.now(), isWriting: false, isGlobal: true, @@ -963,9 +992,7 @@ httpServer.listen(PORT, () => { console.log(` 🎙️ Media Capture Server started successfully: - Port: ${PORT} -- Recordings directory: ${RECORDING_DIR} -- Sample rate: 44.1kHz -- Channels: Mono +- Recordings directory: ${path.join(process.cwd(), RECORDING_DIR)} `); }); diff --git a/packages/frontend/native/index.d.ts b/packages/frontend/native/index.d.ts index 7d096e7f18..31c0b3f2d6 100644 --- a/packages/frontend/native/index.d.ts +++ b/packages/frontend/native/index.d.ts @@ -17,10 +17,11 @@ export declare class ApplicationStateChangedSubscriber { unsubscribe(): void } -export declare class AudioTapStream { +export declare class AudioCaptureSession { stop(): void get sampleRate(): number get channels(): number + get actualSampleRate(): number } export declare class DocStorage { @@ -75,8 +76,7 @@ export declare class ShareableContent { applications(): Array applicationWithProcessId(processId: number): Application | null tappableApplicationWithProcessId(processId: number): TappableApplication | null - checkRecordingPermissions(): RecordingPermissions - static tapGlobalAudio(excludedProcesses: Array | undefined | null, audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioTapStream + static tapGlobalAudio(excludedProcesses: Array | undefined | null, audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioCaptureSession } export declare class SqliteConnection { @@ -127,7 +127,7 @@ export declare class TappableApplication { get objectId(): number get icon(): Buffer get isRunning(): boolean - tapAudio(audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioTapStream + tapAudio(audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioCaptureSession } export interface Blob { diff --git a/packages/frontend/native/index.js b/packages/frontend/native/index.js index d722c9592b..ee8de356f3 100644 --- a/packages/frontend/native/index.js +++ b/packages/frontend/native/index.js @@ -377,7 +377,7 @@ if (!nativeBinding) { module.exports.Application = nativeBinding.Application module.exports.ApplicationListChangedSubscriber = nativeBinding.ApplicationListChangedSubscriber module.exports.ApplicationStateChangedSubscriber = nativeBinding.ApplicationStateChangedSubscriber -module.exports.AudioTapStream = nativeBinding.AudioTapStream +module.exports.AudioCaptureSession = nativeBinding.AudioCaptureSession module.exports.DocStorage = nativeBinding.DocStorage module.exports.DocStoragePool = nativeBinding.DocStoragePool module.exports.RecordingPermissions = nativeBinding.RecordingPermissions diff --git a/packages/frontend/native/media_capture/src/macos/audio_buffer.rs b/packages/frontend/native/media_capture/src/macos/audio_buffer.rs index 93e95692cb..38cc74bb8d 100644 --- a/packages/frontend/native/media_capture/src/macos/audio_buffer.rs +++ b/packages/frontend/native/media_capture/src/macos/audio_buffer.rs @@ -202,37 +202,49 @@ impl InputAndOutputAudioBufferList { input_sample_rate: f64, output_sample_rate: f64, ) -> Result, CoreAudioError> { - let [AudioBuffer { - mData: m_data_input, - mNumberChannels: m_number_channels_input, - mDataByteSize: m_data_byte_size_input, - }, AudioBuffer { - mData: m_data_output, - mNumberChannels: m_number_channels_output, - mDataByteSize: m_data_byte_size_output, - }] = self.0.mBuffers; - let Some(processed_samples_input) = process_audio_frame( - m_data_input, - m_data_byte_size_input, - m_number_channels_input, + let mut mixed_samples = Vec::new(); + + // Directly access buffers from the list + let [input_buffer, output_buffer] = self.0.mBuffers; + + if let Some(processed_input) = process_audio_frame( + input_buffer.mData, + input_buffer.mDataByteSize, + input_buffer.mNumberChannels, input_sample_rate, target_sample_rate, - ) else { - return Err(CoreAudioError::ProcessAudioFrameFailed("input")); - }; + ) { + mixed_samples = processed_input; + } - let Some(processed_samples_output) = process_audio_frame( - m_data_output, - m_data_byte_size_output, - m_number_channels_output, + if let Some(processed_output) = process_audio_frame( + output_buffer.mData, + output_buffer.mDataByteSize, + output_buffer.mNumberChannels, output_sample_rate, target_sample_rate, - ) else { - return Err(CoreAudioError::ProcessAudioFrameFailed("output")); - }; + ) { + if mixed_samples.is_empty() { + mixed_samples = processed_output; + } else { + let len1 = mixed_samples.len(); + let len2 = processed_output.len(); + if len1 < len2 { + mixed_samples.resize(len2, 0.0); + } else if len2 < len1 { + let mut padded_output = processed_output; + padded_output.resize(len1, 0.0); + for (sample1, sample2) in mixed_samples.iter_mut().zip(padded_output.iter()) { + *sample1 = (*sample1 + *sample2) / 2.0; + } + return Ok(mixed_samples); + } - // Use the extracted mixing function with the const weights - let mixed_samples = mix_audio_samples(&processed_samples_input, &processed_samples_output); + for (sample1, sample2) in mixed_samples.iter_mut().zip(processed_output.iter()) { + *sample1 = (*sample1 + *sample2) / 2.0; + } + } + } Ok(mixed_samples) } diff --git a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs index c98de4ab9a..6f59ab4e59 100644 --- a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs +++ b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs @@ -38,7 +38,7 @@ use uuid::Uuid; use crate::{ error::CoreAudioError, pid::{audio_process_list, get_process_property}, - tap_audio::{AggregateDevice, AudioTapStream}, + tap_audio::{AggregateDeviceManager, AudioCaptureSession}, }; #[repr(C)] @@ -89,12 +89,6 @@ static APPLICATION_STATE_CHANGED_LISTENER_BLOCKS: LazyLock< static NSRUNNING_APPLICATION_CLASS: LazyLock> = LazyLock::new(|| AnyClass::get(c"NSRunningApplication")); -static AVCAPTUREDEVICE_CLASS: LazyLock> = - LazyLock::new(|| AnyClass::get(c"AVCaptureDevice")); - -static SCSTREAM_CLASS: LazyLock> = - LazyLock::new(|| AnyClass::get(c"SCStream")); - #[napi] pub struct Application { pub(crate) process_id: i32, @@ -445,10 +439,13 @@ impl TappableApplication { pub fn tap_audio( &self, audio_stream_callback: Arc>, - ) -> Result { - // Use the new method that takes a TappableApplication directly - let mut device = AggregateDevice::new(self)?; - device.start(audio_stream_callback) + ) -> Result { + // Use AggregateDeviceManager instead of AggregateDevice directly + // This provides automatic default device change detection + let mut device_manager = AggregateDeviceManager::new(self)?; + device_manager.start_capture(audio_stream_callback)?; + let boxed_manager = Box::new(device_manager); + Ok(AudioCaptureSession::new(boxed_manager)) } } @@ -742,46 +739,21 @@ impl ShareableContent { } } - #[napi] - pub fn check_recording_permissions(&self) -> Result { - let av_capture_class = AVCAPTUREDEVICE_CLASS - .as_ref() - .ok_or_else(|| Error::new(Status::GenericFailure, "AVCaptureDevice class not found"))?; - - let sc_stream_class = SCSTREAM_CLASS - .as_ref() - .ok_or_else(|| Error::new(Status::GenericFailure, "SCStream class not found"))?; - - let media_type = NSString::from_str("com.apple.avfoundation.avcapturedevice.built-in_audio"); - - let audio_status: i32 = unsafe { - msg_send![ - *av_capture_class, - authorizationStatusForMediaType: &*media_type - ] - }; - - let screen_status: bool = unsafe { msg_send![*sc_stream_class, isScreenCaptureAuthorized] }; - - Ok(RecordingPermissions { - // AVAuthorizationStatusAuthorized = 3 - audio: audio_status == 3, - screen: screen_status, - }) - } - #[napi] pub fn tap_global_audio( excluded_processes: Option>, audio_stream_callback: Arc>, - ) -> Result { - let mut device = AggregateDevice::create_global_tap_but_exclude_processes( - &excluded_processes - .unwrap_or_default() - .iter() - .map(|app| app.object_id) - .collect::>(), - )?; - device.start(audio_stream_callback) + ) -> Result { + let excluded_object_ids = excluded_processes + .unwrap_or_default() + .iter() + .map(|app| app.object_id) + .collect::>(); + + // Use the new AggregateDeviceManager for automatic device adaptation + let mut device_manager = AggregateDeviceManager::new_global(&excluded_object_ids)?; + device_manager.start_capture(audio_stream_callback)?; + let boxed_manager = Box::new(device_manager); + Ok(AudioCaptureSession::new(boxed_manager)) } } diff --git a/packages/frontend/native/media_capture/src/macos/tap_audio.rs b/packages/frontend/native/media_capture/src/macos/tap_audio.rs index be0b9ed0e5..95aa10538f 100644 --- a/packages/frontend/native/media_capture/src/macos/tap_audio.rs +++ b/packages/frontend/native/media_capture/src/macos/tap_audio.rs @@ -12,12 +12,16 @@ use coreaudio::sys::{ kAudioAggregateDeviceIsStackedKey, kAudioAggregateDeviceMainSubDeviceKey, kAudioAggregateDeviceNameKey, kAudioAggregateDeviceSubDeviceListKey, kAudioAggregateDeviceTapAutoStartKey, kAudioAggregateDeviceTapListKey, - kAudioAggregateDeviceUIDKey, kAudioDevicePropertyNominalSampleRate, kAudioHardwareBadDeviceError, + kAudioAggregateDeviceUIDKey, kAudioDevicePropertyDeviceIsAlive, + kAudioDevicePropertyNominalSampleRate, kAudioHardwareBadDeviceError, kAudioHardwareBadStreamError, kAudioHardwareNoError, kAudioHardwarePropertyDefaultInputDevice, - kAudioHardwarePropertyDefaultOutputDevice, kAudioSubDeviceUIDKey, kAudioSubTapUIDKey, - AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, AudioDeviceIOProcID, - AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice, - AudioHardwareDestroyAggregateDevice, AudioObjectID, AudioTimeStamp, OSStatus, + kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyElementMain, + kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, kAudioSubDeviceUIDKey, + kAudioSubTapUIDKey, AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, + AudioDeviceIOProcID, AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice, + AudioHardwareDestroyAggregateDevice, AudioObjectAddPropertyListenerBlock, + AudioObjectGetPropertyDataSize, AudioObjectID, AudioObjectPropertyAddress, + AudioObjectRemovePropertyListenerBlock, AudioTimeStamp, OSStatus, }; use napi::{ bindgen_prelude::Float32Array, @@ -164,6 +168,7 @@ impl AggregateDevice { output_proc_id: None, }; + // Restore the activation logic as it seems necessary for audio flow // Configure the aggregate device to ensure proper handling of both input and // output device.get_aggregate_device_stats()?; @@ -234,65 +239,78 @@ impl AggregateDevice { Ok(dummy_proc_id) } + /// Implementation for the AggregateDevice to start processing audio pub fn start( &mut self, audio_stream_callback: Arc>, + // Add original_audio_stats to ensure consistent target rate + original_audio_stats: AudioStats, ) -> Result { - let mut audio_stats = self.get_aggregate_device_stats()?; + let mut current_audio_stats = self.get_aggregate_device_stats()?; let queue = create_audio_tap_queue(); let mut in_proc_id: AudioDeviceIOProcID = None; + + // Get the current input and output sample rates let mut input_device_sample_rate: f64 = 0.0; get_global_main_property( self.input_device_id, kAudioDevicePropertyNominalSampleRate, &mut input_device_sample_rate, )?; - let output_sample_rate = audio_stats.sample_rate; - let target_sample_rate = input_device_sample_rate.max(output_sample_rate); - audio_stats.sample_rate = target_sample_rate; + let output_sample_rate = current_audio_stats.sample_rate; + // Use the consistent original sample rate as the target for the IO block + let target_sample_rate = original_audio_stats.sample_rate; - let audio_stats_clone = audio_stats; - self.audio_stats = Some(audio_stats); + // Update the device's reported stats to the consistent one + current_audio_stats.sample_rate = target_sample_rate; + current_audio_stats.channels = original_audio_stats.channels; + self.audio_stats = Some(current_audio_stats); + + // Use the consistent stats for the stream object returned + let audio_stats_for_stream = current_audio_stats; let in_io_block: RcBlock< dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32, - > = RcBlock::new( - move |_in_now: *mut c_void, - in_input_data: *mut c_void, - in_input_time: *mut c_void, - _in_output_data: *mut c_void, - _in_output_time: *mut c_void| { - let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() }; + >; + { + in_io_block = RcBlock::new( + move |_in_now: *mut c_void, + in_input_data: *mut c_void, + in_input_time: *mut c_void, + _in_output_data: *mut c_void, + _in_output_time: *mut c_void| { + let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() }; - // ignore pre-roll - if *mSampleTime < 0.0 { - return kAudioHardwareNoError as i32; - } - let Ok(dua_audio_buffer_list) = - (unsafe { InputAndOutputAudioBufferList::from_raw(in_input_data) }) - else { - return kAudioHardwareBadDeviceError as i32; - }; + // ignore pre-roll + if *mSampleTime < 0.0 { + return kAudioHardwareNoError as i32; + } + let Ok(dua_audio_buffer_list) = + (unsafe { InputAndOutputAudioBufferList::from_raw(in_input_data) }) + else { + return kAudioHardwareBadDeviceError as i32; + }; - let Ok(mixed_samples) = dua_audio_buffer_list.mix_input_and_output( - target_sample_rate, - input_device_sample_rate, - output_sample_rate, - ) else { - return kAudioHardwareBadStreamError as i32; - }; + let Ok(mixed_samples) = dua_audio_buffer_list.mix_input_and_output( + target_sample_rate, + input_device_sample_rate, + output_sample_rate, + ) else { + return kAudioHardwareBadStreamError as i32; + }; - // Send the processed audio data to JavaScript - audio_stream_callback.call( - Ok(mixed_samples.into()), - ThreadsafeFunctionCallMode::NonBlocking, - ); + // Send the processed audio data to JavaScript + audio_stream_callback.call( + Ok(mixed_samples.into()), + ThreadsafeFunctionCallMode::NonBlocking, + ); - kAudioHardwareNoError as i32 - }, - ); + kAudioHardwareNoError as i32 + }, + ); + } let status = unsafe { AudioDeviceCreateIOProcIDWithBlock( @@ -310,8 +328,11 @@ impl AggregateDevice { if status != 0 { return Err(CoreAudioError::CreateIOProcIDWithBlockFailed(status).into()); } + let status = unsafe { AudioDeviceStart(self.id, in_proc_id) }; if status != 0 { + // Attempt to clean up the IO proc if start failed + let _cleanup_status = unsafe { AudioDeviceDestroyIOProcID(self.id, in_proc_id) }; return Err(CoreAudioError::AudioDeviceStartFailed(status).into()); } @@ -319,7 +340,7 @@ impl AggregateDevice { device_id: self.id, in_proc_id, stop_called: false, - audio_stats: audio_stats_clone, // Use the updated audio_stats with the actual sample rate + audio_stats: audio_stats_for_stream, input_device_id: self.input_device_id, output_device_id: self.output_device_id, input_proc_id: self.input_proc_id, @@ -386,7 +407,6 @@ impl AggregateDevice { } } -#[napi] pub struct AudioTapStream { device_id: AudioObjectID, in_proc_id: AudioDeviceIOProcID, @@ -398,61 +418,621 @@ pub struct AudioTapStream { output_proc_id: Option, } -#[napi] impl AudioTapStream { - #[napi] pub fn stop(&mut self) -> Result<()> { if self.stop_called { return Ok(()); } + self.stop_called = true; - // Stop the main aggregate device - let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) }; - if status != 0 { - return Err(CoreAudioError::AudioDeviceStopFailed(status).into()); + // Check if device exists before attempting to stop it + let mut device_exists = false; + let mut dummy_size: u32 = 0; + let device_check_status = unsafe { + AudioObjectGetPropertyDataSize( + self.device_id, + &AudioObjectPropertyAddress { + mSelector: kAudioDevicePropertyDeviceIsAlive, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }, + 0, + ptr::null(), + &mut dummy_size, + ) + }; + + if device_check_status == 0 { + device_exists = true; + } + + // Stop the main aggregate device - ignore errors as device might already be + // stopped or disconnected + if device_exists { + let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) }; + // Don't fail the whole stop process if this fails, just log the error and + // continue cleanup + if status != 0 { + // kAudioHardwareBadDeviceError (560227702 / 0x2166616E in hex) indicates the + // device is gone, which is expected in some scenarios (like device + // unplug). Treat this as non-existent. + if status == kAudioHardwareBadDeviceError as i32 { + device_exists = false; // Treat as non-existent for subsequent steps + } + } } // Stop the input device if it was activated if let Some(proc_id) = self.input_proc_id { - let _ = unsafe { AudioDeviceStop(self.input_device_id, proc_id) }; - let _ = unsafe { AudioDeviceDestroyIOProcID(self.input_device_id, proc_id) }; + // Ignore errors as device might be disconnected + let status = unsafe { AudioDeviceStop(self.input_device_id, proc_id) }; + if status != 0 { + println!( + "DEBUG: WARNING: Input device stop failed with status: {}", + status + ); + } + + let status = unsafe { AudioDeviceDestroyIOProcID(self.input_device_id, proc_id) }; + if status != 0 { + println!( + "DEBUG: WARNING: Input device destroy IO proc failed with status: {}", + status + ); + } } // Stop the output device if it was activated if let Some(proc_id) = self.output_proc_id { - let _ = unsafe { AudioDeviceStop(self.output_device_id, proc_id) }; - let _ = unsafe { AudioDeviceDestroyIOProcID(self.output_device_id, proc_id) }; + // Ignore errors as device might be disconnected + let status = unsafe { AudioDeviceStop(self.output_device_id, proc_id) }; + if status != 0 { + println!( + "DEBUG: WARNING: Output device stop failed with status: {}", + status + ); + } + + let status = unsafe { AudioDeviceDestroyIOProcID(self.output_device_id, proc_id) }; + if status != 0 { + println!( + "DEBUG: WARNING: Output device destroy IO proc failed with status: {}", + status + ); + } } - // Destroy the main IO proc - let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) }; - if status != 0 { - return Err(CoreAudioError::AudioDeviceDestroyIOProcIDFailed(status).into()); + // Destroy the main IO proc if device still exists + if device_exists { + let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) }; + if status != 0 { + println!( + "DEBUG: WARNING: Destroy IO proc failed with status: {}", + status + ); + } } - - // Destroy the aggregate device let status = unsafe { AudioHardwareDestroyAggregateDevice(self.device_id) }; if status != 0 { - return Err(CoreAudioError::AudioHardwareDestroyAggregateDeviceFailed(status).into()); + println!( + "DEBUG: WARNING: AudioHardwareDestroyAggregateDevice failed with status: {}", + status + ); } - // Destroy the process tap + // Destroy the process tap - don't fail if this fails let status = unsafe { AudioHardwareDestroyProcessTap(self.device_id) }; if status != 0 { - return Err(CoreAudioError::AudioHardwareDestroyProcessTapFailed(status).into()); + println!( + "DEBUG: WARNING: AudioHardwareDestroyProcessTap failed with status: {}", + status + ); + } + + // Always return success to prevent errors from bubbling up to JavaScript + // since we've made a best effort to clean up + Ok(()) + } + + pub fn get_sample_rate(&self) -> f64 { + self.audio_stats.sample_rate + } + + /// Gets the actual sample rate of the current device + /// + /// This can be different from the original sample rate if the default device + /// has changed. The original sample rate is maintained for consistency in + /// audio processing, but applications might need to know the actual device + /// sample rate for certain operations. + pub fn get_actual_sample_rate(&self) -> Result { + let device_id = self.output_device_id; + let mut actual_sample_rate: f64 = 0.0; + let status = unsafe { + let address = AudioObjectPropertyAddress { + mSelector: kAudioDevicePropertyNominalSampleRate, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }; + + let mut size = std::mem::size_of::() as u32; + coreaudio::sys::AudioObjectGetPropertyData( + device_id, + &address, + 0, + ptr::null(), + &mut size, + &mut actual_sample_rate as *mut f64 as *mut c_void, + ) + }; + + if status != 0 { + return Err(CoreAudioError::GetPropertyDataFailed(status).into()); + } + + Ok(actual_sample_rate) + } + + pub fn get_channels(&self) -> u32 { + self.audio_stats.channels + } + + /// Mark the stream as stopped without performing actual cleanup + /// This is used when the device is known to be in an invalid state + pub fn mark_stopped_without_cleanup(&mut self) { + self.stop_called = true; + } +} + +/// A manager for audio device handling that automatically adapts to device +/// changes +pub struct AggregateDeviceManager { + device: AggregateDevice, + default_devices_listener: Option<*mut c_void>, + is_app_specific: bool, + app_id: Option, + excluded_processes: Vec, + active_stream: Option>>>, + audio_callback: Option>>, + original_audio_stats: Option, +} + +impl AggregateDeviceManager { + /// Creates a new AggregateDeviceManager for a specific application + pub fn new(app: &TappableApplication) -> Result { + let device = AggregateDevice::new(app)?; + + Ok(Self { + device, + default_devices_listener: None, + is_app_specific: true, + app_id: Some(app.object_id), + excluded_processes: Vec::new(), + active_stream: None, + audio_callback: None, + original_audio_stats: None, + }) + } + + /// Creates a new AggregateDeviceManager for global audio with option to + /// exclude processes + pub fn new_global(excluded_processes: &[AudioObjectID]) -> Result { + let device = AggregateDevice::create_global_tap_but_exclude_processes(excluded_processes)?; + Ok(Self { + device, + default_devices_listener: None, + is_app_specific: false, + app_id: None, + excluded_processes: excluded_processes.to_vec(), + active_stream: None, + audio_callback: None, + original_audio_stats: None, + }) + } + + /// This sets up the initial stream and listeners. + pub fn start_capture( + &mut self, + audio_stream_callback: Arc>, + ) -> Result<()> { + // Store the callback for potential device switch later + self.audio_callback = Some(audio_stream_callback.clone()); + + // Create a shared reference for the active stream + let stream_mutex = Arc::new(std::sync::Mutex::new(None)); + self.active_stream = Some(stream_mutex.clone()); + + // Start the initial stream + // Pass the initially determined consistent audio stats + let original_audio_stats = self + .device + .get_aggregate_device_stats() + .unwrap_or(AudioStats { + sample_rate: 48000.0, // Match fallback in setup_device_change_listeners + channels: 1, + }); + self.original_audio_stats = Some(original_audio_stats); // Store for listener use + + let initial_audio_tap_stream = self + .device + .start(audio_stream_callback.clone(), original_audio_stats)?; // Pass clone of callback + + // Setup device change listeners AFTER getting initial stats and stream + self.setup_device_change_listeners()?; + + // Store a reference to the stream + if let Ok(mut stream_guard) = stream_mutex.lock() { + *stream_guard = Some(initial_audio_tap_stream); + } else { + println!("DEBUG: Failed to lock stream_mutex to store AudioTapStream reference"); + // If we can't store the initial stream, something is wrong. + // Attempt to stop the stream we just created? Or just return error? + // For now, return an error. + return Err(napi::Error::from_reason( + "Failed to lock internal stream mutex during startup", + )); } Ok(()) } - #[napi(getter)] - pub fn get_sample_rate(&self) -> f64 { - self.audio_stats.sample_rate + /// Sets up listeners for default device changes + fn setup_device_change_listeners(&mut self) -> Result<()> { + // We need to clean up any existing listeners first + self.cleanup_device_listeners(); + + // Create a weak reference to self to avoid circular references + let stream_arc = self.active_stream.clone(); + let callback_arc = self.audio_callback.clone(); + let is_app_specific = self.is_app_specific; + let app_id = self.app_id; + let excluded_processes = self.excluded_processes.clone(); + + // Retrieve the stored original audio stats + let Some(original_audio_stats) = self.original_audio_stats else { + return Err(napi::Error::from_reason( + "Internal error: Original audio stats not available for listener.", + )); + }; + + // Create a block that will handle device changes + let device_changed_block = RcBlock::new( + move |_in_number_addresses: u32, _in_addresses: *mut c_void| { + // Skip if we don't have all required information + let Some(stream_mutex) = stream_arc.as_ref() else { + return; + }; + let Some(callback) = callback_arc.as_ref() else { + return; + }; + + // Try to lock the stream mutex + let Ok(mut stream_guard) = stream_mutex.lock() else { + return; + }; + + // Create a new device with updated default devices + let result: Result = (|| { + if is_app_specific { + if let Some(id) = app_id { + let app = TappableApplication::new(id)?; + AggregateDevice::new(&app) + } else { + Err(CoreAudioError::CreateProcessTapFailed(0).into()) + } + } else { + AggregateDevice::create_global_tap_but_exclude_processes(&excluded_processes) + } + })(); + + // If we successfully created a new device, stop the old stream and start a new + // one + match result { + Ok(mut new_device) => { + // Stop and drop the old stream if it exists + if let Some(mut old_stream) = stream_guard.take() { + // Explicitly drop the old stream's Box before creating the new device. + // The drop implementation handles cleanup. + // We call stop() directly. + let stop_result = old_stream.stop(); + match stop_result { + Ok(_) => {} + Err(e) => println!( + "DEBUG: Error stopping old stream (proceeding anyway): {}", + e + ), + }; + drop(old_stream); // Ensure it's dropped now + } + + match new_device.start(callback.clone(), original_audio_stats) { + Ok(new_stream) => { + // Use the existing stream_guard which already holds the lock + *stream_guard = Some(new_stream); + } + Err(e) => { + println!("DEBUG: Failed to start new stream: {}", e); + } + } + } + Err(e) => { + println!("DEBUG: Failed to create new device: {}", e); + } + } + }, + ); + + // Create pointers to the device_changed_block that can be used in C functions + let block_ptr = &*device_changed_block as *const Block; + let block_ptr_cast = block_ptr.cast_mut().cast(); + + // Register listeners for both input and output device changes + unsafe { + let status = AudioObjectAddPropertyListenerBlock( + kAudioObjectSystemObject, + &AudioObjectPropertyAddress { + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }, + ptr::null_mut(), + block_ptr_cast, + ); + + if status != 0 { + println!( + "DEBUG: Failed to register input device listener, status: {}", + status + ); + return Err(CoreAudioError::AddPropertyListenerBlockFailed(status).into()); + } + + let status = AudioObjectAddPropertyListenerBlock( + kAudioObjectSystemObject, + &AudioObjectPropertyAddress { + mSelector: kAudioHardwarePropertyDefaultOutputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }, + ptr::null_mut(), + block_ptr_cast, + ); + + if status != 0 { + println!( + "DEBUG: Failed to register output device listener, status: {}", + status + ); + // Clean up the first listener if the second one fails + AudioObjectRemovePropertyListenerBlock( + kAudioObjectSystemObject, + &AudioObjectPropertyAddress { + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }, + ptr::null_mut(), + block_ptr_cast, + ); + return Err(CoreAudioError::AddPropertyListenerBlockFailed(status).into()); + } + } + + // Store the listener pointer for cleanup + self.default_devices_listener = Some(block_ptr_cast); + + Ok(()) + } + + /// Cleans up device change listeners + fn cleanup_device_listeners(&mut self) { + if let Some(listener) = self.default_devices_listener.take() { + unsafe { + // Remove input device change listener + let status = AudioObjectRemovePropertyListenerBlock( + kAudioObjectSystemObject, + &AudioObjectPropertyAddress { + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }, + ptr::null_mut(), + listener, + ); + if status != 0 { + println!( + "DEBUG: Failed to remove input device listener, status: {}", + status + ); + } + + let status = AudioObjectRemovePropertyListenerBlock( + kAudioObjectSystemObject, + &AudioObjectPropertyAddress { + mSelector: kAudioHardwarePropertyDefaultOutputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain, + }, + ptr::null_mut(), + listener, + ); + if status != 0 { + println!( + "DEBUG: Failed to remove output device listener, status: {}", + status + ); + } + } + } + } + + /// Stops the active stream and cleans up listeners. + pub fn stop_capture(&mut self) -> Result<()> { + self.cleanup_device_listeners(); + + let stream_to_stop = if let Some(stream_mutex) = &self.active_stream { + if let Ok(mut stream_guard) = stream_mutex.lock() { + stream_guard.take() // Take ownership from the Option>> + } else { + println!("DEBUG: Failed to lock stream mutex during stop"); + None + } + } else { + None + }; + + if let Some(mut stream) = stream_to_stop { + match stream.stop() { + Ok(_) => {} + Err(e) => println!( + "DEBUG: Error stopping stream in stop_capture (ignored): {}", + e + ), + } + // Explicitly drop here after stopping + drop(stream); + } + + // Clear related fields + self.active_stream = None; + self.audio_callback = None; + self.original_audio_stats = None; + + Ok(()) + } + + /// Gets the stats of the currently active stream, if any. + pub fn get_current_stats(&self) -> Option { + if let Some(stream_mutex) = &self.active_stream { + if let Ok(stream_guard) = stream_mutex.lock() { + // Borrow the stream Option, then map to get stats + stream_guard.as_ref().map(|stream| stream.audio_stats) + } else { + println!("DEBUG: Failed to lock stream mutex for get_current_stats"); + None + } + } else { + None + } + } + + /// Gets the actual sample rate of the currently active stream's output + /// device. + pub fn get_current_actual_sample_rate(&self) -> Result> { + let maybe_stream_ref = if let Some(stream_mutex) = &self.active_stream { + match stream_mutex.lock() { + Ok(guard) => guard, + Err(_) => { + println!("DEBUG: Failed to lock stream mutex for get_current_actual_sample_rate"); + // Return Ok(None) or an error? Let's return None. + return Ok(None); + } + } + } else { + return Ok(None); // No active stream manager + }; + + if let Some(stream) = maybe_stream_ref.as_ref() { + // Call the existing non-napi method on AudioTapStream + match stream.get_actual_sample_rate() { + Ok(rate) => Ok(Some(rate)), + Err(e) => { + println!("DEBUG: Error getting actual sample rate from stream: {}", e); + // Propagate the error + Err(e) + } + } + } else { + Ok(None) // No active stream + } + } +} + +impl Drop for AggregateDeviceManager { + fn drop(&mut self) { + // Call stop_capture which handles listener cleanup and stream stopping + match self.stop_capture() { + Ok(_) => {} + Err(e) => println!("DEBUG: Error during stop_capture in Drop (ignored): {}", e), + } + } +} + +// NEW NAPI Struct: AudioCaptureSession +#[napi] +pub struct AudioCaptureSession { + // Use Option> to allow taking ownership in stop() + manager: Option>, +} + +#[napi] +impl AudioCaptureSession { + // Constructor called internally, not directly via NAPI + pub(crate) fn new(manager: Box) -> Self { + Self { + manager: Some(manager), + } + } + + #[napi] + pub fn stop(&mut self) -> Result<()> { + if let Some(manager) = self.manager.take() { + // manager.stop_capture() will be called by its Drop impl + // We just need to drop the manager here. + drop(manager); + Ok(()) + } else { + println!("DEBUG: AudioCaptureSession.stop() called, but manager was already taken"); + // Return Ok even if called multiple times, idempotent behavior + Ok(()) + } } #[napi(getter)] - pub fn get_channels(&self) -> u32 { - self.audio_stats.channels + pub fn get_sample_rate(&self) -> Result { + if let Some(manager) = &self.manager { + manager + .get_current_stats() + .map(|stats| stats.sample_rate) + .ok_or_else(|| napi::Error::from_reason("No active audio stream to get sample rate from")) + } else { + Err(napi::Error::from_reason("Audio session is stopped")) + } + } + + #[napi(getter)] + pub fn get_channels(&self) -> Result { + if let Some(manager) = &self.manager { + manager + .get_current_stats() + .map(|stats| stats.channels) + .ok_or_else(|| napi::Error::from_reason("No active audio stream to get channels from")) + } else { + Err(napi::Error::from_reason("Audio session is stopped")) + } + } + + #[napi(getter)] + pub fn get_actual_sample_rate(&self) -> Result { + if let Some(manager) = &self.manager { + manager + .get_current_actual_sample_rate()? // Propagate CoreAudioError + .ok_or_else(|| { + napi::Error::from_reason("No active audio stream to get actual sample rate from") + }) + } else { + Err(napi::Error::from_reason("Audio session is stopped")) + } + } +} + +// Ensure the manager is dropped if the session object is dropped without +// calling stop() +impl Drop for AudioCaptureSession { + fn drop(&mut self) { + // Automatically calls drop on self.manager if it's Some + if let Some(manager) = self.manager.take() { + drop(manager); + } } } diff --git a/packages/frontend/native/media_capture/src/macos/utils.rs b/packages/frontend/native/media_capture/src/macos/utils.rs index b17e11b323..83858a4bdc 100644 --- a/packages/frontend/native/media_capture/src/macos/utils.rs +++ b/packages/frontend/native/media_capture/src/macos/utils.rs @@ -5,7 +5,7 @@ use coreaudio::sys::{ kAudioObjectPropertyElementMain, kAudioObjectPropertyScopeGlobal, AudioObjectGetPropertyData, AudioObjectID, AudioObjectPropertyAddress, }; -use rubato::{Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType}; +use rubato::{FastFixedIn, PolynomialDegree, Resampler}; use crate::error::CoreAudioError; @@ -81,22 +81,32 @@ pub fn process_audio_frame( }; if current_sample_rate != target_sample_rate { - let params = SincInterpolationParameters { - sinc_len: 256, - f_cutoff: 0.95, - interpolation: SincInterpolationType::Linear, - oversampling_factor: 256, - window: rubato::WindowFunction::BlackmanHarris2, - }; - let mut resampler = SincFixedIn::::new( + // TODO: may use SincFixedOut to improve the sample quality + // however, it's not working as expected if we only process samples in chunks + // e.g., even with ratio 1.0, resampling 512 samples will result in 382 samples, + // which will produce very bad quality. The reason is that the resampler is + // meant to be used for dealing with larger input size. The reduced number + // of samples is a "delay" of the resampler for better quality. + let mut resampler = match FastFixedIn::::new( target_sample_rate / current_sample_rate, 2.0, - params, + PolynomialDegree::Cubic, processed_samples.len(), 1, - ) - .ok()?; - let mut waves_out = resampler.process(&[processed_samples], None).ok()?; + ) { + Ok(r) => r, + Err(e) => { + eprintln!("Error creating resampler: {:?}", e); + return None; + } + }; + let mut waves_out = match resampler.process(&[processed_samples], None) { + Ok(w) => w, + Err(e) => { + eprintln!("Error processing audio with resampler: {:?}", e); + return None; + } + }; waves_out.pop() } else { Some(processed_samples)