feat(native): media capture (#9992)

2026-02-15 05:37:32 +00:00 · 2025-02-25 06:51:56 +00:00
parent 2ec7de7e32
commit 5dbffba08d
46 changed files with 5791 additions and 74 deletions
--- a/packages/frontend/media-capture-playground/.gitignore
+++ b/packages/frontend/media-capture-playground/.gitignore
@@ -0,0 +1,2 @@
+recordings
+.env
--- a/packages/frontend/media-capture-playground/package.json
+++ b/packages/frontend/media-capture-playground/package.json
@@ -0,0 +1,43 @@
+{
+  "name": "@affine/media-capture-playground",
+  "private": true,
+  "type": "module",
+  "version": "0.0.0",
+  "scripts": {
+    "dev:web": "vite",
+    "dev:server": "tsx --env-file=.env --watch server/main.ts"
+  },
+  "dependencies": {
+    "@affine/native": "workspace:*",
+    "@google/generative-ai": "^0.21.0",
+    "@tailwindcss/vite": "^4.0.6",
+    "@types/express": "^4",
+    "@types/multer": "^1",
+    "@types/react": "^19.0.8",
+    "@types/react-dom": "^19.0.3",
+    "@types/socket.io": "^3.0.2",
+    "@types/socket.io-client": "^3.0.0",
+    "@vitejs/plugin-react": "^4.3.4",
+    "chokidar": "^4.0.3",
+    "express": "^4.21.2",
+    "express-rate-limit": "^7.1.5",
+    "fs-extra": "^11.3.0",
+    "multer": "^1.4.5-lts.1",
+    "openai": "^4.85.1",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
+    "react-markdown": "^9.0.3",
+    "rxjs": "^7.8.1",
+    "socket.io": "^4.7.4",
+    "socket.io-client": "^4.7.4",
+    "swr": "^2.3.2",
+    "tailwindcss": "^4.0.6",
+    "tsx": "^4.19.2",
+    "vite": "^6.1.0"
+  },
+  "devDependencies": {
+    "@types/fs-extra": "^11",
+    "@types/react": "^19.0.1",
+    "@types/react-dom": "^19.0.2"
+  }
+}
--- a/packages/frontend/media-capture-playground/server/gemini.ts
+++ b/packages/frontend/media-capture-playground/server/gemini.ts
@@ -0,0 +1,200 @@
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import {
+  GoogleAIFileManager,
+  type UploadFileResponse,
+} from '@google/generative-ai/server';
+
+const DEFAULT_MODEL = 'gemini-2.0-flash';
+
+export interface TranscriptionResult {
+  title: string;
+  summary: string;
+  segments: {
+    speaker: string;
+    start_time: string;
+    end_time: string;
+    transcription: string;
+  }[];
+}
+
+const PROMPT_TRANSCRIPTION = `
+Generate audio transcription and diarization for the recording.
+The recording source is most likely from a video call with multiple speakers.
+Output in JSON format with the following structure:
+
+{
+  "segments": [
+    {
+      "speaker": "Speaker A",
+      "start_time": "MM:SS",
+      "end_time": "MM:SS",
+      "transcription": "..."
+    },
+    ...
+  ],
+}
+- Use consistent speaker labels throughout
+- Accurate timestamps in MM:SS format
+- Clean transcription with proper punctuation
+- Identify speakers by name if possible, otherwise use "Speaker A/B/C"  
+`;
+
+const PROMPT_SUMMARY = `
+Generate a short title and summary of the conversation. The input is in the following JSON format:
+{
+  "segments": [
+    {
+      "speaker": "Speaker A",
+      "start_time": "MM:SS",
+      "end_time": "MM:SS",
+      "transcription": "..."
+    },
+    ...
+  ],
+}
+
+Output in JSON format with the following structure:
+
+{
+  "title": "Title of the recording",
+  "summary": "Summary of the conversation in markdown format"
+}
+
+1. Summary Structure:
+- The sumary should be inferred from the speakers' language and context
+- All insights should be derived directly from speakers' language and context
+- Use hierarchical organization for clear information structure
+- Use markdown format for the summary. Use bullet points, lists and other markdown styles when appropriate
+
+2. Title:
+- Come up with a title for the recording.
+- The title should be a short description of the recording.
+- The title should be a single sentence or a few words.
+`;
+
+export async function gemini(
+  audioFilePath: string,
+  options?: {
+    model?: 'gemini-2.0-flash' | 'gemini-1.5-flash';
+    mode?: 'transcript' | 'summary';
+  }
+) {
+  if (!process.env.GOOGLE_GEMINI_API_KEY) {
+    console.error('Missing GOOGLE_GEMINI_API_KEY environment variable');
+    throw new Error('GOOGLE_GEMINI_API_KEY is not set');
+  }
+
+  // Initialize GoogleGenerativeAI and FileManager with your API_KEY
+  const genAI = new GoogleGenerativeAI(process.env.GOOGLE_GEMINI_API_KEY);
+  const fileManager = new GoogleAIFileManager(
+    process.env.GOOGLE_GEMINI_API_KEY
+  );
+
+  async function transcribe(
+    audioFilePath: string
+  ): Promise<TranscriptionResult | null> {
+    let uploadResult: UploadFileResponse | null = null;
+
+    try {
+      // Upload the audio file
+      uploadResult = await fileManager.uploadFile(audioFilePath, {
+        mimeType: 'audio/wav',
+        displayName: 'audio_transcription.wav',
+      });
+      console.log('File uploaded:', uploadResult.file.uri);
+
+      // Initialize a Gemini model appropriate for your use case.
+      const model = genAI.getGenerativeModel({
+        model: options?.model || DEFAULT_MODEL,
+        generationConfig: {
+          responseMimeType: 'application/json',
+        },
+      });
+
+      // Generate content using a prompt and the uploaded file
+      const result = await model.generateContent([
+        {
+          fileData: {
+            fileUri: uploadResult.file.uri,
+            mimeType: uploadResult.file.mimeType,
+          },
+        },
+        {
+          text: PROMPT_TRANSCRIPTION,
+        },
+      ]);
+
+      const text = result.response.text();
+
+      try {
+        const parsed = JSON.parse(text);
+        return parsed;
+      } catch (e) {
+        console.error('Failed to parse transcription JSON:', e);
+        console.error('Raw text that failed to parse:', text);
+        return null;
+      }
+    } catch (e) {
+      console.error('Error during transcription:', e);
+      return null;
+    } finally {
+      if (uploadResult) {
+        await fileManager.deleteFile(uploadResult.file.name);
+      }
+    }
+  }
+
+  async function summarize(transcription: TranscriptionResult) {
+    try {
+      const model = genAI.getGenerativeModel({
+        model: options?.model || DEFAULT_MODEL,
+        generationConfig: {
+          responseMimeType: 'application/json',
+        },
+      });
+
+      const result = await model.generateContent([
+        {
+          text: PROMPT_SUMMARY + '\n\n' + JSON.stringify(transcription),
+        },
+      ]);
+
+      const text = result.response.text();
+
+      try {
+        const parsed = JSON.parse(text);
+        return parsed;
+      } catch (e) {
+        console.error('Failed to parse summary JSON:', e);
+        console.error('Raw text that failed to parse:', text);
+        return null;
+      }
+    } catch (e) {
+      console.error('Error during summarization:', e);
+      return null;
+    }
+  }
+
+  const transcription = await transcribe(audioFilePath);
+  if (!transcription) {
+    console.error('Transcription failed');
+    return null;
+  }
+
+  const summary = await summarize(transcription);
+  if (!summary) {
+    console.error('Summary generation failed');
+    return transcription;
+  }
+
+  const result = {
+    ...transcription,
+    ...summary,
+  };
+  console.log('Processing completed:', {
+    title: result.title,
+    segmentsCount: result.segments?.length,
+  });
+
+  return result;
+}
--- a/packages/frontend/media-capture-playground/server/main.ts
+++ b/packages/frontend/media-capture-playground/server/main.ts
@@ -0,0 +1,759 @@
+/* eslint-disable @typescript-eslint/no-misused-promises */
+import { exec } from 'node:child_process';
+import { createServer } from 'node:http';
+import { promisify } from 'node:util';
+
+import {
+  type Application,
+  type AudioTapStream,
+  ShareableContent,
+} from '@affine/native';
+import type { FSWatcher } from 'chokidar';
+import chokidar from 'chokidar';
+import express from 'express';
+import rateLimit from 'express-rate-limit';
+import fs from 'fs-extra';
+import { Server } from 'socket.io';
+
+import { gemini, type TranscriptionResult } from './gemini';
+import { WavWriter } from './wav-writer';
+
+// Constants
+const RECORDING_DIR = './recordings';
+const PORT = process.env.PORT || 6544;
+
+// Ensure recordings directory exists
+fs.ensureDirSync(RECORDING_DIR);
+console.log(`📁 Ensuring recordings directory exists at ${RECORDING_DIR}`);
+
+// Types
+interface Recording {
+  app: Application;
+  appGroup: Application | null;
+  buffers: Float32Array[];
+  stream: AudioTapStream;
+  startTime: number;
+  isWriting: boolean;
+}
+
+interface RecordingStatus {
+  processId: number;
+  bundleIdentifier: string;
+  name: string;
+  startTime: number;
+  duration: number;
+}
+
+interface RecordingMetadata {
+  appName: string;
+  bundleIdentifier: string;
+  processId: number;
+  recordingStartTime: number;
+  recordingEndTime: number;
+  recordingDuration: number;
+  sampleRate: number;
+  totalSamples: number;
+}
+
+interface AppInfo {
+  app: Application;
+  processId: number;
+  processGroupId: number | null;
+  bundleIdentifier: string;
+  name: string;
+  running: boolean;
+}
+
+interface TranscriptionMetadata {
+  transcriptionStartTime: number;
+  transcriptionEndTime: number;
+  transcriptionStatus: 'not_started' | 'pending' | 'completed' | 'error';
+  transcription?: TranscriptionResult;
+  error?: string;
+}
+
+// State
+const recordingMap = new Map<number, Recording>();
+let appsSubscriber = () => {};
+let fsWatcher: FSWatcher | null = null;
+
+// Server setup
+const app = express();
+const httpServer = createServer(app);
+const io = new Server(httpServer, {
+  cors: { origin: '*' },
+});
+
+app.use(express.json());
+
+// Update the static file serving to handle the new folder structure
+app.use(
+  '/recordings',
+  (req, res, next) => {
+    // Extract the folder name from the path
+    const parts = req.path.split('/');
+    if (parts.length < 2) {
+      return res.status(400).json({ error: 'Invalid request path' });
+    }
+
+    const folderName = parts[1];
+    if (!validateAndSanitizeFolderName(folderName)) {
+      return res.status(400).json({ error: 'Invalid folder name format' });
+    }
+
+    if (req.path.endsWith('.wav')) {
+      res.setHeader('Content-Type', 'audio/wav');
+    } else if (req.path.endsWith('.png')) {
+      res.setHeader('Content-Type', 'image/png');
+    }
+    next();
+  },
+  express.static(RECORDING_DIR)
+);
+
+// Recording management
+async function saveRecording(recording: Recording): Promise<string | null> {
+  try {
+    recording.isWriting = true;
+    const app = recording.appGroup || recording.app;
+
+    const totalSamples = recording.buffers.reduce(
+      (acc, buf) => acc + buf.length,
+      0
+    );
+
+    const recordingEndTime = Date.now();
+    const recordingDuration = (recordingEndTime - recording.startTime) / 1000;
+    const expectedSamples = recordingDuration * 44100;
+
+    console.log(`💾 Saving recording for ${app.name}:`);
+    console.log(`- Process ID: ${app.processId}`);
+    console.log(`- Bundle ID: ${app.bundleIdentifier}`);
+    console.log(`- Actual duration: ${recordingDuration.toFixed(2)}s`);
+    console.log(`- Expected samples: ${Math.floor(expectedSamples)}`);
+    console.log(`- Actual samples: ${totalSamples}`);
+    console.log(
+      `- Sample ratio: ${(totalSamples / expectedSamples).toFixed(2)}`
+    );
+
+    // Create a buffer for the mono audio
+    const buffer = new Float32Array(totalSamples);
+    let offset = 0;
+    recording.buffers.forEach(buf => {
+      buffer.set(buf, offset);
+      offset += buf.length;
+    });
+
+    await fs.ensureDir(RECORDING_DIR);
+
+    const timestamp = Date.now();
+    const baseFilename = `${recording.app.bundleIdentifier}-${recording.app.processId}-${timestamp}`;
+    const recordingDir = `${RECORDING_DIR}/${baseFilename}`;
+    await fs.ensureDir(recordingDir);
+
+    const wavFilename = `${recordingDir}/recording.wav`;
+    const transcriptionWavFilename = `${recordingDir}/transcription.wav`;
+    const metadataFilename = `${recordingDir}/metadata.json`;
+    const iconFilename = `${recordingDir}/icon.png`;
+
+    // Save high-quality WAV file for playback (44.1kHz)
+    console.log(`📝 Writing high-quality WAV file to ${wavFilename}`);
+    const writer = new WavWriter(wavFilename, { targetSampleRate: 44100 });
+    writer.write(buffer);
+    await writer.end();
+    console.log('✅ High-quality WAV file written successfully');
+
+    // Save low-quality WAV file for transcription (8kHz)
+    console.log(
+      `📝 Writing transcription WAV file to ${transcriptionWavFilename}`
+    );
+    const transcriptionWriter = new WavWriter(transcriptionWavFilename, {
+      targetSampleRate: 8000,
+    });
+    transcriptionWriter.write(buffer);
+    await transcriptionWriter.end();
+    console.log('✅ Transcription WAV file written successfully');
+
+    // Save app icon if available
+    if (app.icon) {
+      console.log(`📝 Writing app icon to ${iconFilename}`);
+      await fs.writeFile(iconFilename, app.icon);
+      console.log('✅ App icon written successfully');
+    }
+
+    console.log(`📝 Writing metadata to ${metadataFilename}`);
+    // Save metadata (without icon)
+    const metadata: RecordingMetadata = {
+      appName: app.name,
+      bundleIdentifier: app.bundleIdentifier,
+      processId: app.processId,
+      recordingStartTime: recording.startTime,
+      recordingEndTime,
+      recordingDuration,
+      sampleRate: 44100,
+      totalSamples,
+    };
+
+    await fs.writeJson(metadataFilename, metadata, { spaces: 2 });
+    console.log('✅ Metadata file written successfully');
+
+    return baseFilename;
+  } catch (error) {
+    console.error('❌ Error saving recording:', error);
+    return null;
+  }
+}
+
+function getRecordingStatus(): RecordingStatus[] {
+  return Array.from(recordingMap.entries()).map(([processId, recording]) => ({
+    processId,
+    bundleIdentifier: recording.app.bundleIdentifier,
+    name: recording.app.name,
+    startTime: recording.startTime,
+    duration: Date.now() - recording.startTime,
+  }));
+}
+
+function emitRecordingStatus() {
+  io.emit('apps:recording', { recordings: getRecordingStatus() });
+}
+
+async function startRecording(app: Application) {
+  if (recordingMap.has(app.processId)) {
+    console.log(
+      `⚠️ Recording already in progress for ${app.name} (PID: ${app.processId})`
+    );
+    return;
+  }
+
+  // Find the root app of the process group
+  const processGroupId = await getProcessGroupId(app.processId);
+  const rootApp = processGroupId
+    ? (shareableContent
+        .applications()
+        .find(a => a.processId === processGroupId) ?? app)
+    : app;
+
+  console.log(
+    `🎙️ Starting recording for ${rootApp.name} (PID: ${rootApp.processId})`
+  );
+
+  const buffers: Float32Array[] = [];
+  const stream = app.tapAudio((err, samples) => {
+    if (err) {
+      console.error(`❌ Audio stream error for ${rootApp.name}:`, err);
+      return;
+    }
+    const recording = recordingMap.get(app.processId);
+    if (recording && !recording.isWriting) {
+      buffers.push(new Float32Array(samples));
+    }
+  });
+
+  recordingMap.set(app.processId, {
+    app,
+    appGroup: rootApp,
+    buffers,
+    stream,
+    startTime: Date.now(),
+    isWriting: false,
+  });
+
+  console.log(`✅ Recording started successfully for ${rootApp.name}`);
+  emitRecordingStatus();
+}
+
+async function stopRecording(processId: number) {
+  const recording = recordingMap.get(processId);
+  if (!recording) {
+    console.log(`ℹ️ No active recording found for process ID ${processId}`);
+    return;
+  }
+
+  const app = recording.appGroup || recording.app;
+
+  console.log(`⏹️ Stopping recording for ${app.name} (PID: ${app.processId})`);
+  console.log(
+    `⏱️ Recording duration: ${((Date.now() - recording.startTime) / 1000).toFixed(2)}s`
+  );
+
+  recording.stream.stop();
+  const filename = await saveRecording(recording);
+  recordingMap.delete(processId);
+
+  if (filename) {
+    console.log(`✅ Recording saved successfully to ${filename}`);
+  } else {
+    console.error(`❌ Failed to save recording for ${app.name}`);
+  }
+
+  emitRecordingStatus();
+  return filename;
+}
+
+// File management
+async function getRecordings(): Promise<
+  {
+    wav: string;
+    metadata?: RecordingMetadata;
+    transcription?: TranscriptionMetadata;
+  }[]
+> {
+  try {
+    const allItems = await fs.readdir(RECORDING_DIR);
+
+    // First filter out non-directories
+    const dirs = (
+      await Promise.all(
+        allItems.map(async item => {
+          const fullPath = `${RECORDING_DIR}/${item}`;
+          try {
+            const stat = await fs.stat(fullPath);
+            return stat.isDirectory() ? item : null;
+          } catch {
+            return null;
+          }
+        })
+      )
+    ).filter((d): d is string => d !== null);
+
+    const recordings = await Promise.all(
+      dirs.map(async dir => {
+        try {
+          const recordingPath = `${RECORDING_DIR}/${dir}`;
+          const metadataPath = `${recordingPath}/metadata.json`;
+          const transcriptionPath = `${recordingPath}/transcription.json`;
+
+          let metadata: RecordingMetadata | undefined;
+          try {
+            metadata = await fs.readJson(metadataPath);
+          } catch {
+            // Metadata might not exist
+          }
+
+          let transcription: TranscriptionMetadata | undefined;
+          try {
+            // Check if transcription file exists
+            const transcriptionExists = await fs.pathExists(transcriptionPath);
+            if (transcriptionExists) {
+              transcription = await fs.readJson(transcriptionPath);
+            } else {
+              // If transcription.wav exists but no transcription.json, it means transcription is available but not started
+              transcription = {
+                transcriptionStartTime: 0,
+                transcriptionEndTime: 0,
+                transcriptionStatus: 'not_started',
+              };
+            }
+          } catch (error) {
+            console.error(`Error reading transcription for ${dir}:`, error);
+          }
+
+          return {
+            wav: dir,
+            metadata,
+            transcription,
+          };
+        } catch (error) {
+          console.error(`Error processing directory ${dir}:`, error);
+          return null;
+        }
+      })
+    );
+
+    // Filter out nulls and sort by recording start time
+    return recordings
+      .filter((r): r is NonNullable<typeof r> => r !== null)
+      .sort(
+        (a, b) =>
+          (b.metadata?.recordingStartTime ?? 0) -
+          (a.metadata?.recordingStartTime ?? 0)
+      );
+  } catch (error) {
+    console.error('Error reading recordings directory:', error);
+    return [];
+  }
+}
+
+async function setupRecordingsWatcher() {
+  if (fsWatcher) {
+    console.log('🔄 Closing existing recordings watcher');
+    await fsWatcher.close();
+  }
+
+  try {
+    console.log('👀 Setting up recordings watcher...');
+    const files = await getRecordings();
+    console.log(`📊 Found ${files.length} existing recordings`);
+    io.emit('apps:saved', { recordings: files });
+
+    fsWatcher = chokidar.watch(RECORDING_DIR, {
+      ignored: /(^|[/\\])\../, // ignore dotfiles
+      persistent: true,
+      ignoreInitial: true,
+      awaitWriteFinish: {
+        stabilityThreshold: 500,
+        pollInterval: 100,
+      },
+    });
+
+    // Handle file events
+    fsWatcher
+      .on('add', async path => {
+        if (path.endsWith('.wav') || path.endsWith('.json')) {
+          console.log(`📝 File added: ${path}`);
+          const files = await getRecordings();
+          io.emit('apps:saved', { recordings: files });
+        }
+      })
+      .on('change', async path => {
+        if (path.endsWith('.wav') || path.endsWith('.json')) {
+          console.log(`📝 File changed: ${path}`);
+          const files = await getRecordings();
+          io.emit('apps:saved', { recordings: files });
+        }
+      })
+      .on('unlink', async path => {
+        if (path.endsWith('.wav') || path.endsWith('.json')) {
+          console.log(`🗑️ File removed: ${path}`);
+          const files = await getRecordings();
+          io.emit('apps:saved', { recordings: files });
+        }
+      })
+      .on('error', error => {
+        console.error('❌ Error watching recordings directory:', error);
+      })
+      .on('ready', () => {
+        console.log('✅ Recordings watcher setup complete');
+      });
+  } catch (error) {
+    console.error('❌ Error setting up recordings watcher:', error);
+  }
+}
+
+// Process management
+async function getProcessGroupId(pid: number): Promise<number | null> {
+  try {
+    const execAsync = promisify(exec);
+    const { stdout } = await execAsync(`ps -o pgid -p ${pid}`);
+    const lines = stdout.trim().split('\n');
+    if (lines.length < 2) return null;
+
+    const pgid = parseInt(lines[1].trim(), 10);
+    return isNaN(pgid) ? null : pgid;
+  } catch {
+    return null;
+  }
+}
+
+// Application management
+const shareableContent = new ShareableContent();
+
+async function getAllApps(): Promise<AppInfo[]> {
+  const apps = await Promise.all(
+    shareableContent.applications().map(async app => {
+      try {
+        return {
+          app,
+          processId: app.processId,
+          processGroupId: await getProcessGroupId(app.processId),
+          bundleIdentifier: app.bundleIdentifier,
+          name: app.name,
+          running: app.isRunning,
+        };
+      } catch (error) {
+        console.error(error);
+        return null;
+      }
+    })
+  );
+
+  const filteredApps = apps.filter(
+    (v): v is AppInfo =>
+      v !== null && !v.bundleIdentifier.startsWith('com.apple')
+  );
+
+  // Stop recording if app is not listed
+  await Promise.all(
+    filteredApps.map(async ({ app }) => {
+      if (!filteredApps.some(a => a.processId === app.processId)) {
+        await stopRecording(app.processId);
+      }
+    })
+  );
+
+  return filteredApps;
+}
+
+function listenToAppStateChanges(apps: AppInfo[]) {
+  const subscribers = apps.map(({ app }) => {
+    return ShareableContent.onAppStateChanged(app, () => {
+      setTimeout(() => {
+        console.log(
+          `🔄 Application state changed: ${app.name} (PID: ${app.processId}) is now ${
+            app.isRunning ? '▶️ running' : '⏹️ stopped'
+          }`
+        );
+        io.emit('apps:state-changed', {
+          processId: app.processId,
+          running: app.isRunning,
+        });
+        if (!app.isRunning) {
+          stopRecording(app.processId).catch(error => {
+            console.error('❌ Error stopping recording:', error);
+          });
+        }
+      }, 50);
+    });
+  });
+
+  appsSubscriber();
+  appsSubscriber = () => {
+    subscribers.forEach(subscriber => subscriber.unsubscribe());
+  };
+}
+
+// Socket.IO setup
+io.on('connection', async socket => {
+  console.log('🔌 New client connected');
+  const initialApps = await getAllApps();
+  console.log(`📤 Sending ${initialApps.length} applications to new client`);
+  socket.emit('apps:all', { apps: initialApps });
+  socket.emit('apps:recording', { recordings: getRecordingStatus() });
+
+  const files = await getRecordings();
+  console.log(`📤 Sending ${files.length} saved recordings to new client`);
+  socket.emit('apps:saved', { recordings: files });
+
+  listenToAppStateChanges(initialApps);
+
+  socket.on('disconnect', () => {
+    console.log('🔌 Client disconnected');
+  });
+});
+
+// Application list change listener
+ShareableContent.onApplicationListChanged(() => {
+  (async () => {
+    try {
+      console.log('🔄 Application list changed, updating clients...');
+      const apps = await getAllApps();
+      console.log(`📢 Broadcasting ${apps.length} applications to all clients`);
+      io.emit('apps:all', { apps });
+    } catch (error) {
+      console.error('❌ Error handling application list change:', error);
+    }
+  })().catch(error => {
+    console.error('❌ Error in application list change handler:', error);
+  });
+});
+
+// API Routes
+const rateLimiter = rateLimit({
+  windowMs: 1000,
+  max: 200,
+  message: { error: 'Too many requests, please try again later.' },
+});
+
+app.get('/permissions', (req, res) => {
+  const permission = shareableContent.checkRecordingPermissions();
+  res.json({ permission });
+});
+
+app.get('/apps', async (_req, res) => {
+  const apps = await getAllApps();
+  listenToAppStateChanges(apps);
+  res.json({ apps });
+});
+
+app.get('/apps/saved', rateLimiter, async (_req, res) => {
+  const files = await getRecordings();
+  res.json({ recordings: files });
+});
+
+// Utility function to validate and sanitize folder name
+function validateAndSanitizeFolderName(folderName: string): string | null {
+  // Allow alphanumeric characters, hyphens, dots (for bundle IDs)
+  // Format: bundleId-processId-timestamp
+  if (!/^[\w.-]+-\d+-\d+$/.test(folderName)) {
+    return null;
+  }
+
+  // Remove any path traversal attempts
+  const sanitized = folderName.replace(/^\.+|\.+$/g, '').replace(/[/\\]/g, '');
+  return sanitized;
+}
+
+app.delete('/recordings/:foldername', rateLimiter, async (req, res) => {
+  const foldername = validateAndSanitizeFolderName(req.params.foldername);
+  if (!foldername) {
+    console.error('❌ Invalid folder name format:', req.params.foldername);
+    return res.status(400).json({ error: 'Invalid folder name format' });
+  }
+
+  const recordingDir = `${RECORDING_DIR}/${foldername}`;
+
+  try {
+    // Ensure the resolved path is within RECORDING_DIR
+    const resolvedPath = await fs.realpath(recordingDir);
+    const recordingDirPath = await fs.realpath(RECORDING_DIR);
+
+    if (!resolvedPath.startsWith(recordingDirPath)) {
+      console.error('❌ Path traversal attempt detected:', {
+        resolvedPath,
+        recordingDirPath,
+        requestedFile: foldername,
+      });
+      return res.status(403).json({ error: 'Access denied' });
+    }
+
+    console.log(`🗑️ Deleting recording folder: ${foldername}`);
+    await fs.remove(recordingDir);
+    console.log('✅ Recording folder deleted successfully');
+    res.status(200).json({ success: true });
+  } catch (error) {
+    const typedError = error as NodeJS.ErrnoException;
+    if (typedError.code === 'ENOENT') {
+      console.error('❌ Folder not found:', recordingDir);
+      res.status(404).json({ error: 'Folder not found' });
+    } else {
+      console.error('❌ Error deleting folder:', {
+        error: typedError,
+        code: typedError.code,
+        message: typedError.message,
+        path: recordingDir,
+      });
+      res.status(500).json({
+        error: `Failed to delete folder: ${typedError.message || 'Unknown error'}`,
+      });
+    }
+  }
+});
+
+app.get('/apps/:process_id/icon', (req, res) => {
+  const processId = parseInt(req.params.process_id);
+  try {
+    const app = shareableContent.applicationWithProcessId(processId);
+    const icon = app.icon;
+    res.set('Content-Type', 'image/png');
+    res.send(icon);
+  } catch {
+    res.status(404).json({ error: 'App icon not found' });
+  }
+});
+
+app.post('/apps/:process_id/record', async (req, res) => {
+  const processId = parseInt(req.params.process_id);
+  const app = shareableContent.applicationWithProcessId(processId);
+  await startRecording(app);
+  res.json({ success: true });
+});
+
+app.post('/apps/:process_id/stop', async (req, res) => {
+  const processId = parseInt(req.params.process_id);
+  await stopRecording(processId);
+  res.json({ success: true });
+});
+
+// Update transcription endpoint to use folder validation
+app.post(
+  '/recordings/:foldername/transcribe',
+  rateLimiter,
+  async (req, res) => {
+    const foldername = validateAndSanitizeFolderName(req.params.foldername);
+    if (!foldername) {
+      console.error('❌ Invalid folder name format:', req.params.foldername);
+      return res.status(400).json({ error: 'Invalid folder name format' });
+    }
+
+    const recordingDir = `${RECORDING_DIR}/${foldername}`;
+
+    try {
+      // Check if directory exists
+      await fs.access(recordingDir);
+
+      const transcriptionWavPath = `${recordingDir}/transcription.wav`;
+      const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
+
+      // Check if transcription file exists
+      await fs.access(transcriptionWavPath);
+
+      // Create initial transcription metadata
+      const initialMetadata: TranscriptionMetadata = {
+        transcriptionStartTime: Date.now(),
+        transcriptionEndTime: 0,
+        transcriptionStatus: 'pending',
+      };
+      await fs.writeJson(transcriptionMetadataPath, initialMetadata);
+
+      // Notify clients that transcription has started
+      io.emit('apps:recording-transcription-start', { filename: foldername });
+
+      const transcription = await gemini(transcriptionWavPath, {
+        mode: 'transcript',
+      });
+
+      // Update transcription metadata with results
+      const metadata: TranscriptionMetadata = {
+        transcriptionStartTime: initialMetadata.transcriptionStartTime,
+        transcriptionEndTime: Date.now(),
+        transcriptionStatus: 'completed',
+        transcription: transcription ?? undefined,
+      };
+
+      await fs.writeJson(transcriptionMetadataPath, metadata);
+
+      // Notify clients that transcription is complete
+      io.emit('apps:recording-transcription-end', {
+        filename: foldername,
+        success: true,
+        transcription,
+      });
+
+      res.json({ success: true });
+    } catch (error) {
+      console.error('❌ Error during transcription:', error);
+
+      // Update transcription metadata with error
+      const metadata: TranscriptionMetadata = {
+        transcriptionStartTime: Date.now(),
+        transcriptionEndTime: Date.now(),
+        transcriptionStatus: 'error',
+        error: error instanceof Error ? error.message : 'Unknown error',
+      };
+
+      await fs
+        .writeJson(`${recordingDir}/transcription.json`, metadata)
+        .catch(err => {
+          console.error('❌ Error saving transcription metadata:', err);
+        });
+
+      // Notify clients of transcription error
+      io.emit('apps:recording-transcription-end', {
+        filename: foldername,
+        success: false,
+        error: error instanceof Error ? error.message : 'Unknown error',
+      });
+
+      res.status(500).json({
+        error: error instanceof Error ? error.message : 'Unknown error',
+      });
+    }
+  }
+);
+
+// Start server
+httpServer.listen(PORT, () => {
+  console.log(`
+🎙️  Media Capture Server started successfully:
+- Port: ${PORT}
+- Recordings directory: ${RECORDING_DIR}
+- Sample rate: 44.1kHz
+- Channels: Mono
+`);
+});
+
+// Initialize file watcher
+setupRecordingsWatcher().catch(error => {
+  console.error('Failed to setup recordings watcher:', error);
+});
--- a/packages/frontend/media-capture-playground/server/types.d.ts
+++ b/packages/frontend/media-capture-playground/server/types.d.ts
@@ -0,0 +1,4 @@
+declare module '*.txt' {
+  const content: string;
+  export default content;
+}
--- a/packages/frontend/media-capture-playground/server/wav-writer.ts
+++ b/packages/frontend/media-capture-playground/server/wav-writer.ts
@@ -0,0 +1,125 @@
+import fs from 'fs-extra';
+
+interface WavWriterConfig {
+  targetSampleRate?: number;
+}
+
+export class WavWriter {
+  private readonly file: fs.WriteStream;
+  private readonly originalSampleRate: number = 44100;
+  private readonly targetSampleRate: number;
+  private readonly numChannels = 1; // The audio is mono
+  private samplesWritten = 0;
+  private readonly tempFilePath: string;
+  private readonly finalFilePath: string;
+
+  constructor(finalPath: string, config: WavWriterConfig = {}) {
+    this.finalFilePath = finalPath;
+    this.tempFilePath = finalPath + '.tmp';
+    this.targetSampleRate = config.targetSampleRate ?? this.originalSampleRate;
+    this.file = fs.createWriteStream(this.tempFilePath);
+    this.writeHeader(); // Always write header immediately
+  }
+
+  private writeHeader() {
+    const buffer = Buffer.alloc(44); // WAV header is 44 bytes
+
+    // RIFF chunk descriptor
+    buffer.write('RIFF', 0);
+    buffer.writeUInt32LE(36, 4); // Initial file size - 8 (will be updated later)
+    buffer.write('WAVE', 8);
+
+    // fmt sub-chunk
+    buffer.write('fmt ', 12);
+    buffer.writeUInt32LE(16, 16); // Subchunk1Size (16 for PCM)
+    buffer.writeUInt16LE(3, 20); // AudioFormat (3 for IEEE float)
+    buffer.writeUInt16LE(this.numChannels, 22); // NumChannels
+    buffer.writeUInt32LE(this.targetSampleRate, 24); // SampleRate
+    buffer.writeUInt32LE(this.targetSampleRate * this.numChannels * 4, 28); // ByteRate
+    buffer.writeUInt16LE(this.numChannels * 4, 32); // BlockAlign
+    buffer.writeUInt16LE(32, 34); // BitsPerSample (32 for float)
+
+    // data sub-chunk
+    buffer.write('data', 36);
+    buffer.writeUInt32LE(0, 40); // Initial data size (will be updated later)
+
+    this.file.write(buffer);
+  }
+
+  private resample(samples: Float32Array): Float32Array {
+    const ratio = this.originalSampleRate / this.targetSampleRate;
+    const newLength = Math.floor(samples.length / ratio);
+    const result = new Float32Array(newLength);
+
+    for (let i = 0; i < newLength; i++) {
+      const position = i * ratio;
+      const index = Math.floor(position);
+      const fraction = position - index;
+
+      // Linear interpolation between adjacent samples
+      if (index + 1 < samples.length) {
+        result[i] =
+          samples[index] * (1 - fraction) + samples[index + 1] * fraction;
+      } else {
+        result[i] = samples[index];
+      }
+    }
+
+    return result;
+  }
+
+  write(samples: Float32Array) {
+    // Resample the input samples
+    const resampledData = this.resample(samples);
+
+    // Create a buffer with the correct size (4 bytes per float)
+    const buffer = Buffer.alloc(resampledData.length * 4);
+
+    // Write each float value properly
+    for (let i = 0; i < resampledData.length; i++) {
+      buffer.writeFloatLE(resampledData[i], i * 4);
+    }
+
+    this.file.write(buffer);
+    this.samplesWritten += resampledData.length;
+  }
+
+  async end(): Promise<void> {
+    return new Promise<void>((resolve, reject) => {
+      this.file.end(() => {
+        void this.updateHeaderAndCleanup().then(resolve).catch(reject);
+      });
+    });
+  }
+
+  private async updateHeaderAndCleanup(): Promise<void> {
+    // Read the entire temporary file
+    const data = await fs.promises.readFile(this.tempFilePath);
+
+    // Update the header with correct sizes
+    const dataSize = this.samplesWritten * 4;
+    const fileSize = dataSize + 36;
+
+    data.writeUInt32LE(fileSize, 4); // Update RIFF chunk size
+    data.writeUInt32LE(dataSize, 40); // Update data chunk size
+
+    // Write the updated file
+    await fs.promises.writeFile(this.finalFilePath, data);
+
+    // Clean up temp file
+    await fs.promises.unlink(this.tempFilePath);
+  }
+}
+
+/**
+ * Creates a Buffer from Float32Array audio data
+ * @param float32Array - The Float32Array containing audio samples
+ * @returns FileData - The audio data as a Buffer
+ */
+export function FileData(float32Array: Float32Array): Buffer {
+  const buffer = Buffer.alloc(float32Array.length * 4); // 4 bytes per float
+  for (let i = 0; i < float32Array.length; i++) {
+    buffer.writeFloatLE(float32Array[i], i * 4);
+  }
+  return buffer;
+}
--- a/packages/frontend/media-capture-playground/tsconfg.node.json
+++ b/packages/frontend/media-capture-playground/tsconfg.node.json
@@ -0,0 +1,7 @@
+{
+  "extends": "../../../tsconfig.node.json",
+  "compilerOptions": {
+    "rootDir": "./server"
+  },
+  "include": ["./server"]
+}
--- a/packages/frontend/media-capture-playground/tsconfig.json
+++ b/packages/frontend/media-capture-playground/tsconfig.json
@@ -0,0 +1,10 @@
+{
+  "extends": "../../../tsconfig.web.json",
+  "compilerOptions": {
+    "rootDir": "./web",
+    "outDir": "./dist",
+    "tsBuildInfoFile": "./dist/tsconfig.tsbuildinfo"
+  },
+  "include": ["./web", "server/types.d.ts"],
+  "references": [{ "path": "../native" }]
+}
--- a/packages/frontend/media-capture-playground/vite.config.ts
+++ b/packages/frontend/media-capture-playground/vite.config.ts
@@ -0,0 +1,18 @@
+import tailwindcss from '@tailwindcss/vite';
+import react from '@vitejs/plugin-react';
+import { defineConfig } from 'vite';
+
+// https://vite.dev/config/
+export default defineConfig({
+  plugins: [react(), tailwindcss()],
+  root: './web',
+  server: {
+    proxy: {
+      '/api': {
+        target: 'http://localhost:6544',
+        changeOrigin: true,
+        rewrite: path => path.replace(/^\/api/, ''),
+      },
+    },
+  },
+});
--- a/packages/frontend/media-capture-playground/web/app.tsx
+++ b/packages/frontend/media-capture-playground/web/app.tsx
@@ -0,0 +1,33 @@
+import { AppList } from './components/app-list';
+import { SavedRecordings } from './components/saved-recordings';
+
+export function App() {
+  return (
+    <div className="h-screen bg-gray-50 overflow-hidden">
+      <div className="h-full p-4 flex gap-4 max-w-[1800px] mx-auto">
+        <div className="flex-1 flex flex-col min-h-0">
+          <h1 className="text-xl font-bold text-gray-900 mb-1">
+            Running Applications
+          </h1>
+          <p className="text-sm text-gray-500 mb-2">
+            Select an application to start recording its audio
+          </p>
+          <div className="flex-1 bg-white shadow-lg rounded-lg border border-gray-100 overflow-auto">
+            <AppList />
+          </div>
+        </div>
+        <div className="w-[1024px] flex flex-col min-h-0">
+          <h1 className="text-xl font-bold text-gray-900 mb-1">
+            Saved Recordings
+          </h1>
+          <p className="text-sm text-gray-500 mb-2">
+            Listen to and manage your recorded audio files
+          </p>
+          <div className="flex-1 bg-white shadow-lg rounded-lg border border-gray-100 p-4 overflow-auto">
+            <SavedRecordings />
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/packages/frontend/media-capture-playground/web/components/app-item.tsx
+++ b/packages/frontend/media-capture-playground/web/components/app-item.tsx
@@ -0,0 +1,122 @@
+import React from 'react';
+
+import type { AppGroup, RecordingStatus } from '../types';
+import { formatDuration } from '../utils';
+
+interface AppItemProps {
+  app: AppGroup;
+  recordings?: RecordingStatus[];
+}
+
+export function AppItem({ app, recordings }: AppItemProps) {
+  const [imgError, setImgError] = React.useState(false);
+  const [isRecording, setIsRecording] = React.useState(false);
+
+  const appName = app.rootApp.name || '';
+  const bundleId = app.rootApp.bundleIdentifier || '';
+  const firstLetter = appName.charAt(0).toUpperCase();
+  const isRunning = app.apps.some(a => a.running);
+
+  const recording = recordings?.find((r: RecordingStatus) =>
+    app.apps.some(a => a.processId === r.processId)
+  );
+
+  const handleRecordClick = React.useCallback(() => {
+    const recordingApp = app.apps.find(a => a.running);
+    if (!recordingApp) {
+      return;
+    }
+    if (isRecording) {
+      void fetch(`/api/apps/${recordingApp.processId}/stop`, {
+        method: 'POST',
+      })
+        .then(() => setIsRecording(false))
+        .catch(error => console.error('Failed to stop recording:', error));
+    } else {
+      void fetch(`/api/apps/${recordingApp.processId}/record`, {
+        method: 'POST',
+      })
+        .then(() => setIsRecording(true))
+        .catch(error => console.error('Failed to start recording:', error));
+    }
+  }, [app.apps, isRecording]);
+
+  React.useEffect(() => {
+    setIsRecording(!!recording);
+  }, [recording]);
+
+  const [duration, setDuration] = React.useState(0);
+
+  React.useEffect(() => {
+    if (recording) {
+      const interval = setInterval(() => {
+        setDuration(Date.now() - recording.startTime);
+      }, 1000);
+      return () => clearInterval(interval);
+    } else {
+      setDuration(0);
+    }
+    return () => {};
+  }, [recording]);
+
+  return (
+    <div className="flex items-center h-16 space-x-2 p-3 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100">
+      {imgError ? (
+        <div className="w-8 h-8 rounded-lg bg-gray-50 border border-gray-100 flex items-center justify-center text-gray-600 font-semibold text-base">
+          {firstLetter}
+        </div>
+      ) : (
+        <img
+          src={`/api/apps/${app.rootApp.processId}/icon`}
+          loading="lazy"
+          alt={appName}
+          className="w-8 h-8 object-contain rounded-lg bg-gray-50 border border-gray-100"
+          onError={() => setImgError(true)}
+        />
+      )}
+      <div className="flex-1 min-w-0">
+        <div className="flex items-center space-x-1 mb-1">
+          {appName ? (
+            <span className="text-gray-900 font-medium text-sm truncate">
+              {appName}
+            </span>
+          ) : (
+            <span className="text-gray-400 italic font-medium text-sm">
+              Unnamed Application
+            </span>
+          )}
+          <span className="text-xs px-1 bg-gray-50 text-gray-500 rounded border border-gray-100">
+            PID: {app.rootApp.processId}
+          </span>
+          <span
+            className={`text-xs px-2 py-0.5 rounded-full font-medium border ${recording ? 'bg-red-50 text-red-600 border-red-100 opacity-100' : 'opacity-0'}`}
+          >
+            {recording ? formatDuration(duration) : '00:00:00'}
+          </span>
+        </div>
+        <div className="text-xs text-gray-500 font-mono truncate opacity-80">
+          {bundleId}
+        </div>
+      </div>
+      {(isRunning || isRecording) && (
+        <button
+          onClick={handleRecordClick}
+          className={`h-8 min-w-[80px] flex items-center justify-center rounded-lg text-sm font-medium transition-all duration-200 ${
+            isRecording
+              ? 'bg-red-50 text-red-600 hover:bg-red-100 border border-red-200'
+              : 'bg-blue-50 text-blue-600 hover:bg-blue-100 border border-blue-200'
+          }`}
+        >
+          {isRecording ? (
+            <>
+              <div className="w-1.5 h-1.5 rounded-full bg-red-500 animate-pulse mr-2" />
+              <span>Stop</span>
+            </>
+          ) : (
+            <span>Record</span>
+          )}
+        </button>
+      )}
+    </div>
+  );
+}
--- a/packages/frontend/media-capture-playground/web/components/app-list.tsx
+++ b/packages/frontend/media-capture-playground/web/components/app-list.tsx
@@ -0,0 +1,144 @@
+import React from 'react';
+import useSWRSubscription from 'swr/subscription';
+
+import type { App, AppGroup, RecordingStatus } from '../types';
+import { socket } from '../utils';
+import { AppItem } from './app-item';
+
+export function AppList() {
+  const { data: apps = [] } = useSWRSubscription('apps', (_key, { next }) => {
+    let apps: App[] = [];
+    // Initial apps fetch
+    fetch('/api/apps')
+      .then(res => res.json())
+      .then(data => {
+        apps = data.apps;
+        next(null, apps);
+      })
+      .catch(err => next(err));
+
+    // Subscribe to app updates
+    socket.on('apps:all', data => {
+      next(null, data.apps);
+      apps = data.apps;
+    });
+    socket.on('apps:state-changed', data => {
+      const index = apps.findIndex(a => a.processId === data.processId);
+      if (index !== -1) {
+        next(
+          null,
+          apps.toSpliced(index, 1, {
+            ...apps[index],
+            running: data.running,
+          })
+        );
+      }
+    });
+    socket.on('connect', () => {
+      // Refetch on reconnect
+      fetch('/api/apps')
+        .then(res => res.json())
+        .then(data => next(null, data.apps))
+        .catch(err => next(err));
+    });
+
+    return () => {
+      socket.off('apps:all');
+      socket.off('apps:state-changed');
+      socket.off('connect');
+    };
+  });
+
+  const { data: recordings = [] } = useSWRSubscription<RecordingStatus[]>(
+    'recordings',
+    (
+      _key: string,
+      { next }: { next: (err: Error | null, data?: RecordingStatus[]) => void }
+    ) => {
+      // Subscribe to recording updates
+      socket.on('apps:recording', (data: { recordings: RecordingStatus[] }) => {
+        next(null, data.recordings);
+      });
+
+      return () => {
+        socket.off('apps:recording');
+      };
+    }
+  );
+
+  const appGroups: AppGroup[] = React.useMemo(() => {
+    const mapping = apps.reduce((acc: Record<number, AppGroup>, app: App) => {
+      if (!acc[app.processGroupId]) {
+        acc[app.processGroupId] = {
+          processGroupId: app.processGroupId,
+          apps: [],
+          rootApp:
+            apps.find((a: App) => a.processId === app.processGroupId) || app,
+        };
+      }
+      acc[app.processGroupId].apps.push(app);
+      return acc;
+    }, {});
+    return Object.values(mapping);
+  }, [apps]);
+
+  const runningApps = (appGroups || []).filter(app =>
+    app.apps.some(a => a.running)
+  );
+  const notRunningApps = (appGroups || []).filter(
+    app => !app.apps.some(a => a.running)
+  );
+
+  return (
+    <div className="h-full flex flex-col divide-y divide-gray-100">
+      <div className="p-4 relative">
+        <div className="flex items-center justify-between sticky top-0 bg-white z-10 mb-2">
+          <h2 className="text-sm font-semibold text-gray-900">
+            Active Applications
+          </h2>
+          <span className="text-xs px-2 py-1 bg-blue-50 rounded-full text-blue-600 font-medium">
+            {runningApps.length} listening
+          </span>
+        </div>
+        <div className="space-y-2">
+          {runningApps.map(app => (
+            <AppItem
+              key={app.processGroupId}
+              app={app}
+              recordings={recordings}
+            />
+          ))}
+          {runningApps.length === 0 && (
+            <div className="text-sm text-gray-500 italic bg-gray-50 rounded-xl p-4 text-center">
+              No applications are currently listening
+            </div>
+          )}
+        </div>
+      </div>
+      <div className="p-4 flex-1 relative">
+        <div className="flex items-center justify-between sticky top-0 bg-white z-10 mb-2">
+          <h2 className="text-sm font-semibold text-gray-900">
+            Other Applications
+          </h2>
+          <span className="text-xs px-2 py-1 bg-gray-50 rounded-full text-gray-600 font-medium">
+            {notRunningApps.length} available
+          </span>
+        </div>
+        <div className="space-y-2">
+          {notRunningApps.map(app => (
+            <AppItem
+              key={app.processGroupId}
+              app={app}
+              recordings={recordings}
+            />
+          ))}
+          {notRunningApps.length === 0 && (
+            <div className="text-sm text-gray-500 italic bg-gray-50 rounded-xl p-4 text-center">
+              No other applications found
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/packages/frontend/media-capture-playground/web/components/icons.tsx
+++ b/packages/frontend/media-capture-playground/web/components/icons.tsx
@@ -0,0 +1,163 @@
+import type { ReactElement } from 'react';
+
+export function PlayIcon(): ReactElement {
+  return (
+    <svg
+      className="w-6 h-6 text-gray-900"
+      viewBox="0 0 24 24"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        fillRule="evenodd"
+        clipRule="evenodd"
+        d="M4.5 5.653c0-1.426 1.529-2.33 2.779-1.643l11.54 6.348c1.295.712 1.295 2.573 0 3.285L7.28 19.991c-1.25.687-2.779-.217-2.779-1.643V5.653z"
+        fill="currentColor"
+      />
+    </svg>
+  );
+}
+
+export function PauseIcon(): ReactElement {
+  return (
+    <svg
+      className="w-6 h-6 text-gray-900"
+      viewBox="0 0 24 24"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        fillRule="evenodd"
+        clipRule="evenodd"
+        d="M6.75 5.25a.75.75 0 01.75-.75H9a.75.75 0 01.75.75v13.5a.75.75 0 01-.75.75H7.5a.75.75 0 01-.75-.75V5.25zm7 0a.75.75 0 01.75-.75h1.5a.75.75 0 01.75.75v13.5a.75.75 0 01-.75.75h-1.5a.75.75 0 01-.75-.75V5.25z"
+        fill="currentColor"
+      />
+    </svg>
+  );
+}
+
+export function RewindIcon(): ReactElement {
+  return (
+    <svg
+      className="w-5 h-5 text-gray-600"
+      viewBox="0 0 24 24"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        d="M12.066 11.2a1 1 0 000 1.6l5.334 4A1 1 0 0019 16V8a1 1 0 00-1.6-.8l-5.334 4zM11 8a1 1 0 00-1.6-.8l-5.334 4a1 1 0 000 1.6l5.334 4A1 1 0 0011 16V8z"
+        fill="currentColor"
+      />
+    </svg>
+  );
+}
+
+export function ForwardIcon(): ReactElement {
+  return (
+    <svg
+      className="w-5 h-5 text-gray-600"
+      viewBox="0 0 24 24"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        d="M5 8a1 1 0 011.6-.8l5.334 4a1 1 0 010 1.6L6.6 16.8A1 1 0 015 16V8zm7.066-.8a1 1 0 00-1.6.8v8a1 1 0 001.6.8l5.334-4a1 1 0 000-1.6l-5.334-4z"
+        fill="currentColor"
+      />
+    </svg>
+  );
+}
+
+export function DeleteIcon(): ReactElement {
+  return (
+    <svg
+      className="w-5 h-5"
+      viewBox="0 0 24 24"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"
+        stroke="currentColor"
+        strokeWidth="2"
+        strokeLinecap="round"
+        strokeLinejoin="round"
+      />
+    </svg>
+  );
+}
+
+export function LoadingSpinner(): ReactElement {
+  return (
+    <svg className="animate-spin h-4 w-4" viewBox="0 0 24 24">
+      <circle
+        className="opacity-25"
+        cx="12"
+        cy="12"
+        r="10"
+        stroke="currentColor"
+        strokeWidth="4"
+        fill="none"
+      />
+      <path
+        className="opacity-75"
+        fill="currentColor"
+        d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+      />
+    </svg>
+  );
+}
+
+export function ErrorIcon(): ReactElement {
+  return (
+    <svg
+      className="w-4 h-4 mr-1.5 flex-shrink-0"
+      fill="currentColor"
+      viewBox="0 0 20 20"
+    >
+      <path
+        fillRule="evenodd"
+        d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z"
+        clipRule="evenodd"
+      />
+    </svg>
+  );
+}
+
+export function MicrophoneIcon(): ReactElement {
+  return (
+    <svg
+      className="w-4 h-4 mr-1.5 text-blue-500"
+      viewBox="0 0 20 20"
+      fill="currentColor"
+    >
+      <path d="M7 4a3 3 0 016 0v4a3 3 0 11-6 0V4zm4 10.93A7.001 7.001 0 0017 8a1 1 0 10-2 0A5 5 0 015 8a1 1 0 00-2 0 7.001 7.001 0 006 6.93V17H6a1 1 0 100 2h8a1 1 0 100-2h-3v-2.07z" />
+    </svg>
+  );
+}
+
+export function WarningIcon(): ReactElement {
+  return (
+    <svg className="w-4 h-4 mr-1.5" viewBox="0 0 20 20" fill="currentColor">
+      <path
+        fillRule="evenodd"
+        d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z"
+        clipRule="evenodd"
+      />
+    </svg>
+  );
+}
+
+export function DefaultAppIcon(): ReactElement {
+  return (
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      className="h-6 w-6"
+      viewBox="0 0 20 20"
+      fill="currentColor"
+    >
+      <path d="M10 2a3 3 0 00-3 3v4a3 3 0 006 0V5a3 3 0 00-3-3zm0 2a1 1 0 011 1v4a1 1 0 11-2 0V5a1 1 0 011-1z" />
+      <path d="M3 10a7 7 0 1014 0h-2a5 5 0 11-10 0H3z" />
+    </svg>
+  );
+}
--- a/packages/frontend/media-capture-playground/web/components/saved-recording-item.tsx
+++ b/packages/frontend/media-capture-playground/web/components/saved-recording-item.tsx
@@ -0,0 +1,872 @@
+import type { ReactElement } from 'react';
+import React from 'react';
+import ReactMarkdown from 'react-markdown';
+
+import type { SavedRecording, TranscriptionMetadata } from '../types';
+import { formatDuration, socket } from '../utils';
+import {
+  DefaultAppIcon,
+  DeleteIcon,
+  ErrorIcon,
+  ForwardIcon,
+  LoadingSpinner,
+  MicrophoneIcon,
+  PauseIcon,
+  PlayIcon,
+  RewindIcon,
+  WarningIcon,
+} from './icons';
+
+interface SavedRecordingItemProps {
+  recording: SavedRecording;
+}
+
+// Audio player controls component
+function AudioControls({
+  audioRef,
+  playbackRate,
+  onPlaybackRateChange,
+  onSeek,
+  onPlayPause,
+}: {
+  audioRef: React.RefObject<HTMLAudioElement | null>;
+  playbackRate: number;
+  onPlaybackRateChange: () => void;
+  onSeek: (seconds: number) => void;
+  onPlayPause: () => void;
+}): ReactElement {
+  const [currentTime, setCurrentTime] = React.useState('00:00');
+  const [duration, setDuration] = React.useState('00:00');
+
+  React.useEffect(() => {
+    const audio = audioRef.current;
+    if (!audio) return;
+
+    const formatTime = (time: number) => {
+      const minutes = Math.floor(time / 60);
+      const seconds = Math.floor(time % 60);
+      return `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
+    };
+
+    const updateTime = () => {
+      setCurrentTime(formatTime(audio.currentTime));
+      setDuration(formatTime(audio.duration));
+    };
+
+    audio.addEventListener('timeupdate', updateTime);
+    audio.addEventListener('loadedmetadata', updateTime);
+
+    return () => {
+      audio.removeEventListener('timeupdate', updateTime);
+      audio.removeEventListener('loadedmetadata', updateTime);
+    };
+  }, [audioRef]);
+
+  return (
+    <div className="flex items-center justify-between">
+      <div className="flex items-center space-x-2">
+        <button
+          onClick={() => onSeek(-15)}
+          className="p-2 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100 hover:shadow-sm"
+          title="Back 15 seconds"
+        >
+          <RewindIcon />
+        </button>
+        <button
+          onClick={onPlayPause}
+          className="p-2 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100 hover:shadow-sm"
+        >
+          {audioRef.current?.paused ? <PlayIcon /> : <PauseIcon />}
+        </button>
+        <button
+          onClick={() => onSeek(30)}
+          className="p-2 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100 hover:shadow-sm"
+          title="Forward 30 seconds"
+        >
+          <ForwardIcon />
+        </button>
+        <div className="text-sm font-mono text-gray-500 ml-2">
+          {currentTime} <span className="text-gray-400">/</span> {duration}
+        </div>
+      </div>
+      <button
+        onClick={onPlaybackRateChange}
+        className="px-3 py-1.5 text-sm font-medium text-gray-600 bg-gray-50 hover:bg-gray-100 rounded-lg transition-all duration-200 border border-gray-100 hover:shadow-sm"
+      >
+        {playbackRate}x
+      </button>
+    </div>
+  );
+}
+
+// Waveform visualization component
+function WaveformVisualizer({
+  containerRef,
+  waveformData,
+  currentTime,
+  fileName,
+}: {
+  containerRef: React.RefObject<HTMLDivElement | null>;
+  waveformData: number[];
+  currentTime: number;
+  fileName: string;
+}): ReactElement {
+  return (
+    <div
+      className="relative h-14 bg-gray-50 overflow-hidden rounded-lg border border-gray-100"
+      ref={containerRef}
+    >
+      <div className="absolute inset-0 flex items-end">
+        {waveformData.map((amplitude, i) => (
+          <div
+            key={`${fileName}-bar-${i}`}
+            className="flex-1 bg-red-400 transition-all duration-200"
+            style={{
+              height: `${Math.max(amplitude * 100, 3)}%`,
+              opacity:
+                i < Math.floor(currentTime * waveformData.length) ? 1 : 0.3,
+              margin: '0 0.5px',
+            }}
+          />
+        ))}
+      </div>
+    </div>
+  );
+}
+
+// Update TranscriptionMessage component
+function TranscriptionMessage({
+  item,
+  isNewSpeaker,
+  isCurrentMessage,
+}: {
+  item: {
+    speaker: string;
+    start_time: string;
+    transcription: string;
+  };
+  isNewSpeaker: boolean;
+  isCurrentMessage: boolean;
+}): ReactElement {
+  return (
+    <div className="flex items-start gap-3 group transition-all duration-300 w-full">
+      <div className="w-[120px] flex-shrink-0">
+        <div className="flex flex-col items-start gap-1">
+          {isNewSpeaker && (
+            <div
+              className={`px-2.5 py-1 rounded-lg text-xs font-medium border transition-colors duration-300 ${
+                isCurrentMessage
+                  ? 'bg-blue-100 text-blue-700 border-blue-200'
+                  : 'bg-blue-50 text-blue-600 border-blue-100'
+              }`}
+            >
+              {item.speaker}
+            </div>
+          )}
+          <div
+            className={`text-[11px] font-mono ml-2 transition-colors duration-300 ${
+              isCurrentMessage ? 'text-blue-500' : 'text-gray-400'
+            }`}
+          >
+            {item.start_time}
+          </div>
+        </div>
+      </div>
+      <div className="flex-1 min-w-0 w-full">
+        <div
+          className={`text-sm leading-relaxed rounded-xl px-4 py-2 border transition-all inline-flex duration-300 ${
+            isCurrentMessage
+              ? 'bg-blue-50/50 text-blue-900 border-blue-200 shadow-md'
+              : 'bg-white text-gray-600 border-gray-100 shadow-sm hover:shadow-md'
+          }`}
+        >
+          {item.transcription}
+        </div>
+      </div>
+    </div>
+  );
+}
+
+// Add new Summary component
+function TranscriptionSummary({ summary }: { summary: string }): ReactElement {
+  return (
+    <div className="mb-6 bg-blue-50/50 rounded-xl p-4 border border-blue-100">
+      <div className="text-xs font-medium text-blue-600 mb-2 uppercase tracking-wider">
+        Summary
+      </div>
+      <div className="text-sm text-gray-700 leading-relaxed prose prose-sm max-w-none prose-headings:text-gray-900 prose-a:text-blue-600 whitespace-pre-wrap">
+        <ReactMarkdown>{summary}</ReactMarkdown>
+      </div>
+    </div>
+  );
+}
+
+// Update TranscriptionContent component
+function TranscriptionContent({
+  transcriptionData,
+  currentAudioTime,
+}: {
+  transcriptionData: {
+    segments: Array<{
+      speaker: string;
+      start_time: string;
+      transcription: string;
+    }>;
+    summary: string;
+    title: string;
+  };
+  currentAudioTime: number;
+}): ReactElement {
+  const parseTimestamp = (timestamp: string) => {
+    // Handle "MM:SS" format (without hours)
+    const [minutes, seconds] = timestamp.split(':');
+    return parseInt(minutes, 10) * 60 + parseInt(seconds, 10);
+  };
+
+  return (
+    <div className="space-y-2 py-2 max-h-[400px] overflow-y-auto pr-2 scrollbar-thin scrollbar-thumb-gray-300 scrollbar-track-transparent hover:scrollbar-thumb-gray-400 w-full">
+      <TranscriptionSummary summary={transcriptionData.summary} />
+      {transcriptionData.segments.map((item, index) => {
+        const isNewSpeaker =
+          index === 0 ||
+          transcriptionData.segments[index - 1].speaker !== item.speaker;
+
+        const startTime = parseTimestamp(item.start_time);
+        // Use next segment's start time as end time, or add 3 seconds for the last segment
+        const endTime =
+          index < transcriptionData.segments.length - 1
+            ? parseTimestamp(transcriptionData.segments[index + 1].start_time)
+            : startTime + 3;
+
+        const isCurrentMessage =
+          currentAudioTime >= startTime && currentAudioTime < endTime;
+
+        return (
+          <TranscriptionMessage
+            key={`${item.speaker}-${item.start_time}-${index}`}
+            item={item}
+            isNewSpeaker={isNewSpeaker}
+            isCurrentMessage={isCurrentMessage}
+          />
+        );
+      })}
+    </div>
+  );
+}
+
+// Update TranscriptionStatus component
+function TranscriptionStatus({
+  transcription,
+  transcriptionError,
+  currentAudioTime,
+}: {
+  transcription?: TranscriptionMetadata;
+  transcriptionError: string | null;
+  currentAudioTime: number;
+}): ReactElement | null {
+  if (!transcription && !transcriptionError) {
+    return null;
+  }
+
+  if (transcription?.transcriptionStatus === 'pending') {
+    return (
+      <div className="my-2">
+        <div className="text-sm text-gray-600 bg-gray-50/50 p-4 border border-gray-100 w-full">
+          <div className="font-medium text-gray-900 mb-4 flex items-center sticky top-0 bg-gray-50/50 backdrop-blur-sm z-10 py-2">
+            <MicrophoneIcon />
+            <span>Processing Audio</span>
+          </div>
+          <div className="flex items-center justify-center py-8">
+            <div className="flex flex-col items-center gap-3">
+              <LoadingSpinner />
+              <div className="text-sm text-gray-600">
+                <span className="font-medium">Starting transcription</span>
+                <span className="text-gray-400 animate-pulse">...</span>
+              </div>
+              <div className="text-xs text-gray-400 max-w-sm text-center">
+                This may take a few moments depending on the length of the
+                recording
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    );
+  }
+
+  if (transcriptionError) {
+    return (
+      <div className="text-xs text-red-500 m-2 flex items-center bg-red-50 rounded-lg p-2 border border-red-100">
+        <ErrorIcon />
+        {transcriptionError}
+      </div>
+    );
+  }
+
+  if (
+    transcription?.transcriptionStatus === 'completed' &&
+    transcription.transcription
+  ) {
+    try {
+      const transcriptionData = transcription.transcription;
+      if (
+        !transcriptionData.segments ||
+        !Array.isArray(transcriptionData.segments)
+      ) {
+        throw new Error('Invalid transcription data format');
+      }
+
+      return (
+        <div className="my-2">
+          <div className="text-sm text-gray-600 bg-gray-50/50 p-4 border border-gray-100 w-full">
+            <div className="font-medium text-gray-900 mb-4 flex items-center sticky top-0 bg-gray-50/50 backdrop-blur-sm z-10 py-2">
+              <MicrophoneIcon />
+              <span>Conversation Transcript</span>
+            </div>
+            {transcriptionData.title && (
+              <div className="mb-4 bg-blue-50/50 rounded-lg p-3 border border-blue-100">
+                <div className="text-xs font-medium text-blue-600 uppercase tracking-wider mb-1">
+                  Title
+                </div>
+                <div className="text-base font-medium text-gray-900">
+                  {transcriptionData.title}
+                </div>
+              </div>
+            )}
+            <TranscriptionContent
+              transcriptionData={transcriptionData}
+              currentAudioTime={currentAudioTime}
+            />
+          </div>
+        </div>
+      );
+    } catch (error) {
+      return (
+        <div className="text-sm text-red-500 bg-red-50 rounded-lg p-2 border border-red-100 m-2">
+          {error instanceof Error
+            ? error.message
+            : 'Failed to parse transcription data'}
+        </div>
+      );
+    }
+  }
+
+  return null;
+}
+
+// Add new RecordingHeader component
+function RecordingHeader({
+  metadata,
+  fileName,
+  recordingDate,
+  duration,
+  error,
+  isDeleting,
+  showDeleteConfirm,
+  setShowDeleteConfirm,
+  handleDeleteClick,
+}: {
+  metadata: SavedRecording['metadata'];
+  fileName: string;
+  recordingDate: string;
+  duration: string;
+  error: string | null;
+  isDeleting: boolean;
+  showDeleteConfirm: boolean;
+  setShowDeleteConfirm: (show: boolean) => void;
+  handleDeleteClick: () => void;
+  transcriptionError: string | null;
+}): ReactElement {
+  const [imgError, setImgError] = React.useState(false);
+
+  return (
+    <div className="flex items-start space-x-4 p-4 bg-gray-50/30">
+      <div className="relative w-12 h-12 flex-shrink-0">
+        {!imgError ? (
+          <img
+            src={`/api/recordings/${fileName}/icon.png`}
+            alt={metadata?.appName || 'Unknown Application'}
+            className="w-12 h-12 object-contain rounded-lg bg-gray-50 border border-gray-100 shadow-sm transition-transform duration-200 hover:scale-105"
+            onError={() => setImgError(true)}
+          />
+        ) : (
+          <div className="w-12 h-12 rounded-xl flex items-center justify-center text-gray-500 bg-gray-50 border border-gray-100 shadow-sm">
+            <DefaultAppIcon />
+          </div>
+        )}
+      </div>
+      <div className="flex-1 min-w-0">
+        <div className="flex items-center justify-between">
+          <div className="flex items-center space-x-2">
+            <span className="text-gray-900 font-semibold text-base truncate">
+              {metadata?.appName || 'Unknown Application'}
+            </span>
+            <span className="text-xs px-2 py-0.5 bg-blue-50 rounded-full text-blue-600 font-medium border border-blue-100">
+              {duration}
+            </span>
+          </div>
+          <div className="flex items-center">
+            {showDeleteConfirm ? (
+              <div className="flex items-center space-x-2">
+                <button
+                  onClick={() => setShowDeleteConfirm(false)}
+                  className="h-8 px-3 text-sm font-medium text-gray-600 hover:bg-gray-50 rounded-lg transition-colors border border-gray-100"
+                  disabled={isDeleting}
+                >
+                  Cancel
+                </button>
+                <button
+                  onClick={handleDeleteClick}
+                  className="h-8 px-3 text-sm font-medium text-red-600 hover:bg-red-50 rounded-lg transition-colors border border-red-100 disabled:opacity-50 disabled:cursor-not-allowed"
+                  disabled={isDeleting}
+                >
+                  {isDeleting ? (
+                    <div className="flex items-center space-x-2">
+                      <LoadingSpinner />
+                      <span>Deleting...</span>
+                    </div>
+                  ) : (
+                    'Confirm'
+                  )}
+                </button>
+              </div>
+            ) : (
+              <button
+                onClick={() => setShowDeleteConfirm(true)}
+                className="h-8 w-8 flex items-center justify-center text-gray-400 hover:text-red-500 hover:bg-red-50 rounded-lg transition-colors"
+                title="Delete recording"
+              >
+                <DeleteIcon />
+              </button>
+            )}
+          </div>
+        </div>
+        <div className="text-sm text-gray-600 mt-1">{recordingDate}</div>
+        <div className="text-xs text-gray-400 font-mono mt-0.5 truncate">
+          {metadata?.bundleIdentifier || fileName}
+        </div>
+        {error && (
+          <div className="text-xs text-red-500 mt-2 flex items-center bg-red-50 rounded-lg p-2 border border-red-100">
+            <ErrorIcon />
+            {error}
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+
+// Add new AudioPlayer component
+function AudioPlayer({
+  isLoading,
+  error,
+  audioRef,
+  playbackRate,
+  handlePlaybackRateChange,
+  handleSeek,
+  handlePlayPause,
+  containerRef,
+  waveformData,
+  currentTime,
+  fileName,
+}: {
+  isLoading: boolean;
+  error: string | null;
+  audioRef: React.RefObject<HTMLAudioElement>;
+  playbackRate: number;
+  handlePlaybackRateChange: () => void;
+  handleSeek: (seconds: number) => void;
+  handlePlayPause: () => void;
+  containerRef: React.RefObject<HTMLDivElement>;
+  waveformData: number[];
+  currentTime: number;
+  fileName: string;
+}): ReactElement {
+  return (
+    <div className="px-4 pb-4">
+      {isLoading && !error ? (
+        <div className="h-14 bg-gray-50 rounded-lg flex items-center justify-center border border-gray-100">
+          <LoadingSpinner />
+          <span className="ml-2 text-sm text-gray-600 font-medium">
+            Loading audio...
+          </span>
+        </div>
+      ) : (
+        <div className="flex flex-col space-y-3">
+          <AudioControls
+            audioRef={audioRef}
+            playbackRate={playbackRate}
+            onPlaybackRateChange={handlePlaybackRateChange}
+            onSeek={handleSeek}
+            onPlayPause={handlePlayPause}
+          />
+          <WaveformVisualizer
+            containerRef={containerRef}
+            waveformData={waveformData}
+            currentTime={currentTime}
+            fileName={fileName}
+          />
+        </div>
+      )}
+    </div>
+  );
+}
+
+// Add new TranscribeButton component
+function TranscribeButton({
+  transcriptionStatus,
+  onTranscribe,
+}: {
+  transcriptionStatus?: TranscriptionMetadata['transcriptionStatus'];
+  onTranscribe: () => void;
+}): ReactElement {
+  return (
+    <div className="px-4 pb-4">
+      <div className="flex justify-end">
+        <button
+          onClick={onTranscribe}
+          disabled={transcriptionStatus === 'pending'}
+          className={`h-8 px-3 text-sm font-medium rounded-lg transition-colors border flex items-center space-x-2
+            ${
+              transcriptionStatus === 'pending'
+                ? 'bg-blue-50 text-blue-600 border-blue-200 cursor-not-allowed'
+                : transcriptionStatus === 'completed'
+                  ? 'text-blue-600 hover:bg-blue-50 border-blue-100'
+                  : transcriptionStatus === 'error'
+                    ? 'text-red-600 hover:bg-red-50 border-red-100'
+                    : 'text-blue-600 hover:bg-blue-50 border-blue-100'
+            }`}
+        >
+          {transcriptionStatus === 'pending' ? (
+            <>
+              <LoadingSpinner />
+              <span>Transcribing...</span>
+            </>
+          ) : transcriptionStatus === 'completed' ? (
+            <>
+              <MicrophoneIcon />
+              <span>Transcribe Again</span>
+            </>
+          ) : transcriptionStatus === 'error' ? (
+            <>
+              <WarningIcon />
+              <span>Retry Transcription</span>
+            </>
+          ) : (
+            <>
+              <MicrophoneIcon />
+              <span>Transcribe</span>
+            </>
+          )}
+        </button>
+      </div>
+    </div>
+  );
+}
+
+// Main SavedRecordingItem component (simplified)
+export function SavedRecordingItem({
+  recording,
+}: SavedRecordingItemProps): ReactElement {
+  const [error, setError] = React.useState<string | null>(null);
+  const [isLoading, setIsLoading] = React.useState(true);
+  const [isDeleting, setIsDeleting] = React.useState(false);
+  const [showDeleteConfirm, setShowDeleteConfirm] = React.useState(false);
+  const [playbackRate, setPlaybackRate] = React.useState(1);
+  const [waveformData, setWaveformData] = React.useState<number[]>([]);
+  const [currentTime, setCurrentTime] = React.useState(0);
+  const audioRef = React.useRef<HTMLAudioElement | null>(null);
+  const containerRef = React.useRef<HTMLDivElement | null>(null);
+  const [segments, setSegments] = React.useState(40);
+  const [currentAudioTime, setCurrentAudioTime] = React.useState(0);
+  const [transcriptionError, setTranscriptionError] = React.useState<
+    string | null
+  >(null);
+
+  const metadata = recording.metadata;
+  const fileName = recording.wav;
+  const recordingDate = metadata
+    ? new Date(metadata.recordingStartTime).toLocaleString()
+    : 'Unknown date';
+  const duration = metadata
+    ? formatDuration(metadata.recordingDuration * 1000)
+    : 'Unknown duration';
+
+  // Update current audio time
+  React.useEffect(() => {
+    const audio = audioRef.current;
+    if (audio) {
+      const handleTimeUpdate = () => {
+        setCurrentAudioTime(audio.currentTime);
+      };
+      audio.addEventListener('timeupdate', handleTimeUpdate);
+      return () => audio.removeEventListener('timeupdate', handleTimeUpdate);
+    }
+    return () => {};
+  }, []);
+
+  // Calculate number of segments based on container width
+  React.useEffect(() => {
+    const updateSegments = () => {
+      if (containerRef.current) {
+        // Each bar should be at least 2px wide (1px bar + 1px gap)
+        const width = containerRef.current.offsetWidth;
+        setSegments(Math.floor(width / 2));
+      }
+    };
+
+    updateSegments();
+    const resizeObserver = new ResizeObserver(updateSegments);
+    if (containerRef.current) {
+      resizeObserver.observe(containerRef.current);
+    }
+
+    return () => resizeObserver.disconnect();
+  }, []);
+
+  const processAudioData = React.useCallback(async () => {
+    try {
+      const response = await fetch(`/api/recordings/${fileName}/recording.wav`);
+      if (!response.ok) {
+        throw new Error(
+          `Failed to fetch audio file (${response.status}): ${response.statusText}`
+        );
+      }
+
+      const audioContext = new AudioContext();
+      const arrayBuffer = await response.arrayBuffer();
+
+      // Ensure we have data to process
+      if (!arrayBuffer || arrayBuffer.byteLength === 0) {
+        throw new Error('No audio data received');
+      }
+
+      const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
+      const channelData = audioBuffer.getChannelData(0);
+
+      // Process the audio data in chunks to create the waveform
+      const numberOfSamples = channelData.length;
+      const samplesPerSegment = Math.floor(numberOfSamples / segments);
+
+      const waveform: number[] = [];
+      for (let i = 0; i < segments; i++) {
+        const start = i * samplesPerSegment;
+        const end = start + samplesPerSegment;
+        const segmentData = channelData.slice(start, end);
+
+        // Calculate RMS (root mean square) for better amplitude representation
+        const rms = Math.sqrt(
+          segmentData.reduce((sum, sample) => sum + sample * sample, 0) /
+            segmentData.length
+        );
+
+        waveform.push(rms);
+      }
+
+      // Normalize the waveform data to a 0-1 range
+      const maxAmplitude = Math.max(...waveform);
+      const normalizedWaveform = waveform.map(amp => amp / maxAmplitude);
+
+      setWaveformData(normalizedWaveform);
+      setIsLoading(false);
+    } catch (err) {
+      console.error('Error processing audio:', err);
+      setError(
+        err instanceof Error ? err.message : 'Failed to process audio data'
+      );
+      setIsLoading(false);
+    }
+  }, [fileName, segments]);
+
+  React.useEffect(() => {
+    const audio = audioRef.current;
+    if (audio) {
+      const handleError = (e: ErrorEvent) => {
+        console.error('Audio error:', e);
+        setError('Failed to load audio');
+        setIsLoading(false);
+      };
+
+      const handleLoadedMetadata = () => {
+        void processAudioData().catch(err => {
+          console.error('Error processing audio data:', err);
+          setError('Failed to process audio data');
+          setIsLoading(false);
+        });
+      };
+
+      const handleTimeUpdate = () => {
+        setCurrentTime(audio.currentTime / audio.duration);
+      };
+
+      audio.addEventListener('error', handleError as EventListener);
+      audio.addEventListener('loadedmetadata', handleLoadedMetadata);
+      audio.addEventListener('timeupdate', handleTimeUpdate);
+
+      return () => {
+        audio.removeEventListener('error', handleError as EventListener);
+        audio.removeEventListener('loadedmetadata', handleLoadedMetadata);
+        audio.removeEventListener('timeupdate', handleTimeUpdate);
+      };
+    }
+    return () => {};
+  }, [processAudioData]);
+
+  const handlePlayPause = React.useCallback(() => {
+    if (audioRef.current) {
+      if (audioRef.current.paused) {
+        void audioRef.current.play();
+      } else {
+        audioRef.current.pause();
+      }
+    }
+  }, []);
+
+  const handleSeek = React.useCallback((seconds: number) => {
+    if (audioRef.current) {
+      audioRef.current.currentTime += seconds;
+    }
+  }, []);
+
+  const handlePlaybackRateChange = React.useCallback(() => {
+    if (audioRef.current) {
+      const newRate = playbackRate === 1 ? 1.5 : 1;
+      audioRef.current.playbackRate = newRate;
+      setPlaybackRate(newRate);
+    }
+  }, [playbackRate]);
+
+  const handleDelete = React.useCallback(async () => {
+    setIsDeleting(true);
+    setError(null); // Clear any previous errors
+
+    try {
+      const response = await fetch(`/api/recordings/${recording.wav}`, {
+        method: 'DELETE',
+      });
+
+      if (!response.ok) {
+        let errorMessage: string;
+        try {
+          const errorData = await response.json();
+          errorMessage = errorData.error;
+        } catch {
+          errorMessage = `Server error (${response.status}): ${response.statusText}`;
+        }
+        throw new Error(errorMessage);
+      }
+
+      setShowDeleteConfirm(false);
+    } catch (err) {
+      console.error('Error deleting recording:', err);
+      setError(
+        err instanceof Error ? err.message : 'An unexpected error occurred'
+      );
+    } finally {
+      setIsDeleting(false);
+    }
+  }, [recording.wav]);
+
+  const handleDeleteClick = React.useCallback(() => {
+    void handleDelete().catch(err => {
+      console.error('Unexpected error during deletion:', err);
+      setError('An unexpected error occurred');
+    });
+  }, [handleDelete]);
+
+  React.useEffect(() => {
+    // Listen for transcription events
+    socket.on(
+      'apps:recording-transcription-start',
+      (data: { filename: string }) => {
+        if (data.filename === recording.wav) {
+          setTranscriptionError(null);
+        }
+      }
+    );
+
+    socket.on(
+      'apps:recording-transcription-end',
+      (data: {
+        filename: string;
+        success: boolean;
+        transcription?: string;
+        error?: string;
+      }) => {
+        if (data.filename === recording.wav && !data.success) {
+          setTranscriptionError(data.error || 'Transcription failed');
+        }
+      }
+    );
+
+    return () => {
+      socket.off('apps:recording-transcription-start');
+      socket.off('apps:recording-transcription-end');
+    };
+  }, [recording.wav]);
+
+  const handleTranscribe = React.useCallback(async () => {
+    try {
+      const response = await fetch(
+        `/api/recordings/${recording.wav}/transcribe`,
+        {
+          method: 'POST',
+        }
+      );
+
+      if (!response.ok) {
+        const error = await response.json();
+        throw new Error(error.error || 'Failed to start transcription');
+      }
+    } catch (err) {
+      setTranscriptionError(
+        err instanceof Error ? err.message : 'Failed to start transcription'
+      );
+    }
+  }, [recording.wav]);
+
+  return (
+    <div className="bg-white rounded-lg shadow-sm hover:shadow-md transition-all duration-300 overflow-hidden mb-3 border border-gray-100 hover:border-gray-200">
+      <RecordingHeader
+        metadata={metadata}
+        fileName={fileName}
+        recordingDate={recordingDate}
+        duration={duration}
+        error={error}
+        isDeleting={isDeleting}
+        showDeleteConfirm={showDeleteConfirm}
+        setShowDeleteConfirm={setShowDeleteConfirm}
+        handleDeleteClick={handleDeleteClick}
+        transcriptionError={transcriptionError}
+      />
+      <AudioPlayer
+        isLoading={isLoading}
+        error={error}
+        audioRef={audioRef as React.RefObject<HTMLAudioElement>}
+        playbackRate={playbackRate}
+        handlePlaybackRateChange={handlePlaybackRateChange}
+        handleSeek={handleSeek}
+        handlePlayPause={handlePlayPause}
+        containerRef={containerRef as React.RefObject<HTMLDivElement>}
+        waveformData={waveformData}
+        currentTime={currentTime}
+        fileName={fileName}
+      />
+      <audio
+        ref={audioRef}
+        src={`/api/recordings/${fileName}/recording.wav`}
+        preload="metadata"
+        className="hidden"
+      />
+      <TranscriptionStatus
+        transcription={recording.transcription}
+        transcriptionError={transcriptionError}
+        currentAudioTime={currentAudioTime}
+      />
+      <TranscribeButton
+        transcriptionStatus={recording.transcription?.transcriptionStatus}
+        onTranscribe={() => void handleTranscribe()}
+      />
+    </div>
+  );
+}
--- a/packages/frontend/media-capture-playground/web/components/saved-recordings.tsx
+++ b/packages/frontend/media-capture-playground/web/components/saved-recordings.tsx
@@ -0,0 +1,41 @@
+import useSWRSubscription from 'swr/subscription';
+
+import type { SavedRecording } from '../types';
+import { socket } from '../utils';
+import { SavedRecordingItem } from './saved-recording-item';
+
+export function SavedRecordings(): React.ReactElement {
+  const { data: recordings = [] } = useSWRSubscription<SavedRecording[]>(
+    'saved-recordings',
+    (
+      _key: string,
+      { next }: { next: (err: Error | null, data?: SavedRecording[]) => void }
+    ) => {
+      // Subscribe to saved recordings updates
+      socket.on('apps:saved', (data: { recordings: SavedRecording[] }) => {
+        next(null, data.recordings);
+      });
+
+      fetch('/api/apps/saved')
+        .then(res => res.json())
+        .then(data => next(null, data.recordings))
+        .catch(err => next(err));
+
+      return () => {
+        socket.off('apps:saved');
+      };
+    }
+  );
+
+  if (recordings.length === 0) {
+    return <p className="text-gray-500 italic text-sm">No saved recordings</p>;
+  }
+
+  return (
+    <div className="space-y-1">
+      {recordings.map(recording => (
+        <SavedRecordingItem key={recording.wav} recording={recording} />
+      ))}
+    </div>
+  );
+}
--- a/packages/frontend/media-capture-playground/web/index.html
+++ b/packages/frontend/media-capture-playground/web/index.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Media Capture Playground</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/main.tsx"></script>
+  </body>
+</html>
--- a/packages/frontend/media-capture-playground/web/main.css
+++ b/packages/frontend/media-capture-playground/web/main.css
@@ -0,0 +1 @@
+@import 'tailwindcss';
--- a/packages/frontend/media-capture-playground/web/main.tsx
+++ b/packages/frontend/media-capture-playground/web/main.tsx
@@ -0,0 +1,11 @@
+import './main.css';
+
+import { createRoot } from 'react-dom/client';
+
+import { App } from './app';
+
+const rootElement = document.getElementById('root');
+if (!rootElement) {
+  throw new Error('Failed to find the root element');
+}
+createRoot(rootElement).render(<App />);
--- a/packages/frontend/media-capture-playground/web/types.ts
+++ b/packages/frontend/media-capture-playground/web/types.ts
@@ -0,0 +1,55 @@
+export interface App {
+  processId: number;
+  processGroupId: number;
+  bundleIdentifier: string;
+  name: string;
+  running: boolean;
+}
+
+export interface AppGroup {
+  processGroupId: number;
+  rootApp: App;
+  apps: App[];
+}
+
+export interface RecordingStatus {
+  processId: number;
+  bundleIdentifier: string;
+  name: string;
+  startTime: number;
+}
+
+export interface RecordingMetadata {
+  appName: string;
+  bundleIdentifier: string;
+  processId: number;
+  recordingStartTime: number;
+  recordingEndTime: number;
+  recordingDuration: number;
+  sampleRate: number;
+  totalSamples: number;
+  icon?: Uint8Array;
+}
+
+export interface TranscriptionMetadata {
+  transcriptionStartTime: number;
+  transcriptionEndTime: number;
+  transcriptionStatus: 'not_started' | 'pending' | 'completed' | 'error';
+  transcription?: {
+    title: string;
+    segments: Array<{
+      speaker: string;
+      start_time: string;
+      end_time: string;
+      transcription: string;
+    }>;
+    summary: string;
+  };
+  error?: string;
+}
+
+export interface SavedRecording {
+  wav: string;
+  metadata?: RecordingMetadata;
+  transcription?: TranscriptionMetadata;
+}
--- a/packages/frontend/media-capture-playground/web/utils.ts
+++ b/packages/frontend/media-capture-playground/web/utils.ts
@@ -0,0 +1,19 @@
+import { io } from 'socket.io-client';
+
+// Create a singleton socket instance
+export const socket = io('http://localhost:6544');
+
+export function formatDuration(ms: number): string {
+  const seconds = Math.floor(ms / 1000);
+  const minutes = Math.floor(seconds / 60);
+  const hours = Math.floor(minutes / 60);
+  return `${hours.toString().padStart(2, '0')}:${(minutes % 60)
+    .toString()
+    .padStart(2, '0')}:${(seconds % 60).toString().padStart(2, '0')}`;
+}
+
+// Helper function to convert timestamp (MM:SS.mmm) to seconds
+export function timestampToSeconds(timestamp: string): number {
+  const [minutes, seconds] = timestamp.split(':').map(parseFloat);
+  return minutes * 60 + seconds;
+}
--- a/packages/frontend/native/.gitignore
+++ b/packages/frontend/native/.gitignore
@@ -1,2 +1,3 @@
 *.fixture
 lib
+*.bin
--- a/packages/frontend/native/Cargo.toml
+++ b/packages/frontend/native/Cargo.toml
@@ -7,14 +7,15 @@ version = "0.0.0"
 crate-type = ["cdylib", "rlib"]

 [dependencies]
-affine_common    = { workspace = true }
-affine_nbstore   = { path = "./nbstore" }
-affine_sqlite_v1 = { path = "./sqlite_v1" }
-napi             = { workspace = true }
-napi-derive      = { workspace = true }
-once_cell        = { workspace = true }
-sqlx             = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
-tokio            = { workspace = true, features = ["full"] }
+affine_common        = { workspace = true }
+affine_media_capture = { path = "./media_capture" }
+affine_nbstore       = { path = "./nbstore" }
+affine_sqlite_v1     = { path = "./sqlite_v1" }
+napi                 = { workspace = true }
+napi-derive          = { workspace = true }
+once_cell            = { workspace = true }
+sqlx                 = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
+tokio                = { workspace = true, features = ["full"] }

 [build-dependencies]
 napi-build = { workspace = true }
--- a/packages/frontend/native/media-capture-exapmle.ts
+++ b/packages/frontend/native/media-capture-exapmle.ts
@@ -0,0 +1,149 @@
+import { join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+import {
+  Whisper,
+  WhisperFullParams,
+  WhisperSamplingStrategy,
+} from '@napi-rs/whisper';
+import { BehaviorSubject, EMPTY, Observable } from 'rxjs';
+import {
+  distinctUntilChanged,
+  exhaustMap,
+  groupBy,
+  mergeMap,
+  switchMap,
+  tap,
+} from 'rxjs/operators';
+
+import { type Application, ShareableContent } from './index.js';
+
+const rootDir = join(fileURLToPath(import.meta.url), '..');
+
+const shareableContent = new ShareableContent();
+
+const appList = new Set([
+  'com.tinyspeck.slackmacgap.helper',
+  'us.zoom.xos',
+  'org.mozilla.firefoxdeveloperedition',
+]);
+
+console.info(shareableContent.applications().map(app => app.bundleIdentifier));
+
+const GGLM_LARGE = join(rootDir, 'ggml-large-v3-turbo.bin');
+
+const whisper = new Whisper(GGLM_LARGE, {
+  useGpu: true,
+  gpuDevice: 1,
+});
+
+const whisperParams = new WhisperFullParams(WhisperSamplingStrategy.Greedy);
+
+const SAMPLE_WINDOW_MS = 3000; // 3 seconds, similar to stream.cpp's step_ms
+const SAMPLES_PER_WINDOW = (SAMPLE_WINDOW_MS / 1000) * 16000; // 16kHz sample rate
+
+// eslint-disable-next-line rxjs/finnish
+const runningApplications = new BehaviorSubject(
+  shareableContent.applications()
+);
+
+const applicationListChangedSubscriber =
+  ShareableContent.onApplicationListChanged(() => {
+    runningApplications.next(shareableContent.applications());
+  });
+
+runningApplications
+  .pipe(
+    mergeMap(apps => apps.filter(app => appList.has(app.bundleIdentifier))),
+    groupBy(app => app.bundleIdentifier),
+    mergeMap(app$ =>
+      app$.pipe(
+        exhaustMap(app =>
+          new Observable<[Application, boolean]>(subscriber => {
+            const stateSubscriber = ShareableContent.onAppStateChanged(
+              app,
+              err => {
+                if (err) {
+                  subscriber.error(err);
+                  return;
+                }
+                subscriber.next([app, app.isRunning]);
+              }
+            );
+            return () => {
+              stateSubscriber.unsubscribe();
+            };
+          }).pipe(
+            distinctUntilChanged(
+              ([_, isRunningA], [__, isRunningB]) => isRunningA === isRunningB
+            ),
+            switchMap(([app]) =>
+              !app.isRunning
+                ? EMPTY
+                : new Observable(observer => {
+                    const buffers: Float32Array[] = [];
+                    const audioStream = app.tapAudio((err, samples) => {
+                      if (err) {
+                        observer.error(err);
+                        return;
+                      }
+
+                      if (samples) {
+                        buffers.push(samples);
+                        observer.next(samples);
+
+                        // Calculate total samples in buffer
+                        const totalSamples = buffers.reduce(
+                          (acc, buf) => acc + buf.length,
+                          0
+                        );
+
+                        // Process when we have enough samples for our window
+                        if (totalSamples >= SAMPLES_PER_WINDOW) {
+                          // Concatenate all buffers
+                          const concatenated = new Float32Array(totalSamples);
+                          let offset = 0;
+                          buffers.forEach(buf => {
+                            concatenated.set(buf, offset);
+                            offset += buf.length;
+                          });
+
+                          // Transcribe the audio
+                          const result = whisper.full(
+                            whisperParams,
+                            concatenated
+                          );
+
+                          // Print results
+                          console.info(result);
+
+                          // Keep any remaining samples for next window
+                          const remainingSamples =
+                            totalSamples - SAMPLES_PER_WINDOW;
+                          if (remainingSamples > 0) {
+                            const lastBuffer = buffers[buffers.length - 1];
+                            buffers.length = 0;
+                            buffers.push(lastBuffer.slice(-remainingSamples));
+                          } else {
+                            buffers.length = 0;
+                          }
+                        }
+                      }
+                    });
+
+                    return () => {
+                      audioStream.stop();
+                    };
+                  })
+            )
+          )
+        )
+      )
+    ),
+    tap({
+      finalize: () => {
+        applicationListChangedSubscriber.unsubscribe();
+      },
+    })
+  )
+  .subscribe();
--- a/packages/frontend/native/media_capture/Cargo.toml
+++ b/packages/frontend/native/media_capture/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+edition = "2021"
+name    = "affine_media_capture"
+version = "0.0.0"
+
+[lib]
+crate-type = ["cdylib", "rlib"]
+
+[dependencies]
+napi        = { workspace = true, features = ["napi4"] }
+napi-derive = { workspace = true, features = ["type-def"] }
+rubato      = { workspace = true }
+
+[target.'cfg(target_os = "macos")'.dependencies]
+block2           = { workspace = true }
+core-foundation  = { workspace = true, features = ["with-uuid"] }
+coreaudio-rs     = { workspace = true }
+dispatch2        = { workspace = true }
+objc2            = { workspace = true }
+objc2-foundation = { workspace = true }
+screencapturekit = { workspace = true }
+thiserror        = { workspace = true }
+uuid             = { workspace = true, features = ["v4"] }
+
+[build-dependencies]
+napi-build = { workspace = true }
--- a/packages/frontend/native/media_capture/build.rs
+++ b/packages/frontend/native/media_capture/build.rs
@@ -0,0 +1,3 @@
+fn main() {
+  napi_build::setup();
+}
--- a/packages/frontend/native/media_capture/src/lib.rs
+++ b/packages/frontend/native/media_capture/src/lib.rs
@@ -0,0 +1,4 @@
+#[cfg(target_os = "macos")]
+pub mod macos;
+#[cfg(target_os = "macos")]
+pub(crate) use macos::*;
--- a/packages/frontend/native/media_capture/src/macos/audio_stream_basic_desc.rs
+++ b/packages/frontend/native/media_capture/src/macos/audio_stream_basic_desc.rs
@@ -0,0 +1,282 @@
+use std::{fmt::Display, mem, ptr};
+
+use coreaudio::sys::{
+  kAudioHardwareNoError, kAudioObjectPropertyElementMain, kAudioObjectPropertyScopeGlobal,
+  kAudioTapPropertyFormat, AudioObjectGetPropertyData, AudioObjectID, AudioObjectPropertyAddress,
+};
+use objc2::{Encode, Encoding, RefEncode};
+
+use crate::error::CoreAudioError;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u32)]
+pub enum AudioFormatID {
+  LinearPcm = 0x6C70636D,            // 'lpcm'
+  Ac3 = 0x61632D33,                  // 'ac-3'
+  Ac360958 = 0x63616333,             // 'cac3'
+  AppleIma4 = 0x696D6134,            // 'ima4'
+  Mpeg4Aac = 0x61616320,             // 'aac '
+  Mpeg4Celp = 0x63656C70,            // 'celp'
+  Mpeg4Hvxc = 0x68767863,            // 'hvxc'
+  Mpeg4TwinVq = 0x74777671,          // 'twvq'
+  Mace3 = 0x4D414333,                // 'MAC3'
+  Mace6 = 0x4D414336,                // 'MAC6'
+  ULaw = 0x756C6177,                 // 'ulaw'
+  ALaw = 0x616C6177,                 // 'alaw'
+  QDesign = 0x51444D43,              // 'QDMC'
+  QDesign2 = 0x51444D32,             // 'QDM2'
+  Qualcomm = 0x51636C70,             // 'Qclp'
+  MpegLayer1 = 0x2E6D7031,           // '.mp1'
+  MpegLayer2 = 0x2E6D7032,           // '.mp2'
+  MpegLayer3 = 0x2E6D7033,           // '.mp3'
+  TimeCode = 0x74696D65,             // 'time'
+  MidiStream = 0x6D696469,           // 'midi'
+  ParameterValueStream = 0x61707673, // 'apvs'
+  AppleLossless = 0x616C6163,        // 'alac'
+  Mpeg4AacHe = 0x61616368,           // 'aach'
+  Mpeg4AacLd = 0x6161636C,           // 'aacl'
+  Mpeg4AacEld = 0x61616365,          // 'aace'
+  Mpeg4AacEldSbr = 0x61616366,       // 'aacf'
+  Mpeg4AacEldV2 = 0x61616367,        // 'aacg'
+  Mpeg4AacHeV2 = 0x61616370,         // 'aacp'
+  Mpeg4AacSpatial = 0x61616373,      // 'aacs'
+  MpegdUsac = 0x75736163,            // 'usac'
+  Amr = 0x73616D72,                  // 'samr'
+  AmrWb = 0x73617762,                // 'sawb'
+  Audible = 0x41554442,              // 'AUDB'
+  ILbc = 0x696C6263,                 // 'ilbc'
+  DviIntelIma = 0x6D730011,
+  MicrosoftGsm = 0x6D730031,
+  Aes3 = 0x61657333,        // 'aes3'
+  EnhancedAc3 = 0x65632D33, // 'ec-3'
+  Flac = 0x666C6163,        // 'flac'
+  Opus = 0x6F707573,        // 'opus'
+  Apac = 0x61706163,        // 'apac'
+  Unknown = 0x00000000,
+}
+
+impl From<u32> for AudioFormatID {
+  fn from(value: u32) -> Self {
+    match value {
+      0x6C70636D => Self::LinearPcm,
+      0x61632D33 => Self::Ac3,
+      0x63616333 => Self::Ac360958,
+      0x696D6134 => Self::AppleIma4,
+      0x61616320 => Self::Mpeg4Aac,
+      0x63656C70 => Self::Mpeg4Celp,
+      0x68767863 => Self::Mpeg4Hvxc,
+      0x74777671 => Self::Mpeg4TwinVq,
+      0x4D414333 => Self::Mace3,
+      0x4D414336 => Self::Mace6,
+      0x756C6177 => Self::ULaw,
+      0x616C6177 => Self::ALaw,
+      0x51444D43 => Self::QDesign,
+      0x51444D32 => Self::QDesign2,
+      0x51636C70 => Self::Qualcomm,
+      0x2E6D7031 => Self::MpegLayer1,
+      0x2E6D7032 => Self::MpegLayer2,
+      0x2E6D7033 => Self::MpegLayer3,
+      0x74696D65 => Self::TimeCode,
+      0x6D696469 => Self::MidiStream,
+      0x61707673 => Self::ParameterValueStream,
+      0x616C6163 => Self::AppleLossless,
+      0x61616368 => Self::Mpeg4AacHe,
+      0x6161636C => Self::Mpeg4AacLd,
+      0x61616365 => Self::Mpeg4AacEld,
+      0x61616366 => Self::Mpeg4AacEldSbr,
+      0x61616367 => Self::Mpeg4AacEldV2,
+      0x61616370 => Self::Mpeg4AacHeV2,
+      0x61616373 => Self::Mpeg4AacSpatial,
+      0x75736163 => Self::MpegdUsac,
+      0x73616D72 => Self::Amr,
+      0x73617762 => Self::AmrWb,
+      0x41554442 => Self::Audible,
+      0x696C6263 => Self::ILbc,
+      0x6D730011 => Self::DviIntelIma,
+      0x6D730031 => Self::MicrosoftGsm,
+      0x61657333 => Self::Aes3,
+      0x65632D33 => Self::EnhancedAc3,
+      0x666C6163 => Self::Flac,
+      0x6F707573 => Self::Opus,
+      0x61706163 => Self::Apac,
+      _ => Self::Unknown,
+    }
+  }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct AudioFormatFlags(pub u32);
+
+#[allow(unused)]
+impl AudioFormatFlags {
+  pub const IS_FLOAT: u32 = 1 << 0;
+  pub const IS_BIG_ENDIAN: u32 = 1 << 1;
+  pub const IS_SIGNED_INTEGER: u32 = 1 << 2;
+  pub const IS_PACKED: u32 = 1 << 3;
+  pub const IS_ALIGNED_HIGH: u32 = 1 << 4;
+  pub const IS_NON_INTERLEAVED: u32 = 1 << 5;
+  pub const IS_NON_MIXABLE: u32 = 1 << 6;
+  pub const ARE_ALL_CLEAR: u32 = 0x80000000;
+
+  pub const LINEAR_PCM_IS_FLOAT: u32 = Self::IS_FLOAT;
+  pub const LINEAR_PCM_IS_BIG_ENDIAN: u32 = Self::IS_BIG_ENDIAN;
+  pub const LINEAR_PCM_IS_SIGNED_INTEGER: u32 = Self::IS_SIGNED_INTEGER;
+  pub const LINEAR_PCM_IS_PACKED: u32 = Self::IS_PACKED;
+  pub const LINEAR_PCM_IS_ALIGNED_HIGH: u32 = Self::IS_ALIGNED_HIGH;
+  pub const LINEAR_PCM_IS_NON_INTERLEAVED: u32 = Self::IS_NON_INTERLEAVED;
+  pub const LINEAR_PCM_IS_NON_MIXABLE: u32 = Self::IS_NON_MIXABLE;
+  pub const LINEAR_PCM_SAMPLE_FRACTION_SHIFT: u32 = 7;
+  pub const LINEAR_PCM_SAMPLE_FRACTION_MASK: u32 = 0x3F << Self::LINEAR_PCM_SAMPLE_FRACTION_SHIFT;
+  pub const LINEAR_PCM_ARE_ALL_CLEAR: u32 = Self::ARE_ALL_CLEAR;
+
+  pub const APPLE_LOSSLESS_FORMAT_FLAG_16_BIT_SOURCE_DATA: u32 = 1;
+  pub const APPLE_LOSSLESS_FORMAT_FLAG_20_BIT_SOURCE_DATA: u32 = 2;
+  pub const APPLE_LOSSLESS_FORMAT_FLAG_24_BIT_SOURCE_DATA: u32 = 3;
+  pub const APPLE_LOSSLESS_FORMAT_FLAG_32_BIT_SOURCE_DATA: u32 = 4;
+}
+
+impl std::fmt::Display for AudioFormatFlags {
+  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    let mut flags = Vec::new();
+
+    if self.0 & Self::IS_FLOAT != 0 {
+      flags.push("FLOAT");
+    }
+    if self.0 & Self::IS_BIG_ENDIAN != 0 {
+      flags.push("BIG_ENDIAN");
+    }
+    if self.0 & Self::IS_SIGNED_INTEGER != 0 {
+      flags.push("SIGNED_INTEGER");
+    }
+    if self.0 & Self::IS_PACKED != 0 {
+      flags.push("PACKED");
+    }
+    if self.0 & Self::IS_ALIGNED_HIGH != 0 {
+      flags.push("ALIGNED_HIGH");
+    }
+    if self.0 & Self::IS_NON_INTERLEAVED != 0 {
+      flags.push("NON_INTERLEAVED");
+    }
+    if self.0 & Self::IS_NON_MIXABLE != 0 {
+      flags.push("NON_MIXABLE");
+    }
+    if self.0 & Self::ARE_ALL_CLEAR != 0 {
+      flags.push("ALL_CLEAR");
+    }
+
+    if flags.is_empty() {
+      write!(f, "NONE")
+    } else {
+      write!(f, "{}", flags.join(" | "))
+    }
+  }
+}
+
+impl std::fmt::Debug for AudioFormatFlags {
+  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    write!(f, "AudioFormatFlags({})", self)
+  }
+}
+
+impl From<u32> for AudioFormatFlags {
+  fn from(value: u32) -> Self {
+    Self(value)
+  }
+}
+
+/// [Apple's documentation](https://developer.apple.com/documentation/coreaudiotypes/audiostreambasicdescription?language=objc)
+#[repr(C)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[allow(non_snake_case)]
+pub struct AudioStreamBasicDescription {
+  pub mSampleRate: f64,
+  pub mFormatID: u32,
+  pub mFormatFlags: u32,
+  pub mBytesPerPacket: u32,
+  pub mFramesPerPacket: u32,
+  pub mBytesPerFrame: u32,
+  pub mChannelsPerFrame: u32,
+  pub mBitsPerChannel: u32,
+  pub mReserved: u32,
+}
+
+unsafe impl Encode for AudioStreamBasicDescription {
+  const ENCODING: Encoding = Encoding::Struct(
+    "AudioStreamBasicDescription",
+    &[
+      <f64>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+      <u32>::ENCODING,
+    ],
+  );
+}
+
+unsafe impl RefEncode for AudioStreamBasicDescription {
+  const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
+}
+
+#[derive(Debug, Clone, Copy)]
+#[repr(transparent)]
+pub struct AudioStreamDescription(pub(crate) AudioStreamBasicDescription);
+
+impl Display for AudioStreamDescription {
+  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    write!(
+      f,
+      "AudioStreamBasicDescription {{ mSampleRate: {}, mFormatID: {:?}, mFormatFlags: {}, \
+       mBytesPerPacket: {}, mFramesPerPacket: {}, mBytesPerFrame: {}, mChannelsPerFrame: {}, \
+       mBitsPerChannel: {}, mReserved: {} }}",
+      self.0.mSampleRate,
+      AudioFormatID::from(self.0.mFormatID),
+      AudioFormatFlags(self.0.mFormatFlags),
+      self.0.mBytesPerPacket,
+      self.0.mFramesPerPacket,
+      self.0.mBytesPerFrame,
+      self.0.mChannelsPerFrame,
+      self.0.mBitsPerChannel,
+      self.0.mReserved
+    )
+  }
+}
+
+pub fn read_audio_stream_basic_description(
+  tap_id: AudioObjectID,
+) -> std::result::Result<AudioStreamDescription, CoreAudioError> {
+  let mut data_size = mem::size_of::<AudioStreamBasicDescription>();
+  let address = AudioObjectPropertyAddress {
+    mSelector: kAudioTapPropertyFormat,
+    mScope: kAudioObjectPropertyScopeGlobal,
+    mElement: kAudioObjectPropertyElementMain,
+  };
+  let mut data = AudioStreamBasicDescription {
+    mSampleRate: 0.0,
+    mFormatID: 0,
+    mFormatFlags: 0,
+    mBytesPerPacket: 0,
+    mFramesPerPacket: 0,
+    mBytesPerFrame: 0,
+    mChannelsPerFrame: 0,
+    mBitsPerChannel: 0,
+    mReserved: 0,
+  };
+  let status = unsafe {
+    AudioObjectGetPropertyData(
+      tap_id,
+      &address,
+      0,
+      ptr::null_mut(),
+      (&mut data_size as *mut usize).cast(),
+      (&mut data as *mut AudioStreamBasicDescription).cast(),
+    )
+  };
+  if status != kAudioHardwareNoError as i32 {
+    return Err(CoreAudioError::GetAudioStreamBasicDescriptionFailed(status));
+  }
+  Ok(AudioStreamDescription(data))
+}
--- a/packages/frontend/native/media_capture/src/macos/av_audio_file.rs
+++ b/packages/frontend/native/media_capture/src/macos/av_audio_file.rs
@@ -0,0 +1,71 @@
+use std::ptr;
+
+use objc2::{
+  msg_send,
+  runtime::{AnyClass, AnyObject},
+  AllocAnyThread,
+};
+use objc2_foundation::{NSDictionary, NSError, NSNumber, NSString, NSUInteger, NSURL};
+
+use crate::{
+  av_audio_format::AVAudioFormat, av_audio_pcm_buffer::AVAudioPCMBuffer, error::CoreAudioError,
+};
+
+#[allow(unused)]
+pub(crate) struct AVAudioFile {
+  inner: *mut AnyObject,
+}
+
+#[allow(unused)]
+impl AVAudioFile {
+  pub(crate) fn new(url: &str, format: &AVAudioFormat) -> Result<Self, CoreAudioError> {
+    let cls = AnyClass::get(c"AVAudioFile").ok_or(CoreAudioError::AVAudioFileClassNotFound)?;
+    let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
+    if obj.is_null() {
+      return Err(CoreAudioError::AllocAVAudioFileFailed);
+    }
+    let url: &NSURL = &*unsafe { NSURL::fileURLWithPath(&NSString::from_str(url)) };
+    let settings = &*NSDictionary::from_retained_objects(
+      &[
+        &*NSString::from_str("AVFormatIDKey"),
+        &*NSString::from_str("AVSampleRateKey"),
+        &*NSString::from_str("AVNumberOfChannelsKey"),
+      ],
+      &[
+        NSNumber::initWithUnsignedInt(
+          NSNumber::alloc(),
+          format.audio_stream_basic_description.0.mFormatID,
+        ),
+        NSNumber::initWithDouble(NSNumber::alloc(), format.get_sample_rate()),
+        NSNumber::initWithUnsignedInt(NSNumber::alloc(), format.get_channel_count()),
+      ],
+    );
+    let is_interleaved = format.is_interleaved();
+    let mut error: *mut NSError = ptr::null_mut();
+    let common_format: NSUInteger = 1;
+    let obj: *mut AnyObject = unsafe {
+      msg_send![
+        obj,
+        initForWriting: url,
+        settings: settings,
+        commonFormat: common_format,
+        interleaved: is_interleaved,
+        error: &mut error
+      ]
+    };
+    if obj.is_null() {
+      return Err(CoreAudioError::InitAVAudioFileFailed);
+    }
+    Ok(Self { inner: obj })
+  }
+
+  pub(crate) fn write(&self, buffer: AVAudioPCMBuffer) -> Result<(), CoreAudioError> {
+    let mut error: *mut NSError = ptr::null_mut();
+    let success: bool =
+      unsafe { msg_send![self.inner, writeFromBuffer: buffer.inner, error: &mut error] };
+    if !success {
+      return Err(CoreAudioError::WriteAVAudioFileFailed);
+    }
+    Ok(())
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/av_audio_format.rs
+++ b/packages/frontend/native/media_capture/src/macos/av_audio_format.rs
@@ -0,0 +1,95 @@
+use objc2::{
+  msg_send,
+  runtime::{AnyClass, AnyObject},
+  Encode, Encoding, RefEncode,
+};
+
+use crate::{audio_stream_basic_desc::AudioStreamDescription, error::CoreAudioError};
+
+#[derive(Debug)]
+#[allow(unused)]
+pub(crate) struct AVAudioFormat {
+  pub(crate) inner: AVAudioFormatRef,
+  pub(crate) audio_stream_basic_description: AudioStreamDescription,
+}
+
+#[repr(transparent)]
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct AVAudioFormatRef(pub(crate) *mut AnyObject);
+
+unsafe impl Encode for AVAudioFormatRef {
+  const ENCODING: Encoding = Encoding::Struct(
+    "AVAudioFormat",
+    &[
+      Encoding::Double,
+      Encoding::UInt,
+      Encoding::Pointer(&Encoding::Struct(
+        "AVAudioChannelLayout",
+        &[
+          Encoding::UInt,
+          Encoding::UInt,
+          Encoding::Pointer(&Encoding::Struct(
+            "AudioChannelLayout",
+            &[
+              Encoding::UInt,
+              Encoding::UInt,
+              Encoding::Array(
+                1,
+                &Encoding::Struct(
+                  "AudioChannelDescription",
+                  &[
+                    Encoding::UInt,
+                    Encoding::UInt,
+                    Encoding::Array(3, &Encoding::Float),
+                  ],
+                ),
+              ),
+              Encoding::UInt,
+              Encoding::UInt,
+            ],
+          )),
+          Encoding::UInt,
+        ],
+      )),
+      Encoding::Pointer(&Encoding::Object),
+    ],
+  );
+}
+
+unsafe impl RefEncode for AVAudioFormatRef {
+  const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
+}
+
+#[allow(unused)]
+impl AVAudioFormat {
+  pub fn new(
+    audio_stream_basic_description: AudioStreamDescription,
+  ) -> Result<Self, CoreAudioError> {
+    let cls = AnyClass::get(c"AVAudioFormat").ok_or(CoreAudioError::AVAudioFormatClassNotFound)?;
+    let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
+    if obj.is_null() {
+      return Err(CoreAudioError::AllocAVAudioFormatFailed);
+    }
+    let obj: *mut AnyObject =
+      unsafe { msg_send![obj, initWithStreamDescription: &audio_stream_basic_description.0] };
+    if obj.is_null() {
+      return Err(CoreAudioError::InitAVAudioFormatFailed);
+    }
+    Ok(Self {
+      inner: AVAudioFormatRef(obj),
+      audio_stream_basic_description,
+    })
+  }
+
+  pub(crate) fn get_sample_rate(&self) -> f64 {
+    unsafe { msg_send![self.inner.0, sampleRate] }
+  }
+
+  pub(crate) fn get_channel_count(&self) -> u32 {
+    unsafe { msg_send![self.inner.0, channelCount] }
+  }
+
+  pub(crate) fn is_interleaved(&self) -> bool {
+    unsafe { msg_send![self.inner.0, isInterleaved] }
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/av_audio_pcm_buffer.rs
+++ b/packages/frontend/native/media_capture/src/macos/av_audio_pcm_buffer.rs
@@ -0,0 +1,35 @@
+use block2::RcBlock;
+use objc2::{
+  msg_send,
+  runtime::{AnyClass, AnyObject},
+};
+
+use crate::{av_audio_format::AVAudioFormat, error::CoreAudioError, tap_audio::AudioBufferList};
+
+#[allow(unused)]
+pub(crate) struct AVAudioPCMBuffer {
+  pub(crate) inner: *mut AnyObject,
+}
+
+#[allow(unused)]
+impl AVAudioPCMBuffer {
+  pub(crate) fn new(
+    audio_format: &AVAudioFormat,
+    buffer_list: *const AudioBufferList,
+  ) -> Result<Self, CoreAudioError> {
+    let cls =
+      AnyClass::get(c"AVAudioPCMBuffer").ok_or(CoreAudioError::AVAudioPCMBufferClassNotFound)?;
+    let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
+    if obj.is_null() {
+      return Err(CoreAudioError::AllocAVAudioPCMBufferFailed);
+    }
+    let deallocator = RcBlock::new(|_buffer_list: *const AudioBufferList| {});
+    let obj: *mut AnyObject = unsafe {
+      msg_send![obj, initWithPCMFormat: audio_format.inner.0, bufferListNoCopy: buffer_list, deallocator: &*deallocator]
+    };
+    if obj.is_null() {
+      return Err(CoreAudioError::InitAVAudioPCMBufferFailed);
+    }
+    Ok(Self { inner: obj })
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/ca_tap_description.rs
+++ b/packages/frontend/native/media_capture/src/macos/ca_tap_description.rs
@@ -0,0 +1,84 @@
+use core_foundation::{
+  base::{FromVoid, ItemRef},
+  string::CFString,
+};
+use coreaudio::sys::AudioObjectID;
+use objc2::{
+  msg_send,
+  runtime::{AnyClass, AnyObject},
+  AllocAnyThread,
+};
+use objc2_foundation::{NSArray, NSNumber, NSString, NSUUID};
+
+use crate::error::CoreAudioError;
+
+pub(crate) struct CATapDescription {
+  pub(crate) inner: *mut AnyObject,
+}
+
+impl CATapDescription {
+  pub fn init_stereo_mixdown_of_processes(
+    process: AudioObjectID,
+  ) -> std::result::Result<Self, CoreAudioError> {
+    let cls =
+      AnyClass::get(c"CATapDescription").ok_or(CoreAudioError::CATapDescriptionClassNotFound)?;
+    let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
+    if obj.is_null() {
+      return Err(CoreAudioError::AllocCATapDescriptionFailed);
+    }
+    let processes_array =
+      NSArray::from_retained_slice(&[NSNumber::initWithUnsignedInt(NSNumber::alloc(), process)]);
+    let obj: *mut AnyObject =
+      unsafe { msg_send![obj, initStereoMixdownOfProcesses: &*processes_array] };
+    if obj.is_null() {
+      return Err(CoreAudioError::InitStereoMixdownOfProcessesFailed);
+    }
+
+    Ok(Self { inner: obj })
+  }
+
+  pub fn init_stereo_global_tap_but_exclude_processes(
+    processes: &[AudioObjectID],
+  ) -> std::result::Result<Self, CoreAudioError> {
+    let cls =
+      AnyClass::get(c"CATapDescription").ok_or(CoreAudioError::CATapDescriptionClassNotFound)?;
+    let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
+    if obj.is_null() {
+      return Err(CoreAudioError::AllocCATapDescriptionFailed);
+    }
+    let processes_array = NSArray::from_retained_slice(
+      processes
+        .iter()
+        .map(|p| NSNumber::initWithUnsignedInt(NSNumber::alloc(), *p))
+        .collect::<Vec<_>>()
+        .as_slice(),
+    );
+    let obj: *mut AnyObject =
+      unsafe { msg_send![obj, initStereoMixdownOfProcesses: &*processes_array] };
+    if obj.is_null() {
+      return Err(CoreAudioError::InitStereoMixdownOfProcessesFailed);
+    }
+
+    Ok(Self { inner: obj })
+  }
+
+  pub fn get_uuid(&self) -> std::result::Result<ItemRef<CFString>, CoreAudioError> {
+    let uuid: *mut NSUUID = unsafe { msg_send![self.inner, UUID] };
+    if uuid.is_null() {
+      return Err(CoreAudioError::GetCATapDescriptionUUIDFailed);
+    }
+    let uuid_string: *mut NSString = unsafe { msg_send![uuid, UUIDString] };
+    if uuid_string.is_null() {
+      return Err(CoreAudioError::ConvertUUIDToCFStringFailed);
+    }
+    Ok(unsafe { CFString::from_void(uuid_string.cast()) })
+  }
+}
+
+impl Drop for CATapDescription {
+  fn drop(&mut self) {
+    unsafe {
+      let _: () = msg_send![self.inner, release];
+    }
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/device.rs
+++ b/packages/frontend/native/media_capture/src/macos/device.rs
@@ -0,0 +1,66 @@
+use std::{mem, ptr};
+
+use core_foundation::{base::TCFType, string::CFString};
+use coreaudio::sys::{
+  kAudioDevicePropertyDeviceUID, kAudioHardwareNoError, kAudioObjectPropertyElementMain,
+  kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, AudioDeviceID,
+  AudioObjectGetPropertyData, AudioObjectID, AudioObjectPropertyAddress, CFStringRef,
+};
+
+use crate::error::CoreAudioError;
+
+pub(crate) fn get_device_uid(
+  device_id: AudioDeviceID,
+) -> std::result::Result<CFString, CoreAudioError> {
+  let system_output_id = get_device_audio_id(device_id)?;
+  let address = AudioObjectPropertyAddress {
+    mSelector: kAudioDevicePropertyDeviceUID,
+    mScope: kAudioObjectPropertyScopeGlobal,
+    mElement: kAudioObjectPropertyElementMain,
+  };
+
+  let mut output_uid: CFStringRef = ptr::null_mut();
+  let mut data_size = mem::size_of::<CFStringRef>();
+  let status = unsafe {
+    AudioObjectGetPropertyData(
+      system_output_id,
+      &address,
+      0,
+      ptr::null_mut(),
+      (&mut data_size as *mut usize).cast(),
+      (&mut output_uid as *mut CFStringRef).cast(),
+    )
+  };
+
+  if status != 0 {
+    return Err(CoreAudioError::GetDeviceUidFailed(status));
+  }
+  Ok(unsafe { CFString::wrap_under_create_rule(output_uid.cast()) })
+}
+
+pub(crate) fn get_device_audio_id(
+  device_id: AudioDeviceID,
+) -> std::result::Result<AudioObjectID, CoreAudioError> {
+  let mut system_output_id: AudioObjectID = 0;
+  let mut data_size = mem::size_of::<AudioObjectID>();
+
+  let address = AudioObjectPropertyAddress {
+    mSelector: device_id,
+    mScope: kAudioObjectPropertyScopeGlobal,
+    mElement: kAudioObjectPropertyElementMain,
+  };
+  let status = unsafe {
+    AudioObjectGetPropertyData(
+      kAudioObjectSystemObject,
+      &address,
+      0,
+      ptr::null_mut(),
+      (&mut data_size as *mut usize).cast(),
+      (&mut system_output_id as *mut AudioObjectID).cast(),
+    )
+  };
+  if status != kAudioHardwareNoError as i32 {
+    return Err(CoreAudioError::GetDefaultDeviceFailed(status));
+  }
+  Ok(system_output_id)
+}
--- a/packages/frontend/native/media_capture/src/macos/error.rs
+++ b/packages/frontend/native/media_capture/src/macos/error.rs
@@ -0,0 +1,81 @@
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum CoreAudioError {
+  #[error("Map pid {0} to AudioObjectID failed")]
+  PidNotFound(i32),
+  #[error("Create process tap failed, status: {0}")]
+  CreateProcessTapFailed(i32),
+  #[error("Get default device failed, status: {0}")]
+  GetDefaultDeviceFailed(i32),
+  #[error("Get device uid failed, status: {0}")]
+  GetDeviceUidFailed(i32),
+  #[error("Create aggregate device failed, status: {0}")]
+  CreateAggregateDeviceFailed(i32),
+  #[error("Get process object list size failed, status: {0}")]
+  GetProcessObjectListSizeFailed(i32),
+  #[error("Get process object list failed, status: {0}")]
+  GetProcessObjectListFailed(i32),
+  #[error("AudioObjectGetPropertyDataSize failed, status: {0}")]
+  AudioObjectGetPropertyDataSizeFailed(i32),
+  #[error("CATapDescription class not found")]
+  CATapDescriptionClassNotFound,
+  #[error("Alloc CATapDescription failed")]
+  AllocCATapDescriptionFailed,
+  #[error("Call initStereoMixdownOfProcesses on CATapDescription failed")]
+  InitStereoMixdownOfProcessesFailed,
+  #[error("Get UUID on CATapDescription failed")]
+  GetCATapDescriptionUUIDFailed,
+  #[error("Get mute behavior on CATapDescription failed")]
+  GetMuteBehaviorFailed,
+  #[error("Convert UUID to CFString failed")]
+  ConvertUUIDToCFStringFailed,
+  #[error("Get AudioStreamBasicDescription failed, status: {0}")]
+  GetAudioStreamBasicDescriptionFailed(i32),
+  #[error("AVAudioFormat class not found")]
+  AVAudioFormatClassNotFound,
+  #[error("Alloc AVAudioFormat failed")]
+  AllocAVAudioFormatFailed,
+  #[error("Init AVAudioFormat failed")]
+  InitAVAudioFormatFailed,
+  #[error("Create IOProcIDWithBlock failed, status: {0}")]
+  CreateIOProcIDWithBlockFailed(i32),
+  #[error("Get hardware devices failed, status: {0}")]
+  GetHardwareDevicesFailed(i32),
+  #[error("AudioDeviceStart failed, status: {0}")]
+  AudioDeviceStartFailed(i32),
+  #[error("AudioDeviceStop failed, status: {0}")]
+  AudioDeviceStopFailed(i32),
+  #[error("AudioDeviceDestroyIOProcID failed, status: {0}")]
+  AudioDeviceDestroyIOProcIDFailed(i32),
+  #[error("AudioHardwareDestroyAggregateDevice failed, status: {0}")]
+  AudioHardwareDestroyAggregateDeviceFailed(i32),
+  #[error("AudioHardwareDestroyProcessTap failed, status: {0}")]
+  AudioHardwareDestroyProcessTapFailed(i32),
+  #[error("Get aggregate device property full sub device list failed, status: {0}")]
+  GetAggregateDevicePropertyFullSubDeviceListFailed(i32),
+  #[error("Add property listener block failed, status: {0}")]
+  AddPropertyListenerBlockFailed(i32),
+  #[error("AudioObjectGetPropertyData failed, status: {0}")]
+  AudioObjectGetPropertyDataFailed(i32),
+  #[error("AVAudioFile class not found")]
+  AVAudioFileClassNotFound,
+  #[error("Alloc AVAudioFile failed")]
+  AllocAVAudioFileFailed,
+  #[error("Init AVAudioFile failed")]
+  InitAVAudioFileFailed,
+  #[error("AVAudioPCMBuffer class not found")]
+  AVAudioPCMBufferClassNotFound,
+  #[error("Alloc AVAudioPCMBuffer failed")]
+  AllocAVAudioPCMBufferFailed,
+  #[error("Init AVAudioPCMBuffer failed")]
+  InitAVAudioPCMBufferFailed,
+  #[error("Write AVAudioFile failed")]
+  WriteAVAudioFileFailed,
+}
+
+impl From<CoreAudioError> for napi::Error {
+  fn from(value: CoreAudioError) -> Self {
+    napi::Error::new(napi::Status::GenericFailure, value.to_string())
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/mod.rs
+++ b/packages/frontend/native/media_capture/src/macos/mod.rs
@@ -0,0 +1,11 @@
+pub mod audio_stream_basic_desc;
+pub mod av_audio_file;
+pub mod av_audio_format;
+pub mod av_audio_pcm_buffer;
+pub mod ca_tap_description;
+pub mod device;
+pub(crate) mod error;
+pub mod pid;
+pub mod queue;
+pub mod screen_capture_kit;
+pub mod tap_audio;
--- a/packages/frontend/native/media_capture/src/macos/pid.rs
+++ b/packages/frontend/native/media_capture/src/macos/pid.rs
@@ -0,0 +1,98 @@
+use std::{mem::MaybeUninit, ptr};
+
+use coreaudio::sys::{
+  kAudioHardwareNoError, kAudioHardwarePropertyProcessObjectList, kAudioObjectPropertyElementMain,
+  kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, AudioObjectGetPropertyData,
+  AudioObjectGetPropertyDataSize, AudioObjectID, AudioObjectPropertyAddress,
+  AudioObjectPropertySelector,
+};
+
+use crate::error::CoreAudioError;
+
+pub fn audio_process_list() -> Result<Vec<AudioObjectID>, CoreAudioError> {
+  let address = AudioObjectPropertyAddress {
+    mSelector: kAudioHardwarePropertyProcessObjectList,
+    mScope: kAudioObjectPropertyScopeGlobal,
+    mElement: kAudioObjectPropertyElementMain,
+  };
+
+  let mut data_size = 0u32;
+  let status = unsafe {
+    AudioObjectGetPropertyDataSize(
+      kAudioObjectSystemObject,
+      &address,
+      0,
+      ptr::null_mut(),
+      &mut data_size,
+    )
+  };
+
+  if status != kAudioHardwareNoError as i32 {
+    return Err(CoreAudioError::GetProcessObjectListSizeFailed(status));
+  }
+
+  let mut process_list: Vec<AudioObjectID> = vec![0; data_size as usize];
+
+  let status = unsafe {
+    AudioObjectGetPropertyData(
+      kAudioObjectSystemObject,
+      &address,
+      0,
+      ptr::null_mut(),
+      (&mut data_size as *mut u32).cast(),
+      process_list.as_mut_ptr().cast(),
+    )
+  };
+
+  if status != kAudioHardwareNoError as i32 {
+    return Err(CoreAudioError::GetProcessObjectListFailed(status));
+  }
+
+  Ok(process_list)
+}
+
+pub fn get_process_property<T: Sized>(
+  object: &AudioObjectID,
+  selector: AudioObjectPropertySelector,
+) -> Result<T, CoreAudioError> {
+  let object_id = *object;
+  let address = AudioObjectPropertyAddress {
+    mSelector: selector,
+    mScope: kAudioObjectPropertyScopeGlobal,
+    mElement: kAudioObjectPropertyElementMain,
+  };
+
+  let mut data_size = 0u32;
+  let status = unsafe {
+    AudioObjectGetPropertyDataSize(object_id, &address, 0, ptr::null_mut(), &mut data_size)
+  };
+
+  if status != kAudioHardwareNoError as i32 {
+    return Err(CoreAudioError::AudioObjectGetPropertyDataSizeFailed(status));
+  }
+  get_property_data(object_id, &address, &mut data_size)
+}
+
+pub fn get_property_data<T: Sized>(
+  object_id: AudioObjectID,
+  address: &AudioObjectPropertyAddress,
+  data_size: &mut u32,
+) -> Result<T, CoreAudioError> {
+  let mut property = MaybeUninit::<T>::uninit();
+  let status = unsafe {
+    AudioObjectGetPropertyData(
+      object_id,
+      address,
+      0,
+      ptr::null_mut(),
+      (data_size as *mut u32).cast(),
+      property.as_mut_ptr().cast(),
+    )
+  };
+
+  if status != kAudioHardwareNoError as i32 {
+    return Err(CoreAudioError::AudioObjectGetPropertyDataFailed(status));
+  }
+
+  Ok(unsafe { property.assume_init() })
+}
--- a/packages/frontend/native/media_capture/src/macos/queue.rs
+++ b/packages/frontend/native/media_capture/src/macos/queue.rs
@@ -0,0 +1,12 @@
+pub(crate) fn create_audio_tap_queue() -> *mut dispatch2::ffi::dispatch_queue_s {
+  let queue_attr = unsafe {
+    dispatch2::ffi::dispatch_queue_attr_make_with_qos_class(
+      dispatch2::ffi::DISPATCH_QUEUE_SERIAL,
+      dispatch2::ffi::dispatch_qos_class_t::QOS_CLASS_USER_INITIATED,
+      0,
+    )
+  };
+  unsafe {
+    dispatch2::ffi::dispatch_queue_create(c"ProcessTapRecorder".as_ptr().cast(), queue_attr)
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
+++ b/packages/frontend/native/media_capture/src/macos/screen_capture_kit.rs
@@ -0,0 +1,623 @@
+use std::{
+  collections::HashMap,
+  ffi::c_void,
+  ptr,
+  sync::{
+    atomic::{AtomicPtr, Ordering},
+    Arc, LazyLock, RwLock,
+  },
+};
+
+use block2::{Block, RcBlock};
+use core_foundation::{
+  base::TCFType,
+  string::{CFString, CFStringRef},
+};
+use coreaudio::sys::{
+  kAudioHardwarePropertyProcessObjectList, kAudioObjectPropertyElementMain,
+  kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, kAudioProcessPropertyBundleID,
+  kAudioProcessPropertyIsRunning, kAudioProcessPropertyIsRunningInput, kAudioProcessPropertyPID,
+  AudioObjectAddPropertyListenerBlock, AudioObjectID, AudioObjectPropertyAddress,
+  AudioObjectRemovePropertyListenerBlock,
+};
+use napi::{
+  bindgen_prelude::{Buffer, Error, Float32Array, Result, Status},
+  threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
+};
+use napi_derive::napi;
+use objc2::{
+  msg_send,
+  rc::Retained,
+  runtime::{AnyClass, AnyObject},
+  Encode, Encoding,
+};
+use objc2_foundation::NSString;
+use screencapturekit::shareable_content::SCShareableContent;
+use uuid::Uuid;
+
+use crate::{
+  error::CoreAudioError,
+  pid::{audio_process_list, get_process_property},
+  tap_audio::{AggregateDevice, AudioTapStream},
+};
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+struct NSSize {
+  width: f64,
+  height: f64,
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+struct NSPoint {
+  x: f64,
+  y: f64,
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+struct NSRect {
+  origin: NSPoint,
+  size: NSSize,
+}
+
+unsafe impl Encode for NSSize {
+  const ENCODING: Encoding = Encoding::Struct("NSSize", &[f64::ENCODING, f64::ENCODING]);
+}
+
+unsafe impl Encode for NSPoint {
+  const ENCODING: Encoding = Encoding::Struct("NSPoint", &[f64::ENCODING, f64::ENCODING]);
+}
+
+unsafe impl Encode for NSRect {
+  const ENCODING: Encoding = Encoding::Struct("NSRect", &[<NSPoint>::ENCODING, <NSSize>::ENCODING]);
+}
+
+static RUNNING_APPLICATIONS: LazyLock<RwLock<Vec<AudioObjectID>>> =
+  LazyLock::new(|| RwLock::new(audio_process_list().expect("Failed to get running applications")));
+
+static APPLICATION_STATE_CHANGED_SUBSCRIBERS: LazyLock<
+  RwLock<HashMap<AudioObjectID, HashMap<Uuid, Arc<ThreadsafeFunction<(), ()>>>>>,
+> = LazyLock::new(|| RwLock::new(HashMap::new()));
+
+static APPLICATION_STATE_CHANGED_LISTENER_BLOCKS: LazyLock<
+  RwLock<HashMap<AudioObjectID, AtomicPtr<c_void>>>,
+> = LazyLock::new(|| RwLock::new(HashMap::new()));
+
+static NSRUNNING_APPLICATION_CLASS: LazyLock<Option<&'static AnyClass>> =
+  LazyLock::new(|| AnyClass::get(c"NSRunningApplication"));
+
+static AVCAPTUREDEVICE_CLASS: LazyLock<Option<&'static AnyClass>> =
+  LazyLock::new(|| AnyClass::get(c"AVCaptureDevice"));
+
+static SCSTREAM_CLASS: LazyLock<Option<&'static AnyClass>> =
+  LazyLock::new(|| AnyClass::get(c"SCStream"));
+
+struct TappableApplication {
+  object_id: AudioObjectID,
+}
+
+impl TappableApplication {
+  fn new(object_id: AudioObjectID) -> Self {
+    Self { object_id }
+  }
+
+  fn process_id(&self) -> std::result::Result<i32, CoreAudioError> {
+    get_process_property(&self.object_id, kAudioProcessPropertyPID)
+  }
+
+  fn bundle_identifier(&self) -> Result<String> {
+    let bundle_id: CFStringRef =
+      get_process_property(&self.object_id, kAudioProcessPropertyBundleID)?;
+    Ok(unsafe { CFString::wrap_under_get_rule(bundle_id) }.to_string())
+  }
+
+  fn name(&self) -> Result<String> {
+    let pid = self.process_id()?;
+
+    // Get NSRunningApplication class
+    let running_app_class = NSRUNNING_APPLICATION_CLASS.as_ref().ok_or_else(|| {
+      Error::new(
+        Status::GenericFailure,
+        "NSRunningApplication class not found",
+      )
+    })?;
+
+    // Get running application with PID
+    let running_app: *mut AnyObject =
+      unsafe { msg_send![*running_app_class, runningApplicationWithProcessIdentifier: pid] };
+    if running_app.is_null() {
+      return Ok(String::new());
+    }
+
+    // Get localized name
+    let name: *mut NSString = unsafe { msg_send![running_app, localizedName] };
+    if name.is_null() {
+      return Ok(String::new());
+    }
+
+    // Create a safe wrapper and convert to string
+    let name = unsafe {
+      Retained::from_raw(name).ok_or_else(|| {
+        Error::new(
+          Status::GenericFailure,
+          "Failed to create safe wrapper for localizedName",
+        )
+      })?
+    };
+    Ok(name.to_string())
+  }
+
+  fn icon(&self) -> Result<Vec<u8>> {
+    let pid = self.process_id()?;
+
+    // Get NSRunningApplication class
+    let running_app_class = NSRUNNING_APPLICATION_CLASS.as_ref().ok_or_else(|| {
+      Error::new(
+        Status::GenericFailure,
+        "NSRunningApplication class not found",
+      )
+    })?;
+
+    // Get running application with PID
+    let running_app: *mut AnyObject =
+      unsafe { msg_send![*running_app_class, runningApplicationWithProcessIdentifier: pid] };
+    if running_app.is_null() {
+      return Ok(Vec::new());
+    }
+
+    unsafe {
+      // Get original icon
+      let icon: *mut AnyObject = msg_send![running_app, icon];
+      if icon.is_null() {
+        return Ok(Vec::new());
+      }
+
+      // Create a new NSImage with 64x64 size
+      let nsimage_class = AnyClass::get(c"NSImage")
+        .ok_or_else(|| Error::new(Status::GenericFailure, "NSImage class not found"))?;
+      let resized_image: *mut AnyObject = msg_send![nsimage_class, alloc];
+      let resized_image: *mut AnyObject =
+        msg_send![resized_image, initWithSize: NSSize { width: 64.0, height: 64.0 }];
+      let _: () = msg_send![resized_image, lockFocus];
+
+      // Define drawing rectangle for 64x64 image
+      let draw_rect = NSRect {
+        origin: NSPoint { x: 0.0, y: 0.0 },
+        size: NSSize {
+          width: 64.0,
+          height: 64.0,
+        },
+      };
+
+      // Draw the original icon into draw_rect (using NSCompositingOperationCopy = 2)
+      let _: () = msg_send![icon, drawInRect: draw_rect, fromRect: NSRect { origin: NSPoint { x: 0.0, y: 0.0 }, size: NSSize { width: 0.0, height: 0.0 } }, operation: 2, fraction: 1.0];
+      let _: () = msg_send![resized_image, unlockFocus];
+
+      // Get TIFF representation from the downsized image
+      let tiff_data: *mut AnyObject = msg_send![resized_image, TIFFRepresentation];
+      if tiff_data.is_null() {
+        return Ok(Vec::new());
+      }
+
+      // Create bitmap image rep from TIFF
+      let bitmap_class = AnyClass::get(c"NSBitmapImageRep")
+        .ok_or_else(|| Error::new(Status::GenericFailure, "NSBitmapImageRep class not found"))?;
+      let bitmap: *mut AnyObject = msg_send![bitmap_class, imageRepWithData: tiff_data];
+      if bitmap.is_null() {
+        return Ok(Vec::new());
+      }
+
+      // Create properties dictionary with compression factor
+      let dict_class = AnyClass::get(c"NSMutableDictionary").ok_or_else(|| {
+        Error::new(
+          Status::GenericFailure,
+          "NSMutableDictionary class not found",
+        )
+      })?;
+      let properties: *mut AnyObject = msg_send![dict_class, dictionary];
+
+      // Add compression properties
+      let compression_key = NSString::from_str("NSImageCompressionFactor");
+      let number_class = AnyClass::get(c"NSNumber")
+        .ok_or_else(|| Error::new(Status::GenericFailure, "NSNumber class not found"))?;
+      let compression_value: *mut AnyObject = msg_send![number_class, numberWithDouble: 0.8];
+      let _: () = msg_send![properties, setObject: compression_value, forKey: &*compression_key];
+
+      // Get PNG data with properties
+      let png_data: *mut AnyObject =
+        msg_send![bitmap, representationUsingType: 4, properties: properties]; // 4 = PNG
+
+      if png_data.is_null() {
+        return Ok(Vec::new());
+      }
+
+      // Get bytes from NSData
+      let bytes: *const u8 = msg_send![png_data, bytes];
+      let length: usize = msg_send![png_data, length];
+
+      if bytes.is_null() {
+        return Ok(Vec::new());
+      }
+
+      // Copy bytes into a Vec<u8>
+      let data = std::slice::from_raw_parts(bytes, length).to_vec();
+      Ok(data)
+    }
+  }
+}
+
+#[napi]
+pub struct Application {
+  inner: TappableApplication,
+  pub(crate) object_id: AudioObjectID,
+  pub(crate) process_id: i32,
+  pub(crate) bundle_identifier: String,
+  pub(crate) name: String,
+}
+
+#[napi]
+impl Application {
+  fn new(app: TappableApplication) -> Result<Self> {
+    let object_id = app.object_id;
+    let bundle_identifier = app.bundle_identifier()?;
+    let name = app.name()?;
+    let process_id = app.process_id()?;
+
+    Ok(Self {
+      inner: app,
+      object_id,
+      process_id,
+      bundle_identifier,
+      name,
+    })
+  }
+
+  #[napi]
+  pub fn tap_global_audio(
+    excluded_processes: Option<Vec<&Application>>,
+    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+  ) -> Result<AudioTapStream> {
+    let mut device = AggregateDevice::create_global_tap_but_exclude_processes(
+      &excluded_processes
+        .unwrap_or_default()
+        .iter()
+        .map(|app| app.object_id)
+        .collect::<Vec<_>>(),
+    )?;
+    device.start(audio_stream_callback)
+  }
+
+  #[napi(getter)]
+  pub fn process_id(&self) -> i32 {
+    self.process_id
+  }
+
+  #[napi(getter)]
+  pub fn bundle_identifier(&self) -> String {
+    self.bundle_identifier.clone()
+  }
+
+  #[napi(getter)]
+  pub fn name(&self) -> String {
+    self.name.clone()
+  }
+
+  #[napi(getter)]
+  pub fn icon(&self) -> Result<Buffer> {
+    let icon = self.inner.icon()?;
+    Ok(Buffer::from(icon))
+  }
+
+  #[napi(getter)]
+  pub fn get_is_running(&self) -> Result<bool> {
+    Ok(get_process_property(
+      &self.object_id,
+      kAudioProcessPropertyIsRunningInput,
+    )?)
+  }
+
+  #[napi]
+  pub fn tap_audio(
+    &self,
+    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+  ) -> Result<AudioTapStream> {
+    let mut device = AggregateDevice::new(self)?;
+    device.start(audio_stream_callback)
+  }
+}
+
+#[napi]
+pub struct ApplicationListChangedSubscriber {
+  listener_block: *const Block<dyn Fn(u32, *mut c_void)>,
+}
+
+#[napi]
+impl ApplicationListChangedSubscriber {
+  #[napi]
+  pub fn unsubscribe(&self) -> Result<()> {
+    let status = unsafe {
+      AudioObjectRemovePropertyListenerBlock(
+        kAudioObjectSystemObject,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioHardwarePropertyProcessObjectList,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        ptr::null_mut(),
+        self.listener_block.cast_mut().cast(),
+      )
+    };
+    if status != 0 {
+      return Err(Error::new(
+        Status::GenericFailure,
+        "Failed to remove property listener",
+      ));
+    }
+    Ok(())
+  }
+}
+
+#[napi]
+pub struct ApplicationStateChangedSubscriber {
+  id: Uuid,
+  object_id: AudioObjectID,
+}
+
+#[napi]
+impl ApplicationStateChangedSubscriber {
+  #[napi]
+  pub fn unsubscribe(&self) {
+    if let Ok(mut lock) = APPLICATION_STATE_CHANGED_SUBSCRIBERS.write() {
+      if let Some(subscribers) = lock.get_mut(&self.object_id) {
+        subscribers.remove(&self.id);
+        if subscribers.is_empty() {
+          lock.remove(&self.object_id);
+          if let Some(listener_block) = APPLICATION_STATE_CHANGED_LISTENER_BLOCKS
+            .write()
+            .ok()
+            .as_mut()
+            .and_then(|map| map.remove(&self.object_id))
+          {
+            unsafe {
+              AudioObjectRemovePropertyListenerBlock(
+                self.object_id,
+                &AudioObjectPropertyAddress {
+                  mSelector: kAudioProcessPropertyIsRunning,
+                  mScope: kAudioObjectPropertyScopeGlobal,
+                  mElement: kAudioObjectPropertyElementMain,
+                },
+                ptr::null_mut(),
+                listener_block.load(Ordering::Relaxed),
+              );
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+#[napi]
+pub struct ShareableContent {
+  _inner: SCShareableContent,
+}
+
+#[napi]
+#[derive(Default)]
+pub struct RecordingPermissions {
+  pub audio: bool,
+  pub screen: bool,
+}
+
+#[napi]
+impl ShareableContent {
+  #[napi]
+  pub fn on_application_list_changed(
+    callback: Arc<ThreadsafeFunction<(), ()>>,
+  ) -> Result<ApplicationListChangedSubscriber> {
+    let callback_block: RcBlock<dyn Fn(u32, *mut c_void)> =
+      RcBlock::new(move |_in_number_addresses, _in_addresses: *mut c_void| {
+        if let Err(err) = RUNNING_APPLICATIONS
+          .write()
+          .map_err(|_| {
+            Error::new(
+              Status::GenericFailure,
+              "Poisoned RwLock while writing RunningApplications",
+            )
+          })
+          .and_then(|mut running_applications| {
+            audio_process_list().map_err(From::from).map(|apps| {
+              *running_applications = apps;
+            })
+          })
+        {
+          callback.call(Err(err), ThreadsafeFunctionCallMode::NonBlocking);
+        } else {
+          callback.call(Ok(()), ThreadsafeFunctionCallMode::NonBlocking);
+        }
+      });
+    let listener_block = &*callback_block as *const Block<dyn Fn(u32, *mut c_void)>;
+    let status = unsafe {
+      AudioObjectAddPropertyListenerBlock(
+        kAudioObjectSystemObject,
+        &AudioObjectPropertyAddress {
+          mSelector: kAudioHardwarePropertyProcessObjectList,
+          mScope: kAudioObjectPropertyScopeGlobal,
+          mElement: kAudioObjectPropertyElementMain,
+        },
+        ptr::null_mut(),
+        listener_block.cast_mut().cast(),
+      )
+    };
+    if status != 0 {
+      return Err(Error::new(
+        Status::GenericFailure,
+        "Failed to add property listener",
+      ));
+    }
+    Ok(ApplicationListChangedSubscriber { listener_block })
+  }
+
+  #[napi]
+  pub fn on_app_state_changed(
+    app: &Application,
+    callback: Arc<ThreadsafeFunction<(), ()>>,
+  ) -> Result<ApplicationStateChangedSubscriber> {
+    let id = Uuid::new_v4();
+    let mut lock = APPLICATION_STATE_CHANGED_SUBSCRIBERS.write().map_err(|_| {
+      Error::new(
+        Status::GenericFailure,
+        "Poisoned RwLock while writing ApplicationStateChangedSubscribers",
+      )
+    })?;
+    if let Some(subscribers) = lock.get_mut(&app.object_id) {
+      subscribers.insert(id, callback);
+    } else {
+      let object_id = app.object_id;
+      let list_change: RcBlock<dyn Fn(u32, *mut c_void)> =
+        RcBlock::new(move |in_number_addresses, in_addresses: *mut c_void| {
+          let addresses = unsafe {
+            std::slice::from_raw_parts(
+              in_addresses as *mut AudioObjectPropertyAddress,
+              in_number_addresses as usize,
+            )
+          };
+          for address in addresses {
+            if address.mSelector == kAudioProcessPropertyIsRunning {
+              if let Some(subscribers) = APPLICATION_STATE_CHANGED_SUBSCRIBERS
+                .read()
+                .ok()
+                .as_ref()
+                .and_then(|map| map.get(&object_id))
+              {
+                for callback in subscribers.values() {
+                  callback.call(Ok(()), ThreadsafeFunctionCallMode::NonBlocking);
+                }
+              }
+            }
+          }
+        });
+      let address = AudioObjectPropertyAddress {
+        mSelector: kAudioProcessPropertyIsRunning,
+        mScope: kAudioObjectPropertyScopeGlobal,
+        mElement: kAudioObjectPropertyElementMain,
+      };
+      let listener_block = &*list_change as *const Block<dyn Fn(u32, *mut c_void)>;
+      let status = unsafe {
+        AudioObjectAddPropertyListenerBlock(
+          app.object_id,
+          &address,
+          ptr::null_mut(),
+          listener_block.cast_mut().cast(),
+        )
+      };
+      if status != 0 {
+        return Err(Error::new(
+          Status::GenericFailure,
+          "Failed to add property listener",
+        ));
+      }
+      let subscribers = {
+        let mut map = HashMap::new();
+        map.insert(id, callback);
+        map
+      };
+      lock.insert(app.object_id, subscribers);
+    }
+    Ok(ApplicationStateChangedSubscriber {
+      id,
+      object_id: app.object_id,
+    })
+  }
+
+  #[napi(constructor)]
+  pub fn new() -> Result<Self> {
+    Ok(Self {
+      _inner: SCShareableContent::get().map_err(|err| Error::new(Status::GenericFailure, err))?,
+    })
+  }
+
+  #[napi]
+  pub fn applications(&self) -> Result<Vec<Application>> {
+    RUNNING_APPLICATIONS
+      .read()
+      .map_err(|_| {
+        Error::new(
+          Status::GenericFailure,
+          "Poisoned RwLock while reading RunningApplications",
+        )
+      })?
+      .iter()
+      .filter_map(|id| {
+        let app = TappableApplication::new(*id);
+        if !app.bundle_identifier().ok()?.is_empty() {
+          Some(Application::new(app))
+        } else {
+          None
+        }
+      })
+      .collect()
+  }
+
+  #[napi]
+  pub fn application_with_process_id(&self, process_id: u32) -> Result<Application> {
+    // Find the AudioObjectID for the given process ID
+    let audio_object_id = {
+      let running_apps = RUNNING_APPLICATIONS.read().map_err(|_| {
+        Error::new(
+          Status::GenericFailure,
+          "Poisoned RwLock while reading RunningApplications",
+        )
+      })?;
+
+      *running_apps
+        .iter()
+        .find(|&&id| {
+          let app = TappableApplication::new(id);
+          app
+            .process_id()
+            .map(|pid| pid as u32 == process_id)
+            .unwrap_or(false)
+        })
+        .ok_or_else(|| {
+          Error::new(
+            Status::GenericFailure,
+            format!("No application found with process ID {}", process_id),
+          )
+        })?
+    };
+
+    let app = TappableApplication::new(audio_object_id);
+    Application::new(app)
+  }
+
+  #[napi]
+  pub fn check_recording_permissions(&self) -> Result<RecordingPermissions> {
+    let av_capture_class = AVCAPTUREDEVICE_CLASS
+      .as_ref()
+      .ok_or_else(|| Error::new(Status::GenericFailure, "AVCaptureDevice class not found"))?;
+
+    let sc_stream_class = SCSTREAM_CLASS
+      .as_ref()
+      .ok_or_else(|| Error::new(Status::GenericFailure, "SCStream class not found"))?;
+
+    let media_type = NSString::from_str("com.apple.avfoundation.avcapturedevice.built-in_audio");
+
+    let audio_status: i32 = unsafe {
+      msg_send![
+        *av_capture_class,
+        authorizationStatusForMediaType: &*media_type
+      ]
+    };
+
+    let screen_status: bool = unsafe { msg_send![*sc_stream_class, isScreenCaptureAuthorized] };
+
+    Ok(RecordingPermissions {
+      // AVAuthorizationStatusAuthorized = 3
+      audio: audio_status == 3,
+      screen: screen_status,
+    })
+  }
+}
--- a/packages/frontend/native/media_capture/src/macos/tap_audio.rs
+++ b/packages/frontend/native/media_capture/src/macos/tap_audio.rs
@@ -0,0 +1,360 @@
+use std::{ffi::c_void, sync::Arc};
+
+use block2::{Block, RcBlock};
+use core_foundation::{
+  array::CFArray,
+  base::{CFType, ItemRef, TCFType},
+  boolean::CFBoolean,
+  dictionary::CFDictionary,
+  string::CFString,
+  uuid::CFUUID,
+};
+use coreaudio::sys::{
+  kAudioAggregateDeviceIsPrivateKey, kAudioAggregateDeviceIsStackedKey,
+  kAudioAggregateDeviceMainSubDeviceKey, kAudioAggregateDeviceNameKey,
+  kAudioAggregateDeviceSubDeviceListKey, kAudioAggregateDeviceTapAutoStartKey,
+  kAudioAggregateDeviceTapListKey, kAudioAggregateDeviceUIDKey, kAudioHardwareNoError,
+  kAudioHardwarePropertyDefaultInputDevice, kAudioHardwarePropertyDefaultSystemOutputDevice,
+  kAudioSubDeviceUIDKey, kAudioSubTapDriftCompensationKey, kAudioSubTapUIDKey,
+  AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, AudioDeviceIOProcID,
+  AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
+  AudioHardwareDestroyAggregateDevice, AudioObjectID, AudioTimeStamp, OSStatus,
+};
+use napi::{
+  bindgen_prelude::Float32Array,
+  threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
+  Result,
+};
+use napi_derive::napi;
+use objc2::{runtime::AnyObject, Encode, Encoding, RefEncode};
+
+use crate::{
+  ca_tap_description::CATapDescription, device::get_device_uid, error::CoreAudioError,
+  queue::create_audio_tap_queue, screen_capture_kit::Application,
+};
+
+extern "C" {
+  fn AudioHardwareCreateProcessTap(
+    inDescription: *mut AnyObject,
+    outTapID: *mut AudioObjectID,
+  ) -> OSStatus;
+
+  fn AudioHardwareDestroyProcessTap(tapID: AudioObjectID) -> OSStatus;
+}
+
+/// [Apple's documentation](https://developer.apple.com/documentation/coreaudiotypes/audiobuffer?language=objc)
+#[repr(C)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[allow(non_snake_case)]
+pub struct AudioBuffer {
+  pub mNumberChannels: u32,
+  pub mDataByteSize: u32,
+  pub mData: *mut c_void,
+}
+
+unsafe impl Encode for AudioBuffer {
+  const ENCODING: Encoding = Encoding::Struct(
+    "AudioBuffer",
+    &[<u32>::ENCODING, <u32>::ENCODING, <*mut c_void>::ENCODING],
+  );
+}
+
+unsafe impl RefEncode for AudioBuffer {
+  const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
+}
+
+#[repr(C)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[allow(non_snake_case)]
+pub struct AudioBufferList {
+  pub mNumberBuffers: u32,
+  pub mBuffers: [AudioBuffer; 1],
+}
+
+unsafe impl Encode for AudioBufferList {
+  const ENCODING: Encoding = Encoding::Struct(
+    "AudioBufferList",
+    &[<u32>::ENCODING, <[AudioBuffer; 1]>::ENCODING],
+  );
+}
+
+unsafe impl RefEncode for AudioBufferList {
+  const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
+}
+
+pub struct AggregateDevice {
+  pub tap_id: AudioObjectID,
+  pub id: AudioObjectID,
+}
+
+impl AggregateDevice {
+  pub fn new(app: &Application) -> Result<Self> {
+    let mut tap_id: AudioObjectID = 0;
+
+    let tap_description = CATapDescription::init_stereo_mixdown_of_processes(app.object_id)?;
+    let status = unsafe { AudioHardwareCreateProcessTap(tap_description.inner, &mut tap_id) };
+
+    if status != 0 {
+      return Err(CoreAudioError::CreateProcessTapFailed(status).into());
+    }
+
+    let description_dict = Self::create_aggregate_description(tap_id, tap_description.get_uuid()?)?;
+
+    let mut aggregate_device_id: AudioObjectID = 0;
+
+    let status = unsafe {
+      AudioHardwareCreateAggregateDevice(
+        description_dict.as_concrete_TypeRef().cast(),
+        &mut aggregate_device_id,
+      )
+    };
+
+    // Check the status and return the appropriate result
+    if status != 0 {
+      return Err(CoreAudioError::CreateAggregateDeviceFailed(status).into());
+    }
+
+    Ok(Self {
+      tap_id,
+      id: aggregate_device_id,
+    })
+  }
+
+  pub fn create_global_tap_but_exclude_processes(processes: &[AudioObjectID]) -> Result<Self> {
+    let mut tap_id: AudioObjectID = 0;
+    let tap_description =
+      CATapDescription::init_stereo_global_tap_but_exclude_processes(processes)?;
+    let status = unsafe { AudioHardwareCreateProcessTap(tap_description.inner, &mut tap_id) };
+
+    if status != 0 {
+      return Err(CoreAudioError::CreateProcessTapFailed(status).into());
+    }
+
+    let description_dict = Self::create_aggregate_description(tap_id, tap_description.get_uuid()?)?;
+
+    let mut aggregate_device_id: AudioObjectID = 0;
+
+    let status = unsafe {
+      AudioHardwareCreateAggregateDevice(
+        description_dict.as_concrete_TypeRef().cast(),
+        &mut aggregate_device_id,
+      )
+    };
+
+    // Check the status and return the appropriate result
+    if status != 0 {
+      return Err(CoreAudioError::CreateAggregateDeviceFailed(status).into());
+    }
+
+    Ok(Self {
+      tap_id,
+      id: aggregate_device_id,
+    })
+  }
+
+  pub fn start(
+    &mut self,
+    audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
+  ) -> Result<AudioTapStream> {
+    let queue = create_audio_tap_queue();
+    let mut in_proc_id: AudioDeviceIOProcID = None;
+
+    let in_io_block: RcBlock<
+      dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32,
+    > = RcBlock::new(
+      move |_in_now: *mut c_void,
+            in_input_data: *mut c_void,
+            in_input_time: *mut c_void,
+            _out_output_data: *mut c_void,
+            _in_output_time: *mut c_void| {
+        let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() };
+
+        // ignore pre-roll
+        if *mSampleTime < 0.0 {
+          return kAudioHardwareNoError as i32;
+        }
+        let AudioBufferList { mBuffers, .. } =
+          unsafe { &mut *in_input_data.cast::<AudioBufferList>() };
+        let [AudioBuffer {
+          mData,
+          mNumberChannels,
+          mDataByteSize,
+        }] = mBuffers;
+        // Only create slice if we have valid data
+        if !mData.is_null() && *mDataByteSize > 0 {
+          // Calculate total number of samples (accounting for interleaved stereo)
+          let total_samples = *mDataByteSize as usize / 4; // 4 bytes per f32
+
+          // Create a slice of all samples
+          let samples: &[f32] =
+            unsafe { std::slice::from_raw_parts(mData.cast::<f32>(), total_samples) };
+
+          // Convert to mono if needed
+          let mono_samples: Vec<f32> = if *mNumberChannels > 1 {
+            samples
+              .chunks(*mNumberChannels as usize)
+              .map(|chunk| chunk.iter().sum::<f32>() / *mNumberChannels as f32)
+              .collect()
+          } else {
+            samples.to_vec()
+          };
+
+          audio_stream_callback.call(
+            Ok(mono_samples.into()),
+            ThreadsafeFunctionCallMode::NonBlocking,
+          );
+        }
+
+        kAudioHardwareNoError as i32
+      },
+    );
+
+    let status = unsafe {
+      AudioDeviceCreateIOProcIDWithBlock(
+        &mut in_proc_id,
+        self.id,
+        queue.cast(),
+        (&*in_io_block
+          as *const Block<
+            dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32,
+          >)
+          .cast_mut()
+          .cast(),
+      )
+    };
+    if status != 0 {
+      return Err(CoreAudioError::CreateIOProcIDWithBlockFailed(status).into());
+    }
+    let status = unsafe { AudioDeviceStart(self.id, in_proc_id) };
+    if status != 0 {
+      return Err(CoreAudioError::AudioDeviceStartFailed(status).into());
+    }
+
+    Ok(AudioTapStream {
+      device_id: self.id,
+      in_proc_id,
+      stop_called: false,
+    })
+  }
+
+  fn create_aggregate_description(
+    tap_id: AudioObjectID,
+    tap_uuid_string: ItemRef<CFString>,
+  ) -> Result<CFDictionary<CFType, CFType>> {
+    let system_output_uid = get_device_uid(kAudioHardwarePropertyDefaultSystemOutputDevice)?;
+    let default_input_uid = get_device_uid(kAudioHardwarePropertyDefaultInputDevice)?;
+
+    let aggregate_device_name = CFString::new(&format!("Tap-{}", tap_id));
+    let aggregate_device_uid: uuid::Uuid = CFUUID::new().into();
+    let aggregate_device_uid_string = aggregate_device_uid.to_string();
+
+    // Sub-device UID key and dictionary
+    let sub_device_output_dict = CFDictionary::from_CFType_pairs(&[(
+      cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
+      system_output_uid.as_CFType(),
+    )]);
+
+    let sub_device_input_dict = CFDictionary::from_CFType_pairs(&[(
+      cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
+      default_input_uid.as_CFType(),
+    )]);
+
+    let tap_device_dict = CFDictionary::from_CFType_pairs(&[
+      (
+        cfstring_from_bytes_with_nul(kAudioSubTapDriftCompensationKey).as_CFType(),
+        CFBoolean::false_value().as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioSubTapUIDKey).as_CFType(),
+        tap_uuid_string.as_CFType(),
+      ),
+    ]);
+
+    let capture_device_list = vec![sub_device_input_dict, sub_device_output_dict];
+
+    // Sub-device list
+    let sub_device_list = CFArray::from_CFTypes(&capture_device_list);
+
+    let tap_list = CFArray::from_CFTypes(&[tap_device_dict]);
+
+    // Create the aggregate device description dictionary
+    let description_dict = CFDictionary::from_CFType_pairs(&[
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceNameKey).as_CFType(),
+        aggregate_device_name.as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceUIDKey).as_CFType(),
+        CFString::new(aggregate_device_uid_string.as_str()).as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceMainSubDeviceKey).as_CFType(),
+        system_output_uid.as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceIsPrivateKey).as_CFType(),
+        CFBoolean::true_value().as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceIsStackedKey).as_CFType(),
+        CFBoolean::false_value().as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceTapAutoStartKey).as_CFType(),
+        CFBoolean::true_value().as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceSubDeviceListKey).as_CFType(),
+        sub_device_list.as_CFType(),
+      ),
+      (
+        cfstring_from_bytes_with_nul(kAudioAggregateDeviceTapListKey).as_CFType(),
+        tap_list.as_CFType(),
+      ),
+    ]);
+    Ok(description_dict)
+  }
+}
+
+#[napi]
+pub struct AudioTapStream {
+  device_id: AudioObjectID,
+  in_proc_id: AudioDeviceIOProcID,
+  stop_called: bool,
+}
+
+#[napi]
+impl AudioTapStream {
+  #[napi]
+  pub fn stop(&mut self) -> Result<()> {
+    if self.stop_called {
+      return Ok(());
+    }
+    self.stop_called = true;
+    let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
+    if status != 0 {
+      return Err(CoreAudioError::AudioDeviceStopFailed(status).into());
+    }
+    let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
+    if status != 0 {
+      return Err(CoreAudioError::AudioDeviceDestroyIOProcIDFailed(status).into());
+    }
+    let status = unsafe { AudioHardwareDestroyAggregateDevice(self.device_id) };
+    if status != 0 {
+      return Err(CoreAudioError::AudioHardwareDestroyAggregateDeviceFailed(status).into());
+    }
+    let status = unsafe { AudioHardwareDestroyProcessTap(self.device_id) };
+    if status != 0 {
+      return Err(CoreAudioError::AudioHardwareDestroyProcessTapFailed(status).into());
+    }
+    Ok(())
+  }
+}
+
+fn cfstring_from_bytes_with_nul(bytes: &'static [u8]) -> CFString {
+  CFString::new(
+    unsafe { std::ffi::CStr::from_bytes_with_nul_unchecked(bytes) }
+      .to_string_lossy()
+      .as_ref(),
+  )
+}
--- a/packages/frontend/native/package.json
+++ b/packages/frontend/native/package.json
@@ -26,8 +26,10 @@
  },
  "devDependencies": {
    "@napi-rs/cli": "3.0.0-alpha.70",
+    "@napi-rs/whisper": "^0.0.4",
    "@types/node": "^22.0.0",
    "ava": "^6.2.0",
+    "rxjs": "^7.8.1",
    "ts-node": "^10.9.2",
    "typescript": "^5.7.2"
  },
--- a/packages/frontend/native/src/lib.rs
+++ b/packages/frontend/native/src/lib.rs
@@ -1,4 +1,6 @@
 pub mod hashcash;

+#[allow(unused_imports)]
+pub use affine_media_capture::*;
 pub use affine_nbstore::*;
 pub use affine_sqlite_v1::*;
--- a/packages/frontend/native/tsconfig.json
+++ b/packages/frontend/native/tsconfig.json
@@ -3,6 +3,6 @@
  "compilerOptions": {
    "outDir": "./dist"
  },
-  "include": ["index.d.ts"],
+  "include": ["index.d.ts", "media-capture-example.ts"],
  "references": []
 }