feat(native): media capture (#9992)

This commit is contained in:
Brooooooklyn
2025-02-25 06:51:56 +00:00
parent 2ec7de7e32
commit 5dbffba08d
46 changed files with 5791 additions and 74 deletions

View File

@@ -0,0 +1,2 @@
recordings
.env

View File

@@ -0,0 +1,43 @@
{
"name": "@affine/media-capture-playground",
"private": true,
"type": "module",
"version": "0.0.0",
"scripts": {
"dev:web": "vite",
"dev:server": "tsx --env-file=.env --watch server/main.ts"
},
"dependencies": {
"@affine/native": "workspace:*",
"@google/generative-ai": "^0.21.0",
"@tailwindcss/vite": "^4.0.6",
"@types/express": "^4",
"@types/multer": "^1",
"@types/react": "^19.0.8",
"@types/react-dom": "^19.0.3",
"@types/socket.io": "^3.0.2",
"@types/socket.io-client": "^3.0.0",
"@vitejs/plugin-react": "^4.3.4",
"chokidar": "^4.0.3",
"express": "^4.21.2",
"express-rate-limit": "^7.1.5",
"fs-extra": "^11.3.0",
"multer": "^1.4.5-lts.1",
"openai": "^4.85.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-markdown": "^9.0.3",
"rxjs": "^7.8.1",
"socket.io": "^4.7.4",
"socket.io-client": "^4.7.4",
"swr": "^2.3.2",
"tailwindcss": "^4.0.6",
"tsx": "^4.19.2",
"vite": "^6.1.0"
},
"devDependencies": {
"@types/fs-extra": "^11",
"@types/react": "^19.0.1",
"@types/react-dom": "^19.0.2"
}
}

View File

@@ -0,0 +1,200 @@
import { GoogleGenerativeAI } from '@google/generative-ai';
import {
GoogleAIFileManager,
type UploadFileResponse,
} from '@google/generative-ai/server';
const DEFAULT_MODEL = 'gemini-2.0-flash';
export interface TranscriptionResult {
title: string;
summary: string;
segments: {
speaker: string;
start_time: string;
end_time: string;
transcription: string;
}[];
}
const PROMPT_TRANSCRIPTION = `
Generate audio transcription and diarization for the recording.
The recording source is most likely from a video call with multiple speakers.
Output in JSON format with the following structure:
{
"segments": [
{
"speaker": "Speaker A",
"start_time": "MM:SS",
"end_time": "MM:SS",
"transcription": "..."
},
...
],
}
- Use consistent speaker labels throughout
- Accurate timestamps in MM:SS format
- Clean transcription with proper punctuation
- Identify speakers by name if possible, otherwise use "Speaker A/B/C"
`;
const PROMPT_SUMMARY = `
Generate a short title and summary of the conversation. The input is in the following JSON format:
{
"segments": [
{
"speaker": "Speaker A",
"start_time": "MM:SS",
"end_time": "MM:SS",
"transcription": "..."
},
...
],
}
Output in JSON format with the following structure:
{
"title": "Title of the recording",
"summary": "Summary of the conversation in markdown format"
}
1. Summary Structure:
- The sumary should be inferred from the speakers' language and context
- All insights should be derived directly from speakers' language and context
- Use hierarchical organization for clear information structure
- Use markdown format for the summary. Use bullet points, lists and other markdown styles when appropriate
2. Title:
- Come up with a title for the recording.
- The title should be a short description of the recording.
- The title should be a single sentence or a few words.
`;
export async function gemini(
audioFilePath: string,
options?: {
model?: 'gemini-2.0-flash' | 'gemini-1.5-flash';
mode?: 'transcript' | 'summary';
}
) {
if (!process.env.GOOGLE_GEMINI_API_KEY) {
console.error('Missing GOOGLE_GEMINI_API_KEY environment variable');
throw new Error('GOOGLE_GEMINI_API_KEY is not set');
}
// Initialize GoogleGenerativeAI and FileManager with your API_KEY
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_GEMINI_API_KEY);
const fileManager = new GoogleAIFileManager(
process.env.GOOGLE_GEMINI_API_KEY
);
async function transcribe(
audioFilePath: string
): Promise<TranscriptionResult | null> {
let uploadResult: UploadFileResponse | null = null;
try {
// Upload the audio file
uploadResult = await fileManager.uploadFile(audioFilePath, {
mimeType: 'audio/wav',
displayName: 'audio_transcription.wav',
});
console.log('File uploaded:', uploadResult.file.uri);
// Initialize a Gemini model appropriate for your use case.
const model = genAI.getGenerativeModel({
model: options?.model || DEFAULT_MODEL,
generationConfig: {
responseMimeType: 'application/json',
},
});
// Generate content using a prompt and the uploaded file
const result = await model.generateContent([
{
fileData: {
fileUri: uploadResult.file.uri,
mimeType: uploadResult.file.mimeType,
},
},
{
text: PROMPT_TRANSCRIPTION,
},
]);
const text = result.response.text();
try {
const parsed = JSON.parse(text);
return parsed;
} catch (e) {
console.error('Failed to parse transcription JSON:', e);
console.error('Raw text that failed to parse:', text);
return null;
}
} catch (e) {
console.error('Error during transcription:', e);
return null;
} finally {
if (uploadResult) {
await fileManager.deleteFile(uploadResult.file.name);
}
}
}
async function summarize(transcription: TranscriptionResult) {
try {
const model = genAI.getGenerativeModel({
model: options?.model || DEFAULT_MODEL,
generationConfig: {
responseMimeType: 'application/json',
},
});
const result = await model.generateContent([
{
text: PROMPT_SUMMARY + '\n\n' + JSON.stringify(transcription),
},
]);
const text = result.response.text();
try {
const parsed = JSON.parse(text);
return parsed;
} catch (e) {
console.error('Failed to parse summary JSON:', e);
console.error('Raw text that failed to parse:', text);
return null;
}
} catch (e) {
console.error('Error during summarization:', e);
return null;
}
}
const transcription = await transcribe(audioFilePath);
if (!transcription) {
console.error('Transcription failed');
return null;
}
const summary = await summarize(transcription);
if (!summary) {
console.error('Summary generation failed');
return transcription;
}
const result = {
...transcription,
...summary,
};
console.log('Processing completed:', {
title: result.title,
segmentsCount: result.segments?.length,
});
return result;
}

View File

@@ -0,0 +1,759 @@
/* eslint-disable @typescript-eslint/no-misused-promises */
import { exec } from 'node:child_process';
import { createServer } from 'node:http';
import { promisify } from 'node:util';
import {
type Application,
type AudioTapStream,
ShareableContent,
} from '@affine/native';
import type { FSWatcher } from 'chokidar';
import chokidar from 'chokidar';
import express from 'express';
import rateLimit from 'express-rate-limit';
import fs from 'fs-extra';
import { Server } from 'socket.io';
import { gemini, type TranscriptionResult } from './gemini';
import { WavWriter } from './wav-writer';
// Constants
const RECORDING_DIR = './recordings';
const PORT = process.env.PORT || 6544;
// Ensure recordings directory exists
fs.ensureDirSync(RECORDING_DIR);
console.log(`📁 Ensuring recordings directory exists at ${RECORDING_DIR}`);
// Types
interface Recording {
app: Application;
appGroup: Application | null;
buffers: Float32Array[];
stream: AudioTapStream;
startTime: number;
isWriting: boolean;
}
interface RecordingStatus {
processId: number;
bundleIdentifier: string;
name: string;
startTime: number;
duration: number;
}
interface RecordingMetadata {
appName: string;
bundleIdentifier: string;
processId: number;
recordingStartTime: number;
recordingEndTime: number;
recordingDuration: number;
sampleRate: number;
totalSamples: number;
}
interface AppInfo {
app: Application;
processId: number;
processGroupId: number | null;
bundleIdentifier: string;
name: string;
running: boolean;
}
interface TranscriptionMetadata {
transcriptionStartTime: number;
transcriptionEndTime: number;
transcriptionStatus: 'not_started' | 'pending' | 'completed' | 'error';
transcription?: TranscriptionResult;
error?: string;
}
// State
const recordingMap = new Map<number, Recording>();
let appsSubscriber = () => {};
let fsWatcher: FSWatcher | null = null;
// Server setup
const app = express();
const httpServer = createServer(app);
const io = new Server(httpServer, {
cors: { origin: '*' },
});
app.use(express.json());
// Update the static file serving to handle the new folder structure
app.use(
'/recordings',
(req, res, next) => {
// Extract the folder name from the path
const parts = req.path.split('/');
if (parts.length < 2) {
return res.status(400).json({ error: 'Invalid request path' });
}
const folderName = parts[1];
if (!validateAndSanitizeFolderName(folderName)) {
return res.status(400).json({ error: 'Invalid folder name format' });
}
if (req.path.endsWith('.wav')) {
res.setHeader('Content-Type', 'audio/wav');
} else if (req.path.endsWith('.png')) {
res.setHeader('Content-Type', 'image/png');
}
next();
},
express.static(RECORDING_DIR)
);
// Recording management
async function saveRecording(recording: Recording): Promise<string | null> {
try {
recording.isWriting = true;
const app = recording.appGroup || recording.app;
const totalSamples = recording.buffers.reduce(
(acc, buf) => acc + buf.length,
0
);
const recordingEndTime = Date.now();
const recordingDuration = (recordingEndTime - recording.startTime) / 1000;
const expectedSamples = recordingDuration * 44100;
console.log(`💾 Saving recording for ${app.name}:`);
console.log(`- Process ID: ${app.processId}`);
console.log(`- Bundle ID: ${app.bundleIdentifier}`);
console.log(`- Actual duration: ${recordingDuration.toFixed(2)}s`);
console.log(`- Expected samples: ${Math.floor(expectedSamples)}`);
console.log(`- Actual samples: ${totalSamples}`);
console.log(
`- Sample ratio: ${(totalSamples / expectedSamples).toFixed(2)}`
);
// Create a buffer for the mono audio
const buffer = new Float32Array(totalSamples);
let offset = 0;
recording.buffers.forEach(buf => {
buffer.set(buf, offset);
offset += buf.length;
});
await fs.ensureDir(RECORDING_DIR);
const timestamp = Date.now();
const baseFilename = `${recording.app.bundleIdentifier}-${recording.app.processId}-${timestamp}`;
const recordingDir = `${RECORDING_DIR}/${baseFilename}`;
await fs.ensureDir(recordingDir);
const wavFilename = `${recordingDir}/recording.wav`;
const transcriptionWavFilename = `${recordingDir}/transcription.wav`;
const metadataFilename = `${recordingDir}/metadata.json`;
const iconFilename = `${recordingDir}/icon.png`;
// Save high-quality WAV file for playback (44.1kHz)
console.log(`📝 Writing high-quality WAV file to ${wavFilename}`);
const writer = new WavWriter(wavFilename, { targetSampleRate: 44100 });
writer.write(buffer);
await writer.end();
console.log('✅ High-quality WAV file written successfully');
// Save low-quality WAV file for transcription (8kHz)
console.log(
`📝 Writing transcription WAV file to ${transcriptionWavFilename}`
);
const transcriptionWriter = new WavWriter(transcriptionWavFilename, {
targetSampleRate: 8000,
});
transcriptionWriter.write(buffer);
await transcriptionWriter.end();
console.log('✅ Transcription WAV file written successfully');
// Save app icon if available
if (app.icon) {
console.log(`📝 Writing app icon to ${iconFilename}`);
await fs.writeFile(iconFilename, app.icon);
console.log('✅ App icon written successfully');
}
console.log(`📝 Writing metadata to ${metadataFilename}`);
// Save metadata (without icon)
const metadata: RecordingMetadata = {
appName: app.name,
bundleIdentifier: app.bundleIdentifier,
processId: app.processId,
recordingStartTime: recording.startTime,
recordingEndTime,
recordingDuration,
sampleRate: 44100,
totalSamples,
};
await fs.writeJson(metadataFilename, metadata, { spaces: 2 });
console.log('✅ Metadata file written successfully');
return baseFilename;
} catch (error) {
console.error('❌ Error saving recording:', error);
return null;
}
}
function getRecordingStatus(): RecordingStatus[] {
return Array.from(recordingMap.entries()).map(([processId, recording]) => ({
processId,
bundleIdentifier: recording.app.bundleIdentifier,
name: recording.app.name,
startTime: recording.startTime,
duration: Date.now() - recording.startTime,
}));
}
function emitRecordingStatus() {
io.emit('apps:recording', { recordings: getRecordingStatus() });
}
async function startRecording(app: Application) {
if (recordingMap.has(app.processId)) {
console.log(
`⚠️ Recording already in progress for ${app.name} (PID: ${app.processId})`
);
return;
}
// Find the root app of the process group
const processGroupId = await getProcessGroupId(app.processId);
const rootApp = processGroupId
? (shareableContent
.applications()
.find(a => a.processId === processGroupId) ?? app)
: app;
console.log(
`🎙️ Starting recording for ${rootApp.name} (PID: ${rootApp.processId})`
);
const buffers: Float32Array[] = [];
const stream = app.tapAudio((err, samples) => {
if (err) {
console.error(`❌ Audio stream error for ${rootApp.name}:`, err);
return;
}
const recording = recordingMap.get(app.processId);
if (recording && !recording.isWriting) {
buffers.push(new Float32Array(samples));
}
});
recordingMap.set(app.processId, {
app,
appGroup: rootApp,
buffers,
stream,
startTime: Date.now(),
isWriting: false,
});
console.log(`✅ Recording started successfully for ${rootApp.name}`);
emitRecordingStatus();
}
async function stopRecording(processId: number) {
const recording = recordingMap.get(processId);
if (!recording) {
console.log(` No active recording found for process ID ${processId}`);
return;
}
const app = recording.appGroup || recording.app;
console.log(`⏹️ Stopping recording for ${app.name} (PID: ${app.processId})`);
console.log(
`⏱️ Recording duration: ${((Date.now() - recording.startTime) / 1000).toFixed(2)}s`
);
recording.stream.stop();
const filename = await saveRecording(recording);
recordingMap.delete(processId);
if (filename) {
console.log(`✅ Recording saved successfully to ${filename}`);
} else {
console.error(`❌ Failed to save recording for ${app.name}`);
}
emitRecordingStatus();
return filename;
}
// File management
async function getRecordings(): Promise<
{
wav: string;
metadata?: RecordingMetadata;
transcription?: TranscriptionMetadata;
}[]
> {
try {
const allItems = await fs.readdir(RECORDING_DIR);
// First filter out non-directories
const dirs = (
await Promise.all(
allItems.map(async item => {
const fullPath = `${RECORDING_DIR}/${item}`;
try {
const stat = await fs.stat(fullPath);
return stat.isDirectory() ? item : null;
} catch {
return null;
}
})
)
).filter((d): d is string => d !== null);
const recordings = await Promise.all(
dirs.map(async dir => {
try {
const recordingPath = `${RECORDING_DIR}/${dir}`;
const metadataPath = `${recordingPath}/metadata.json`;
const transcriptionPath = `${recordingPath}/transcription.json`;
let metadata: RecordingMetadata | undefined;
try {
metadata = await fs.readJson(metadataPath);
} catch {
// Metadata might not exist
}
let transcription: TranscriptionMetadata | undefined;
try {
// Check if transcription file exists
const transcriptionExists = await fs.pathExists(transcriptionPath);
if (transcriptionExists) {
transcription = await fs.readJson(transcriptionPath);
} else {
// If transcription.wav exists but no transcription.json, it means transcription is available but not started
transcription = {
transcriptionStartTime: 0,
transcriptionEndTime: 0,
transcriptionStatus: 'not_started',
};
}
} catch (error) {
console.error(`Error reading transcription for ${dir}:`, error);
}
return {
wav: dir,
metadata,
transcription,
};
} catch (error) {
console.error(`Error processing directory ${dir}:`, error);
return null;
}
})
);
// Filter out nulls and sort by recording start time
return recordings
.filter((r): r is NonNullable<typeof r> => r !== null)
.sort(
(a, b) =>
(b.metadata?.recordingStartTime ?? 0) -
(a.metadata?.recordingStartTime ?? 0)
);
} catch (error) {
console.error('Error reading recordings directory:', error);
return [];
}
}
async function setupRecordingsWatcher() {
if (fsWatcher) {
console.log('🔄 Closing existing recordings watcher');
await fsWatcher.close();
}
try {
console.log('👀 Setting up recordings watcher...');
const files = await getRecordings();
console.log(`📊 Found ${files.length} existing recordings`);
io.emit('apps:saved', { recordings: files });
fsWatcher = chokidar.watch(RECORDING_DIR, {
ignored: /(^|[/\\])\../, // ignore dotfiles
persistent: true,
ignoreInitial: true,
awaitWriteFinish: {
stabilityThreshold: 500,
pollInterval: 100,
},
});
// Handle file events
fsWatcher
.on('add', async path => {
if (path.endsWith('.wav') || path.endsWith('.json')) {
console.log(`📝 File added: ${path}`);
const files = await getRecordings();
io.emit('apps:saved', { recordings: files });
}
})
.on('change', async path => {
if (path.endsWith('.wav') || path.endsWith('.json')) {
console.log(`📝 File changed: ${path}`);
const files = await getRecordings();
io.emit('apps:saved', { recordings: files });
}
})
.on('unlink', async path => {
if (path.endsWith('.wav') || path.endsWith('.json')) {
console.log(`🗑️ File removed: ${path}`);
const files = await getRecordings();
io.emit('apps:saved', { recordings: files });
}
})
.on('error', error => {
console.error('❌ Error watching recordings directory:', error);
})
.on('ready', () => {
console.log('✅ Recordings watcher setup complete');
});
} catch (error) {
console.error('❌ Error setting up recordings watcher:', error);
}
}
// Process management
async function getProcessGroupId(pid: number): Promise<number | null> {
try {
const execAsync = promisify(exec);
const { stdout } = await execAsync(`ps -o pgid -p ${pid}`);
const lines = stdout.trim().split('\n');
if (lines.length < 2) return null;
const pgid = parseInt(lines[1].trim(), 10);
return isNaN(pgid) ? null : pgid;
} catch {
return null;
}
}
// Application management
const shareableContent = new ShareableContent();
async function getAllApps(): Promise<AppInfo[]> {
const apps = await Promise.all(
shareableContent.applications().map(async app => {
try {
return {
app,
processId: app.processId,
processGroupId: await getProcessGroupId(app.processId),
bundleIdentifier: app.bundleIdentifier,
name: app.name,
running: app.isRunning,
};
} catch (error) {
console.error(error);
return null;
}
})
);
const filteredApps = apps.filter(
(v): v is AppInfo =>
v !== null && !v.bundleIdentifier.startsWith('com.apple')
);
// Stop recording if app is not listed
await Promise.all(
filteredApps.map(async ({ app }) => {
if (!filteredApps.some(a => a.processId === app.processId)) {
await stopRecording(app.processId);
}
})
);
return filteredApps;
}
function listenToAppStateChanges(apps: AppInfo[]) {
const subscribers = apps.map(({ app }) => {
return ShareableContent.onAppStateChanged(app, () => {
setTimeout(() => {
console.log(
`🔄 Application state changed: ${app.name} (PID: ${app.processId}) is now ${
app.isRunning ? '▶️ running' : '⏹️ stopped'
}`
);
io.emit('apps:state-changed', {
processId: app.processId,
running: app.isRunning,
});
if (!app.isRunning) {
stopRecording(app.processId).catch(error => {
console.error('❌ Error stopping recording:', error);
});
}
}, 50);
});
});
appsSubscriber();
appsSubscriber = () => {
subscribers.forEach(subscriber => subscriber.unsubscribe());
};
}
// Socket.IO setup
io.on('connection', async socket => {
console.log('🔌 New client connected');
const initialApps = await getAllApps();
console.log(`📤 Sending ${initialApps.length} applications to new client`);
socket.emit('apps:all', { apps: initialApps });
socket.emit('apps:recording', { recordings: getRecordingStatus() });
const files = await getRecordings();
console.log(`📤 Sending ${files.length} saved recordings to new client`);
socket.emit('apps:saved', { recordings: files });
listenToAppStateChanges(initialApps);
socket.on('disconnect', () => {
console.log('🔌 Client disconnected');
});
});
// Application list change listener
ShareableContent.onApplicationListChanged(() => {
(async () => {
try {
console.log('🔄 Application list changed, updating clients...');
const apps = await getAllApps();
console.log(`📢 Broadcasting ${apps.length} applications to all clients`);
io.emit('apps:all', { apps });
} catch (error) {
console.error('❌ Error handling application list change:', error);
}
})().catch(error => {
console.error('❌ Error in application list change handler:', error);
});
});
// API Routes
const rateLimiter = rateLimit({
windowMs: 1000,
max: 200,
message: { error: 'Too many requests, please try again later.' },
});
app.get('/permissions', (req, res) => {
const permission = shareableContent.checkRecordingPermissions();
res.json({ permission });
});
app.get('/apps', async (_req, res) => {
const apps = await getAllApps();
listenToAppStateChanges(apps);
res.json({ apps });
});
app.get('/apps/saved', rateLimiter, async (_req, res) => {
const files = await getRecordings();
res.json({ recordings: files });
});
// Utility function to validate and sanitize folder name
function validateAndSanitizeFolderName(folderName: string): string | null {
// Allow alphanumeric characters, hyphens, dots (for bundle IDs)
// Format: bundleId-processId-timestamp
if (!/^[\w.-]+-\d+-\d+$/.test(folderName)) {
return null;
}
// Remove any path traversal attempts
const sanitized = folderName.replace(/^\.+|\.+$/g, '').replace(/[/\\]/g, '');
return sanitized;
}
app.delete('/recordings/:foldername', rateLimiter, async (req, res) => {
const foldername = validateAndSanitizeFolderName(req.params.foldername);
if (!foldername) {
console.error('❌ Invalid folder name format:', req.params.foldername);
return res.status(400).json({ error: 'Invalid folder name format' });
}
const recordingDir = `${RECORDING_DIR}/${foldername}`;
try {
// Ensure the resolved path is within RECORDING_DIR
const resolvedPath = await fs.realpath(recordingDir);
const recordingDirPath = await fs.realpath(RECORDING_DIR);
if (!resolvedPath.startsWith(recordingDirPath)) {
console.error('❌ Path traversal attempt detected:', {
resolvedPath,
recordingDirPath,
requestedFile: foldername,
});
return res.status(403).json({ error: 'Access denied' });
}
console.log(`🗑️ Deleting recording folder: ${foldername}`);
await fs.remove(recordingDir);
console.log('✅ Recording folder deleted successfully');
res.status(200).json({ success: true });
} catch (error) {
const typedError = error as NodeJS.ErrnoException;
if (typedError.code === 'ENOENT') {
console.error('❌ Folder not found:', recordingDir);
res.status(404).json({ error: 'Folder not found' });
} else {
console.error('❌ Error deleting folder:', {
error: typedError,
code: typedError.code,
message: typedError.message,
path: recordingDir,
});
res.status(500).json({
error: `Failed to delete folder: ${typedError.message || 'Unknown error'}`,
});
}
}
});
app.get('/apps/:process_id/icon', (req, res) => {
const processId = parseInt(req.params.process_id);
try {
const app = shareableContent.applicationWithProcessId(processId);
const icon = app.icon;
res.set('Content-Type', 'image/png');
res.send(icon);
} catch {
res.status(404).json({ error: 'App icon not found' });
}
});
app.post('/apps/:process_id/record', async (req, res) => {
const processId = parseInt(req.params.process_id);
const app = shareableContent.applicationWithProcessId(processId);
await startRecording(app);
res.json({ success: true });
});
app.post('/apps/:process_id/stop', async (req, res) => {
const processId = parseInt(req.params.process_id);
await stopRecording(processId);
res.json({ success: true });
});
// Update transcription endpoint to use folder validation
app.post(
'/recordings/:foldername/transcribe',
rateLimiter,
async (req, res) => {
const foldername = validateAndSanitizeFolderName(req.params.foldername);
if (!foldername) {
console.error('❌ Invalid folder name format:', req.params.foldername);
return res.status(400).json({ error: 'Invalid folder name format' });
}
const recordingDir = `${RECORDING_DIR}/${foldername}`;
try {
// Check if directory exists
await fs.access(recordingDir);
const transcriptionWavPath = `${recordingDir}/transcription.wav`;
const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
// Check if transcription file exists
await fs.access(transcriptionWavPath);
// Create initial transcription metadata
const initialMetadata: TranscriptionMetadata = {
transcriptionStartTime: Date.now(),
transcriptionEndTime: 0,
transcriptionStatus: 'pending',
};
await fs.writeJson(transcriptionMetadataPath, initialMetadata);
// Notify clients that transcription has started
io.emit('apps:recording-transcription-start', { filename: foldername });
const transcription = await gemini(transcriptionWavPath, {
mode: 'transcript',
});
// Update transcription metadata with results
const metadata: TranscriptionMetadata = {
transcriptionStartTime: initialMetadata.transcriptionStartTime,
transcriptionEndTime: Date.now(),
transcriptionStatus: 'completed',
transcription: transcription ?? undefined,
};
await fs.writeJson(transcriptionMetadataPath, metadata);
// Notify clients that transcription is complete
io.emit('apps:recording-transcription-end', {
filename: foldername,
success: true,
transcription,
});
res.json({ success: true });
} catch (error) {
console.error('❌ Error during transcription:', error);
// Update transcription metadata with error
const metadata: TranscriptionMetadata = {
transcriptionStartTime: Date.now(),
transcriptionEndTime: Date.now(),
transcriptionStatus: 'error',
error: error instanceof Error ? error.message : 'Unknown error',
};
await fs
.writeJson(`${recordingDir}/transcription.json`, metadata)
.catch(err => {
console.error('❌ Error saving transcription metadata:', err);
});
// Notify clients of transcription error
io.emit('apps:recording-transcription-end', {
filename: foldername,
success: false,
error: error instanceof Error ? error.message : 'Unknown error',
});
res.status(500).json({
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
);
// Start server
httpServer.listen(PORT, () => {
console.log(`
🎙️ Media Capture Server started successfully:
- Port: ${PORT}
- Recordings directory: ${RECORDING_DIR}
- Sample rate: 44.1kHz
- Channels: Mono
`);
});
// Initialize file watcher
setupRecordingsWatcher().catch(error => {
console.error('Failed to setup recordings watcher:', error);
});

View File

@@ -0,0 +1,4 @@
declare module '*.txt' {
const content: string;
export default content;
}

View File

@@ -0,0 +1,125 @@
import fs from 'fs-extra';
interface WavWriterConfig {
targetSampleRate?: number;
}
export class WavWriter {
private readonly file: fs.WriteStream;
private readonly originalSampleRate: number = 44100;
private readonly targetSampleRate: number;
private readonly numChannels = 1; // The audio is mono
private samplesWritten = 0;
private readonly tempFilePath: string;
private readonly finalFilePath: string;
constructor(finalPath: string, config: WavWriterConfig = {}) {
this.finalFilePath = finalPath;
this.tempFilePath = finalPath + '.tmp';
this.targetSampleRate = config.targetSampleRate ?? this.originalSampleRate;
this.file = fs.createWriteStream(this.tempFilePath);
this.writeHeader(); // Always write header immediately
}
private writeHeader() {
const buffer = Buffer.alloc(44); // WAV header is 44 bytes
// RIFF chunk descriptor
buffer.write('RIFF', 0);
buffer.writeUInt32LE(36, 4); // Initial file size - 8 (will be updated later)
buffer.write('WAVE', 8);
// fmt sub-chunk
buffer.write('fmt ', 12);
buffer.writeUInt32LE(16, 16); // Subchunk1Size (16 for PCM)
buffer.writeUInt16LE(3, 20); // AudioFormat (3 for IEEE float)
buffer.writeUInt16LE(this.numChannels, 22); // NumChannels
buffer.writeUInt32LE(this.targetSampleRate, 24); // SampleRate
buffer.writeUInt32LE(this.targetSampleRate * this.numChannels * 4, 28); // ByteRate
buffer.writeUInt16LE(this.numChannels * 4, 32); // BlockAlign
buffer.writeUInt16LE(32, 34); // BitsPerSample (32 for float)
// data sub-chunk
buffer.write('data', 36);
buffer.writeUInt32LE(0, 40); // Initial data size (will be updated later)
this.file.write(buffer);
}
private resample(samples: Float32Array): Float32Array {
const ratio = this.originalSampleRate / this.targetSampleRate;
const newLength = Math.floor(samples.length / ratio);
const result = new Float32Array(newLength);
for (let i = 0; i < newLength; i++) {
const position = i * ratio;
const index = Math.floor(position);
const fraction = position - index;
// Linear interpolation between adjacent samples
if (index + 1 < samples.length) {
result[i] =
samples[index] * (1 - fraction) + samples[index + 1] * fraction;
} else {
result[i] = samples[index];
}
}
return result;
}
write(samples: Float32Array) {
// Resample the input samples
const resampledData = this.resample(samples);
// Create a buffer with the correct size (4 bytes per float)
const buffer = Buffer.alloc(resampledData.length * 4);
// Write each float value properly
for (let i = 0; i < resampledData.length; i++) {
buffer.writeFloatLE(resampledData[i], i * 4);
}
this.file.write(buffer);
this.samplesWritten += resampledData.length;
}
async end(): Promise<void> {
return new Promise<void>((resolve, reject) => {
this.file.end(() => {
void this.updateHeaderAndCleanup().then(resolve).catch(reject);
});
});
}
private async updateHeaderAndCleanup(): Promise<void> {
// Read the entire temporary file
const data = await fs.promises.readFile(this.tempFilePath);
// Update the header with correct sizes
const dataSize = this.samplesWritten * 4;
const fileSize = dataSize + 36;
data.writeUInt32LE(fileSize, 4); // Update RIFF chunk size
data.writeUInt32LE(dataSize, 40); // Update data chunk size
// Write the updated file
await fs.promises.writeFile(this.finalFilePath, data);
// Clean up temp file
await fs.promises.unlink(this.tempFilePath);
}
}
/**
* Creates a Buffer from Float32Array audio data
* @param float32Array - The Float32Array containing audio samples
* @returns FileData - The audio data as a Buffer
*/
export function FileData(float32Array: Float32Array): Buffer {
const buffer = Buffer.alloc(float32Array.length * 4); // 4 bytes per float
for (let i = 0; i < float32Array.length; i++) {
buffer.writeFloatLE(float32Array[i], i * 4);
}
return buffer;
}

View File

@@ -0,0 +1,7 @@
{
"extends": "../../../tsconfig.node.json",
"compilerOptions": {
"rootDir": "./server"
},
"include": ["./server"]
}

View File

@@ -0,0 +1,10 @@
{
"extends": "../../../tsconfig.web.json",
"compilerOptions": {
"rootDir": "./web",
"outDir": "./dist",
"tsBuildInfoFile": "./dist/tsconfig.tsbuildinfo"
},
"include": ["./web", "server/types.d.ts"],
"references": [{ "path": "../native" }]
}

View File

@@ -0,0 +1,18 @@
import tailwindcss from '@tailwindcss/vite';
import react from '@vitejs/plugin-react';
import { defineConfig } from 'vite';
// https://vite.dev/config/
export default defineConfig({
plugins: [react(), tailwindcss()],
root: './web',
server: {
proxy: {
'/api': {
target: 'http://localhost:6544',
changeOrigin: true,
rewrite: path => path.replace(/^\/api/, ''),
},
},
},
});

View File

@@ -0,0 +1,33 @@
import { AppList } from './components/app-list';
import { SavedRecordings } from './components/saved-recordings';
export function App() {
return (
<div className="h-screen bg-gray-50 overflow-hidden">
<div className="h-full p-4 flex gap-4 max-w-[1800px] mx-auto">
<div className="flex-1 flex flex-col min-h-0">
<h1 className="text-xl font-bold text-gray-900 mb-1">
Running Applications
</h1>
<p className="text-sm text-gray-500 mb-2">
Select an application to start recording its audio
</p>
<div className="flex-1 bg-white shadow-lg rounded-lg border border-gray-100 overflow-auto">
<AppList />
</div>
</div>
<div className="w-[1024px] flex flex-col min-h-0">
<h1 className="text-xl font-bold text-gray-900 mb-1">
Saved Recordings
</h1>
<p className="text-sm text-gray-500 mb-2">
Listen to and manage your recorded audio files
</p>
<div className="flex-1 bg-white shadow-lg rounded-lg border border-gray-100 p-4 overflow-auto">
<SavedRecordings />
</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,122 @@
import React from 'react';
import type { AppGroup, RecordingStatus } from '../types';
import { formatDuration } from '../utils';
interface AppItemProps {
app: AppGroup;
recordings?: RecordingStatus[];
}
export function AppItem({ app, recordings }: AppItemProps) {
const [imgError, setImgError] = React.useState(false);
const [isRecording, setIsRecording] = React.useState(false);
const appName = app.rootApp.name || '';
const bundleId = app.rootApp.bundleIdentifier || '';
const firstLetter = appName.charAt(0).toUpperCase();
const isRunning = app.apps.some(a => a.running);
const recording = recordings?.find((r: RecordingStatus) =>
app.apps.some(a => a.processId === r.processId)
);
const handleRecordClick = React.useCallback(() => {
const recordingApp = app.apps.find(a => a.running);
if (!recordingApp) {
return;
}
if (isRecording) {
void fetch(`/api/apps/${recordingApp.processId}/stop`, {
method: 'POST',
})
.then(() => setIsRecording(false))
.catch(error => console.error('Failed to stop recording:', error));
} else {
void fetch(`/api/apps/${recordingApp.processId}/record`, {
method: 'POST',
})
.then(() => setIsRecording(true))
.catch(error => console.error('Failed to start recording:', error));
}
}, [app.apps, isRecording]);
React.useEffect(() => {
setIsRecording(!!recording);
}, [recording]);
const [duration, setDuration] = React.useState(0);
React.useEffect(() => {
if (recording) {
const interval = setInterval(() => {
setDuration(Date.now() - recording.startTime);
}, 1000);
return () => clearInterval(interval);
} else {
setDuration(0);
}
return () => {};
}, [recording]);
return (
<div className="flex items-center h-16 space-x-2 p-3 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100">
{imgError ? (
<div className="w-8 h-8 rounded-lg bg-gray-50 border border-gray-100 flex items-center justify-center text-gray-600 font-semibold text-base">
{firstLetter}
</div>
) : (
<img
src={`/api/apps/${app.rootApp.processId}/icon`}
loading="lazy"
alt={appName}
className="w-8 h-8 object-contain rounded-lg bg-gray-50 border border-gray-100"
onError={() => setImgError(true)}
/>
)}
<div className="flex-1 min-w-0">
<div className="flex items-center space-x-1 mb-1">
{appName ? (
<span className="text-gray-900 font-medium text-sm truncate">
{appName}
</span>
) : (
<span className="text-gray-400 italic font-medium text-sm">
Unnamed Application
</span>
)}
<span className="text-xs px-1 bg-gray-50 text-gray-500 rounded border border-gray-100">
PID: {app.rootApp.processId}
</span>
<span
className={`text-xs px-2 py-0.5 rounded-full font-medium border ${recording ? 'bg-red-50 text-red-600 border-red-100 opacity-100' : 'opacity-0'}`}
>
{recording ? formatDuration(duration) : '00:00:00'}
</span>
</div>
<div className="text-xs text-gray-500 font-mono truncate opacity-80">
{bundleId}
</div>
</div>
{(isRunning || isRecording) && (
<button
onClick={handleRecordClick}
className={`h-8 min-w-[80px] flex items-center justify-center rounded-lg text-sm font-medium transition-all duration-200 ${
isRecording
? 'bg-red-50 text-red-600 hover:bg-red-100 border border-red-200'
: 'bg-blue-50 text-blue-600 hover:bg-blue-100 border border-blue-200'
}`}
>
{isRecording ? (
<>
<div className="w-1.5 h-1.5 rounded-full bg-red-500 animate-pulse mr-2" />
<span>Stop</span>
</>
) : (
<span>Record</span>
)}
</button>
)}
</div>
);
}

View File

@@ -0,0 +1,144 @@
import React from 'react';
import useSWRSubscription from 'swr/subscription';
import type { App, AppGroup, RecordingStatus } from '../types';
import { socket } from '../utils';
import { AppItem } from './app-item';
export function AppList() {
const { data: apps = [] } = useSWRSubscription('apps', (_key, { next }) => {
let apps: App[] = [];
// Initial apps fetch
fetch('/api/apps')
.then(res => res.json())
.then(data => {
apps = data.apps;
next(null, apps);
})
.catch(err => next(err));
// Subscribe to app updates
socket.on('apps:all', data => {
next(null, data.apps);
apps = data.apps;
});
socket.on('apps:state-changed', data => {
const index = apps.findIndex(a => a.processId === data.processId);
if (index !== -1) {
next(
null,
apps.toSpliced(index, 1, {
...apps[index],
running: data.running,
})
);
}
});
socket.on('connect', () => {
// Refetch on reconnect
fetch('/api/apps')
.then(res => res.json())
.then(data => next(null, data.apps))
.catch(err => next(err));
});
return () => {
socket.off('apps:all');
socket.off('apps:state-changed');
socket.off('connect');
};
});
const { data: recordings = [] } = useSWRSubscription<RecordingStatus[]>(
'recordings',
(
_key: string,
{ next }: { next: (err: Error | null, data?: RecordingStatus[]) => void }
) => {
// Subscribe to recording updates
socket.on('apps:recording', (data: { recordings: RecordingStatus[] }) => {
next(null, data.recordings);
});
return () => {
socket.off('apps:recording');
};
}
);
const appGroups: AppGroup[] = React.useMemo(() => {
const mapping = apps.reduce((acc: Record<number, AppGroup>, app: App) => {
if (!acc[app.processGroupId]) {
acc[app.processGroupId] = {
processGroupId: app.processGroupId,
apps: [],
rootApp:
apps.find((a: App) => a.processId === app.processGroupId) || app,
};
}
acc[app.processGroupId].apps.push(app);
return acc;
}, {});
return Object.values(mapping);
}, [apps]);
const runningApps = (appGroups || []).filter(app =>
app.apps.some(a => a.running)
);
const notRunningApps = (appGroups || []).filter(
app => !app.apps.some(a => a.running)
);
return (
<div className="h-full flex flex-col divide-y divide-gray-100">
<div className="p-4 relative">
<div className="flex items-center justify-between sticky top-0 bg-white z-10 mb-2">
<h2 className="text-sm font-semibold text-gray-900">
Active Applications
</h2>
<span className="text-xs px-2 py-1 bg-blue-50 rounded-full text-blue-600 font-medium">
{runningApps.length} listening
</span>
</div>
<div className="space-y-2">
{runningApps.map(app => (
<AppItem
key={app.processGroupId}
app={app}
recordings={recordings}
/>
))}
{runningApps.length === 0 && (
<div className="text-sm text-gray-500 italic bg-gray-50 rounded-xl p-4 text-center">
No applications are currently listening
</div>
)}
</div>
</div>
<div className="p-4 flex-1 relative">
<div className="flex items-center justify-between sticky top-0 bg-white z-10 mb-2">
<h2 className="text-sm font-semibold text-gray-900">
Other Applications
</h2>
<span className="text-xs px-2 py-1 bg-gray-50 rounded-full text-gray-600 font-medium">
{notRunningApps.length} available
</span>
</div>
<div className="space-y-2">
{notRunningApps.map(app => (
<AppItem
key={app.processGroupId}
app={app}
recordings={recordings}
/>
))}
{notRunningApps.length === 0 && (
<div className="text-sm text-gray-500 italic bg-gray-50 rounded-xl p-4 text-center">
No other applications found
</div>
)}
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,163 @@
import type { ReactElement } from 'react';
export function PlayIcon(): ReactElement {
return (
<svg
className="w-6 h-6 text-gray-900"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M4.5 5.653c0-1.426 1.529-2.33 2.779-1.643l11.54 6.348c1.295.712 1.295 2.573 0 3.285L7.28 19.991c-1.25.687-2.779-.217-2.779-1.643V5.653z"
fill="currentColor"
/>
</svg>
);
}
export function PauseIcon(): ReactElement {
return (
<svg
className="w-6 h-6 text-gray-900"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M6.75 5.25a.75.75 0 01.75-.75H9a.75.75 0 01.75.75v13.5a.75.75 0 01-.75.75H7.5a.75.75 0 01-.75-.75V5.25zm7 0a.75.75 0 01.75-.75h1.5a.75.75 0 01.75.75v13.5a.75.75 0 01-.75.75h-1.5a.75.75 0 01-.75-.75V5.25z"
fill="currentColor"
/>
</svg>
);
}
export function RewindIcon(): ReactElement {
return (
<svg
className="w-5 h-5 text-gray-600"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M12.066 11.2a1 1 0 000 1.6l5.334 4A1 1 0 0019 16V8a1 1 0 00-1.6-.8l-5.334 4zM11 8a1 1 0 00-1.6-.8l-5.334 4a1 1 0 000 1.6l5.334 4A1 1 0 0011 16V8z"
fill="currentColor"
/>
</svg>
);
}
export function ForwardIcon(): ReactElement {
return (
<svg
className="w-5 h-5 text-gray-600"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M5 8a1 1 0 011.6-.8l5.334 4a1 1 0 010 1.6L6.6 16.8A1 1 0 015 16V8zm7.066-.8a1 1 0 00-1.6.8v8a1 1 0 001.6.8l5.334-4a1 1 0 000-1.6l-5.334-4z"
fill="currentColor"
/>
</svg>
);
}
export function DeleteIcon(): ReactElement {
return (
<svg
className="w-5 h-5"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
);
}
export function LoadingSpinner(): ReactElement {
return (
<svg className="animate-spin h-4 w-4" viewBox="0 0 24 24">
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
fill="none"
/>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
/>
</svg>
);
}
export function ErrorIcon(): ReactElement {
return (
<svg
className="w-4 h-4 mr-1.5 flex-shrink-0"
fill="currentColor"
viewBox="0 0 20 20"
>
<path
fillRule="evenodd"
d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z"
clipRule="evenodd"
/>
</svg>
);
}
export function MicrophoneIcon(): ReactElement {
return (
<svg
className="w-4 h-4 mr-1.5 text-blue-500"
viewBox="0 0 20 20"
fill="currentColor"
>
<path d="M7 4a3 3 0 016 0v4a3 3 0 11-6 0V4zm4 10.93A7.001 7.001 0 0017 8a1 1 0 10-2 0A5 5 0 015 8a1 1 0 00-2 0 7.001 7.001 0 006 6.93V17H6a1 1 0 100 2h8a1 1 0 100-2h-3v-2.07z" />
</svg>
);
}
export function WarningIcon(): ReactElement {
return (
<svg className="w-4 h-4 mr-1.5" viewBox="0 0 20 20" fill="currentColor">
<path
fillRule="evenodd"
d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z"
clipRule="evenodd"
/>
</svg>
);
}
export function DefaultAppIcon(): ReactElement {
return (
<svg
xmlns="http://www.w3.org/2000/svg"
className="h-6 w-6"
viewBox="0 0 20 20"
fill="currentColor"
>
<path d="M10 2a3 3 0 00-3 3v4a3 3 0 006 0V5a3 3 0 00-3-3zm0 2a1 1 0 011 1v4a1 1 0 11-2 0V5a1 1 0 011-1z" />
<path d="M3 10a7 7 0 1014 0h-2a5 5 0 11-10 0H3z" />
</svg>
);
}

View File

@@ -0,0 +1,872 @@
import type { ReactElement } from 'react';
import React from 'react';
import ReactMarkdown from 'react-markdown';
import type { SavedRecording, TranscriptionMetadata } from '../types';
import { formatDuration, socket } from '../utils';
import {
DefaultAppIcon,
DeleteIcon,
ErrorIcon,
ForwardIcon,
LoadingSpinner,
MicrophoneIcon,
PauseIcon,
PlayIcon,
RewindIcon,
WarningIcon,
} from './icons';
interface SavedRecordingItemProps {
recording: SavedRecording;
}
// Audio player controls component
function AudioControls({
audioRef,
playbackRate,
onPlaybackRateChange,
onSeek,
onPlayPause,
}: {
audioRef: React.RefObject<HTMLAudioElement | null>;
playbackRate: number;
onPlaybackRateChange: () => void;
onSeek: (seconds: number) => void;
onPlayPause: () => void;
}): ReactElement {
const [currentTime, setCurrentTime] = React.useState('00:00');
const [duration, setDuration] = React.useState('00:00');
React.useEffect(() => {
const audio = audioRef.current;
if (!audio) return;
const formatTime = (time: number) => {
const minutes = Math.floor(time / 60);
const seconds = Math.floor(time % 60);
return `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
};
const updateTime = () => {
setCurrentTime(formatTime(audio.currentTime));
setDuration(formatTime(audio.duration));
};
audio.addEventListener('timeupdate', updateTime);
audio.addEventListener('loadedmetadata', updateTime);
return () => {
audio.removeEventListener('timeupdate', updateTime);
audio.removeEventListener('loadedmetadata', updateTime);
};
}, [audioRef]);
return (
<div className="flex items-center justify-between">
<div className="flex items-center space-x-2">
<button
onClick={() => onSeek(-15)}
className="p-2 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100 hover:shadow-sm"
title="Back 15 seconds"
>
<RewindIcon />
</button>
<button
onClick={onPlayPause}
className="p-2 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100 hover:shadow-sm"
>
{audioRef.current?.paused ? <PlayIcon /> : <PauseIcon />}
</button>
<button
onClick={() => onSeek(30)}
className="p-2 hover:bg-gray-50 rounded-lg transition-all duration-200 border border-transparent hover:border-gray-100 hover:shadow-sm"
title="Forward 30 seconds"
>
<ForwardIcon />
</button>
<div className="text-sm font-mono text-gray-500 ml-2">
{currentTime} <span className="text-gray-400">/</span> {duration}
</div>
</div>
<button
onClick={onPlaybackRateChange}
className="px-3 py-1.5 text-sm font-medium text-gray-600 bg-gray-50 hover:bg-gray-100 rounded-lg transition-all duration-200 border border-gray-100 hover:shadow-sm"
>
{playbackRate}x
</button>
</div>
);
}
// Waveform visualization component
function WaveformVisualizer({
containerRef,
waveformData,
currentTime,
fileName,
}: {
containerRef: React.RefObject<HTMLDivElement | null>;
waveformData: number[];
currentTime: number;
fileName: string;
}): ReactElement {
return (
<div
className="relative h-14 bg-gray-50 overflow-hidden rounded-lg border border-gray-100"
ref={containerRef}
>
<div className="absolute inset-0 flex items-end">
{waveformData.map((amplitude, i) => (
<div
key={`${fileName}-bar-${i}`}
className="flex-1 bg-red-400 transition-all duration-200"
style={{
height: `${Math.max(amplitude * 100, 3)}%`,
opacity:
i < Math.floor(currentTime * waveformData.length) ? 1 : 0.3,
margin: '0 0.5px',
}}
/>
))}
</div>
</div>
);
}
// Update TranscriptionMessage component
function TranscriptionMessage({
item,
isNewSpeaker,
isCurrentMessage,
}: {
item: {
speaker: string;
start_time: string;
transcription: string;
};
isNewSpeaker: boolean;
isCurrentMessage: boolean;
}): ReactElement {
return (
<div className="flex items-start gap-3 group transition-all duration-300 w-full">
<div className="w-[120px] flex-shrink-0">
<div className="flex flex-col items-start gap-1">
{isNewSpeaker && (
<div
className={`px-2.5 py-1 rounded-lg text-xs font-medium border transition-colors duration-300 ${
isCurrentMessage
? 'bg-blue-100 text-blue-700 border-blue-200'
: 'bg-blue-50 text-blue-600 border-blue-100'
}`}
>
{item.speaker}
</div>
)}
<div
className={`text-[11px] font-mono ml-2 transition-colors duration-300 ${
isCurrentMessage ? 'text-blue-500' : 'text-gray-400'
}`}
>
{item.start_time}
</div>
</div>
</div>
<div className="flex-1 min-w-0 w-full">
<div
className={`text-sm leading-relaxed rounded-xl px-4 py-2 border transition-all inline-flex duration-300 ${
isCurrentMessage
? 'bg-blue-50/50 text-blue-900 border-blue-200 shadow-md'
: 'bg-white text-gray-600 border-gray-100 shadow-sm hover:shadow-md'
}`}
>
{item.transcription}
</div>
</div>
</div>
);
}
// Add new Summary component
function TranscriptionSummary({ summary }: { summary: string }): ReactElement {
return (
<div className="mb-6 bg-blue-50/50 rounded-xl p-4 border border-blue-100">
<div className="text-xs font-medium text-blue-600 mb-2 uppercase tracking-wider">
Summary
</div>
<div className="text-sm text-gray-700 leading-relaxed prose prose-sm max-w-none prose-headings:text-gray-900 prose-a:text-blue-600 whitespace-pre-wrap">
<ReactMarkdown>{summary}</ReactMarkdown>
</div>
</div>
);
}
// Update TranscriptionContent component
function TranscriptionContent({
transcriptionData,
currentAudioTime,
}: {
transcriptionData: {
segments: Array<{
speaker: string;
start_time: string;
transcription: string;
}>;
summary: string;
title: string;
};
currentAudioTime: number;
}): ReactElement {
const parseTimestamp = (timestamp: string) => {
// Handle "MM:SS" format (without hours)
const [minutes, seconds] = timestamp.split(':');
return parseInt(minutes, 10) * 60 + parseInt(seconds, 10);
};
return (
<div className="space-y-2 py-2 max-h-[400px] overflow-y-auto pr-2 scrollbar-thin scrollbar-thumb-gray-300 scrollbar-track-transparent hover:scrollbar-thumb-gray-400 w-full">
<TranscriptionSummary summary={transcriptionData.summary} />
{transcriptionData.segments.map((item, index) => {
const isNewSpeaker =
index === 0 ||
transcriptionData.segments[index - 1].speaker !== item.speaker;
const startTime = parseTimestamp(item.start_time);
// Use next segment's start time as end time, or add 3 seconds for the last segment
const endTime =
index < transcriptionData.segments.length - 1
? parseTimestamp(transcriptionData.segments[index + 1].start_time)
: startTime + 3;
const isCurrentMessage =
currentAudioTime >= startTime && currentAudioTime < endTime;
return (
<TranscriptionMessage
key={`${item.speaker}-${item.start_time}-${index}`}
item={item}
isNewSpeaker={isNewSpeaker}
isCurrentMessage={isCurrentMessage}
/>
);
})}
</div>
);
}
// Update TranscriptionStatus component
function TranscriptionStatus({
transcription,
transcriptionError,
currentAudioTime,
}: {
transcription?: TranscriptionMetadata;
transcriptionError: string | null;
currentAudioTime: number;
}): ReactElement | null {
if (!transcription && !transcriptionError) {
return null;
}
if (transcription?.transcriptionStatus === 'pending') {
return (
<div className="my-2">
<div className="text-sm text-gray-600 bg-gray-50/50 p-4 border border-gray-100 w-full">
<div className="font-medium text-gray-900 mb-4 flex items-center sticky top-0 bg-gray-50/50 backdrop-blur-sm z-10 py-2">
<MicrophoneIcon />
<span>Processing Audio</span>
</div>
<div className="flex items-center justify-center py-8">
<div className="flex flex-col items-center gap-3">
<LoadingSpinner />
<div className="text-sm text-gray-600">
<span className="font-medium">Starting transcription</span>
<span className="text-gray-400 animate-pulse">...</span>
</div>
<div className="text-xs text-gray-400 max-w-sm text-center">
This may take a few moments depending on the length of the
recording
</div>
</div>
</div>
</div>
</div>
);
}
if (transcriptionError) {
return (
<div className="text-xs text-red-500 m-2 flex items-center bg-red-50 rounded-lg p-2 border border-red-100">
<ErrorIcon />
{transcriptionError}
</div>
);
}
if (
transcription?.transcriptionStatus === 'completed' &&
transcription.transcription
) {
try {
const transcriptionData = transcription.transcription;
if (
!transcriptionData.segments ||
!Array.isArray(transcriptionData.segments)
) {
throw new Error('Invalid transcription data format');
}
return (
<div className="my-2">
<div className="text-sm text-gray-600 bg-gray-50/50 p-4 border border-gray-100 w-full">
<div className="font-medium text-gray-900 mb-4 flex items-center sticky top-0 bg-gray-50/50 backdrop-blur-sm z-10 py-2">
<MicrophoneIcon />
<span>Conversation Transcript</span>
</div>
{transcriptionData.title && (
<div className="mb-4 bg-blue-50/50 rounded-lg p-3 border border-blue-100">
<div className="text-xs font-medium text-blue-600 uppercase tracking-wider mb-1">
Title
</div>
<div className="text-base font-medium text-gray-900">
{transcriptionData.title}
</div>
</div>
)}
<TranscriptionContent
transcriptionData={transcriptionData}
currentAudioTime={currentAudioTime}
/>
</div>
</div>
);
} catch (error) {
return (
<div className="text-sm text-red-500 bg-red-50 rounded-lg p-2 border border-red-100 m-2">
{error instanceof Error
? error.message
: 'Failed to parse transcription data'}
</div>
);
}
}
return null;
}
// Add new RecordingHeader component
function RecordingHeader({
metadata,
fileName,
recordingDate,
duration,
error,
isDeleting,
showDeleteConfirm,
setShowDeleteConfirm,
handleDeleteClick,
}: {
metadata: SavedRecording['metadata'];
fileName: string;
recordingDate: string;
duration: string;
error: string | null;
isDeleting: boolean;
showDeleteConfirm: boolean;
setShowDeleteConfirm: (show: boolean) => void;
handleDeleteClick: () => void;
transcriptionError: string | null;
}): ReactElement {
const [imgError, setImgError] = React.useState(false);
return (
<div className="flex items-start space-x-4 p-4 bg-gray-50/30">
<div className="relative w-12 h-12 flex-shrink-0">
{!imgError ? (
<img
src={`/api/recordings/${fileName}/icon.png`}
alt={metadata?.appName || 'Unknown Application'}
className="w-12 h-12 object-contain rounded-lg bg-gray-50 border border-gray-100 shadow-sm transition-transform duration-200 hover:scale-105"
onError={() => setImgError(true)}
/>
) : (
<div className="w-12 h-12 rounded-xl flex items-center justify-center text-gray-500 bg-gray-50 border border-gray-100 shadow-sm">
<DefaultAppIcon />
</div>
)}
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center justify-between">
<div className="flex items-center space-x-2">
<span className="text-gray-900 font-semibold text-base truncate">
{metadata?.appName || 'Unknown Application'}
</span>
<span className="text-xs px-2 py-0.5 bg-blue-50 rounded-full text-blue-600 font-medium border border-blue-100">
{duration}
</span>
</div>
<div className="flex items-center">
{showDeleteConfirm ? (
<div className="flex items-center space-x-2">
<button
onClick={() => setShowDeleteConfirm(false)}
className="h-8 px-3 text-sm font-medium text-gray-600 hover:bg-gray-50 rounded-lg transition-colors border border-gray-100"
disabled={isDeleting}
>
Cancel
</button>
<button
onClick={handleDeleteClick}
className="h-8 px-3 text-sm font-medium text-red-600 hover:bg-red-50 rounded-lg transition-colors border border-red-100 disabled:opacity-50 disabled:cursor-not-allowed"
disabled={isDeleting}
>
{isDeleting ? (
<div className="flex items-center space-x-2">
<LoadingSpinner />
<span>Deleting...</span>
</div>
) : (
'Confirm'
)}
</button>
</div>
) : (
<button
onClick={() => setShowDeleteConfirm(true)}
className="h-8 w-8 flex items-center justify-center text-gray-400 hover:text-red-500 hover:bg-red-50 rounded-lg transition-colors"
title="Delete recording"
>
<DeleteIcon />
</button>
)}
</div>
</div>
<div className="text-sm text-gray-600 mt-1">{recordingDate}</div>
<div className="text-xs text-gray-400 font-mono mt-0.5 truncate">
{metadata?.bundleIdentifier || fileName}
</div>
{error && (
<div className="text-xs text-red-500 mt-2 flex items-center bg-red-50 rounded-lg p-2 border border-red-100">
<ErrorIcon />
{error}
</div>
)}
</div>
</div>
);
}
// Add new AudioPlayer component
function AudioPlayer({
isLoading,
error,
audioRef,
playbackRate,
handlePlaybackRateChange,
handleSeek,
handlePlayPause,
containerRef,
waveformData,
currentTime,
fileName,
}: {
isLoading: boolean;
error: string | null;
audioRef: React.RefObject<HTMLAudioElement>;
playbackRate: number;
handlePlaybackRateChange: () => void;
handleSeek: (seconds: number) => void;
handlePlayPause: () => void;
containerRef: React.RefObject<HTMLDivElement>;
waveformData: number[];
currentTime: number;
fileName: string;
}): ReactElement {
return (
<div className="px-4 pb-4">
{isLoading && !error ? (
<div className="h-14 bg-gray-50 rounded-lg flex items-center justify-center border border-gray-100">
<LoadingSpinner />
<span className="ml-2 text-sm text-gray-600 font-medium">
Loading audio...
</span>
</div>
) : (
<div className="flex flex-col space-y-3">
<AudioControls
audioRef={audioRef}
playbackRate={playbackRate}
onPlaybackRateChange={handlePlaybackRateChange}
onSeek={handleSeek}
onPlayPause={handlePlayPause}
/>
<WaveformVisualizer
containerRef={containerRef}
waveformData={waveformData}
currentTime={currentTime}
fileName={fileName}
/>
</div>
)}
</div>
);
}
// Add new TranscribeButton component
function TranscribeButton({
transcriptionStatus,
onTranscribe,
}: {
transcriptionStatus?: TranscriptionMetadata['transcriptionStatus'];
onTranscribe: () => void;
}): ReactElement {
return (
<div className="px-4 pb-4">
<div className="flex justify-end">
<button
onClick={onTranscribe}
disabled={transcriptionStatus === 'pending'}
className={`h-8 px-3 text-sm font-medium rounded-lg transition-colors border flex items-center space-x-2
${
transcriptionStatus === 'pending'
? 'bg-blue-50 text-blue-600 border-blue-200 cursor-not-allowed'
: transcriptionStatus === 'completed'
? 'text-blue-600 hover:bg-blue-50 border-blue-100'
: transcriptionStatus === 'error'
? 'text-red-600 hover:bg-red-50 border-red-100'
: 'text-blue-600 hover:bg-blue-50 border-blue-100'
}`}
>
{transcriptionStatus === 'pending' ? (
<>
<LoadingSpinner />
<span>Transcribing...</span>
</>
) : transcriptionStatus === 'completed' ? (
<>
<MicrophoneIcon />
<span>Transcribe Again</span>
</>
) : transcriptionStatus === 'error' ? (
<>
<WarningIcon />
<span>Retry Transcription</span>
</>
) : (
<>
<MicrophoneIcon />
<span>Transcribe</span>
</>
)}
</button>
</div>
</div>
);
}
// Main SavedRecordingItem component (simplified)
export function SavedRecordingItem({
recording,
}: SavedRecordingItemProps): ReactElement {
const [error, setError] = React.useState<string | null>(null);
const [isLoading, setIsLoading] = React.useState(true);
const [isDeleting, setIsDeleting] = React.useState(false);
const [showDeleteConfirm, setShowDeleteConfirm] = React.useState(false);
const [playbackRate, setPlaybackRate] = React.useState(1);
const [waveformData, setWaveformData] = React.useState<number[]>([]);
const [currentTime, setCurrentTime] = React.useState(0);
const audioRef = React.useRef<HTMLAudioElement | null>(null);
const containerRef = React.useRef<HTMLDivElement | null>(null);
const [segments, setSegments] = React.useState(40);
const [currentAudioTime, setCurrentAudioTime] = React.useState(0);
const [transcriptionError, setTranscriptionError] = React.useState<
string | null
>(null);
const metadata = recording.metadata;
const fileName = recording.wav;
const recordingDate = metadata
? new Date(metadata.recordingStartTime).toLocaleString()
: 'Unknown date';
const duration = metadata
? formatDuration(metadata.recordingDuration * 1000)
: 'Unknown duration';
// Update current audio time
React.useEffect(() => {
const audio = audioRef.current;
if (audio) {
const handleTimeUpdate = () => {
setCurrentAudioTime(audio.currentTime);
};
audio.addEventListener('timeupdate', handleTimeUpdate);
return () => audio.removeEventListener('timeupdate', handleTimeUpdate);
}
return () => {};
}, []);
// Calculate number of segments based on container width
React.useEffect(() => {
const updateSegments = () => {
if (containerRef.current) {
// Each bar should be at least 2px wide (1px bar + 1px gap)
const width = containerRef.current.offsetWidth;
setSegments(Math.floor(width / 2));
}
};
updateSegments();
const resizeObserver = new ResizeObserver(updateSegments);
if (containerRef.current) {
resizeObserver.observe(containerRef.current);
}
return () => resizeObserver.disconnect();
}, []);
const processAudioData = React.useCallback(async () => {
try {
const response = await fetch(`/api/recordings/${fileName}/recording.wav`);
if (!response.ok) {
throw new Error(
`Failed to fetch audio file (${response.status}): ${response.statusText}`
);
}
const audioContext = new AudioContext();
const arrayBuffer = await response.arrayBuffer();
// Ensure we have data to process
if (!arrayBuffer || arrayBuffer.byteLength === 0) {
throw new Error('No audio data received');
}
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
const channelData = audioBuffer.getChannelData(0);
// Process the audio data in chunks to create the waveform
const numberOfSamples = channelData.length;
const samplesPerSegment = Math.floor(numberOfSamples / segments);
const waveform: number[] = [];
for (let i = 0; i < segments; i++) {
const start = i * samplesPerSegment;
const end = start + samplesPerSegment;
const segmentData = channelData.slice(start, end);
// Calculate RMS (root mean square) for better amplitude representation
const rms = Math.sqrt(
segmentData.reduce((sum, sample) => sum + sample * sample, 0) /
segmentData.length
);
waveform.push(rms);
}
// Normalize the waveform data to a 0-1 range
const maxAmplitude = Math.max(...waveform);
const normalizedWaveform = waveform.map(amp => amp / maxAmplitude);
setWaveformData(normalizedWaveform);
setIsLoading(false);
} catch (err) {
console.error('Error processing audio:', err);
setError(
err instanceof Error ? err.message : 'Failed to process audio data'
);
setIsLoading(false);
}
}, [fileName, segments]);
React.useEffect(() => {
const audio = audioRef.current;
if (audio) {
const handleError = (e: ErrorEvent) => {
console.error('Audio error:', e);
setError('Failed to load audio');
setIsLoading(false);
};
const handleLoadedMetadata = () => {
void processAudioData().catch(err => {
console.error('Error processing audio data:', err);
setError('Failed to process audio data');
setIsLoading(false);
});
};
const handleTimeUpdate = () => {
setCurrentTime(audio.currentTime / audio.duration);
};
audio.addEventListener('error', handleError as EventListener);
audio.addEventListener('loadedmetadata', handleLoadedMetadata);
audio.addEventListener('timeupdate', handleTimeUpdate);
return () => {
audio.removeEventListener('error', handleError as EventListener);
audio.removeEventListener('loadedmetadata', handleLoadedMetadata);
audio.removeEventListener('timeupdate', handleTimeUpdate);
};
}
return () => {};
}, [processAudioData]);
const handlePlayPause = React.useCallback(() => {
if (audioRef.current) {
if (audioRef.current.paused) {
void audioRef.current.play();
} else {
audioRef.current.pause();
}
}
}, []);
const handleSeek = React.useCallback((seconds: number) => {
if (audioRef.current) {
audioRef.current.currentTime += seconds;
}
}, []);
const handlePlaybackRateChange = React.useCallback(() => {
if (audioRef.current) {
const newRate = playbackRate === 1 ? 1.5 : 1;
audioRef.current.playbackRate = newRate;
setPlaybackRate(newRate);
}
}, [playbackRate]);
const handleDelete = React.useCallback(async () => {
setIsDeleting(true);
setError(null); // Clear any previous errors
try {
const response = await fetch(`/api/recordings/${recording.wav}`, {
method: 'DELETE',
});
if (!response.ok) {
let errorMessage: string;
try {
const errorData = await response.json();
errorMessage = errorData.error;
} catch {
errorMessage = `Server error (${response.status}): ${response.statusText}`;
}
throw new Error(errorMessage);
}
setShowDeleteConfirm(false);
} catch (err) {
console.error('Error deleting recording:', err);
setError(
err instanceof Error ? err.message : 'An unexpected error occurred'
);
} finally {
setIsDeleting(false);
}
}, [recording.wav]);
const handleDeleteClick = React.useCallback(() => {
void handleDelete().catch(err => {
console.error('Unexpected error during deletion:', err);
setError('An unexpected error occurred');
});
}, [handleDelete]);
React.useEffect(() => {
// Listen for transcription events
socket.on(
'apps:recording-transcription-start',
(data: { filename: string }) => {
if (data.filename === recording.wav) {
setTranscriptionError(null);
}
}
);
socket.on(
'apps:recording-transcription-end',
(data: {
filename: string;
success: boolean;
transcription?: string;
error?: string;
}) => {
if (data.filename === recording.wav && !data.success) {
setTranscriptionError(data.error || 'Transcription failed');
}
}
);
return () => {
socket.off('apps:recording-transcription-start');
socket.off('apps:recording-transcription-end');
};
}, [recording.wav]);
const handleTranscribe = React.useCallback(async () => {
try {
const response = await fetch(
`/api/recordings/${recording.wav}/transcribe`,
{
method: 'POST',
}
);
if (!response.ok) {
const error = await response.json();
throw new Error(error.error || 'Failed to start transcription');
}
} catch (err) {
setTranscriptionError(
err instanceof Error ? err.message : 'Failed to start transcription'
);
}
}, [recording.wav]);
return (
<div className="bg-white rounded-lg shadow-sm hover:shadow-md transition-all duration-300 overflow-hidden mb-3 border border-gray-100 hover:border-gray-200">
<RecordingHeader
metadata={metadata}
fileName={fileName}
recordingDate={recordingDate}
duration={duration}
error={error}
isDeleting={isDeleting}
showDeleteConfirm={showDeleteConfirm}
setShowDeleteConfirm={setShowDeleteConfirm}
handleDeleteClick={handleDeleteClick}
transcriptionError={transcriptionError}
/>
<AudioPlayer
isLoading={isLoading}
error={error}
audioRef={audioRef as React.RefObject<HTMLAudioElement>}
playbackRate={playbackRate}
handlePlaybackRateChange={handlePlaybackRateChange}
handleSeek={handleSeek}
handlePlayPause={handlePlayPause}
containerRef={containerRef as React.RefObject<HTMLDivElement>}
waveformData={waveformData}
currentTime={currentTime}
fileName={fileName}
/>
<audio
ref={audioRef}
src={`/api/recordings/${fileName}/recording.wav`}
preload="metadata"
className="hidden"
/>
<TranscriptionStatus
transcription={recording.transcription}
transcriptionError={transcriptionError}
currentAudioTime={currentAudioTime}
/>
<TranscribeButton
transcriptionStatus={recording.transcription?.transcriptionStatus}
onTranscribe={() => void handleTranscribe()}
/>
</div>
);
}

View File

@@ -0,0 +1,41 @@
import useSWRSubscription from 'swr/subscription';
import type { SavedRecording } from '../types';
import { socket } from '../utils';
import { SavedRecordingItem } from './saved-recording-item';
export function SavedRecordings(): React.ReactElement {
const { data: recordings = [] } = useSWRSubscription<SavedRecording[]>(
'saved-recordings',
(
_key: string,
{ next }: { next: (err: Error | null, data?: SavedRecording[]) => void }
) => {
// Subscribe to saved recordings updates
socket.on('apps:saved', (data: { recordings: SavedRecording[] }) => {
next(null, data.recordings);
});
fetch('/api/apps/saved')
.then(res => res.json())
.then(data => next(null, data.recordings))
.catch(err => next(err));
return () => {
socket.off('apps:saved');
};
}
);
if (recordings.length === 0) {
return <p className="text-gray-500 italic text-sm">No saved recordings</p>;
}
return (
<div className="space-y-1">
{recordings.map(recording => (
<SavedRecordingItem key={recording.wav} recording={recording} />
))}
</div>
);
}

View File

@@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Media Capture Playground</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/main.tsx"></script>
</body>
</html>

View File

@@ -0,0 +1 @@
@import 'tailwindcss';

View File

@@ -0,0 +1,11 @@
import './main.css';
import { createRoot } from 'react-dom/client';
import { App } from './app';
const rootElement = document.getElementById('root');
if (!rootElement) {
throw new Error('Failed to find the root element');
}
createRoot(rootElement).render(<App />);

View File

@@ -0,0 +1,55 @@
export interface App {
processId: number;
processGroupId: number;
bundleIdentifier: string;
name: string;
running: boolean;
}
export interface AppGroup {
processGroupId: number;
rootApp: App;
apps: App[];
}
export interface RecordingStatus {
processId: number;
bundleIdentifier: string;
name: string;
startTime: number;
}
export interface RecordingMetadata {
appName: string;
bundleIdentifier: string;
processId: number;
recordingStartTime: number;
recordingEndTime: number;
recordingDuration: number;
sampleRate: number;
totalSamples: number;
icon?: Uint8Array;
}
export interface TranscriptionMetadata {
transcriptionStartTime: number;
transcriptionEndTime: number;
transcriptionStatus: 'not_started' | 'pending' | 'completed' | 'error';
transcription?: {
title: string;
segments: Array<{
speaker: string;
start_time: string;
end_time: string;
transcription: string;
}>;
summary: string;
};
error?: string;
}
export interface SavedRecording {
wav: string;
metadata?: RecordingMetadata;
transcription?: TranscriptionMetadata;
}

View File

@@ -0,0 +1,19 @@
import { io } from 'socket.io-client';
// Create a singleton socket instance
export const socket = io('http://localhost:6544');
export function formatDuration(ms: number): string {
const seconds = Math.floor(ms / 1000);
const minutes = Math.floor(seconds / 60);
const hours = Math.floor(minutes / 60);
return `${hours.toString().padStart(2, '0')}:${(minutes % 60)
.toString()
.padStart(2, '0')}:${(seconds % 60).toString().padStart(2, '0')}`;
}
// Helper function to convert timestamp (MM:SS.mmm) to seconds
export function timestampToSeconds(timestamp: string): number {
const [minutes, seconds] = timestamp.split(':').map(parseFloat);
return minutes * 60 + seconds;
}

View File

@@ -1,2 +1,3 @@
*.fixture
lib
*.bin

View File

@@ -7,14 +7,15 @@ version = "0.0.0"
crate-type = ["cdylib", "rlib"]
[dependencies]
affine_common = { workspace = true }
affine_nbstore = { path = "./nbstore" }
affine_sqlite_v1 = { path = "./sqlite_v1" }
napi = { workspace = true }
napi-derive = { workspace = true }
once_cell = { workspace = true }
sqlx = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
tokio = { workspace = true, features = ["full"] }
affine_common = { workspace = true }
affine_media_capture = { path = "./media_capture" }
affine_nbstore = { path = "./nbstore" }
affine_sqlite_v1 = { path = "./sqlite_v1" }
napi = { workspace = true }
napi-derive = { workspace = true }
once_cell = { workspace = true }
sqlx = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
tokio = { workspace = true, features = ["full"] }
[build-dependencies]
napi-build = { workspace = true }

View File

@@ -0,0 +1,149 @@
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
Whisper,
WhisperFullParams,
WhisperSamplingStrategy,
} from '@napi-rs/whisper';
import { BehaviorSubject, EMPTY, Observable } from 'rxjs';
import {
distinctUntilChanged,
exhaustMap,
groupBy,
mergeMap,
switchMap,
tap,
} from 'rxjs/operators';
import { type Application, ShareableContent } from './index.js';
const rootDir = join(fileURLToPath(import.meta.url), '..');
const shareableContent = new ShareableContent();
const appList = new Set([
'com.tinyspeck.slackmacgap.helper',
'us.zoom.xos',
'org.mozilla.firefoxdeveloperedition',
]);
console.info(shareableContent.applications().map(app => app.bundleIdentifier));
const GGLM_LARGE = join(rootDir, 'ggml-large-v3-turbo.bin');
const whisper = new Whisper(GGLM_LARGE, {
useGpu: true,
gpuDevice: 1,
});
const whisperParams = new WhisperFullParams(WhisperSamplingStrategy.Greedy);
const SAMPLE_WINDOW_MS = 3000; // 3 seconds, similar to stream.cpp's step_ms
const SAMPLES_PER_WINDOW = (SAMPLE_WINDOW_MS / 1000) * 16000; // 16kHz sample rate
// eslint-disable-next-line rxjs/finnish
const runningApplications = new BehaviorSubject(
shareableContent.applications()
);
const applicationListChangedSubscriber =
ShareableContent.onApplicationListChanged(() => {
runningApplications.next(shareableContent.applications());
});
runningApplications
.pipe(
mergeMap(apps => apps.filter(app => appList.has(app.bundleIdentifier))),
groupBy(app => app.bundleIdentifier),
mergeMap(app$ =>
app$.pipe(
exhaustMap(app =>
new Observable<[Application, boolean]>(subscriber => {
const stateSubscriber = ShareableContent.onAppStateChanged(
app,
err => {
if (err) {
subscriber.error(err);
return;
}
subscriber.next([app, app.isRunning]);
}
);
return () => {
stateSubscriber.unsubscribe();
};
}).pipe(
distinctUntilChanged(
([_, isRunningA], [__, isRunningB]) => isRunningA === isRunningB
),
switchMap(([app]) =>
!app.isRunning
? EMPTY
: new Observable(observer => {
const buffers: Float32Array[] = [];
const audioStream = app.tapAudio((err, samples) => {
if (err) {
observer.error(err);
return;
}
if (samples) {
buffers.push(samples);
observer.next(samples);
// Calculate total samples in buffer
const totalSamples = buffers.reduce(
(acc, buf) => acc + buf.length,
0
);
// Process when we have enough samples for our window
if (totalSamples >= SAMPLES_PER_WINDOW) {
// Concatenate all buffers
const concatenated = new Float32Array(totalSamples);
let offset = 0;
buffers.forEach(buf => {
concatenated.set(buf, offset);
offset += buf.length;
});
// Transcribe the audio
const result = whisper.full(
whisperParams,
concatenated
);
// Print results
console.info(result);
// Keep any remaining samples for next window
const remainingSamples =
totalSamples - SAMPLES_PER_WINDOW;
if (remainingSamples > 0) {
const lastBuffer = buffers[buffers.length - 1];
buffers.length = 0;
buffers.push(lastBuffer.slice(-remainingSamples));
} else {
buffers.length = 0;
}
}
}
});
return () => {
audioStream.stop();
};
})
)
)
)
)
),
tap({
finalize: () => {
applicationListChangedSubscriber.unsubscribe();
},
})
)
.subscribe();

View File

@@ -0,0 +1,26 @@
[package]
edition = "2021"
name = "affine_media_capture"
version = "0.0.0"
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
napi = { workspace = true, features = ["napi4"] }
napi-derive = { workspace = true, features = ["type-def"] }
rubato = { workspace = true }
[target.'cfg(target_os = "macos")'.dependencies]
block2 = { workspace = true }
core-foundation = { workspace = true, features = ["with-uuid"] }
coreaudio-rs = { workspace = true }
dispatch2 = { workspace = true }
objc2 = { workspace = true }
objc2-foundation = { workspace = true }
screencapturekit = { workspace = true }
thiserror = { workspace = true }
uuid = { workspace = true, features = ["v4"] }
[build-dependencies]
napi-build = { workspace = true }

View File

@@ -0,0 +1,3 @@
fn main() {
napi_build::setup();
}

View File

@@ -0,0 +1,4 @@
#[cfg(target_os = "macos")]
pub mod macos;
#[cfg(target_os = "macos")]
pub(crate) use macos::*;

View File

@@ -0,0 +1,282 @@
use std::{fmt::Display, mem, ptr};
use coreaudio::sys::{
kAudioHardwareNoError, kAudioObjectPropertyElementMain, kAudioObjectPropertyScopeGlobal,
kAudioTapPropertyFormat, AudioObjectGetPropertyData, AudioObjectID, AudioObjectPropertyAddress,
};
use objc2::{Encode, Encoding, RefEncode};
use crate::error::CoreAudioError;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
pub enum AudioFormatID {
LinearPcm = 0x6C70636D, // 'lpcm'
Ac3 = 0x61632D33, // 'ac-3'
Ac360958 = 0x63616333, // 'cac3'
AppleIma4 = 0x696D6134, // 'ima4'
Mpeg4Aac = 0x61616320, // 'aac '
Mpeg4Celp = 0x63656C70, // 'celp'
Mpeg4Hvxc = 0x68767863, // 'hvxc'
Mpeg4TwinVq = 0x74777671, // 'twvq'
Mace3 = 0x4D414333, // 'MAC3'
Mace6 = 0x4D414336, // 'MAC6'
ULaw = 0x756C6177, // 'ulaw'
ALaw = 0x616C6177, // 'alaw'
QDesign = 0x51444D43, // 'QDMC'
QDesign2 = 0x51444D32, // 'QDM2'
Qualcomm = 0x51636C70, // 'Qclp'
MpegLayer1 = 0x2E6D7031, // '.mp1'
MpegLayer2 = 0x2E6D7032, // '.mp2'
MpegLayer3 = 0x2E6D7033, // '.mp3'
TimeCode = 0x74696D65, // 'time'
MidiStream = 0x6D696469, // 'midi'
ParameterValueStream = 0x61707673, // 'apvs'
AppleLossless = 0x616C6163, // 'alac'
Mpeg4AacHe = 0x61616368, // 'aach'
Mpeg4AacLd = 0x6161636C, // 'aacl'
Mpeg4AacEld = 0x61616365, // 'aace'
Mpeg4AacEldSbr = 0x61616366, // 'aacf'
Mpeg4AacEldV2 = 0x61616367, // 'aacg'
Mpeg4AacHeV2 = 0x61616370, // 'aacp'
Mpeg4AacSpatial = 0x61616373, // 'aacs'
MpegdUsac = 0x75736163, // 'usac'
Amr = 0x73616D72, // 'samr'
AmrWb = 0x73617762, // 'sawb'
Audible = 0x41554442, // 'AUDB'
ILbc = 0x696C6263, // 'ilbc'
DviIntelIma = 0x6D730011,
MicrosoftGsm = 0x6D730031,
Aes3 = 0x61657333, // 'aes3'
EnhancedAc3 = 0x65632D33, // 'ec-3'
Flac = 0x666C6163, // 'flac'
Opus = 0x6F707573, // 'opus'
Apac = 0x61706163, // 'apac'
Unknown = 0x00000000,
}
impl From<u32> for AudioFormatID {
fn from(value: u32) -> Self {
match value {
0x6C70636D => Self::LinearPcm,
0x61632D33 => Self::Ac3,
0x63616333 => Self::Ac360958,
0x696D6134 => Self::AppleIma4,
0x61616320 => Self::Mpeg4Aac,
0x63656C70 => Self::Mpeg4Celp,
0x68767863 => Self::Mpeg4Hvxc,
0x74777671 => Self::Mpeg4TwinVq,
0x4D414333 => Self::Mace3,
0x4D414336 => Self::Mace6,
0x756C6177 => Self::ULaw,
0x616C6177 => Self::ALaw,
0x51444D43 => Self::QDesign,
0x51444D32 => Self::QDesign2,
0x51636C70 => Self::Qualcomm,
0x2E6D7031 => Self::MpegLayer1,
0x2E6D7032 => Self::MpegLayer2,
0x2E6D7033 => Self::MpegLayer3,
0x74696D65 => Self::TimeCode,
0x6D696469 => Self::MidiStream,
0x61707673 => Self::ParameterValueStream,
0x616C6163 => Self::AppleLossless,
0x61616368 => Self::Mpeg4AacHe,
0x6161636C => Self::Mpeg4AacLd,
0x61616365 => Self::Mpeg4AacEld,
0x61616366 => Self::Mpeg4AacEldSbr,
0x61616367 => Self::Mpeg4AacEldV2,
0x61616370 => Self::Mpeg4AacHeV2,
0x61616373 => Self::Mpeg4AacSpatial,
0x75736163 => Self::MpegdUsac,
0x73616D72 => Self::Amr,
0x73617762 => Self::AmrWb,
0x41554442 => Self::Audible,
0x696C6263 => Self::ILbc,
0x6D730011 => Self::DviIntelIma,
0x6D730031 => Self::MicrosoftGsm,
0x61657333 => Self::Aes3,
0x65632D33 => Self::EnhancedAc3,
0x666C6163 => Self::Flac,
0x6F707573 => Self::Opus,
0x61706163 => Self::Apac,
_ => Self::Unknown,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct AudioFormatFlags(pub u32);
#[allow(unused)]
impl AudioFormatFlags {
pub const IS_FLOAT: u32 = 1 << 0;
pub const IS_BIG_ENDIAN: u32 = 1 << 1;
pub const IS_SIGNED_INTEGER: u32 = 1 << 2;
pub const IS_PACKED: u32 = 1 << 3;
pub const IS_ALIGNED_HIGH: u32 = 1 << 4;
pub const IS_NON_INTERLEAVED: u32 = 1 << 5;
pub const IS_NON_MIXABLE: u32 = 1 << 6;
pub const ARE_ALL_CLEAR: u32 = 0x80000000;
pub const LINEAR_PCM_IS_FLOAT: u32 = Self::IS_FLOAT;
pub const LINEAR_PCM_IS_BIG_ENDIAN: u32 = Self::IS_BIG_ENDIAN;
pub const LINEAR_PCM_IS_SIGNED_INTEGER: u32 = Self::IS_SIGNED_INTEGER;
pub const LINEAR_PCM_IS_PACKED: u32 = Self::IS_PACKED;
pub const LINEAR_PCM_IS_ALIGNED_HIGH: u32 = Self::IS_ALIGNED_HIGH;
pub const LINEAR_PCM_IS_NON_INTERLEAVED: u32 = Self::IS_NON_INTERLEAVED;
pub const LINEAR_PCM_IS_NON_MIXABLE: u32 = Self::IS_NON_MIXABLE;
pub const LINEAR_PCM_SAMPLE_FRACTION_SHIFT: u32 = 7;
pub const LINEAR_PCM_SAMPLE_FRACTION_MASK: u32 = 0x3F << Self::LINEAR_PCM_SAMPLE_FRACTION_SHIFT;
pub const LINEAR_PCM_ARE_ALL_CLEAR: u32 = Self::ARE_ALL_CLEAR;
pub const APPLE_LOSSLESS_FORMAT_FLAG_16_BIT_SOURCE_DATA: u32 = 1;
pub const APPLE_LOSSLESS_FORMAT_FLAG_20_BIT_SOURCE_DATA: u32 = 2;
pub const APPLE_LOSSLESS_FORMAT_FLAG_24_BIT_SOURCE_DATA: u32 = 3;
pub const APPLE_LOSSLESS_FORMAT_FLAG_32_BIT_SOURCE_DATA: u32 = 4;
}
impl std::fmt::Display for AudioFormatFlags {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut flags = Vec::new();
if self.0 & Self::IS_FLOAT != 0 {
flags.push("FLOAT");
}
if self.0 & Self::IS_BIG_ENDIAN != 0 {
flags.push("BIG_ENDIAN");
}
if self.0 & Self::IS_SIGNED_INTEGER != 0 {
flags.push("SIGNED_INTEGER");
}
if self.0 & Self::IS_PACKED != 0 {
flags.push("PACKED");
}
if self.0 & Self::IS_ALIGNED_HIGH != 0 {
flags.push("ALIGNED_HIGH");
}
if self.0 & Self::IS_NON_INTERLEAVED != 0 {
flags.push("NON_INTERLEAVED");
}
if self.0 & Self::IS_NON_MIXABLE != 0 {
flags.push("NON_MIXABLE");
}
if self.0 & Self::ARE_ALL_CLEAR != 0 {
flags.push("ALL_CLEAR");
}
if flags.is_empty() {
write!(f, "NONE")
} else {
write!(f, "{}", flags.join(" | "))
}
}
}
impl std::fmt::Debug for AudioFormatFlags {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "AudioFormatFlags({})", self)
}
}
impl From<u32> for AudioFormatFlags {
fn from(value: u32) -> Self {
Self(value)
}
}
/// [Apple's documentation](https://developer.apple.com/documentation/coreaudiotypes/audiostreambasicdescription?language=objc)
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq)]
#[allow(non_snake_case)]
pub struct AudioStreamBasicDescription {
pub mSampleRate: f64,
pub mFormatID: u32,
pub mFormatFlags: u32,
pub mBytesPerPacket: u32,
pub mFramesPerPacket: u32,
pub mBytesPerFrame: u32,
pub mChannelsPerFrame: u32,
pub mBitsPerChannel: u32,
pub mReserved: u32,
}
unsafe impl Encode for AudioStreamBasicDescription {
const ENCODING: Encoding = Encoding::Struct(
"AudioStreamBasicDescription",
&[
<f64>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
<u32>::ENCODING,
],
);
}
unsafe impl RefEncode for AudioStreamBasicDescription {
const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
}
#[derive(Debug, Clone, Copy)]
#[repr(transparent)]
pub struct AudioStreamDescription(pub(crate) AudioStreamBasicDescription);
impl Display for AudioStreamDescription {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"AudioStreamBasicDescription {{ mSampleRate: {}, mFormatID: {:?}, mFormatFlags: {}, \
mBytesPerPacket: {}, mFramesPerPacket: {}, mBytesPerFrame: {}, mChannelsPerFrame: {}, \
mBitsPerChannel: {}, mReserved: {} }}",
self.0.mSampleRate,
AudioFormatID::from(self.0.mFormatID),
AudioFormatFlags(self.0.mFormatFlags),
self.0.mBytesPerPacket,
self.0.mFramesPerPacket,
self.0.mBytesPerFrame,
self.0.mChannelsPerFrame,
self.0.mBitsPerChannel,
self.0.mReserved
)
}
}
pub fn read_audio_stream_basic_description(
tap_id: AudioObjectID,
) -> std::result::Result<AudioStreamDescription, CoreAudioError> {
let mut data_size = mem::size_of::<AudioStreamBasicDescription>();
let address = AudioObjectPropertyAddress {
mSelector: kAudioTapPropertyFormat,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
};
let mut data = AudioStreamBasicDescription {
mSampleRate: 0.0,
mFormatID: 0,
mFormatFlags: 0,
mBytesPerPacket: 0,
mFramesPerPacket: 0,
mBytesPerFrame: 0,
mChannelsPerFrame: 0,
mBitsPerChannel: 0,
mReserved: 0,
};
let status = unsafe {
AudioObjectGetPropertyData(
tap_id,
&address,
0,
ptr::null_mut(),
(&mut data_size as *mut usize).cast(),
(&mut data as *mut AudioStreamBasicDescription).cast(),
)
};
if status != kAudioHardwareNoError as i32 {
return Err(CoreAudioError::GetAudioStreamBasicDescriptionFailed(status));
}
Ok(AudioStreamDescription(data))
}

View File

@@ -0,0 +1,71 @@
use std::ptr;
use objc2::{
msg_send,
runtime::{AnyClass, AnyObject},
AllocAnyThread,
};
use objc2_foundation::{NSDictionary, NSError, NSNumber, NSString, NSUInteger, NSURL};
use crate::{
av_audio_format::AVAudioFormat, av_audio_pcm_buffer::AVAudioPCMBuffer, error::CoreAudioError,
};
#[allow(unused)]
pub(crate) struct AVAudioFile {
inner: *mut AnyObject,
}
#[allow(unused)]
impl AVAudioFile {
pub(crate) fn new(url: &str, format: &AVAudioFormat) -> Result<Self, CoreAudioError> {
let cls = AnyClass::get(c"AVAudioFile").ok_or(CoreAudioError::AVAudioFileClassNotFound)?;
let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
if obj.is_null() {
return Err(CoreAudioError::AllocAVAudioFileFailed);
}
let url: &NSURL = &*unsafe { NSURL::fileURLWithPath(&NSString::from_str(url)) };
let settings = &*NSDictionary::from_retained_objects(
&[
&*NSString::from_str("AVFormatIDKey"),
&*NSString::from_str("AVSampleRateKey"),
&*NSString::from_str("AVNumberOfChannelsKey"),
],
&[
NSNumber::initWithUnsignedInt(
NSNumber::alloc(),
format.audio_stream_basic_description.0.mFormatID,
),
NSNumber::initWithDouble(NSNumber::alloc(), format.get_sample_rate()),
NSNumber::initWithUnsignedInt(NSNumber::alloc(), format.get_channel_count()),
],
);
let is_interleaved = format.is_interleaved();
let mut error: *mut NSError = ptr::null_mut();
let common_format: NSUInteger = 1;
let obj: *mut AnyObject = unsafe {
msg_send![
obj,
initForWriting: url,
settings: settings,
commonFormat: common_format,
interleaved: is_interleaved,
error: &mut error
]
};
if obj.is_null() {
return Err(CoreAudioError::InitAVAudioFileFailed);
}
Ok(Self { inner: obj })
}
pub(crate) fn write(&self, buffer: AVAudioPCMBuffer) -> Result<(), CoreAudioError> {
let mut error: *mut NSError = ptr::null_mut();
let success: bool =
unsafe { msg_send![self.inner, writeFromBuffer: buffer.inner, error: &mut error] };
if !success {
return Err(CoreAudioError::WriteAVAudioFileFailed);
}
Ok(())
}
}

View File

@@ -0,0 +1,95 @@
use objc2::{
msg_send,
runtime::{AnyClass, AnyObject},
Encode, Encoding, RefEncode,
};
use crate::{audio_stream_basic_desc::AudioStreamDescription, error::CoreAudioError};
#[derive(Debug)]
#[allow(unused)]
pub(crate) struct AVAudioFormat {
pub(crate) inner: AVAudioFormatRef,
pub(crate) audio_stream_basic_description: AudioStreamDescription,
}
#[repr(transparent)]
#[derive(Debug, Clone, Copy)]
pub(crate) struct AVAudioFormatRef(pub(crate) *mut AnyObject);
unsafe impl Encode for AVAudioFormatRef {
const ENCODING: Encoding = Encoding::Struct(
"AVAudioFormat",
&[
Encoding::Double,
Encoding::UInt,
Encoding::Pointer(&Encoding::Struct(
"AVAudioChannelLayout",
&[
Encoding::UInt,
Encoding::UInt,
Encoding::Pointer(&Encoding::Struct(
"AudioChannelLayout",
&[
Encoding::UInt,
Encoding::UInt,
Encoding::Array(
1,
&Encoding::Struct(
"AudioChannelDescription",
&[
Encoding::UInt,
Encoding::UInt,
Encoding::Array(3, &Encoding::Float),
],
),
),
Encoding::UInt,
Encoding::UInt,
],
)),
Encoding::UInt,
],
)),
Encoding::Pointer(&Encoding::Object),
],
);
}
unsafe impl RefEncode for AVAudioFormatRef {
const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
}
#[allow(unused)]
impl AVAudioFormat {
pub fn new(
audio_stream_basic_description: AudioStreamDescription,
) -> Result<Self, CoreAudioError> {
let cls = AnyClass::get(c"AVAudioFormat").ok_or(CoreAudioError::AVAudioFormatClassNotFound)?;
let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
if obj.is_null() {
return Err(CoreAudioError::AllocAVAudioFormatFailed);
}
let obj: *mut AnyObject =
unsafe { msg_send![obj, initWithStreamDescription: &audio_stream_basic_description.0] };
if obj.is_null() {
return Err(CoreAudioError::InitAVAudioFormatFailed);
}
Ok(Self {
inner: AVAudioFormatRef(obj),
audio_stream_basic_description,
})
}
pub(crate) fn get_sample_rate(&self) -> f64 {
unsafe { msg_send![self.inner.0, sampleRate] }
}
pub(crate) fn get_channel_count(&self) -> u32 {
unsafe { msg_send![self.inner.0, channelCount] }
}
pub(crate) fn is_interleaved(&self) -> bool {
unsafe { msg_send![self.inner.0, isInterleaved] }
}
}

View File

@@ -0,0 +1,35 @@
use block2::RcBlock;
use objc2::{
msg_send,
runtime::{AnyClass, AnyObject},
};
use crate::{av_audio_format::AVAudioFormat, error::CoreAudioError, tap_audio::AudioBufferList};
#[allow(unused)]
pub(crate) struct AVAudioPCMBuffer {
pub(crate) inner: *mut AnyObject,
}
#[allow(unused)]
impl AVAudioPCMBuffer {
pub(crate) fn new(
audio_format: &AVAudioFormat,
buffer_list: *const AudioBufferList,
) -> Result<Self, CoreAudioError> {
let cls =
AnyClass::get(c"AVAudioPCMBuffer").ok_or(CoreAudioError::AVAudioPCMBufferClassNotFound)?;
let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
if obj.is_null() {
return Err(CoreAudioError::AllocAVAudioPCMBufferFailed);
}
let deallocator = RcBlock::new(|_buffer_list: *const AudioBufferList| {});
let obj: *mut AnyObject = unsafe {
msg_send![obj, initWithPCMFormat: audio_format.inner.0, bufferListNoCopy: buffer_list, deallocator: &*deallocator]
};
if obj.is_null() {
return Err(CoreAudioError::InitAVAudioPCMBufferFailed);
}
Ok(Self { inner: obj })
}
}

View File

@@ -0,0 +1,84 @@
use core_foundation::{
base::{FromVoid, ItemRef},
string::CFString,
};
use coreaudio::sys::AudioObjectID;
use objc2::{
msg_send,
runtime::{AnyClass, AnyObject},
AllocAnyThread,
};
use objc2_foundation::{NSArray, NSNumber, NSString, NSUUID};
use crate::error::CoreAudioError;
pub(crate) struct CATapDescription {
pub(crate) inner: *mut AnyObject,
}
impl CATapDescription {
pub fn init_stereo_mixdown_of_processes(
process: AudioObjectID,
) -> std::result::Result<Self, CoreAudioError> {
let cls =
AnyClass::get(c"CATapDescription").ok_or(CoreAudioError::CATapDescriptionClassNotFound)?;
let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
if obj.is_null() {
return Err(CoreAudioError::AllocCATapDescriptionFailed);
}
let processes_array =
NSArray::from_retained_slice(&[NSNumber::initWithUnsignedInt(NSNumber::alloc(), process)]);
let obj: *mut AnyObject =
unsafe { msg_send![obj, initStereoMixdownOfProcesses: &*processes_array] };
if obj.is_null() {
return Err(CoreAudioError::InitStereoMixdownOfProcessesFailed);
}
Ok(Self { inner: obj })
}
pub fn init_stereo_global_tap_but_exclude_processes(
processes: &[AudioObjectID],
) -> std::result::Result<Self, CoreAudioError> {
let cls =
AnyClass::get(c"CATapDescription").ok_or(CoreAudioError::CATapDescriptionClassNotFound)?;
let obj: *mut AnyObject = unsafe { msg_send![cls, alloc] };
if obj.is_null() {
return Err(CoreAudioError::AllocCATapDescriptionFailed);
}
let processes_array = NSArray::from_retained_slice(
processes
.iter()
.map(|p| NSNumber::initWithUnsignedInt(NSNumber::alloc(), *p))
.collect::<Vec<_>>()
.as_slice(),
);
let obj: *mut AnyObject =
unsafe { msg_send![obj, initStereoMixdownOfProcesses: &*processes_array] };
if obj.is_null() {
return Err(CoreAudioError::InitStereoMixdownOfProcessesFailed);
}
Ok(Self { inner: obj })
}
pub fn get_uuid(&self) -> std::result::Result<ItemRef<CFString>, CoreAudioError> {
let uuid: *mut NSUUID = unsafe { msg_send![self.inner, UUID] };
if uuid.is_null() {
return Err(CoreAudioError::GetCATapDescriptionUUIDFailed);
}
let uuid_string: *mut NSString = unsafe { msg_send![uuid, UUIDString] };
if uuid_string.is_null() {
return Err(CoreAudioError::ConvertUUIDToCFStringFailed);
}
Ok(unsafe { CFString::from_void(uuid_string.cast()) })
}
}
impl Drop for CATapDescription {
fn drop(&mut self) {
unsafe {
let _: () = msg_send![self.inner, release];
}
}
}

View File

@@ -0,0 +1,66 @@
use std::{mem, ptr};
use core_foundation::{base::TCFType, string::CFString};
use coreaudio::sys::{
kAudioDevicePropertyDeviceUID, kAudioHardwareNoError, kAudioObjectPropertyElementMain,
kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, AudioDeviceID,
AudioObjectGetPropertyData, AudioObjectID, AudioObjectPropertyAddress, CFStringRef,
};
use crate::error::CoreAudioError;
pub(crate) fn get_device_uid(
device_id: AudioDeviceID,
) -> std::result::Result<CFString, CoreAudioError> {
let system_output_id = get_device_audio_id(device_id)?;
let address = AudioObjectPropertyAddress {
mSelector: kAudioDevicePropertyDeviceUID,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
};
let mut output_uid: CFStringRef = ptr::null_mut();
let mut data_size = mem::size_of::<CFStringRef>();
let status = unsafe {
AudioObjectGetPropertyData(
system_output_id,
&address,
0,
ptr::null_mut(),
(&mut data_size as *mut usize).cast(),
(&mut output_uid as *mut CFStringRef).cast(),
)
};
if status != 0 {
return Err(CoreAudioError::GetDeviceUidFailed(status));
}
Ok(unsafe { CFString::wrap_under_create_rule(output_uid.cast()) })
}
pub(crate) fn get_device_audio_id(
device_id: AudioDeviceID,
) -> std::result::Result<AudioObjectID, CoreAudioError> {
let mut system_output_id: AudioObjectID = 0;
let mut data_size = mem::size_of::<AudioObjectID>();
let address = AudioObjectPropertyAddress {
mSelector: device_id,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
};
let status = unsafe {
AudioObjectGetPropertyData(
kAudioObjectSystemObject,
&address,
0,
ptr::null_mut(),
(&mut data_size as *mut usize).cast(),
(&mut system_output_id as *mut AudioObjectID).cast(),
)
};
if status != kAudioHardwareNoError as i32 {
return Err(CoreAudioError::GetDefaultDeviceFailed(status));
}
Ok(system_output_id)
}

View File

@@ -0,0 +1,81 @@
use thiserror::Error;
#[derive(Error, Debug)]
pub enum CoreAudioError {
#[error("Map pid {0} to AudioObjectID failed")]
PidNotFound(i32),
#[error("Create process tap failed, status: {0}")]
CreateProcessTapFailed(i32),
#[error("Get default device failed, status: {0}")]
GetDefaultDeviceFailed(i32),
#[error("Get device uid failed, status: {0}")]
GetDeviceUidFailed(i32),
#[error("Create aggregate device failed, status: {0}")]
CreateAggregateDeviceFailed(i32),
#[error("Get process object list size failed, status: {0}")]
GetProcessObjectListSizeFailed(i32),
#[error("Get process object list failed, status: {0}")]
GetProcessObjectListFailed(i32),
#[error("AudioObjectGetPropertyDataSize failed, status: {0}")]
AudioObjectGetPropertyDataSizeFailed(i32),
#[error("CATapDescription class not found")]
CATapDescriptionClassNotFound,
#[error("Alloc CATapDescription failed")]
AllocCATapDescriptionFailed,
#[error("Call initStereoMixdownOfProcesses on CATapDescription failed")]
InitStereoMixdownOfProcessesFailed,
#[error("Get UUID on CATapDescription failed")]
GetCATapDescriptionUUIDFailed,
#[error("Get mute behavior on CATapDescription failed")]
GetMuteBehaviorFailed,
#[error("Convert UUID to CFString failed")]
ConvertUUIDToCFStringFailed,
#[error("Get AudioStreamBasicDescription failed, status: {0}")]
GetAudioStreamBasicDescriptionFailed(i32),
#[error("AVAudioFormat class not found")]
AVAudioFormatClassNotFound,
#[error("Alloc AVAudioFormat failed")]
AllocAVAudioFormatFailed,
#[error("Init AVAudioFormat failed")]
InitAVAudioFormatFailed,
#[error("Create IOProcIDWithBlock failed, status: {0}")]
CreateIOProcIDWithBlockFailed(i32),
#[error("Get hardware devices failed, status: {0}")]
GetHardwareDevicesFailed(i32),
#[error("AudioDeviceStart failed, status: {0}")]
AudioDeviceStartFailed(i32),
#[error("AudioDeviceStop failed, status: {0}")]
AudioDeviceStopFailed(i32),
#[error("AudioDeviceDestroyIOProcID failed, status: {0}")]
AudioDeviceDestroyIOProcIDFailed(i32),
#[error("AudioHardwareDestroyAggregateDevice failed, status: {0}")]
AudioHardwareDestroyAggregateDeviceFailed(i32),
#[error("AudioHardwareDestroyProcessTap failed, status: {0}")]
AudioHardwareDestroyProcessTapFailed(i32),
#[error("Get aggregate device property full sub device list failed, status: {0}")]
GetAggregateDevicePropertyFullSubDeviceListFailed(i32),
#[error("Add property listener block failed, status: {0}")]
AddPropertyListenerBlockFailed(i32),
#[error("AudioObjectGetPropertyData failed, status: {0}")]
AudioObjectGetPropertyDataFailed(i32),
#[error("AVAudioFile class not found")]
AVAudioFileClassNotFound,
#[error("Alloc AVAudioFile failed")]
AllocAVAudioFileFailed,
#[error("Init AVAudioFile failed")]
InitAVAudioFileFailed,
#[error("AVAudioPCMBuffer class not found")]
AVAudioPCMBufferClassNotFound,
#[error("Alloc AVAudioPCMBuffer failed")]
AllocAVAudioPCMBufferFailed,
#[error("Init AVAudioPCMBuffer failed")]
InitAVAudioPCMBufferFailed,
#[error("Write AVAudioFile failed")]
WriteAVAudioFileFailed,
}
impl From<CoreAudioError> for napi::Error {
fn from(value: CoreAudioError) -> Self {
napi::Error::new(napi::Status::GenericFailure, value.to_string())
}
}

View File

@@ -0,0 +1,11 @@
pub mod audio_stream_basic_desc;
pub mod av_audio_file;
pub mod av_audio_format;
pub mod av_audio_pcm_buffer;
pub mod ca_tap_description;
pub mod device;
pub(crate) mod error;
pub mod pid;
pub mod queue;
pub mod screen_capture_kit;
pub mod tap_audio;

View File

@@ -0,0 +1,98 @@
use std::{mem::MaybeUninit, ptr};
use coreaudio::sys::{
kAudioHardwareNoError, kAudioHardwarePropertyProcessObjectList, kAudioObjectPropertyElementMain,
kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, AudioObjectGetPropertyData,
AudioObjectGetPropertyDataSize, AudioObjectID, AudioObjectPropertyAddress,
AudioObjectPropertySelector,
};
use crate::error::CoreAudioError;
pub fn audio_process_list() -> Result<Vec<AudioObjectID>, CoreAudioError> {
let address = AudioObjectPropertyAddress {
mSelector: kAudioHardwarePropertyProcessObjectList,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
};
let mut data_size = 0u32;
let status = unsafe {
AudioObjectGetPropertyDataSize(
kAudioObjectSystemObject,
&address,
0,
ptr::null_mut(),
&mut data_size,
)
};
if status != kAudioHardwareNoError as i32 {
return Err(CoreAudioError::GetProcessObjectListSizeFailed(status));
}
let mut process_list: Vec<AudioObjectID> = vec![0; data_size as usize];
let status = unsafe {
AudioObjectGetPropertyData(
kAudioObjectSystemObject,
&address,
0,
ptr::null_mut(),
(&mut data_size as *mut u32).cast(),
process_list.as_mut_ptr().cast(),
)
};
if status != kAudioHardwareNoError as i32 {
return Err(CoreAudioError::GetProcessObjectListFailed(status));
}
Ok(process_list)
}
pub fn get_process_property<T: Sized>(
object: &AudioObjectID,
selector: AudioObjectPropertySelector,
) -> Result<T, CoreAudioError> {
let object_id = *object;
let address = AudioObjectPropertyAddress {
mSelector: selector,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
};
let mut data_size = 0u32;
let status = unsafe {
AudioObjectGetPropertyDataSize(object_id, &address, 0, ptr::null_mut(), &mut data_size)
};
if status != kAudioHardwareNoError as i32 {
return Err(CoreAudioError::AudioObjectGetPropertyDataSizeFailed(status));
}
get_property_data(object_id, &address, &mut data_size)
}
pub fn get_property_data<T: Sized>(
object_id: AudioObjectID,
address: &AudioObjectPropertyAddress,
data_size: &mut u32,
) -> Result<T, CoreAudioError> {
let mut property = MaybeUninit::<T>::uninit();
let status = unsafe {
AudioObjectGetPropertyData(
object_id,
address,
0,
ptr::null_mut(),
(data_size as *mut u32).cast(),
property.as_mut_ptr().cast(),
)
};
if status != kAudioHardwareNoError as i32 {
return Err(CoreAudioError::AudioObjectGetPropertyDataFailed(status));
}
Ok(unsafe { property.assume_init() })
}

View File

@@ -0,0 +1,12 @@
pub(crate) fn create_audio_tap_queue() -> *mut dispatch2::ffi::dispatch_queue_s {
let queue_attr = unsafe {
dispatch2::ffi::dispatch_queue_attr_make_with_qos_class(
dispatch2::ffi::DISPATCH_QUEUE_SERIAL,
dispatch2::ffi::dispatch_qos_class_t::QOS_CLASS_USER_INITIATED,
0,
)
};
unsafe {
dispatch2::ffi::dispatch_queue_create(c"ProcessTapRecorder".as_ptr().cast(), queue_attr)
}
}

View File

@@ -0,0 +1,623 @@
use std::{
collections::HashMap,
ffi::c_void,
ptr,
sync::{
atomic::{AtomicPtr, Ordering},
Arc, LazyLock, RwLock,
},
};
use block2::{Block, RcBlock};
use core_foundation::{
base::TCFType,
string::{CFString, CFStringRef},
};
use coreaudio::sys::{
kAudioHardwarePropertyProcessObjectList, kAudioObjectPropertyElementMain,
kAudioObjectPropertyScopeGlobal, kAudioObjectSystemObject, kAudioProcessPropertyBundleID,
kAudioProcessPropertyIsRunning, kAudioProcessPropertyIsRunningInput, kAudioProcessPropertyPID,
AudioObjectAddPropertyListenerBlock, AudioObjectID, AudioObjectPropertyAddress,
AudioObjectRemovePropertyListenerBlock,
};
use napi::{
bindgen_prelude::{Buffer, Error, Float32Array, Result, Status},
threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
};
use napi_derive::napi;
use objc2::{
msg_send,
rc::Retained,
runtime::{AnyClass, AnyObject},
Encode, Encoding,
};
use objc2_foundation::NSString;
use screencapturekit::shareable_content::SCShareableContent;
use uuid::Uuid;
use crate::{
error::CoreAudioError,
pid::{audio_process_list, get_process_property},
tap_audio::{AggregateDevice, AudioTapStream},
};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct NSSize {
width: f64,
height: f64,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct NSPoint {
x: f64,
y: f64,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct NSRect {
origin: NSPoint,
size: NSSize,
}
unsafe impl Encode for NSSize {
const ENCODING: Encoding = Encoding::Struct("NSSize", &[f64::ENCODING, f64::ENCODING]);
}
unsafe impl Encode for NSPoint {
const ENCODING: Encoding = Encoding::Struct("NSPoint", &[f64::ENCODING, f64::ENCODING]);
}
unsafe impl Encode for NSRect {
const ENCODING: Encoding = Encoding::Struct("NSRect", &[<NSPoint>::ENCODING, <NSSize>::ENCODING]);
}
static RUNNING_APPLICATIONS: LazyLock<RwLock<Vec<AudioObjectID>>> =
LazyLock::new(|| RwLock::new(audio_process_list().expect("Failed to get running applications")));
static APPLICATION_STATE_CHANGED_SUBSCRIBERS: LazyLock<
RwLock<HashMap<AudioObjectID, HashMap<Uuid, Arc<ThreadsafeFunction<(), ()>>>>>,
> = LazyLock::new(|| RwLock::new(HashMap::new()));
static APPLICATION_STATE_CHANGED_LISTENER_BLOCKS: LazyLock<
RwLock<HashMap<AudioObjectID, AtomicPtr<c_void>>>,
> = LazyLock::new(|| RwLock::new(HashMap::new()));
static NSRUNNING_APPLICATION_CLASS: LazyLock<Option<&'static AnyClass>> =
LazyLock::new(|| AnyClass::get(c"NSRunningApplication"));
static AVCAPTUREDEVICE_CLASS: LazyLock<Option<&'static AnyClass>> =
LazyLock::new(|| AnyClass::get(c"AVCaptureDevice"));
static SCSTREAM_CLASS: LazyLock<Option<&'static AnyClass>> =
LazyLock::new(|| AnyClass::get(c"SCStream"));
struct TappableApplication {
object_id: AudioObjectID,
}
impl TappableApplication {
fn new(object_id: AudioObjectID) -> Self {
Self { object_id }
}
fn process_id(&self) -> std::result::Result<i32, CoreAudioError> {
get_process_property(&self.object_id, kAudioProcessPropertyPID)
}
fn bundle_identifier(&self) -> Result<String> {
let bundle_id: CFStringRef =
get_process_property(&self.object_id, kAudioProcessPropertyBundleID)?;
Ok(unsafe { CFString::wrap_under_get_rule(bundle_id) }.to_string())
}
fn name(&self) -> Result<String> {
let pid = self.process_id()?;
// Get NSRunningApplication class
let running_app_class = NSRUNNING_APPLICATION_CLASS.as_ref().ok_or_else(|| {
Error::new(
Status::GenericFailure,
"NSRunningApplication class not found",
)
})?;
// Get running application with PID
let running_app: *mut AnyObject =
unsafe { msg_send![*running_app_class, runningApplicationWithProcessIdentifier: pid] };
if running_app.is_null() {
return Ok(String::new());
}
// Get localized name
let name: *mut NSString = unsafe { msg_send![running_app, localizedName] };
if name.is_null() {
return Ok(String::new());
}
// Create a safe wrapper and convert to string
let name = unsafe {
Retained::from_raw(name).ok_or_else(|| {
Error::new(
Status::GenericFailure,
"Failed to create safe wrapper for localizedName",
)
})?
};
Ok(name.to_string())
}
fn icon(&self) -> Result<Vec<u8>> {
let pid = self.process_id()?;
// Get NSRunningApplication class
let running_app_class = NSRUNNING_APPLICATION_CLASS.as_ref().ok_or_else(|| {
Error::new(
Status::GenericFailure,
"NSRunningApplication class not found",
)
})?;
// Get running application with PID
let running_app: *mut AnyObject =
unsafe { msg_send![*running_app_class, runningApplicationWithProcessIdentifier: pid] };
if running_app.is_null() {
return Ok(Vec::new());
}
unsafe {
// Get original icon
let icon: *mut AnyObject = msg_send![running_app, icon];
if icon.is_null() {
return Ok(Vec::new());
}
// Create a new NSImage with 64x64 size
let nsimage_class = AnyClass::get(c"NSImage")
.ok_or_else(|| Error::new(Status::GenericFailure, "NSImage class not found"))?;
let resized_image: *mut AnyObject = msg_send![nsimage_class, alloc];
let resized_image: *mut AnyObject =
msg_send![resized_image, initWithSize: NSSize { width: 64.0, height: 64.0 }];
let _: () = msg_send![resized_image, lockFocus];
// Define drawing rectangle for 64x64 image
let draw_rect = NSRect {
origin: NSPoint { x: 0.0, y: 0.0 },
size: NSSize {
width: 64.0,
height: 64.0,
},
};
// Draw the original icon into draw_rect (using NSCompositingOperationCopy = 2)
let _: () = msg_send![icon, drawInRect: draw_rect, fromRect: NSRect { origin: NSPoint { x: 0.0, y: 0.0 }, size: NSSize { width: 0.0, height: 0.0 } }, operation: 2, fraction: 1.0];
let _: () = msg_send![resized_image, unlockFocus];
// Get TIFF representation from the downsized image
let tiff_data: *mut AnyObject = msg_send![resized_image, TIFFRepresentation];
if tiff_data.is_null() {
return Ok(Vec::new());
}
// Create bitmap image rep from TIFF
let bitmap_class = AnyClass::get(c"NSBitmapImageRep")
.ok_or_else(|| Error::new(Status::GenericFailure, "NSBitmapImageRep class not found"))?;
let bitmap: *mut AnyObject = msg_send![bitmap_class, imageRepWithData: tiff_data];
if bitmap.is_null() {
return Ok(Vec::new());
}
// Create properties dictionary with compression factor
let dict_class = AnyClass::get(c"NSMutableDictionary").ok_or_else(|| {
Error::new(
Status::GenericFailure,
"NSMutableDictionary class not found",
)
})?;
let properties: *mut AnyObject = msg_send![dict_class, dictionary];
// Add compression properties
let compression_key = NSString::from_str("NSImageCompressionFactor");
let number_class = AnyClass::get(c"NSNumber")
.ok_or_else(|| Error::new(Status::GenericFailure, "NSNumber class not found"))?;
let compression_value: *mut AnyObject = msg_send![number_class, numberWithDouble: 0.8];
let _: () = msg_send![properties, setObject: compression_value, forKey: &*compression_key];
// Get PNG data with properties
let png_data: *mut AnyObject =
msg_send![bitmap, representationUsingType: 4, properties: properties]; // 4 = PNG
if png_data.is_null() {
return Ok(Vec::new());
}
// Get bytes from NSData
let bytes: *const u8 = msg_send![png_data, bytes];
let length: usize = msg_send![png_data, length];
if bytes.is_null() {
return Ok(Vec::new());
}
// Copy bytes into a Vec<u8>
let data = std::slice::from_raw_parts(bytes, length).to_vec();
Ok(data)
}
}
}
#[napi]
pub struct Application {
inner: TappableApplication,
pub(crate) object_id: AudioObjectID,
pub(crate) process_id: i32,
pub(crate) bundle_identifier: String,
pub(crate) name: String,
}
#[napi]
impl Application {
fn new(app: TappableApplication) -> Result<Self> {
let object_id = app.object_id;
let bundle_identifier = app.bundle_identifier()?;
let name = app.name()?;
let process_id = app.process_id()?;
Ok(Self {
inner: app,
object_id,
process_id,
bundle_identifier,
name,
})
}
#[napi]
pub fn tap_global_audio(
excluded_processes: Option<Vec<&Application>>,
audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
) -> Result<AudioTapStream> {
let mut device = AggregateDevice::create_global_tap_but_exclude_processes(
&excluded_processes
.unwrap_or_default()
.iter()
.map(|app| app.object_id)
.collect::<Vec<_>>(),
)?;
device.start(audio_stream_callback)
}
#[napi(getter)]
pub fn process_id(&self) -> i32 {
self.process_id
}
#[napi(getter)]
pub fn bundle_identifier(&self) -> String {
self.bundle_identifier.clone()
}
#[napi(getter)]
pub fn name(&self) -> String {
self.name.clone()
}
#[napi(getter)]
pub fn icon(&self) -> Result<Buffer> {
let icon = self.inner.icon()?;
Ok(Buffer::from(icon))
}
#[napi(getter)]
pub fn get_is_running(&self) -> Result<bool> {
Ok(get_process_property(
&self.object_id,
kAudioProcessPropertyIsRunningInput,
)?)
}
#[napi]
pub fn tap_audio(
&self,
audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
) -> Result<AudioTapStream> {
let mut device = AggregateDevice::new(self)?;
device.start(audio_stream_callback)
}
}
#[napi]
pub struct ApplicationListChangedSubscriber {
listener_block: *const Block<dyn Fn(u32, *mut c_void)>,
}
#[napi]
impl ApplicationListChangedSubscriber {
#[napi]
pub fn unsubscribe(&self) -> Result<()> {
let status = unsafe {
AudioObjectRemovePropertyListenerBlock(
kAudioObjectSystemObject,
&AudioObjectPropertyAddress {
mSelector: kAudioHardwarePropertyProcessObjectList,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
},
ptr::null_mut(),
self.listener_block.cast_mut().cast(),
)
};
if status != 0 {
return Err(Error::new(
Status::GenericFailure,
"Failed to remove property listener",
));
}
Ok(())
}
}
#[napi]
pub struct ApplicationStateChangedSubscriber {
id: Uuid,
object_id: AudioObjectID,
}
#[napi]
impl ApplicationStateChangedSubscriber {
#[napi]
pub fn unsubscribe(&self) {
if let Ok(mut lock) = APPLICATION_STATE_CHANGED_SUBSCRIBERS.write() {
if let Some(subscribers) = lock.get_mut(&self.object_id) {
subscribers.remove(&self.id);
if subscribers.is_empty() {
lock.remove(&self.object_id);
if let Some(listener_block) = APPLICATION_STATE_CHANGED_LISTENER_BLOCKS
.write()
.ok()
.as_mut()
.and_then(|map| map.remove(&self.object_id))
{
unsafe {
AudioObjectRemovePropertyListenerBlock(
self.object_id,
&AudioObjectPropertyAddress {
mSelector: kAudioProcessPropertyIsRunning,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
},
ptr::null_mut(),
listener_block.load(Ordering::Relaxed),
);
}
}
}
}
}
}
}
#[napi]
pub struct ShareableContent {
_inner: SCShareableContent,
}
#[napi]
#[derive(Default)]
pub struct RecordingPermissions {
pub audio: bool,
pub screen: bool,
}
#[napi]
impl ShareableContent {
#[napi]
pub fn on_application_list_changed(
callback: Arc<ThreadsafeFunction<(), ()>>,
) -> Result<ApplicationListChangedSubscriber> {
let callback_block: RcBlock<dyn Fn(u32, *mut c_void)> =
RcBlock::new(move |_in_number_addresses, _in_addresses: *mut c_void| {
if let Err(err) = RUNNING_APPLICATIONS
.write()
.map_err(|_| {
Error::new(
Status::GenericFailure,
"Poisoned RwLock while writing RunningApplications",
)
})
.and_then(|mut running_applications| {
audio_process_list().map_err(From::from).map(|apps| {
*running_applications = apps;
})
})
{
callback.call(Err(err), ThreadsafeFunctionCallMode::NonBlocking);
} else {
callback.call(Ok(()), ThreadsafeFunctionCallMode::NonBlocking);
}
});
let listener_block = &*callback_block as *const Block<dyn Fn(u32, *mut c_void)>;
let status = unsafe {
AudioObjectAddPropertyListenerBlock(
kAudioObjectSystemObject,
&AudioObjectPropertyAddress {
mSelector: kAudioHardwarePropertyProcessObjectList,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
},
ptr::null_mut(),
listener_block.cast_mut().cast(),
)
};
if status != 0 {
return Err(Error::new(
Status::GenericFailure,
"Failed to add property listener",
));
}
Ok(ApplicationListChangedSubscriber { listener_block })
}
#[napi]
pub fn on_app_state_changed(
app: &Application,
callback: Arc<ThreadsafeFunction<(), ()>>,
) -> Result<ApplicationStateChangedSubscriber> {
let id = Uuid::new_v4();
let mut lock = APPLICATION_STATE_CHANGED_SUBSCRIBERS.write().map_err(|_| {
Error::new(
Status::GenericFailure,
"Poisoned RwLock while writing ApplicationStateChangedSubscribers",
)
})?;
if let Some(subscribers) = lock.get_mut(&app.object_id) {
subscribers.insert(id, callback);
} else {
let object_id = app.object_id;
let list_change: RcBlock<dyn Fn(u32, *mut c_void)> =
RcBlock::new(move |in_number_addresses, in_addresses: *mut c_void| {
let addresses = unsafe {
std::slice::from_raw_parts(
in_addresses as *mut AudioObjectPropertyAddress,
in_number_addresses as usize,
)
};
for address in addresses {
if address.mSelector == kAudioProcessPropertyIsRunning {
if let Some(subscribers) = APPLICATION_STATE_CHANGED_SUBSCRIBERS
.read()
.ok()
.as_ref()
.and_then(|map| map.get(&object_id))
{
for callback in subscribers.values() {
callback.call(Ok(()), ThreadsafeFunctionCallMode::NonBlocking);
}
}
}
}
});
let address = AudioObjectPropertyAddress {
mSelector: kAudioProcessPropertyIsRunning,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain,
};
let listener_block = &*list_change as *const Block<dyn Fn(u32, *mut c_void)>;
let status = unsafe {
AudioObjectAddPropertyListenerBlock(
app.object_id,
&address,
ptr::null_mut(),
listener_block.cast_mut().cast(),
)
};
if status != 0 {
return Err(Error::new(
Status::GenericFailure,
"Failed to add property listener",
));
}
let subscribers = {
let mut map = HashMap::new();
map.insert(id, callback);
map
};
lock.insert(app.object_id, subscribers);
}
Ok(ApplicationStateChangedSubscriber {
id,
object_id: app.object_id,
})
}
#[napi(constructor)]
pub fn new() -> Result<Self> {
Ok(Self {
_inner: SCShareableContent::get().map_err(|err| Error::new(Status::GenericFailure, err))?,
})
}
#[napi]
pub fn applications(&self) -> Result<Vec<Application>> {
RUNNING_APPLICATIONS
.read()
.map_err(|_| {
Error::new(
Status::GenericFailure,
"Poisoned RwLock while reading RunningApplications",
)
})?
.iter()
.filter_map(|id| {
let app = TappableApplication::new(*id);
if !app.bundle_identifier().ok()?.is_empty() {
Some(Application::new(app))
} else {
None
}
})
.collect()
}
#[napi]
pub fn application_with_process_id(&self, process_id: u32) -> Result<Application> {
// Find the AudioObjectID for the given process ID
let audio_object_id = {
let running_apps = RUNNING_APPLICATIONS.read().map_err(|_| {
Error::new(
Status::GenericFailure,
"Poisoned RwLock while reading RunningApplications",
)
})?;
*running_apps
.iter()
.find(|&&id| {
let app = TappableApplication::new(id);
app
.process_id()
.map(|pid| pid as u32 == process_id)
.unwrap_or(false)
})
.ok_or_else(|| {
Error::new(
Status::GenericFailure,
format!("No application found with process ID {}", process_id),
)
})?
};
let app = TappableApplication::new(audio_object_id);
Application::new(app)
}
#[napi]
pub fn check_recording_permissions(&self) -> Result<RecordingPermissions> {
let av_capture_class = AVCAPTUREDEVICE_CLASS
.as_ref()
.ok_or_else(|| Error::new(Status::GenericFailure, "AVCaptureDevice class not found"))?;
let sc_stream_class = SCSTREAM_CLASS
.as_ref()
.ok_or_else(|| Error::new(Status::GenericFailure, "SCStream class not found"))?;
let media_type = NSString::from_str("com.apple.avfoundation.avcapturedevice.built-in_audio");
let audio_status: i32 = unsafe {
msg_send![
*av_capture_class,
authorizationStatusForMediaType: &*media_type
]
};
let screen_status: bool = unsafe { msg_send![*sc_stream_class, isScreenCaptureAuthorized] };
Ok(RecordingPermissions {
// AVAuthorizationStatusAuthorized = 3
audio: audio_status == 3,
screen: screen_status,
})
}
}

View File

@@ -0,0 +1,360 @@
use std::{ffi::c_void, sync::Arc};
use block2::{Block, RcBlock};
use core_foundation::{
array::CFArray,
base::{CFType, ItemRef, TCFType},
boolean::CFBoolean,
dictionary::CFDictionary,
string::CFString,
uuid::CFUUID,
};
use coreaudio::sys::{
kAudioAggregateDeviceIsPrivateKey, kAudioAggregateDeviceIsStackedKey,
kAudioAggregateDeviceMainSubDeviceKey, kAudioAggregateDeviceNameKey,
kAudioAggregateDeviceSubDeviceListKey, kAudioAggregateDeviceTapAutoStartKey,
kAudioAggregateDeviceTapListKey, kAudioAggregateDeviceUIDKey, kAudioHardwareNoError,
kAudioHardwarePropertyDefaultInputDevice, kAudioHardwarePropertyDefaultSystemOutputDevice,
kAudioSubDeviceUIDKey, kAudioSubTapDriftCompensationKey, kAudioSubTapUIDKey,
AudioDeviceCreateIOProcIDWithBlock, AudioDeviceDestroyIOProcID, AudioDeviceIOProcID,
AudioDeviceStart, AudioDeviceStop, AudioHardwareCreateAggregateDevice,
AudioHardwareDestroyAggregateDevice, AudioObjectID, AudioTimeStamp, OSStatus,
};
use napi::{
bindgen_prelude::Float32Array,
threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
Result,
};
use napi_derive::napi;
use objc2::{runtime::AnyObject, Encode, Encoding, RefEncode};
use crate::{
ca_tap_description::CATapDescription, device::get_device_uid, error::CoreAudioError,
queue::create_audio_tap_queue, screen_capture_kit::Application,
};
extern "C" {
fn AudioHardwareCreateProcessTap(
inDescription: *mut AnyObject,
outTapID: *mut AudioObjectID,
) -> OSStatus;
fn AudioHardwareDestroyProcessTap(tapID: AudioObjectID) -> OSStatus;
}
/// [Apple's documentation](https://developer.apple.com/documentation/coreaudiotypes/audiobuffer?language=objc)
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq)]
#[allow(non_snake_case)]
pub struct AudioBuffer {
pub mNumberChannels: u32,
pub mDataByteSize: u32,
pub mData: *mut c_void,
}
unsafe impl Encode for AudioBuffer {
const ENCODING: Encoding = Encoding::Struct(
"AudioBuffer",
&[<u32>::ENCODING, <u32>::ENCODING, <*mut c_void>::ENCODING],
);
}
unsafe impl RefEncode for AudioBuffer {
const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
}
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq)]
#[allow(non_snake_case)]
pub struct AudioBufferList {
pub mNumberBuffers: u32,
pub mBuffers: [AudioBuffer; 1],
}
unsafe impl Encode for AudioBufferList {
const ENCODING: Encoding = Encoding::Struct(
"AudioBufferList",
&[<u32>::ENCODING, <[AudioBuffer; 1]>::ENCODING],
);
}
unsafe impl RefEncode for AudioBufferList {
const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
}
pub struct AggregateDevice {
pub tap_id: AudioObjectID,
pub id: AudioObjectID,
}
impl AggregateDevice {
pub fn new(app: &Application) -> Result<Self> {
let mut tap_id: AudioObjectID = 0;
let tap_description = CATapDescription::init_stereo_mixdown_of_processes(app.object_id)?;
let status = unsafe { AudioHardwareCreateProcessTap(tap_description.inner, &mut tap_id) };
if status != 0 {
return Err(CoreAudioError::CreateProcessTapFailed(status).into());
}
let description_dict = Self::create_aggregate_description(tap_id, tap_description.get_uuid()?)?;
let mut aggregate_device_id: AudioObjectID = 0;
let status = unsafe {
AudioHardwareCreateAggregateDevice(
description_dict.as_concrete_TypeRef().cast(),
&mut aggregate_device_id,
)
};
// Check the status and return the appropriate result
if status != 0 {
return Err(CoreAudioError::CreateAggregateDeviceFailed(status).into());
}
Ok(Self {
tap_id,
id: aggregate_device_id,
})
}
pub fn create_global_tap_but_exclude_processes(processes: &[AudioObjectID]) -> Result<Self> {
let mut tap_id: AudioObjectID = 0;
let tap_description =
CATapDescription::init_stereo_global_tap_but_exclude_processes(processes)?;
let status = unsafe { AudioHardwareCreateProcessTap(tap_description.inner, &mut tap_id) };
if status != 0 {
return Err(CoreAudioError::CreateProcessTapFailed(status).into());
}
let description_dict = Self::create_aggregate_description(tap_id, tap_description.get_uuid()?)?;
let mut aggregate_device_id: AudioObjectID = 0;
let status = unsafe {
AudioHardwareCreateAggregateDevice(
description_dict.as_concrete_TypeRef().cast(),
&mut aggregate_device_id,
)
};
// Check the status and return the appropriate result
if status != 0 {
return Err(CoreAudioError::CreateAggregateDeviceFailed(status).into());
}
Ok(Self {
tap_id,
id: aggregate_device_id,
})
}
pub fn start(
&mut self,
audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
) -> Result<AudioTapStream> {
let queue = create_audio_tap_queue();
let mut in_proc_id: AudioDeviceIOProcID = None;
let in_io_block: RcBlock<
dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32,
> = RcBlock::new(
move |_in_now: *mut c_void,
in_input_data: *mut c_void,
in_input_time: *mut c_void,
_out_output_data: *mut c_void,
_in_output_time: *mut c_void| {
let AudioTimeStamp { mSampleTime, .. } = unsafe { &*in_input_time.cast() };
// ignore pre-roll
if *mSampleTime < 0.0 {
return kAudioHardwareNoError as i32;
}
let AudioBufferList { mBuffers, .. } =
unsafe { &mut *in_input_data.cast::<AudioBufferList>() };
let [AudioBuffer {
mData,
mNumberChannels,
mDataByteSize,
}] = mBuffers;
// Only create slice if we have valid data
if !mData.is_null() && *mDataByteSize > 0 {
// Calculate total number of samples (accounting for interleaved stereo)
let total_samples = *mDataByteSize as usize / 4; // 4 bytes per f32
// Create a slice of all samples
let samples: &[f32] =
unsafe { std::slice::from_raw_parts(mData.cast::<f32>(), total_samples) };
// Convert to mono if needed
let mono_samples: Vec<f32> = if *mNumberChannels > 1 {
samples
.chunks(*mNumberChannels as usize)
.map(|chunk| chunk.iter().sum::<f32>() / *mNumberChannels as f32)
.collect()
} else {
samples.to_vec()
};
audio_stream_callback.call(
Ok(mono_samples.into()),
ThreadsafeFunctionCallMode::NonBlocking,
);
}
kAudioHardwareNoError as i32
},
);
let status = unsafe {
AudioDeviceCreateIOProcIDWithBlock(
&mut in_proc_id,
self.id,
queue.cast(),
(&*in_io_block
as *const Block<
dyn Fn(*mut c_void, *mut c_void, *mut c_void, *mut c_void, *mut c_void) -> i32,
>)
.cast_mut()
.cast(),
)
};
if status != 0 {
return Err(CoreAudioError::CreateIOProcIDWithBlockFailed(status).into());
}
let status = unsafe { AudioDeviceStart(self.id, in_proc_id) };
if status != 0 {
return Err(CoreAudioError::AudioDeviceStartFailed(status).into());
}
Ok(AudioTapStream {
device_id: self.id,
in_proc_id,
stop_called: false,
})
}
fn create_aggregate_description(
tap_id: AudioObjectID,
tap_uuid_string: ItemRef<CFString>,
) -> Result<CFDictionary<CFType, CFType>> {
let system_output_uid = get_device_uid(kAudioHardwarePropertyDefaultSystemOutputDevice)?;
let default_input_uid = get_device_uid(kAudioHardwarePropertyDefaultInputDevice)?;
let aggregate_device_name = CFString::new(&format!("Tap-{}", tap_id));
let aggregate_device_uid: uuid::Uuid = CFUUID::new().into();
let aggregate_device_uid_string = aggregate_device_uid.to_string();
// Sub-device UID key and dictionary
let sub_device_output_dict = CFDictionary::from_CFType_pairs(&[(
cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
system_output_uid.as_CFType(),
)]);
let sub_device_input_dict = CFDictionary::from_CFType_pairs(&[(
cfstring_from_bytes_with_nul(kAudioSubDeviceUIDKey).as_CFType(),
default_input_uid.as_CFType(),
)]);
let tap_device_dict = CFDictionary::from_CFType_pairs(&[
(
cfstring_from_bytes_with_nul(kAudioSubTapDriftCompensationKey).as_CFType(),
CFBoolean::false_value().as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioSubTapUIDKey).as_CFType(),
tap_uuid_string.as_CFType(),
),
]);
let capture_device_list = vec![sub_device_input_dict, sub_device_output_dict];
// Sub-device list
let sub_device_list = CFArray::from_CFTypes(&capture_device_list);
let tap_list = CFArray::from_CFTypes(&[tap_device_dict]);
// Create the aggregate device description dictionary
let description_dict = CFDictionary::from_CFType_pairs(&[
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceNameKey).as_CFType(),
aggregate_device_name.as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceUIDKey).as_CFType(),
CFString::new(aggregate_device_uid_string.as_str()).as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceMainSubDeviceKey).as_CFType(),
system_output_uid.as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceIsPrivateKey).as_CFType(),
CFBoolean::true_value().as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceIsStackedKey).as_CFType(),
CFBoolean::false_value().as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceTapAutoStartKey).as_CFType(),
CFBoolean::true_value().as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceSubDeviceListKey).as_CFType(),
sub_device_list.as_CFType(),
),
(
cfstring_from_bytes_with_nul(kAudioAggregateDeviceTapListKey).as_CFType(),
tap_list.as_CFType(),
),
]);
Ok(description_dict)
}
}
#[napi]
pub struct AudioTapStream {
device_id: AudioObjectID,
in_proc_id: AudioDeviceIOProcID,
stop_called: bool,
}
#[napi]
impl AudioTapStream {
#[napi]
pub fn stop(&mut self) -> Result<()> {
if self.stop_called {
return Ok(());
}
self.stop_called = true;
let status = unsafe { AudioDeviceStop(self.device_id, self.in_proc_id) };
if status != 0 {
return Err(CoreAudioError::AudioDeviceStopFailed(status).into());
}
let status = unsafe { AudioDeviceDestroyIOProcID(self.device_id, self.in_proc_id) };
if status != 0 {
return Err(CoreAudioError::AudioDeviceDestroyIOProcIDFailed(status).into());
}
let status = unsafe { AudioHardwareDestroyAggregateDevice(self.device_id) };
if status != 0 {
return Err(CoreAudioError::AudioHardwareDestroyAggregateDeviceFailed(status).into());
}
let status = unsafe { AudioHardwareDestroyProcessTap(self.device_id) };
if status != 0 {
return Err(CoreAudioError::AudioHardwareDestroyProcessTapFailed(status).into());
}
Ok(())
}
}
fn cfstring_from_bytes_with_nul(bytes: &'static [u8]) -> CFString {
CFString::new(
unsafe { std::ffi::CStr::from_bytes_with_nul_unchecked(bytes) }
.to_string_lossy()
.as_ref(),
)
}

View File

@@ -26,8 +26,10 @@
},
"devDependencies": {
"@napi-rs/cli": "3.0.0-alpha.70",
"@napi-rs/whisper": "^0.0.4",
"@types/node": "^22.0.0",
"ava": "^6.2.0",
"rxjs": "^7.8.1",
"ts-node": "^10.9.2",
"typescript": "^5.7.2"
},

View File

@@ -1,4 +1,6 @@
pub mod hashcash;
#[allow(unused_imports)]
pub use affine_media_capture::*;
pub use affine_nbstore::*;
pub use affine_sqlite_v1::*;

View File

@@ -3,6 +3,6 @@
"compilerOptions": {
"outDir": "./dist"
},
"include": ["index.d.ts"],
"include": ["index.d.ts", "media-capture-example.ts"],
"references": []
}