mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-21 00:07:01 +08:00
fix(native): expose tapped audio stats (#10524)
Need to encode the audio based on the sample's sample rate & channels. Also fixed that global audio tap not receiving any samples at all.
This commit is contained in:
@@ -98,8 +98,8 @@ export async function gemini(
|
|||||||
try {
|
try {
|
||||||
// Upload the audio file
|
// Upload the audio file
|
||||||
uploadResult = await fileManager.uploadFile(audioFilePath, {
|
uploadResult = await fileManager.uploadFile(audioFilePath, {
|
||||||
mimeType: 'audio/wav',
|
mimeType: 'audio/mp3',
|
||||||
displayName: 'audio_transcription.wav',
|
displayName: 'audio_transcription.mp3',
|
||||||
});
|
});
|
||||||
console.log('File uploaded:', uploadResult.file.uri);
|
console.log('File uploaded:', uploadResult.file.uri);
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import { createServer } from 'node:http';
|
|||||||
import {
|
import {
|
||||||
type Application,
|
type Application,
|
||||||
type AudioTapStream,
|
type AudioTapStream,
|
||||||
|
Bitrate,
|
||||||
|
Mp3Encoder,
|
||||||
ShareableContent,
|
ShareableContent,
|
||||||
type TappableApplication,
|
type TappableApplication,
|
||||||
} from '@affine/native';
|
} from '@affine/native';
|
||||||
@@ -15,7 +17,6 @@ import fs from 'fs-extra';
|
|||||||
import { Server } from 'socket.io';
|
import { Server } from 'socket.io';
|
||||||
|
|
||||||
import { gemini, type TranscriptionResult } from './gemini';
|
import { gemini, type TranscriptionResult } from './gemini';
|
||||||
import { WavWriter } from './wav-writer';
|
|
||||||
|
|
||||||
// Constants
|
// Constants
|
||||||
const RECORDING_DIR = './recordings';
|
const RECORDING_DIR = './recordings';
|
||||||
@@ -51,6 +52,7 @@ interface RecordingMetadata {
|
|||||||
recordingEndTime: number;
|
recordingEndTime: number;
|
||||||
recordingDuration: number;
|
recordingDuration: number;
|
||||||
sampleRate: number;
|
sampleRate: number;
|
||||||
|
channels: number;
|
||||||
totalSamples: number;
|
totalSamples: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,7 +102,9 @@ app.use(
|
|||||||
return res.status(400).json({ error: 'Invalid folder name format' });
|
return res.status(400).json({ error: 'Invalid folder name format' });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.path.endsWith('.wav')) {
|
if (req.path.endsWith('.mp3')) {
|
||||||
|
res.setHeader('Content-Type', 'audio/mpeg');
|
||||||
|
} else if (req.path.endsWith('.wav')) {
|
||||||
res.setHeader('Content-Type', 'audio/wav');
|
res.setHeader('Content-Type', 'audio/wav');
|
||||||
} else if (req.path.endsWith('.png')) {
|
} else if (req.path.endsWith('.png')) {
|
||||||
res.setHeader('Content-Type', 'image/png');
|
res.setHeader('Content-Type', 'image/png');
|
||||||
@@ -123,19 +127,25 @@ async function saveRecording(recording: Recording): Promise<string | null> {
|
|||||||
|
|
||||||
const recordingEndTime = Date.now();
|
const recordingEndTime = Date.now();
|
||||||
const recordingDuration = (recordingEndTime - recording.startTime) / 1000;
|
const recordingDuration = (recordingEndTime - recording.startTime) / 1000;
|
||||||
const expectedSamples = recordingDuration * 44100;
|
|
||||||
|
// Get the actual sample rate from the stream's audio stats
|
||||||
|
const actualSampleRate = recording.stream.sampleRate;
|
||||||
|
const channelCount = recording.stream.channels;
|
||||||
|
const expectedSamples = recordingDuration * actualSampleRate;
|
||||||
|
|
||||||
console.log(`💾 Saving recording for ${app.name}:`);
|
console.log(`💾 Saving recording for ${app.name}:`);
|
||||||
console.log(`- Process ID: ${app.processId}`);
|
console.log(`- Process ID: ${app.processId}`);
|
||||||
console.log(`- Bundle ID: ${app.bundleIdentifier}`);
|
console.log(`- Bundle ID: ${app.bundleIdentifier}`);
|
||||||
console.log(`- Actual duration: ${recordingDuration.toFixed(2)}s`);
|
console.log(`- Actual duration: ${recordingDuration.toFixed(2)}s`);
|
||||||
|
console.log(`- Sample rate: ${actualSampleRate}Hz`);
|
||||||
|
console.log(`- Channels: ${channelCount}`);
|
||||||
console.log(`- Expected samples: ${Math.floor(expectedSamples)}`);
|
console.log(`- Expected samples: ${Math.floor(expectedSamples)}`);
|
||||||
console.log(`- Actual samples: ${totalSamples}`);
|
console.log(`- Actual samples: ${totalSamples}`);
|
||||||
console.log(
|
console.log(
|
||||||
`- Sample ratio: ${(totalSamples / expectedSamples).toFixed(2)}`
|
`- Sample ratio: ${(totalSamples / expectedSamples).toFixed(2)}`
|
||||||
);
|
);
|
||||||
|
|
||||||
// Create a buffer for the mono audio
|
// Create a buffer for the audio
|
||||||
const buffer = new Float32Array(totalSamples);
|
const buffer = new Float32Array(totalSamples);
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
recording.buffers.forEach(buf => {
|
recording.buffers.forEach(buf => {
|
||||||
@@ -150,28 +160,33 @@ async function saveRecording(recording: Recording): Promise<string | null> {
|
|||||||
const recordingDir = `${RECORDING_DIR}/${baseFilename}`;
|
const recordingDir = `${RECORDING_DIR}/${baseFilename}`;
|
||||||
await fs.ensureDir(recordingDir);
|
await fs.ensureDir(recordingDir);
|
||||||
|
|
||||||
const wavFilename = `${recordingDir}/recording.wav`;
|
const mp3Filename = `${recordingDir}/recording.mp3`;
|
||||||
const transcriptionWavFilename = `${recordingDir}/transcription.wav`;
|
const transcriptionMp3Filename = `${recordingDir}/transcription.mp3`;
|
||||||
const metadataFilename = `${recordingDir}/metadata.json`;
|
const metadataFilename = `${recordingDir}/metadata.json`;
|
||||||
const iconFilename = `${recordingDir}/icon.png`;
|
const iconFilename = `${recordingDir}/icon.png`;
|
||||||
|
|
||||||
// Save high-quality WAV file for playback (44.1kHz)
|
// Save MP3 file with the actual sample rate from the stream
|
||||||
console.log(`📝 Writing high-quality WAV file to ${wavFilename}`);
|
console.log(`📝 Writing MP3 file to ${mp3Filename}`);
|
||||||
const writer = new WavWriter(wavFilename, { targetSampleRate: 44100 });
|
const mp3Encoder = new Mp3Encoder({
|
||||||
writer.write(buffer);
|
channels: channelCount,
|
||||||
await writer.end();
|
sampleRate: actualSampleRate,
|
||||||
console.log('✅ High-quality WAV file written successfully');
|
|
||||||
|
|
||||||
// Save low-quality WAV file for transcription (8kHz)
|
|
||||||
console.log(
|
|
||||||
`📝 Writing transcription WAV file to ${transcriptionWavFilename}`
|
|
||||||
);
|
|
||||||
const transcriptionWriter = new WavWriter(transcriptionWavFilename, {
|
|
||||||
targetSampleRate: 8000,
|
|
||||||
});
|
});
|
||||||
transcriptionWriter.write(buffer);
|
const mp3Data = mp3Encoder.encode(buffer);
|
||||||
await transcriptionWriter.end();
|
await fs.writeFile(mp3Filename, mp3Data);
|
||||||
console.log('✅ Transcription WAV file written successfully');
|
console.log('✅ MP3 file written successfully');
|
||||||
|
|
||||||
|
// Save low-quality MP3 file for transcription (8kHz)
|
||||||
|
console.log(
|
||||||
|
`📝 Writing transcription MP3 file to ${transcriptionMp3Filename}`
|
||||||
|
);
|
||||||
|
const transcriptionMp3Encoder = new Mp3Encoder({
|
||||||
|
channels: channelCount,
|
||||||
|
bitrate: Bitrate.Kbps8,
|
||||||
|
sampleRate: actualSampleRate,
|
||||||
|
});
|
||||||
|
const transcriptionMp3Data = transcriptionMp3Encoder.encode(buffer);
|
||||||
|
await fs.writeFile(transcriptionMp3Filename, transcriptionMp3Data);
|
||||||
|
console.log('✅ Transcription MP3 file written successfully');
|
||||||
|
|
||||||
// Save app icon if available
|
// Save app icon if available
|
||||||
if (app.icon) {
|
if (app.icon) {
|
||||||
@@ -181,7 +196,7 @@ async function saveRecording(recording: Recording): Promise<string | null> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.log(`📝 Writing metadata to ${metadataFilename}`);
|
console.log(`📝 Writing metadata to ${metadataFilename}`);
|
||||||
// Save metadata (without icon)
|
// Save metadata with the actual sample rate from the stream
|
||||||
const metadata: RecordingMetadata = {
|
const metadata: RecordingMetadata = {
|
||||||
appName: app.name,
|
appName: app.name,
|
||||||
bundleIdentifier: app.bundleIdentifier,
|
bundleIdentifier: app.bundleIdentifier,
|
||||||
@@ -189,7 +204,8 @@ async function saveRecording(recording: Recording): Promise<string | null> {
|
|||||||
recordingStartTime: recording.startTime,
|
recordingStartTime: recording.startTime,
|
||||||
recordingEndTime,
|
recordingEndTime,
|
||||||
recordingDuration,
|
recordingDuration,
|
||||||
sampleRate: 44100,
|
sampleRate: actualSampleRate,
|
||||||
|
channels: channelCount,
|
||||||
totalSamples,
|
totalSamples,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -296,7 +312,7 @@ async function stopRecording(processId: number) {
|
|||||||
// File management
|
// File management
|
||||||
async function getRecordings(): Promise<
|
async function getRecordings(): Promise<
|
||||||
{
|
{
|
||||||
wav: string;
|
mp3: string;
|
||||||
metadata?: RecordingMetadata;
|
metadata?: RecordingMetadata;
|
||||||
transcription?: TranscriptionMetadata;
|
transcription?: TranscriptionMetadata;
|
||||||
}[]
|
}[]
|
||||||
@@ -340,7 +356,7 @@ async function getRecordings(): Promise<
|
|||||||
if (transcriptionExists) {
|
if (transcriptionExists) {
|
||||||
transcription = await fs.readJson(transcriptionPath);
|
transcription = await fs.readJson(transcriptionPath);
|
||||||
} else {
|
} else {
|
||||||
// If transcription.wav exists but no transcription.json, it means transcription is available but not started
|
// If transcription.mp3 exists but no transcription.json, it means transcription is available but not started
|
||||||
transcription = {
|
transcription = {
|
||||||
transcriptionStartTime: 0,
|
transcriptionStartTime: 0,
|
||||||
transcriptionEndTime: 0,
|
transcriptionEndTime: 0,
|
||||||
@@ -352,7 +368,7 @@ async function getRecordings(): Promise<
|
|||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
wav: dir,
|
mp3: dir,
|
||||||
metadata,
|
metadata,
|
||||||
transcription,
|
transcription,
|
||||||
};
|
};
|
||||||
@@ -402,21 +418,21 @@ async function setupRecordingsWatcher() {
|
|||||||
// Handle file events
|
// Handle file events
|
||||||
fsWatcher
|
fsWatcher
|
||||||
.on('add', async path => {
|
.on('add', async path => {
|
||||||
if (path.endsWith('.wav') || path.endsWith('.json')) {
|
if (path.endsWith('.mp3') || path.endsWith('.json')) {
|
||||||
console.log(`📝 File added: ${path}`);
|
console.log(`📝 File added: ${path}`);
|
||||||
const files = await getRecordings();
|
const files = await getRecordings();
|
||||||
io.emit('apps:saved', { recordings: files });
|
io.emit('apps:saved', { recordings: files });
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.on('change', async path => {
|
.on('change', async path => {
|
||||||
if (path.endsWith('.wav') || path.endsWith('.json')) {
|
if (path.endsWith('.mp3') || path.endsWith('.json')) {
|
||||||
console.log(`📝 File changed: ${path}`);
|
console.log(`📝 File changed: ${path}`);
|
||||||
const files = await getRecordings();
|
const files = await getRecordings();
|
||||||
io.emit('apps:saved', { recordings: files });
|
io.emit('apps:saved', { recordings: files });
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.on('unlink', async path => {
|
.on('unlink', async path => {
|
||||||
if (path.endsWith('.wav') || path.endsWith('.json')) {
|
if (path.endsWith('.mp3') || path.endsWith('.json')) {
|
||||||
console.log(`🗑️ File removed: ${path}`);
|
console.log(`🗑️ File removed: ${path}`);
|
||||||
const files = await getRecordings();
|
const files = await getRecordings();
|
||||||
io.emit('apps:saved', { recordings: files });
|
io.emit('apps:saved', { recordings: files });
|
||||||
@@ -702,11 +718,11 @@ app.post(
|
|||||||
// Check if directory exists
|
// Check if directory exists
|
||||||
await fs.access(recordingDir);
|
await fs.access(recordingDir);
|
||||||
|
|
||||||
const transcriptionWavPath = `${recordingDir}/transcription.wav`;
|
const transcriptionMp3Path = `${recordingDir}/transcription.mp3`;
|
||||||
const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
|
const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
|
||||||
|
|
||||||
// Check if transcription file exists
|
// Check if transcription file exists
|
||||||
await fs.access(transcriptionWavPath);
|
await fs.access(transcriptionMp3Path);
|
||||||
|
|
||||||
// Create initial transcription metadata
|
// Create initial transcription metadata
|
||||||
const initialMetadata: TranscriptionMetadata = {
|
const initialMetadata: TranscriptionMetadata = {
|
||||||
@@ -719,7 +735,7 @@ app.post(
|
|||||||
// Notify clients that transcription has started
|
// Notify clients that transcription has started
|
||||||
io.emit('apps:recording-transcription-start', { filename: foldername });
|
io.emit('apps:recording-transcription-start', { filename: foldername });
|
||||||
|
|
||||||
const transcription = await gemini(transcriptionWavPath, {
|
const transcription = await gemini(transcriptionMp3Path, {
|
||||||
mode: 'transcript',
|
mode: 'transcript',
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,125 +0,0 @@
|
|||||||
import fs from 'fs-extra';
|
|
||||||
|
|
||||||
interface WavWriterConfig {
|
|
||||||
targetSampleRate?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export class WavWriter {
|
|
||||||
private readonly file: fs.WriteStream;
|
|
||||||
private readonly originalSampleRate: number = 44100;
|
|
||||||
private readonly targetSampleRate: number;
|
|
||||||
private readonly numChannels = 1; // The audio is mono
|
|
||||||
private samplesWritten = 0;
|
|
||||||
private readonly tempFilePath: string;
|
|
||||||
private readonly finalFilePath: string;
|
|
||||||
|
|
||||||
constructor(finalPath: string, config: WavWriterConfig = {}) {
|
|
||||||
this.finalFilePath = finalPath;
|
|
||||||
this.tempFilePath = finalPath + '.tmp';
|
|
||||||
this.targetSampleRate = config.targetSampleRate ?? this.originalSampleRate;
|
|
||||||
this.file = fs.createWriteStream(this.tempFilePath);
|
|
||||||
this.writeHeader(); // Always write header immediately
|
|
||||||
}
|
|
||||||
|
|
||||||
private writeHeader() {
|
|
||||||
const buffer = Buffer.alloc(44); // WAV header is 44 bytes
|
|
||||||
|
|
||||||
// RIFF chunk descriptor
|
|
||||||
buffer.write('RIFF', 0);
|
|
||||||
buffer.writeUInt32LE(36, 4); // Initial file size - 8 (will be updated later)
|
|
||||||
buffer.write('WAVE', 8);
|
|
||||||
|
|
||||||
// fmt sub-chunk
|
|
||||||
buffer.write('fmt ', 12);
|
|
||||||
buffer.writeUInt32LE(16, 16); // Subchunk1Size (16 for PCM)
|
|
||||||
buffer.writeUInt16LE(3, 20); // AudioFormat (3 for IEEE float)
|
|
||||||
buffer.writeUInt16LE(this.numChannels, 22); // NumChannels
|
|
||||||
buffer.writeUInt32LE(this.targetSampleRate, 24); // SampleRate
|
|
||||||
buffer.writeUInt32LE(this.targetSampleRate * this.numChannels * 4, 28); // ByteRate
|
|
||||||
buffer.writeUInt16LE(this.numChannels * 4, 32); // BlockAlign
|
|
||||||
buffer.writeUInt16LE(32, 34); // BitsPerSample (32 for float)
|
|
||||||
|
|
||||||
// data sub-chunk
|
|
||||||
buffer.write('data', 36);
|
|
||||||
buffer.writeUInt32LE(0, 40); // Initial data size (will be updated later)
|
|
||||||
|
|
||||||
this.file.write(buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
private resample(samples: Float32Array): Float32Array {
|
|
||||||
const ratio = this.originalSampleRate / this.targetSampleRate;
|
|
||||||
const newLength = Math.floor(samples.length / ratio);
|
|
||||||
const result = new Float32Array(newLength);
|
|
||||||
|
|
||||||
for (let i = 0; i < newLength; i++) {
|
|
||||||
const position = i * ratio;
|
|
||||||
const index = Math.floor(position);
|
|
||||||
const fraction = position - index;
|
|
||||||
|
|
||||||
// Linear interpolation between adjacent samples
|
|
||||||
if (index + 1 < samples.length) {
|
|
||||||
result[i] =
|
|
||||||
samples[index] * (1 - fraction) + samples[index + 1] * fraction;
|
|
||||||
} else {
|
|
||||||
result[i] = samples[index];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
write(samples: Float32Array) {
|
|
||||||
// Resample the input samples
|
|
||||||
const resampledData = this.resample(samples);
|
|
||||||
|
|
||||||
// Create a buffer with the correct size (4 bytes per float)
|
|
||||||
const buffer = Buffer.alloc(resampledData.length * 4);
|
|
||||||
|
|
||||||
// Write each float value properly
|
|
||||||
for (let i = 0; i < resampledData.length; i++) {
|
|
||||||
buffer.writeFloatLE(resampledData[i], i * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.file.write(buffer);
|
|
||||||
this.samplesWritten += resampledData.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
async end(): Promise<void> {
|
|
||||||
return new Promise<void>((resolve, reject) => {
|
|
||||||
this.file.end(() => {
|
|
||||||
void this.updateHeaderAndCleanup().then(resolve).catch(reject);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private async updateHeaderAndCleanup(): Promise<void> {
|
|
||||||
// Read the entire temporary file
|
|
||||||
const data = await fs.promises.readFile(this.tempFilePath);
|
|
||||||
|
|
||||||
// Update the header with correct sizes
|
|
||||||
const dataSize = this.samplesWritten * 4;
|
|
||||||
const fileSize = dataSize + 36;
|
|
||||||
|
|
||||||
data.writeUInt32LE(fileSize, 4); // Update RIFF chunk size
|
|
||||||
data.writeUInt32LE(dataSize, 40); // Update data chunk size
|
|
||||||
|
|
||||||
// Write the updated file
|
|
||||||
await fs.promises.writeFile(this.finalFilePath, data);
|
|
||||||
|
|
||||||
// Clean up temp file
|
|
||||||
await fs.promises.unlink(this.tempFilePath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a Buffer from Float32Array audio data
|
|
||||||
* @param float32Array - The Float32Array containing audio samples
|
|
||||||
* @returns FileData - The audio data as a Buffer
|
|
||||||
*/
|
|
||||||
export function FileData(float32Array: Float32Array): Buffer {
|
|
||||||
const buffer = Buffer.alloc(float32Array.length * 4); // 4 bytes per float
|
|
||||||
for (let i = 0; i < float32Array.length; i++) {
|
|
||||||
buffer.writeFloatLE(float32Array[i], i * 4);
|
|
||||||
}
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
@@ -584,7 +584,8 @@ export function SavedRecordingItem({
|
|||||||
>(null);
|
>(null);
|
||||||
|
|
||||||
const metadata = recording.metadata;
|
const metadata = recording.metadata;
|
||||||
const fileName = recording.wav;
|
// Ensure we have a valid filename, fallback to an empty string if undefined
|
||||||
|
const fileName = recording.mp3 || '';
|
||||||
const recordingDate = metadata
|
const recordingDate = metadata
|
||||||
? new Date(metadata.recordingStartTime).toLocaleString()
|
? new Date(metadata.recordingStartTime).toLocaleString()
|
||||||
: 'Unknown date';
|
: 'Unknown date';
|
||||||
@@ -626,7 +627,12 @@ export function SavedRecordingItem({
|
|||||||
|
|
||||||
const processAudioData = React.useCallback(async () => {
|
const processAudioData = React.useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`/api/recordings/${fileName}/recording.wav`);
|
// Check if fileName is empty
|
||||||
|
if (!fileName) {
|
||||||
|
throw new Error('Invalid recording filename');
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`/api/recordings/${fileName}/recording.mp3`);
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Failed to fetch audio file (${response.status}): ${response.statusText}`
|
`Failed to fetch audio file (${response.status}): ${response.statusText}`
|
||||||
@@ -741,7 +747,12 @@ export function SavedRecordingItem({
|
|||||||
setError(null); // Clear any previous errors
|
setError(null); // Clear any previous errors
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`/api/recordings/${recording.wav}`, {
|
// Check if filename is valid
|
||||||
|
if (!recording.mp3) {
|
||||||
|
throw new Error('Invalid recording filename');
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`/api/recordings/${recording.mp3}`, {
|
||||||
method: 'DELETE',
|
method: 'DELETE',
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -765,7 +776,7 @@ export function SavedRecordingItem({
|
|||||||
} finally {
|
} finally {
|
||||||
setIsDeleting(false);
|
setIsDeleting(false);
|
||||||
}
|
}
|
||||||
}, [recording.wav]);
|
}, [recording.mp3]);
|
||||||
|
|
||||||
const handleDeleteClick = React.useCallback(() => {
|
const handleDeleteClick = React.useCallback(() => {
|
||||||
void handleDelete().catch(err => {
|
void handleDelete().catch(err => {
|
||||||
@@ -779,7 +790,7 @@ export function SavedRecordingItem({
|
|||||||
socket.on(
|
socket.on(
|
||||||
'apps:recording-transcription-start',
|
'apps:recording-transcription-start',
|
||||||
(data: { filename: string }) => {
|
(data: { filename: string }) => {
|
||||||
if (data.filename === recording.wav) {
|
if (recording.mp3 && data.filename === recording.mp3) {
|
||||||
setTranscriptionError(null);
|
setTranscriptionError(null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -793,7 +804,7 @@ export function SavedRecordingItem({
|
|||||||
transcription?: string;
|
transcription?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
}) => {
|
}) => {
|
||||||
if (data.filename === recording.wav && !data.success) {
|
if (recording.mp3 && data.filename === recording.mp3 && !data.success) {
|
||||||
setTranscriptionError(data.error || 'Transcription failed');
|
setTranscriptionError(data.error || 'Transcription failed');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -803,12 +814,17 @@ export function SavedRecordingItem({
|
|||||||
socket.off('apps:recording-transcription-start');
|
socket.off('apps:recording-transcription-start');
|
||||||
socket.off('apps:recording-transcription-end');
|
socket.off('apps:recording-transcription-end');
|
||||||
};
|
};
|
||||||
}, [recording.wav]);
|
}, [recording.mp3]);
|
||||||
|
|
||||||
const handleTranscribe = React.useCallback(async () => {
|
const handleTranscribe = React.useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
|
// Check if filename is valid
|
||||||
|
if (!recording.mp3) {
|
||||||
|
throw new Error('Invalid recording filename');
|
||||||
|
}
|
||||||
|
|
||||||
const response = await fetch(
|
const response = await fetch(
|
||||||
`/api/recordings/${recording.wav}/transcribe`,
|
`/api/recordings/${recording.mp3}/transcribe`,
|
||||||
{
|
{
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
}
|
}
|
||||||
@@ -823,7 +839,7 @@ export function SavedRecordingItem({
|
|||||||
err instanceof Error ? err.message : 'Failed to start transcription'
|
err instanceof Error ? err.message : 'Failed to start transcription'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}, [recording.wav]);
|
}, [recording.mp3]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="bg-white rounded-lg shadow-sm hover:shadow-md transition-all duration-300 overflow-hidden mb-3 border border-gray-100 hover:border-gray-200">
|
<div className="bg-white rounded-lg shadow-sm hover:shadow-md transition-all duration-300 overflow-hidden mb-3 border border-gray-100 hover:border-gray-200">
|
||||||
@@ -854,7 +870,7 @@ export function SavedRecordingItem({
|
|||||||
/>
|
/>
|
||||||
<audio
|
<audio
|
||||||
ref={audioRef}
|
ref={audioRef}
|
||||||
src={`/api/recordings/${fileName}/recording.wav`}
|
src={fileName ? `/api/recordings/${fileName}/recording.mp3` : ''}
|
||||||
preload="metadata"
|
preload="metadata"
|
||||||
className="hidden"
|
className="hidden"
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ export function SavedRecordings(): React.ReactElement {
|
|||||||
return (
|
return (
|
||||||
<div className="space-y-1">
|
<div className="space-y-1">
|
||||||
{recordings.map(recording => (
|
{recordings.map(recording => (
|
||||||
<SavedRecordingItem key={recording.wav} recording={recording} />
|
<SavedRecordingItem key={recording.mp3} recording={recording} />
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -27,8 +27,10 @@ export interface RecordingMetadata {
|
|||||||
recordingEndTime: number;
|
recordingEndTime: number;
|
||||||
recordingDuration: number;
|
recordingDuration: number;
|
||||||
sampleRate: number;
|
sampleRate: number;
|
||||||
|
channels: number;
|
||||||
totalSamples: number;
|
totalSamples: number;
|
||||||
icon?: Uint8Array;
|
icon?: Uint8Array;
|
||||||
|
mp3: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TranscriptionMetadata {
|
export interface TranscriptionMetadata {
|
||||||
@@ -49,7 +51,7 @@ export interface TranscriptionMetadata {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface SavedRecording {
|
export interface SavedRecording {
|
||||||
wav: string;
|
mp3: string;
|
||||||
metadata?: RecordingMetadata;
|
metadata?: RecordingMetadata;
|
||||||
transcription?: TranscriptionMetadata;
|
transcription?: TranscriptionMetadata;
|
||||||
}
|
}
|
||||||
|
|||||||
2
packages/frontend/native/index.d.ts
vendored
2
packages/frontend/native/index.d.ts
vendored
@@ -19,6 +19,8 @@ export declare class ApplicationStateChangedSubscriber {
|
|||||||
|
|
||||||
export declare class AudioTapStream {
|
export declare class AudioTapStream {
|
||||||
stop(): void
|
stop(): void
|
||||||
|
get sampleRate(): number
|
||||||
|
get channels(): number
|
||||||
}
|
}
|
||||||
|
|
||||||
export declare class DocStorage {
|
export declare class DocStorage {
|
||||||
|
|||||||
@@ -54,9 +54,9 @@ impl CATapDescription {
|
|||||||
.as_slice(),
|
.as_slice(),
|
||||||
);
|
);
|
||||||
let obj: *mut AnyObject =
|
let obj: *mut AnyObject =
|
||||||
unsafe { msg_send![obj, initStereoMixdownOfProcesses: &*processes_array] };
|
unsafe { msg_send![obj, initStereoGlobalTapButExcludeProcesses: &*processes_array] };
|
||||||
if obj.is_null() {
|
if obj.is_null() {
|
||||||
return Err(CoreAudioError::InitStereoMixdownOfProcessesFailed);
|
return Err(CoreAudioError::InitStereoGlobalTapButExcludeProcessesFailed);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self { inner: obj })
|
Ok(Self { inner: obj })
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ pub enum CoreAudioError {
|
|||||||
AllocCATapDescriptionFailed,
|
AllocCATapDescriptionFailed,
|
||||||
#[error("Call initStereoMixdownOfProcesses on CATapDescription failed")]
|
#[error("Call initStereoMixdownOfProcesses on CATapDescription failed")]
|
||||||
InitStereoMixdownOfProcessesFailed,
|
InitStereoMixdownOfProcessesFailed,
|
||||||
|
#[error("Call initStereoGlobalTapButExcludeProcesses on CATapDescription failed")]
|
||||||
|
InitStereoGlobalTapButExcludeProcessesFailed,
|
||||||
#[error("Get UUID on CATapDescription failed")]
|
#[error("Get UUID on CATapDescription failed")]
|
||||||
GetCATapDescriptionUUIDFailed,
|
GetCATapDescriptionUUIDFailed,
|
||||||
#[error("Get mute behavior on CATapDescription failed")]
|
#[error("Get mute behavior on CATapDescription failed")]
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ use napi_derive::napi;
|
|||||||
use objc2::{runtime::AnyObject, Encode, Encoding, RefEncode};
|
use objc2::{runtime::AnyObject, Encode, Encoding, RefEncode};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
audio_stream_basic_desc::read_audio_stream_basic_description,
|
||||||
ca_tap_description::CATapDescription, device::get_device_uid, error::CoreAudioError,
|
ca_tap_description::CATapDescription, device::get_device_uid, error::CoreAudioError,
|
||||||
queue::create_audio_tap_queue, screen_capture_kit::TappableApplication,
|
queue::create_audio_tap_queue, screen_capture_kit::TappableApplication,
|
||||||
};
|
};
|
||||||
@@ -82,9 +83,17 @@ unsafe impl RefEncode for AudioBufferList {
|
|||||||
const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
|
const ENCODING_REF: Encoding = Encoding::Pointer(&Self::ENCODING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Audio statistics structure to track audio format information
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct AudioStats {
|
||||||
|
pub sample_rate: f64,
|
||||||
|
pub channels: u32,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct AggregateDevice {
|
pub struct AggregateDevice {
|
||||||
pub tap_id: AudioObjectID,
|
pub tap_id: AudioObjectID,
|
||||||
pub id: AudioObjectID,
|
pub id: AudioObjectID,
|
||||||
|
pub audio_stats: Option<AudioStats>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AggregateDevice {
|
impl AggregateDevice {
|
||||||
@@ -118,6 +127,7 @@ impl AggregateDevice {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
tap_id,
|
tap_id,
|
||||||
id: aggregate_device_id,
|
id: aggregate_device_id,
|
||||||
|
audio_stats: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,6 +159,7 @@ impl AggregateDevice {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
tap_id,
|
tap_id,
|
||||||
id: aggregate_device_id,
|
id: aggregate_device_id,
|
||||||
|
audio_stats: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -181,6 +192,7 @@ impl AggregateDevice {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
tap_id,
|
tap_id,
|
||||||
id: aggregate_device_id,
|
id: aggregate_device_id,
|
||||||
|
audio_stats: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,6 +200,22 @@ impl AggregateDevice {
|
|||||||
&mut self,
|
&mut self,
|
||||||
audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
|
audio_stream_callback: Arc<ThreadsafeFunction<Float32Array, (), Float32Array, true>>,
|
||||||
) -> Result<AudioTapStream> {
|
) -> Result<AudioTapStream> {
|
||||||
|
// Read and log the audio format before starting the device
|
||||||
|
let mut audio_stats = AudioStats {
|
||||||
|
sample_rate: 44100.0,
|
||||||
|
channels: 1, // Always set to 1 channel (mono)
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Ok(audio_format) = read_audio_stream_basic_description(self.tap_id) {
|
||||||
|
// Store the audio format information
|
||||||
|
audio_stats.sample_rate = audio_format.0.mSampleRate;
|
||||||
|
// Always use 1 channel regardless of what the system reports
|
||||||
|
audio_stats.channels = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.audio_stats = Some(audio_stats);
|
||||||
|
let audio_stats_clone = audio_stats;
|
||||||
|
|
||||||
let queue = create_audio_tap_queue();
|
let queue = create_audio_tap_queue();
|
||||||
let mut in_proc_id: AudioDeviceIOProcID = None;
|
let mut in_proc_id: AudioDeviceIOProcID = None;
|
||||||
|
|
||||||
@@ -221,18 +249,33 @@ impl AggregateDevice {
|
|||||||
let samples: &[f32] =
|
let samples: &[f32] =
|
||||||
unsafe { std::slice::from_raw_parts(mData.cast::<f32>(), total_samples) };
|
unsafe { std::slice::from_raw_parts(mData.cast::<f32>(), total_samples) };
|
||||||
|
|
||||||
// Convert to mono if needed
|
// Check the channel count and data format
|
||||||
let mono_samples: Vec<f32> = if *mNumberChannels > 1 {
|
let channel_count = *mNumberChannels as usize;
|
||||||
samples
|
|
||||||
.chunks(*mNumberChannels as usize)
|
|
||||||
.map(|chunk| chunk.iter().sum::<f32>() / *mNumberChannels as f32)
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
samples.to_vec()
|
|
||||||
};
|
|
||||||
|
|
||||||
|
// Process the audio based on channel count
|
||||||
|
let mut processed_samples: Vec<f32>;
|
||||||
|
|
||||||
|
if channel_count > 1 {
|
||||||
|
// For stereo, samples are interleaved: [L, R, L, R, ...]
|
||||||
|
// We need to average each pair to get mono
|
||||||
|
let frame_count = total_samples / channel_count;
|
||||||
|
processed_samples = Vec::with_capacity(frame_count);
|
||||||
|
|
||||||
|
for i in 0..frame_count {
|
||||||
|
let mut frame_sum = 0.0;
|
||||||
|
for c in 0..channel_count {
|
||||||
|
frame_sum += samples[i * channel_count + c];
|
||||||
|
}
|
||||||
|
processed_samples.push(frame_sum / (channel_count as f32));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Already mono, just copy the samples
|
||||||
|
processed_samples = samples.to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass the processed samples to the callback
|
||||||
audio_stream_callback.call(
|
audio_stream_callback.call(
|
||||||
Ok(mono_samples.into()),
|
Ok(processed_samples.into()),
|
||||||
ThreadsafeFunctionCallMode::NonBlocking,
|
ThreadsafeFunctionCallMode::NonBlocking,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -266,6 +309,7 @@ impl AggregateDevice {
|
|||||||
device_id: self.id,
|
device_id: self.id,
|
||||||
in_proc_id,
|
in_proc_id,
|
||||||
stop_called: false,
|
stop_called: false,
|
||||||
|
audio_stats: audio_stats_clone,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -353,6 +397,7 @@ pub struct AudioTapStream {
|
|||||||
device_id: AudioObjectID,
|
device_id: AudioObjectID,
|
||||||
in_proc_id: AudioDeviceIOProcID,
|
in_proc_id: AudioDeviceIOProcID,
|
||||||
stop_called: bool,
|
stop_called: bool,
|
||||||
|
audio_stats: AudioStats,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -381,6 +426,16 @@ impl AudioTapStream {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(getter)]
|
||||||
|
pub fn get_sample_rate(&self) -> f64 {
|
||||||
|
self.audio_stats.sample_rate
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(getter)]
|
||||||
|
pub fn get_channels(&self) -> u32 {
|
||||||
|
self.audio_stats.channels
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cfstring_from_bytes_with_nul(bytes: &'static [u8]) -> CFString {
|
fn cfstring_from_bytes_with_nul(bytes: &'static [u8]) -> CFString {
|
||||||
|
|||||||
Reference in New Issue
Block a user