chore: remove lame encoder (#11529)

This commit is contained in:
darkskygit
2025-04-08 05:02:30 +00:00
parent 49c6ad7c04
commit 558e84138c
15 changed files with 111 additions and 418 deletions

View File

@@ -0,0 +1,58 @@
export function createWavBuffer(
samples: Float32Array,
options: {
sampleRate: number;
numChannels: number;
}
) {
const { sampleRate = 44100, numChannels = 1 } = options;
const bitsPerSample = 16;
const bytesPerSample = bitsPerSample / 8;
const dataSize = samples.length * bytesPerSample;
const buffer = new ArrayBuffer(44 + dataSize); // WAV header is 44 bytes
const view = new DataView(buffer);
// Write WAV header
// "RIFF" chunk descriptor
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + dataSize, true); // File size - 8
writeString(view, 8, 'WAVE');
// "fmt " sub-chunk
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true); // Sub-chunk size
view.setUint16(20, 1, true); // Audio format (1 = PCM)
view.setUint16(22, numChannels, true); // Channels
view.setUint32(24, sampleRate, true); // Sample rate
view.setUint32(28, sampleRate * numChannels * bytesPerSample, true); // Byte rate
view.setUint16(32, numChannels * bytesPerSample, true); // Block align
view.setUint16(34, bitsPerSample, true); // Bits per sample
// "data" sub-chunk
writeString(view, 36, 'data');
view.setUint32(40, dataSize, true); // Sub-chunk size
// Write audio data
const offset = 44;
for (let i = 0; i < samples.length; i++) {
// Convert float32 to int16
const s = Math.max(-1, Math.min(1, samples[i]));
view.setInt16(
offset + i * bytesPerSample,
s < 0 ? s * 0x8000 : s * 0x7fff,
true
);
}
return buffer;
}
function writeString(
view: DataView<ArrayBuffer>,
offset: number,
string: string
) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}

View File

@@ -98,8 +98,8 @@ export async function gemini(
try {
// Upload the audio file
uploadResult = await fileManager.uploadFile(audioFilePath, {
mimeType: 'audio/mp3',
displayName: 'audio_transcription.mp3',
mimeType: 'audio/wav',
displayName: 'audio_transcription.wav',
});
console.log('File uploaded:', uploadResult.file.uri);

View File

@@ -5,8 +5,6 @@ import path from 'node:path';
import {
type Application,
type AudioTapStream,
Bitrate,
Mp3Encoder,
ShareableContent,
type TappableApplication,
} from '@affine/native';
@@ -19,6 +17,7 @@ import { debounce } from 'lodash-es';
import multer from 'multer';
import { Server } from 'socket.io';
import { createWavBuffer } from './encode';
import { gemini, type TranscriptionResult } from './gemini';
// Constants
@@ -206,36 +205,34 @@ async function saveRecording(recording: Recording): Promise<string | null> {
const recordingDir = path.join(RECORDING_DIR, sanitizedFilename);
await fs.ensureDir(recordingDir);
const mp3Filename = path.join(recordingDir, 'recording.mp3');
const transcriptionMp3Filename = path.join(
const wavFilename = path.join(recordingDir, 'recording.wav');
const transcriptionWavFilename = path.join(
recordingDir,
'transcription.mp3'
'transcription.wav'
);
const metadataFilename = path.join(recordingDir, 'metadata.json');
const iconFilename = path.join(recordingDir, 'icon.png');
// Save MP3 file with the actual sample rate from the stream
console.log(`📝 Writing MP3 file to ${mp3Filename}`);
const mp3Encoder = new Mp3Encoder({
channels: channelCount,
sampleRate: actualSampleRate,
});
const mp3Data = mp3Encoder.encode(buffer);
await fs.writeFile(mp3Filename, mp3Data);
console.log('✅ MP3 file written successfully');
// Save low-quality MP3 file for transcription (8kHz)
console.log(
`📝 Writing transcription MP3 file to ${transcriptionMp3Filename}`
console.log(`📝 Muxing Wav buffer ${wavFilename}`);
const wavBuffer = new Uint8Array(
createWavBuffer(buffer, {
sampleRate: actualSampleRate,
numChannels: channelCount,
})
);
const transcriptionMp3Encoder = new Mp3Encoder({
channels: channelCount,
bitrate: Bitrate.Kbps8,
sampleRate: actualSampleRate,
});
const transcriptionMp3Data = transcriptionMp3Encoder.encode(buffer);
await fs.writeFile(transcriptionMp3Filename, transcriptionMp3Data);
console.log('✅ Transcription MP3 file written successfully');
// Save Wav file with the actual sample rate from the stream
console.log(`📝 Writing Wav file to ${wavFilename}`);
await fs.writeFile(wavFilename, wavBuffer);
console.log('✅ Wav file written successfully');
// Save low-quality Wav file for transcription (8kHz)
console.log(
`📝 Writing transcription wav file to ${transcriptionWavFilename}`
);
await fs.writeFile(transcriptionWavFilename, wavBuffer);
console.log('✅ Transcription Wav file written successfully');
// Save app icon if available
if (app?.icon) {
@@ -367,7 +364,7 @@ async function stopRecording(processId: number) {
// File management
async function getRecordings(): Promise<
{
mp3: string;
wav: string;
metadata?: RecordingMetadata;
transcription?: TranscriptionMetadata;
}[]
@@ -411,7 +408,7 @@ async function getRecordings(): Promise<
if (transcriptionExists) {
transcription = await fs.readJson(transcriptionPath);
} else {
// If transcription.mp3 exists but no transcription.json, it means transcription is available but not started
// If transcription.Wav exists but no transcription.json, it means transcription is available but not started
transcription = {
transcriptionStartTime: 0,
transcriptionEndTime: 0,
@@ -423,7 +420,7 @@ async function getRecordings(): Promise<
}
return {
mp3: dir,
wav: dir,
metadata,
transcription,
};
@@ -473,21 +470,21 @@ async function setupRecordingsWatcher() {
// Handle file events
fsWatcher
.on('add', async path => {
if (path.endsWith('.mp3') || path.endsWith('.json')) {
if (path.endsWith('.wav') || path.endsWith('.json')) {
console.log(`📝 File added: ${path}`);
const files = await getRecordings();
io.emit('apps:saved', { recordings: files });
}
})
.on('change', async path => {
if (path.endsWith('.mp3') || path.endsWith('.json')) {
if (path.endsWith('.wav') || path.endsWith('.json')) {
console.log(`📝 File changed: ${path}`);
const files = await getRecordings();
io.emit('apps:saved', { recordings: files });
}
})
.on('unlink', async path => {
if (path.endsWith('.mp3') || path.endsWith('.json')) {
if (path.endsWith('.wav') || path.endsWith('.json')) {
console.log(`🗑️ File removed: ${path}`);
const files = await getRecordings();
io.emit('apps:saved', { recordings: files });
@@ -797,11 +794,11 @@ app.post(
// Check if directory exists
await fs.access(recordingDir);
const transcriptionMp3Path = `${recordingDir}/transcription.mp3`;
const transcriptionWavPath = `${recordingDir}/transcription.wav`;
const transcriptionMetadataPath = `${recordingDir}/transcription.json`;
// Check if transcription file exists
await fs.access(transcriptionMp3Path);
await fs.access(transcriptionWavPath);
// Create initial transcription metadata
const initialMetadata: TranscriptionMetadata = {
@@ -814,7 +811,7 @@ app.post(
// Notify clients that transcription has started
io.emit('apps:recording-transcription-start', { filename: foldername });
const transcription = await gemini(transcriptionMp3Path, {
const transcription = await gemini(transcriptionWavPath, {
mode: 'transcript',
});

View File

@@ -591,7 +591,7 @@ export function SavedRecordingItem({
const metadata = recording.metadata;
// Ensure we have a valid filename, fallback to an empty string if undefined
const fileName = recording.mp3 || '';
const fileName = recording.wav || '';
const recordingDate = metadata
? new Date(metadata.recordingStartTime).toLocaleString()
: 'Unknown date';
@@ -638,7 +638,7 @@ export function SavedRecordingItem({
throw new Error('Invalid recording filename');
}
const response = await fetch(`/api/recordings/${fileName}/recording.mp3`);
const response = await fetch(`/api/recordings/${fileName}/recording.wav`);
if (!response.ok) {
throw new Error(
`Failed to fetch audio file (${response.status}): ${response.statusText}`
@@ -754,11 +754,11 @@ export function SavedRecordingItem({
try {
// Check if filename is valid
if (!recording.mp3) {
if (!recording.wav) {
throw new Error('Invalid recording filename');
}
const response = await fetch(`/api/recordings/${recording.mp3}`, {
const response = await fetch(`/api/recordings/${recording.wav}`, {
method: 'DELETE',
});
@@ -782,7 +782,7 @@ export function SavedRecordingItem({
} finally {
setIsDeleting(false);
}
}, [recording.mp3]);
}, [recording.wav]);
const handleDeleteClick = React.useCallback(() => {
void handleDelete().catch(err => {
@@ -796,7 +796,7 @@ export function SavedRecordingItem({
socket.on(
'apps:recording-transcription-start',
(data: { filename: string }) => {
if (recording.mp3 && data.filename === recording.mp3) {
if (recording.wav && data.filename === recording.wav) {
setTranscriptionError(null);
}
}
@@ -810,7 +810,7 @@ export function SavedRecordingItem({
transcription?: string;
error?: string;
}) => {
if (recording.mp3 && data.filename === recording.mp3 && !data.success) {
if (recording.wav && data.filename === recording.wav && !data.success) {
setTranscriptionError(data.error || 'Transcription failed');
}
}
@@ -820,17 +820,17 @@ export function SavedRecordingItem({
socket.off('apps:recording-transcription-start');
socket.off('apps:recording-transcription-end');
};
}, [recording.mp3]);
}, [recording.wav]);
const handleTranscribe = React.useCallback(async () => {
try {
// Check if filename is valid
if (!recording.mp3) {
if (!recording.wav) {
throw new Error('Invalid recording filename');
}
const response = await fetch(
`/api/recordings/${recording.mp3}/transcribe`,
`/api/recordings/${recording.wav}/transcribe`,
{
method: 'POST',
}
@@ -845,7 +845,7 @@ export function SavedRecordingItem({
err instanceof Error ? err.message : 'Failed to start transcription'
);
}
}, [recording.mp3]);
}, [recording.wav]);
return (
<div className="bg-white rounded-lg shadow-sm hover:shadow-md transition-all duration-300 overflow-hidden mb-3 border border-gray-100 hover:border-gray-200">
@@ -876,7 +876,7 @@ export function SavedRecordingItem({
/>
<audio
ref={audioRef}
src={fileName ? `/api/recordings/${fileName}/recording.mp3` : ''}
src={fileName ? `/api/recordings/${fileName}/recording.wav` : ''}
preload="metadata"
className="hidden"
/>

View File

@@ -34,7 +34,7 @@ export function SavedRecordings(): React.ReactElement {
return (
<div className="space-y-1">
{recordings.map(recording => (
<SavedRecordingItem key={recording.mp3} recording={recording} />
<SavedRecordingItem key={recording.wav} recording={recording} />
))}
</div>
);

View File

@@ -54,7 +54,7 @@ export interface TranscriptionMetadata {
}
export interface SavedRecording {
mp3: string;
wav: string;
metadata?: RecordingMetadata;
transcription?: TranscriptionMetadata;
}

View File

@@ -1,23 +0,0 @@
import { readFile, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { tmpdir } from 'node:os';
import test from 'ava';
import { decodeAudio, Mp3Encoder } from '../index.js';
const __dirname = join(fileURLToPath(import.meta.url), '..');
const wav = await readFile(join(__dirname, 'fixtures', 'recording.wav'));
test('convert wav to mp3', async t => {
const audio = await decodeAudio(wav);
const mp3 = new Mp3Encoder({
channels: 1,
});
await t.notThrowsAsync(async () => {
const mp3Data = mp3.encode(audio);
await writeFile(join(tmpdir(), 'recording.mp3'), mp3Data);
});
});

View File

@@ -63,11 +63,6 @@ export declare class DocStoragePool {
getBlobUploadedAt(universalId: string, peer: string, blobId: string): Promise<Date | null>
}
export declare class Mp3Encoder {
constructor(options: EncodeOptions)
encode(input: Float32Array): Uint8Array
}
export declare class RecordingPermissions {
audio: boolean
screen: boolean
@@ -135,42 +130,6 @@ export declare class TappableApplication {
tapAudio(audioStreamCallback: ((err: Error | null, arg: Float32Array) => void)): AudioTapStream
}
/**Enumeration of valid values for `set_brate` */
export declare enum Bitrate {
/**8_000 */
Kbps8 = 8,
/**16_000 */
Kbps16 = 16,
/**24_000 */
Kbps24 = 24,
/**32_000 */
Kbps32 = 32,
/**40_000 */
Kbps40 = 40,
/**48_000 */
Kbps48 = 48,
/**64_000 */
Kbps64 = 64,
/**80_000 */
Kbps80 = 80,
/**96_000 */
Kbps96 = 96,
/**112_000 */
Kbps112 = 112,
/**128_000 */
Kbps128 = 128,
/**160_000 */
Kbps160 = 160,
/**192_000 */
Kbps192 = 192,
/**224_000 */
Kbps224 = 224,
/**256_000 */
Kbps256 = 256,
/**320_000 */
Kbps320 = 320
}
export interface Blob {
key: string
data: Uint8Array
@@ -212,14 +171,6 @@ export interface DocUpdate {
bin: Uint8Array
}
export interface EncodeOptions {
channels: number
quality?: Quality
bitrate?: Bitrate
sampleRate?: number
mode?: Mode
}
export interface InsertRow {
docId?: string
data: Uint8Array
@@ -234,42 +185,6 @@ export interface ListedBlob {
export declare function mintChallengeResponse(resource: string, bits?: number | undefined | null): Promise<string>
/** MPEG mode */
export declare enum Mode {
Mono = 0,
Stereo = 1,
JointStereo = 2,
DualChannel = 3,
NotSet = 4
}
/**
*Possible quality parameter.
*From best(0) to worst(9)
*/
export declare enum Quality {
/**Best possible quality */
Best = 0,
/**Second best */
SecondBest = 1,
/**Close to best */
NearBest = 2,
/**Very nice */
VeryNice = 3,
/**Nice */
Nice = 4,
/**Good */
Good = 5,
/**Decent */
Decent = 6,
/**Okayish */
Ok = 7,
/**Almost worst */
SecondWorst = 8,
/**Worst */
Worst = 9
}
export interface SetBlob {
key: string
data: Uint8Array

View File

@@ -35,7 +35,11 @@ const isMuslFromFilesystem = () => {
}
const isMuslFromReport = () => {
const report = typeof process.report.getReport === 'function' ? process.report.getReport() : null
let report = null
if (typeof process.report?.getReport === 'function') {
process.report.excludeNetwork = true
report = process.report.getReport()
}
if (!report) {
return null
}
@@ -376,16 +380,12 @@ module.exports.ApplicationStateChangedSubscriber = nativeBinding.ApplicationStat
module.exports.AudioTapStream = nativeBinding.AudioTapStream
module.exports.DocStorage = nativeBinding.DocStorage
module.exports.DocStoragePool = nativeBinding.DocStoragePool
module.exports.Mp3Encoder = nativeBinding.Mp3Encoder
module.exports.RecordingPermissions = nativeBinding.RecordingPermissions
module.exports.ShareableContent = nativeBinding.ShareableContent
module.exports.SqliteConnection = nativeBinding.SqliteConnection
module.exports.TappableApplication = nativeBinding.TappableApplication
module.exports.Bitrate = nativeBinding.Bitrate
module.exports.decodeAudio = nativeBinding.decodeAudio
module.exports.decodeAudioSync = nativeBinding.decodeAudioSync
module.exports.mintChallengeResponse = nativeBinding.mintChallengeResponse
module.exports.Mode = nativeBinding.Mode
module.exports.Quality = nativeBinding.Quality
module.exports.ValidationResult = nativeBinding.ValidationResult
module.exports.verifyChallengeResponse = nativeBinding.verifyChallengeResponse

View File

@@ -7,7 +7,6 @@ version = "0.0.0"
crate-type = ["cdylib", "rlib"]
[dependencies]
mp3lame-encoder = { workspace = true, features = ["std"] }
napi = { workspace = true, features = ["napi4"] }
napi-derive = { workspace = true, features = ["type-def"] }
rubato = { workspace = true }

View File

@@ -3,4 +3,3 @@ pub mod macos;
#[cfg(target_os = "macos")]
pub(crate) use macos::*;
pub mod audio_decoder;
pub mod mp3;

View File

@@ -1,219 +0,0 @@
use mp3lame_encoder::{Builder, Encoder, FlushNoGap, MonoPcm};
use napi::bindgen_prelude::{Result, Uint8Array};
use napi_derive::napi;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum LameError {
#[error("Create builder failed")]
CreateBuilderFailed,
#[error("Failed to create encoder")]
BuildError(#[from] mp3lame_encoder::BuildError),
#[error("Failed to encode")]
EncodeError(#[from] mp3lame_encoder::EncodeError),
}
impl From<LameError> for napi::Error {
fn from(value: LameError) -> Self {
napi::Error::new(napi::Status::GenericFailure, value.to_string())
}
}
#[napi]
#[derive(Debug, Clone)]
///Possible quality parameter.
///From best(0) to worst(9)
pub enum Quality {
///Best possible quality
Best = 0,
///Second best
SecondBest = 1,
///Close to best
NearBest = 2,
///Very nice
VeryNice = 3,
///Nice
Nice = 4,
///Good
Good = 5,
///Decent
Decent = 6,
///Okayish
Ok = 7,
///Almost worst
SecondWorst = 8,
///Worst
Worst = 9,
}
impl From<Quality> for mp3lame_encoder::Quality {
fn from(value: Quality) -> Self {
match value {
Quality::Best => mp3lame_encoder::Quality::Best,
Quality::SecondBest => mp3lame_encoder::Quality::SecondBest,
Quality::NearBest => mp3lame_encoder::Quality::NearBest,
Quality::VeryNice => mp3lame_encoder::Quality::VeryNice,
Quality::Nice => mp3lame_encoder::Quality::Nice,
Quality::Good => mp3lame_encoder::Quality::Good,
Quality::Decent => mp3lame_encoder::Quality::Decent,
Quality::Ok => mp3lame_encoder::Quality::Ok,
Quality::SecondWorst => mp3lame_encoder::Quality::SecondWorst,
Quality::Worst => mp3lame_encoder::Quality::Worst,
}
}
}
#[napi]
#[repr(u16)]
#[derive(Debug, Clone)]
///Enumeration of valid values for `set_brate`
pub enum Bitrate {
///8_000
Kbps8 = 8,
///16_000
Kbps16 = 16,
///24_000
Kbps24 = 24,
///32_000
Kbps32 = 32,
///40_000
Kbps40 = 40,
///48_000
Kbps48 = 48,
///64_000
Kbps64 = 64,
///80_000
Kbps80 = 80,
///96_000
Kbps96 = 96,
///112_000
Kbps112 = 112,
///128_000
Kbps128 = 128,
///160_000
Kbps160 = 160,
///192_000
Kbps192 = 192,
///224_000
Kbps224 = 224,
///256_000
Kbps256 = 256,
///320_000
Kbps320 = 320,
}
impl From<Bitrate> for mp3lame_encoder::Bitrate {
fn from(value: Bitrate) -> Self {
match value {
Bitrate::Kbps8 => mp3lame_encoder::Bitrate::Kbps8,
Bitrate::Kbps16 => mp3lame_encoder::Bitrate::Kbps16,
Bitrate::Kbps24 => mp3lame_encoder::Bitrate::Kbps24,
Bitrate::Kbps32 => mp3lame_encoder::Bitrate::Kbps32,
Bitrate::Kbps40 => mp3lame_encoder::Bitrate::Kbps40,
Bitrate::Kbps48 => mp3lame_encoder::Bitrate::Kbps48,
Bitrate::Kbps64 => mp3lame_encoder::Bitrate::Kbps64,
Bitrate::Kbps80 => mp3lame_encoder::Bitrate::Kbps80,
Bitrate::Kbps96 => mp3lame_encoder::Bitrate::Kbps96,
Bitrate::Kbps112 => mp3lame_encoder::Bitrate::Kbps112,
Bitrate::Kbps128 => mp3lame_encoder::Bitrate::Kbps128,
Bitrate::Kbps160 => mp3lame_encoder::Bitrate::Kbps160,
Bitrate::Kbps192 => mp3lame_encoder::Bitrate::Kbps192,
Bitrate::Kbps224 => mp3lame_encoder::Bitrate::Kbps224,
Bitrate::Kbps256 => mp3lame_encoder::Bitrate::Kbps256,
Bitrate::Kbps320 => mp3lame_encoder::Bitrate::Kbps320,
}
}
}
#[napi]
#[derive(Debug, Clone)]
/// MPEG mode
pub enum Mode {
Mono,
Stereo,
JointStereo,
DualChannel,
NotSet,
}
impl From<Mode> for mp3lame_encoder::Mode {
fn from(value: Mode) -> Self {
match value {
Mode::Mono => mp3lame_encoder::Mode::Mono,
Mode::Stereo => mp3lame_encoder::Mode::Stereo,
Mode::JointStereo => mp3lame_encoder::Mode::JointStereo,
Mode::DualChannel => mp3lame_encoder::Mode::DaulChannel,
Mode::NotSet => mp3lame_encoder::Mode::NotSet,
}
}
}
#[napi(object, object_to_js = false)]
#[derive(Debug, Clone)]
pub struct EncodeOptions {
pub channels: u32,
pub quality: Option<Quality>,
pub bitrate: Option<Bitrate>,
pub sample_rate: Option<u32>,
pub mode: Option<Mode>,
}
#[napi]
pub struct Mp3Encoder {
encoder: Encoder,
}
#[napi]
impl Mp3Encoder {
#[napi(constructor)]
pub fn new(options: EncodeOptions) -> Result<Self> {
let mut builder = Builder::new().ok_or(LameError::CreateBuilderFailed)?;
builder
.set_num_channels(options.channels as u8)
.map_err(LameError::BuildError)?;
if let Some(quality) = options.quality {
builder
.set_quality(quality.into())
.map_err(LameError::BuildError)?;
}
if let Some(bitrate) = options.bitrate {
builder
.set_brate(bitrate.into())
.map_err(LameError::BuildError)?;
}
if let Some(sample_rate) = options.sample_rate {
builder
.set_sample_rate(sample_rate)
.map_err(LameError::BuildError)?;
}
if let Some(mode) = options.mode {
builder
.set_mode(mode.into())
.map_err(LameError::BuildError)?;
}
Ok(Self {
encoder: builder.build().map_err(LameError::BuildError)?,
})
}
#[napi]
pub fn encode(&mut self, input: &[f32]) -> Result<Uint8Array> {
let mut output = Vec::with_capacity(input.len());
output.reserve(mp3lame_encoder::max_required_buffer_size(input.len()));
let encoded_size = self
.encoder
.encode(MonoPcm(input), output.spare_capacity_mut())
.map_err(LameError::EncodeError)?;
unsafe {
output.set_len(output.len().wrapping_add(encoded_size));
}
let encoded_size = self
.encoder
.flush::<FlushNoGap>(output.spare_capacity_mut())
.map_err(LameError::EncodeError)?;
unsafe {
output.set_len(output.len().wrapping_add(encoded_size));
}
Ok(output.into())
}
}