mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-04 08:38:34 +00:00
150 lines
4.6 KiB
TypeScript
150 lines
4.6 KiB
TypeScript
import { join } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
import {
|
|
Whisper,
|
|
WhisperFullParams,
|
|
WhisperSamplingStrategy,
|
|
} from '@napi-rs/whisper';
|
|
import { BehaviorSubject, EMPTY, Observable } from 'rxjs';
|
|
import {
|
|
distinctUntilChanged,
|
|
exhaustMap,
|
|
groupBy,
|
|
mergeMap,
|
|
switchMap,
|
|
tap,
|
|
} from 'rxjs/operators';
|
|
|
|
import { type Application, ShareableContent } from './index.js';
|
|
|
|
const rootDir = join(fileURLToPath(import.meta.url), '..');
|
|
|
|
const shareableContent = new ShareableContent();
|
|
|
|
const appList = new Set([
|
|
'com.tinyspeck.slackmacgap.helper',
|
|
'us.zoom.xos',
|
|
'org.mozilla.firefoxdeveloperedition',
|
|
]);
|
|
|
|
console.info(shareableContent.applications().map(app => app.bundleIdentifier));
|
|
|
|
const GGLM_LARGE = join(rootDir, 'ggml-large-v3-turbo.bin');
|
|
|
|
const whisper = new Whisper(GGLM_LARGE, {
|
|
useGpu: true,
|
|
gpuDevice: 1,
|
|
});
|
|
|
|
const whisperParams = new WhisperFullParams(WhisperSamplingStrategy.Greedy);
|
|
|
|
const SAMPLE_WINDOW_MS = 3000; // 3 seconds, similar to stream.cpp's step_ms
|
|
const SAMPLES_PER_WINDOW = (SAMPLE_WINDOW_MS / 1000) * 16000; // 16kHz sample rate
|
|
|
|
// eslint-disable-next-line rxjs/finnish
|
|
const runningApplications = new BehaviorSubject(
|
|
shareableContent.applications()
|
|
);
|
|
|
|
const applicationListChangedSubscriber =
|
|
ShareableContent.onApplicationListChanged(() => {
|
|
runningApplications.next(shareableContent.applications());
|
|
});
|
|
|
|
runningApplications
|
|
.pipe(
|
|
mergeMap(apps => apps.filter(app => appList.has(app.bundleIdentifier))),
|
|
groupBy(app => app.bundleIdentifier),
|
|
mergeMap(app$ =>
|
|
app$.pipe(
|
|
exhaustMap(app =>
|
|
new Observable<[Application, boolean]>(subscriber => {
|
|
const stateSubscriber = ShareableContent.onAppStateChanged(
|
|
app,
|
|
err => {
|
|
if (err) {
|
|
subscriber.error(err);
|
|
return;
|
|
}
|
|
subscriber.next([app, app.isRunning]);
|
|
}
|
|
);
|
|
return () => {
|
|
stateSubscriber.unsubscribe();
|
|
};
|
|
}).pipe(
|
|
distinctUntilChanged(
|
|
([_, isRunningA], [__, isRunningB]) => isRunningA === isRunningB
|
|
),
|
|
switchMap(([app]) =>
|
|
!app.isRunning
|
|
? EMPTY
|
|
: new Observable(observer => {
|
|
const buffers: Float32Array[] = [];
|
|
const audioStream = app.tapAudio((err, samples) => {
|
|
if (err) {
|
|
observer.error(err);
|
|
return;
|
|
}
|
|
|
|
if (samples) {
|
|
buffers.push(samples);
|
|
observer.next(samples);
|
|
|
|
// Calculate total samples in buffer
|
|
const totalSamples = buffers.reduce(
|
|
(acc, buf) => acc + buf.length,
|
|
0
|
|
);
|
|
|
|
// Process when we have enough samples for our window
|
|
if (totalSamples >= SAMPLES_PER_WINDOW) {
|
|
// Concatenate all buffers
|
|
const concatenated = new Float32Array(totalSamples);
|
|
let offset = 0;
|
|
buffers.forEach(buf => {
|
|
concatenated.set(buf, offset);
|
|
offset += buf.length;
|
|
});
|
|
|
|
// Transcribe the audio
|
|
const result = whisper.full(
|
|
whisperParams,
|
|
concatenated
|
|
);
|
|
|
|
// Print results
|
|
console.info(result);
|
|
|
|
// Keep any remaining samples for next window
|
|
const remainingSamples =
|
|
totalSamples - SAMPLES_PER_WINDOW;
|
|
if (remainingSamples > 0) {
|
|
const lastBuffer = buffers[buffers.length - 1];
|
|
buffers.length = 0;
|
|
buffers.push(lastBuffer.slice(-remainingSamples));
|
|
} else {
|
|
buffers.length = 0;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
return () => {
|
|
audioStream.stop();
|
|
};
|
|
})
|
|
)
|
|
)
|
|
)
|
|
)
|
|
),
|
|
tap({
|
|
finalize: () => {
|
|
applicationListChangedSubscriber.unsubscribe();
|
|
},
|
|
})
|
|
)
|
|
.subscribe();
|