feat(server): use native tokenizer impl (#6960)

### Benchmark

`yarn workspace @affine/server-native bench`

```
┌─────────┬────────────┬─────────┬────────────────────┬──────────┬─────────┐
│ (index) │ Task Name  │ ops/sec │ Average Time (ns)  │ Margin   │ Samples │
├─────────┼────────────┼─────────┼────────────────────┼──────────┼─────────┤
│ 0       │ 'tiktoken' │ '5'     │ 176932518.76000002 │ '±4.71%' │ 100     │
│ 1       │ 'native'   │ '16'    │ 61041597.51000003  │ '±0.60%' │ 100     │
└─────────┴────────────┴─────────┴────────────────────┴──────────┴─────────┘
```
This commit is contained in:
Brooooooklyn
2024-05-16 07:55:10 +00:00
parent 46140039d9
commit c7ddd679fd
17 changed files with 206 additions and 39 deletions

View File

@@ -1,5 +1,10 @@
/* auto-generated by NAPI-RS */
/* eslint-disable */
export class Tokenizer {
count(content: string, allowedSpecial?: Array<string> | undefined | null): number
}
export function fromModelName(modelName: string): Tokenizer | null
export function getMime(input: Uint8Array): string