feat(server): impl native reader for server (#14100)

This commit is contained in:
DarkSky
2025-12-14 00:28:43 +08:00
committed by GitHub
parent a0eeed0cdb
commit 844b9d9592
28 changed files with 1333 additions and 1153 deletions

View File

@@ -9,64 +9,43 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1
{
markdown: `AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro.␊
markdown: `AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro.
# You own your data, with no compromises␊
## Local-first & Real-time collaborative␊
We love the idea proposed by Ink & Switch in the famous article about you owning your data, despite the cloud. Furthermore, AFFiNE is the first all-in-one workspace that keeps your data ownership with no compromises on real-time collaboration and editing experience.␊
AFFiNE is a local-first application upon CRDTs with real-time collaboration support. Your data is always stored locally while multiple nodes remain synced in real-time.␊
### Blocks that assemble your next docs, tasks kanban or whiteboard␊
There is a large overlap of their atomic "building blocks" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further.␊
There is a large overlap of their atomic "building blocks" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further.
We are building AFFiNE to be a fundamental open source platform that contains all the building blocks for docs, task management and visual collaboration, hoping you can shape your next workflow with us that can make your life better and also connect others, too.␊
If you want to learn more about the product design of AFFiNE, here goes the concepts:␊
To Shape, not to adapt. AFFiNE is built for individuals & teams who care about their data, who refuse vendor lock-in, and who want to have control over their essential tools.␊
## A true canvas for blocks in any form␊
[Many editor apps](http://notion.so) claimed to be a canvas for productivity. Since _the Mother of All Demos,_ Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers.␊
Many editor apps claimed to be a canvas for productivity. Since the Mother of All Demos, Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers.
"We shape our tools and thereafter our tools shape us”. A lot of pioneers have inspired us a long the way, e.g.:␊
* Quip & Notion with their great concept of "everything is a block"
* Trello with their Kanban
* Airtable & Miro with their no-code programable datasheets
* Miro & Whimiscal with their edgeless visual whiteboard
* Remnote & Capacities with their object-based tag system
For more details, please refer to our [RoadMap](https://docs.affine.pro/docs/core-concepts/roadmap)␊
- Quip & Notion with their great concept of "everything is a block"
- Trello with their Kanban
- Airtable & Miro with their no-code programable datasheets
- Miro & Whimiscal with their edgeless visual whiteboard
- Remnote & Capacities with their object-based tag system
For more details, please refer to our RoadMap
## Self Host␊
Self host AFFiNE␊
### Learning From␊
||Title|Tag|␊
|---|---|---|␊
|Affine Development|Affine Development|<span data-affine-option data-value="AxSe-53xjX" data-option-color="var(--affine-tag-pink)">AFFiNE</span>|␊
|For developers or installations guides, please go to AFFiNE Doc|For developers or installations guides, please go to AFFiNE Doc|<span data-affine-option data-value="0jh9gNw4Yl" data-option-color="var(--affine-tag-orange)">Developers</span>|␊
|Quip & Notion with their great concept of "everything is a block"|Quip & Notion with their great concept of "everything is a block"|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Trello with their Kanban|Trello with their Kanban|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Airtable & Miro with their no-code programable datasheets|Airtable & Miro with their no-code programable datasheets|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Miro & Whimiscal with their edgeless visual whiteboard|Miro & Whimiscal with their edgeless visual whiteboard|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Affine Development|Affine Development||␊
|For developers or installations guides, please go to AFFiNE Doc|For developers or installations guides, please go to AFFiNE Doc||␊
|Quip & Notion with their great concept of "everything is a block"|Quip & Notion with their great concept of "everything is a block"||␊
|Trello with their Kanban|Trello with their Kanban||␊
|Airtable & Miro with their no-code programable datasheets|Airtable & Miro with their no-code programable datasheets||␊
|Miro & Whimiscal with their edgeless visual whiteboard|Miro & Whimiscal with their edgeless visual whiteboard||␊
|Remnote & Capacities with their object-based tag system|Remnote & Capacities with their object-based tag system||␊
## Affine Development␊
For developer or installation guides, please go to [AFFiNE Development](https://docs.affine.pro/docs/development/quick-start)␊
For developer or installation guides, please go to AFFiNE Development
`,
title: 'Write, Draw, Plan all at Once.',

View File

@@ -9,64 +9,43 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1
{
markdown: `AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro.␊
markdown: `AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro.
# You own your data, with no compromises␊
## Local-first & Real-time collaborative␊
We love the idea proposed by Ink & Switch in the famous article about you owning your data, despite the cloud. Furthermore, AFFiNE is the first all-in-one workspace that keeps your data ownership with no compromises on real-time collaboration and editing experience.␊
AFFiNE is a local-first application upon CRDTs with real-time collaboration support. Your data is always stored locally while multiple nodes remain synced in real-time.␊
### Blocks that assemble your next docs, tasks kanban or whiteboard␊
There is a large overlap of their atomic "building blocks" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further.␊
There is a large overlap of their atomic "building blocks" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further.
We are building AFFiNE to be a fundamental open source platform that contains all the building blocks for docs, task management and visual collaboration, hoping you can shape your next workflow with us that can make your life better and also connect others, too.␊
If you want to learn more about the product design of AFFiNE, here goes the concepts:␊
To Shape, not to adapt. AFFiNE is built for individuals & teams who care about their data, who refuse vendor lock-in, and who want to have control over their essential tools.␊
## A true canvas for blocks in any form␊
[Many editor apps](http://notion.so) claimed to be a canvas for productivity. Since _the Mother of All Demos,_ Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers.␊
Many editor apps claimed to be a canvas for productivity. Since the Mother of All Demos, Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers.
"We shape our tools and thereafter our tools shape us”. A lot of pioneers have inspired us a long the way, e.g.:␊
* Quip & Notion with their great concept of "everything is a block"
* Trello with their Kanban
* Airtable & Miro with their no-code programable datasheets
* Miro & Whimiscal with their edgeless visual whiteboard
* Remnote & Capacities with their object-based tag system
For more details, please refer to our [RoadMap](https://docs.affine.pro/docs/core-concepts/roadmap)␊
- Quip & Notion with their great concept of "everything is a block"
- Trello with their Kanban
- Airtable & Miro with their no-code programable datasheets
- Miro & Whimiscal with their edgeless visual whiteboard
- Remnote & Capacities with their object-based tag system
For more details, please refer to our RoadMap
## Self Host␊
Self host AFFiNE␊
### Learning From␊
||Title|Tag|␊
|---|---|---|␊
|Affine Development|Affine Development|<span data-affine-option data-value="AxSe-53xjX" data-option-color="var(--affine-tag-pink)">AFFiNE</span>|␊
|For developers or installations guides, please go to AFFiNE Doc|For developers or installations guides, please go to AFFiNE Doc|<span data-affine-option data-value="0jh9gNw4Yl" data-option-color="var(--affine-tag-orange)">Developers</span>|␊
|Quip & Notion with their great concept of "everything is a block"|Quip & Notion with their great concept of "everything is a block"|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Trello with their Kanban|Trello with their Kanban|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Airtable & Miro with their no-code programable datasheets|Airtable & Miro with their no-code programable datasheets|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Miro & Whimiscal with their edgeless visual whiteboard|Miro & Whimiscal with their edgeless visual whiteboard|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Affine Development|Affine Development||␊
|For developers or installations guides, please go to AFFiNE Doc|For developers or installations guides, please go to AFFiNE Doc||␊
|Quip & Notion with their great concept of "everything is a block"|Quip & Notion with their great concept of "everything is a block"||␊
|Trello with their Kanban|Trello with their Kanban||␊
|Airtable & Miro with their no-code programable datasheets|Airtable & Miro with their no-code programable datasheets||␊
|Miro & Whimiscal with their edgeless visual whiteboard|Miro & Whimiscal with their edgeless visual whiteboard||␊
|Remnote & Capacities with their object-based tag system|Remnote & Capacities with their object-based tag system||␊
## Affine Development␊
For developer or installation guides, please go to [AFFiNE Development](https://docs.affine.pro/docs/development/quick-start)␊
For developer or installation guides, please go to AFFiNE Development
`,
title: 'Write, Draw, Plan all at Once.',

View File

@@ -9,64 +9,43 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1
{
markdown: `AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro.␊
markdown: `AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro.
# You own your data, with no compromises␊
## Local-first & Real-time collaborative␊
We love the idea proposed by Ink & Switch in the famous article about you owning your data, despite the cloud. Furthermore, AFFiNE is the first all-in-one workspace that keeps your data ownership with no compromises on real-time collaboration and editing experience.␊
AFFiNE is a local-first application upon CRDTs with real-time collaboration support. Your data is always stored locally while multiple nodes remain synced in real-time.␊
### Blocks that assemble your next docs, tasks kanban or whiteboard␊
There is a large overlap of their atomic "building blocks" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further.␊
There is a large overlap of their atomic "building blocks" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further.
We are building AFFiNE to be a fundamental open source platform that contains all the building blocks for docs, task management and visual collaboration, hoping you can shape your next workflow with us that can make your life better and also connect others, too.␊
If you want to learn more about the product design of AFFiNE, here goes the concepts:␊
To Shape, not to adapt. AFFiNE is built for individuals & teams who care about their data, who refuse vendor lock-in, and who want to have control over their essential tools.␊
## A true canvas for blocks in any form␊
[Many editor apps](http://notion.so) claimed to be a canvas for productivity. Since _the Mother of All Demos,_ Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers.␊
Many editor apps claimed to be a canvas for productivity. Since the Mother of All Demos, Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers.
"We shape our tools and thereafter our tools shape us”. A lot of pioneers have inspired us a long the way, e.g.:␊
* Quip & Notion with their great concept of "everything is a block"
* Trello with their Kanban
* Airtable & Miro with their no-code programable datasheets
* Miro & Whimiscal with their edgeless visual whiteboard
* Remnote & Capacities with their object-based tag system
For more details, please refer to our [RoadMap](https://docs.affine.pro/docs/core-concepts/roadmap)␊
- Quip & Notion with their great concept of "everything is a block"
- Trello with their Kanban
- Airtable & Miro with their no-code programable datasheets
- Miro & Whimiscal with their edgeless visual whiteboard
- Remnote & Capacities with their object-based tag system
For more details, please refer to our RoadMap
## Self Host␊
Self host AFFiNE␊
### Learning From␊
||Title|Tag|␊
|---|---|---|␊
|Affine Development|Affine Development|<span data-affine-option data-value="AxSe-53xjX" data-option-color="var(--affine-tag-pink)">AFFiNE</span>|␊
|For developers or installations guides, please go to AFFiNE Doc|For developers or installations guides, please go to AFFiNE Doc|<span data-affine-option data-value="0jh9gNw4Yl" data-option-color="var(--affine-tag-orange)">Developers</span>|␊
|Quip & Notion with their great concept of "everything is a block"|Quip & Notion with their great concept of "everything is a block"|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Trello with their Kanban|Trello with their Kanban|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Airtable & Miro with their no-code programable datasheets|Airtable & Miro with their no-code programable datasheets|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Miro & Whimiscal with their edgeless visual whiteboard|Miro & Whimiscal with their edgeless visual whiteboard|<span data-affine-option data-value="HgHsKOUINZ" data-option-color="var(--affine-tag-blue)">Reference</span>|␊
|Affine Development|Affine Development||␊
|For developers or installations guides, please go to AFFiNE Doc|For developers or installations guides, please go to AFFiNE Doc||␊
|Quip & Notion with their great concept of "everything is a block"|Quip & Notion with their great concept of "everything is a block"||␊
|Trello with their Kanban|Trello with their Kanban||␊
|Airtable & Miro with their no-code programable datasheets|Airtable & Miro with their no-code programable datasheets||␊
|Miro & Whimiscal with their edgeless visual whiteboard|Miro & Whimiscal with their edgeless visual whiteboard||␊
|Remnote & Capacities with their object-based tag system|Remnote & Capacities with their object-based tag system||␊
## Affine Development␊
For developer or installation guides, please go to [AFFiNE Development](https://docs.affine.pro/docs/development/quick-start)␊
For developer or installation guides, please go to AFFiNE Development
`,
title: 'Write, Draw, Plan all at Once.',

View File

@@ -192,12 +192,7 @@ export class DatabaseDocReader extends DocReader {
if (!doc) {
return null;
}
return parseDocToMarkdownFromDocSnapshot(
workspaceId,
docId,
doc.bin,
aiEditable
);
return parseDocToMarkdownFromDocSnapshot(docId, doc.bin, aiEditable);
}
async getDocDiff(

View File

@@ -44,12 +44,7 @@ test('can read all blocks from doc snapshot', async t => {
const doc = await models.doc.get(workspace.id, docSnapshot.id);
t.truthy(doc);
const result = await readAllBlocksFromDocSnapshot(
workspace.id,
'doc-0',
docSnapshot.blob,
rootDoc!.blob
);
const result = await readAllBlocksFromDocSnapshot('doc-0', docSnapshot.blob);
t.snapshot({
...result,
@@ -64,11 +59,7 @@ test('can read blob filename from doc snapshot', async t => {
snapshotFile: 'test-doc-with-blob.snapshot.bin',
});
const result = await readAllBlocksFromDocSnapshot(
workspace.id,
'doc-0',
docSnapshot.blob
);
const result = await readAllBlocksFromDocSnapshot('doc-0', docSnapshot.blob);
// NOTE: avoid snapshot result directly, because it will cause hanging
t.snapshot(JSON.parse(JSON.stringify(result)));
@@ -78,11 +69,7 @@ test('can read all blocks from doc snapshot without workspace snapshot', async t
const doc = await models.doc.get(workspace.id, docSnapshot.id);
t.truthy(doc);
const result = await readAllBlocksFromDocSnapshot(
workspace.id,
'doc-0',
docSnapshot.blob
);
const result = await readAllBlocksFromDocSnapshot('doc-0', docSnapshot.blob);
t.snapshot({
...result,
@@ -92,7 +79,6 @@ test('can read all blocks from doc snapshot without workspace snapshot', async t
test('can parse doc to markdown from doc snapshot', async t => {
const result = parseDocToMarkdownFromDocSnapshot(
workspace.id,
docSnapshot.id,
docSnapshot.blob
);
@@ -102,7 +88,6 @@ test('can parse doc to markdown from doc snapshot', async t => {
test('can parse doc to markdown from doc snapshot with ai editable', async t => {
const result = parseDocToMarkdownFromDocSnapshot(
workspace.id,
docSnapshot.id,
docSnapshot.blob,
true

View File

@@ -1,18 +1,10 @@
// TODO(@forehalo):
// Because of the `@affine/server` package can't import directly from workspace packages,
// this is a temporary solution to get the block suite data(title, description) from given yjs binary or yjs doc.
// The logic is mainly copied from
// - packages/frontend/core/src/modules/docs-search/worker/in-worker.ts
// - packages/frontend/core/src/components/page-list/use-block-suite-page-preview.ts
// and it's better to be provided by blocksuite
import { Array as YArray, Doc as YDoc, Map as YMap } from 'yjs';
// eslint-disable-next-line @typescript-eslint/no-restricted-imports -- import from bundle
import {
parsePageDoc as parseDocToMarkdown,
readAllBlocksFromDoc,
parseYDocFromBinary,
parseYDocToMarkdown,
readAllDocIdsFromRootDoc,
} from '@affine/reader/dist';
import { applyUpdate, Array as YArray, Doc as YDoc, Map as YMap } from 'yjs';
} from '../../native';
export interface PageDocContent {
title: string;
@@ -165,64 +157,49 @@ export function parsePageDoc(
}
export function readAllDocIdsFromWorkspaceSnapshot(snapshot: Uint8Array) {
const rootDoc = new YDoc();
applyUpdate(rootDoc, snapshot);
return readAllDocIdsFromRootDoc(rootDoc, {
includeTrash: false,
});
return readAllDocIdsFromRootDoc(Buffer.from(snapshot), false);
}
function safeParseJson<T>(str: string): T | undefined {
try {
return JSON.parse(str) as T;
} catch {
return undefined;
}
}
export async function readAllBlocksFromDocSnapshot(
workspaceId: string,
docId: string,
docSnapshot: Uint8Array,
workspaceSnapshot?: Uint8Array,
maxSummaryLength?: number
docSnapshot: Uint8Array
) {
let rootYDoc: YDoc | undefined;
if (workspaceSnapshot) {
rootYDoc = new YDoc({
guid: workspaceId,
});
applyUpdate(rootYDoc, workspaceSnapshot);
}
const ydoc = new YDoc({
guid: docId,
});
applyUpdate(ydoc, docSnapshot);
return await readAllBlocksFromDoc({
ydoc,
rootYDoc,
spaceId: workspaceId,
maxSummaryLength,
});
const result = parseYDocFromBinary(Buffer.from(docSnapshot), docId);
return {
...result,
blocks: result.blocks.map(block => ({
...block,
docId,
ref: block.refInfo,
additional: block.additional
? safeParseJson(block.additional)
: undefined,
})),
};
}
export function parseDocToMarkdownFromDocSnapshot(
workspaceId: string,
docId: string,
docSnapshot: Uint8Array,
aiEditable = false
) {
const ydoc = new YDoc({
guid: docId,
});
applyUpdate(ydoc, docSnapshot);
const parsed = parseDocToMarkdown({
workspaceId,
doc: ydoc,
buildBlobUrl: (blobId: string) => {
return `/${workspaceId}/blobs/${blobId}`;
},
buildDocUrl: (docId: string) => {
return `/workspace/${workspaceId}/${docId}`;
},
aiEditable,
});
const parsed = parseYDocToMarkdown(
Buffer.from(docSnapshot),
docId,
aiEditable
);
return {
title: parsed.title,
markdown: parsed.md,
markdown: parsed.markdown,
};
}

View File

@@ -40,6 +40,10 @@ export function getTokenEncoder(model?: string | null): Tokenizer | null {
export const getMime = serverNativeModule.getMime;
export const parseDoc = serverNativeModule.parseDoc;
export const htmlSanitize = serverNativeModule.htmlSanitize;
export const parseYDocFromBinary = serverNativeModule.parseDocFromBinary;
export const parseYDocToMarkdown = serverNativeModule.parseDocToMarkdown;
export const readAllDocIdsFromRootDoc =
serverNativeModule.readAllDocIdsFromRootDoc;
export const AFFINE_PRO_PUBLIC_KEY = serverNativeModule.AFFINE_PRO_PUBLIC_KEY;
export const AFFINE_PRO_LICENSE_AES_KEY =
serverNativeModule.AFFINE_PRO_LICENSE_AES_KEY;

View File

@@ -227,15 +227,7 @@ export class IndexerService {
this.logger.debug(`doc ${workspaceId}/${docId} is empty, skip indexing`);
return;
}
const MAX_WORKSPACE_SNAPSHOT_SIZE = 1024 * 1024 * 10; // 10MB
const result = await readAllBlocksFromDocSnapshot(
workspaceId,
docId,
docSnapshot.blob,
workspaceSnapshot.blob.length < MAX_WORKSPACE_SNAPSHOT_SIZE
? workspaceSnapshot.blob
: undefined
);
const result = await readAllBlocksFromDocSnapshot(docId, docSnapshot.blob);
if (!result) {
this.logger.warn(
`parse doc ${workspaceId}/${docId} failed, workspaceSnapshot size: ${workspaceSnapshot.blob.length}, docSnapshot size: ${docSnapshot.blob.length}`
@@ -277,7 +269,7 @@ export class IndexerService {
additional: block.additional
? JSON.stringify(block.additional)
: undefined,
markdownPreview: block.markdownPreview,
markdownPreview: undefined,
createdByUserId: docSnapshot.createdBy ?? '',
updatedByUserId: docSnapshot.updatedBy ?? '',
createdAt: docSnapshot.createdAt,