mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-19 23:37:15 +08:00
feat(server): get full content for doc embedding (#11138)
This commit is contained in:
@@ -42,10 +42,10 @@ export abstract class DocReader {
|
||||
protected readonly blobStorage: WorkspaceBlobStorage
|
||||
) {}
|
||||
|
||||
parseDocContent(bin: Uint8Array) {
|
||||
parseDocContent(bin: Uint8Array, maxSummaryLength = 150) {
|
||||
const doc = new YDoc();
|
||||
applyUpdate(doc, bin);
|
||||
return parsePageDoc(doc);
|
||||
return parsePageDoc(doc, { maxSummaryLength });
|
||||
}
|
||||
|
||||
parseWorkspaceContent(bin: Uint8Array) {
|
||||
@@ -85,6 +85,13 @@ export abstract class DocReader {
|
||||
return content;
|
||||
}
|
||||
|
||||
async getFullDocContent(
|
||||
workspaceId: string,
|
||||
docId: string
|
||||
): Promise<PageDocContent | null> {
|
||||
return await this.getDocContentWithoutCache(workspaceId, docId, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get workspace content, try to read from database first.
|
||||
* If not exists, read from `getWorkspaceContentWithoutCache()` and save it back to database.
|
||||
@@ -131,7 +138,8 @@ export abstract class DocReader {
|
||||
|
||||
protected abstract getDocContentWithoutCache(
|
||||
workspaceId: string,
|
||||
guid: string
|
||||
guid: string,
|
||||
fullContent?: boolean
|
||||
): Promise<PageDocContent | null>;
|
||||
|
||||
protected abstract getWorkspaceContentWithoutCache(
|
||||
@@ -180,13 +188,14 @@ export class DatabaseDocReader extends DocReader {
|
||||
|
||||
protected override async getDocContentWithoutCache(
|
||||
workspaceId: string,
|
||||
guid: string
|
||||
guid: string,
|
||||
fullContent?: boolean
|
||||
): Promise<PageDocContent | null> {
|
||||
const docRecord = await this.workspace.getDoc(workspaceId, guid);
|
||||
if (!docRecord) {
|
||||
return null;
|
||||
}
|
||||
return this.parseDocContent(docRecord.bin);
|
||||
return this.parseDocContent(docRecord.bin, fullContent ? -1 : 150);
|
||||
}
|
||||
|
||||
protected override async getWorkspaceContentWithoutCache(
|
||||
|
||||
@@ -115,7 +115,9 @@ export function parsePageDoc(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (summaryLenNeeded > 0) {
|
||||
if (summaryLenNeeded === -1) {
|
||||
content.summary += text.toString();
|
||||
} else if (summaryLenNeeded > 0) {
|
||||
content.summary += text.toString();
|
||||
summaryLenNeeded -= text.length;
|
||||
} else {
|
||||
|
||||
@@ -159,7 +159,7 @@ export class CopilotContextDocJob implements OnModuleInit {
|
||||
if (!this.supportEmbedding) return;
|
||||
|
||||
try {
|
||||
const content = await this.doc.getDocContent(workspaceId, docId);
|
||||
const content = await this.doc.getFullDocContent(workspaceId, docId);
|
||||
if (content) {
|
||||
// no need to check if embeddings is empty, will throw internally
|
||||
const embeddings = await this.embeddingClient.getFileEmbeddings(
|
||||
|
||||
Reference in New Issue
Block a user