feat(server): auto fix doc summary (#13448)

close AF-2787

<img width="2424" height="412" alt="image"
src="https://github.com/user-attachments/assets/d6dedff5-1904-48b1-8a36-c3189104e45b"
/>



#### PR Dependency Tree


* **PR #13448** 👈

This tree was auto-generated by
[Charcoal](https://github.com/danerwilliams/charcoal)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Introduced an automated system that regularly detects and repairs
documents with missing summaries in all workspaces.
* Added background processing to ensure document summaries are kept
up-to-date without manual intervention.

* **Tests**
* Added new tests to verify detection of documents with empty or
non-empty summaries.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
fengmk2
2025-08-08 21:40:02 +08:00
committed by GitHub
parent 3cfb0a43af
commit c7afc880e6
3 changed files with 142 additions and 2 deletions

View File

@@ -1,10 +1,10 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { Cron, CronExpression } from '@nestjs/schedule';
import { PrismaClient } from '@prisma/client';
import { JOB_SIGNAL, JobQueue, metrics, OnJob } from '../../base';
import { Models } from '../../models';
import { PgWorkspaceDocStorageAdapter } from '../doc';
import { DatabaseDocReader, PgWorkspaceDocStorageAdapter } from '../doc';
declare global {
interface Jobs {
@@ -13,13 +13,23 @@ declare global {
docId: string;
};
'doc.recordPendingDocUpdatesCount': {};
'doc.findEmptySummaryDocs': {
lastFixedWorkspaceSid?: number;
};
'doc.autoFixedDocSummary': {
workspaceId: string;
docId: string;
};
}
}
@Injectable()
export class DocServiceCronJob {
private readonly logger = new Logger(DocServiceCronJob.name);
constructor(
private readonly workspace: PgWorkspaceDocStorageAdapter,
private readonly docReader: DatabaseDocReader,
private readonly prisma: PrismaClient,
private readonly job: JobQueue,
private readonly models: Models
@@ -86,4 +96,74 @@ export class DocServiceCronJob {
}
);
}
@Cron(CronExpression.EVERY_30_SECONDS)
async scheduleFindEmptySummaryDocs() {
await this.job.add(
'doc.findEmptySummaryDocs',
{},
{
// make sure only one job is running at a time
delay: 30 * 1000,
jobId: 'findEmptySummaryDocs',
}
);
}
@OnJob('doc.findEmptySummaryDocs')
async findEmptySummaryDocs(payload: Jobs['doc.findEmptySummaryDocs']) {
const startSid = payload.lastFixedWorkspaceSid ?? 0;
const workspaces = await this.models.workspace.list(
{ sid: { gt: startSid } },
{ id: true, sid: true },
100
);
if (workspaces.length === 0) {
return JOB_SIGNAL.Repeat;
}
let addedCount = 0;
for (const workspace of workspaces) {
const docIds = await this.models.doc.findEmptySummaryDocIds(workspace.id);
for (const docId of docIds) {
// ignore root doc
if (docId === workspace.id) {
continue;
}
await this.job.add(
'doc.autoFixedDocSummary',
{ workspaceId: workspace.id, docId },
{
jobId: `autoFixedDocSummary/${workspace.id}/${docId}`,
}
);
addedCount++;
}
}
const nextSid = workspaces[workspaces.length - 1].sid;
this.logger.log(
`Auto added ${addedCount} docs to queue, lastFixedWorkspaceSid: ${startSid} -> ${nextSid}`
);
// update the lastFixedWorkspaceSid in the payload and repeat the job after 30 seconds
payload.lastFixedWorkspaceSid = nextSid;
return JOB_SIGNAL.Repeat;
}
@OnJob('doc.autoFixedDocSummary')
async autoFixedDocSummary(payload: Jobs['doc.autoFixedDocSummary']) {
const { workspaceId, docId } = payload;
const content = await this.docReader.getDocContent(workspaceId, docId);
if (!content) {
this.logger.warn(
`Summary for doc ${docId} in workspace ${workspaceId} not found`
);
return;
}
await this.models.doc.upsertMeta(workspaceId, docId, content);
return;
}
}

View File

@@ -0,0 +1,47 @@
import { randomUUID } from 'node:crypto';
import test from 'ava';
import { createModule } from '../../__tests__/create-module';
import { Mockers } from '../../__tests__/mocks';
import { Models } from '..';
const module = await createModule({});
const models = module.get(Models);
const owner = await module.create(Mockers.User);
test.after.always(async () => {
await module.close();
});
test('should find null summary doc ids', async t => {
const workspace = await module.create(Mockers.Workspace, {
owner,
});
const docId = randomUUID();
await module.create(Mockers.DocMeta, {
workspaceId: workspace.id,
docId,
});
const docIds = await models.doc.findEmptySummaryDocIds(workspace.id);
t.deepEqual(docIds, [docId]);
});
test('should ignore summary is not null', async t => {
const workspace = await module.create(Mockers.Workspace, {
owner,
});
const docId = randomUUID();
await module.create(Mockers.DocMeta, {
workspaceId: workspace.id,
docId,
summary: 'test',
});
const docIds = await models.doc.findEmptySummaryDocIds(workspace.id);
t.is(docIds.length, 0);
});

View File

@@ -696,5 +696,18 @@ export class DocModel extends BaseModel {
return [count, rows] as const;
}
async findEmptySummaryDocIds(workspaceId: string) {
const rows = await this.db.workspaceDoc.findMany({
where: {
workspaceId,
summary: null,
},
select: {
docId: true,
},
});
return rows.map(row => row.docId);
}
// #endregion
}