fix(server): get pending embedding docs in event handler (#13095)

maybe fix AI-309
This commit is contained in:
DarkSky
2025-07-08 17:38:16 +08:00
committed by GitHub
parent 839706cf65
commit d2f016c628

View File

@@ -2,7 +2,7 @@ import { randomUUID } from 'node:crypto';
import { Injectable } from '@nestjs/common';
import { Transactional } from '@nestjs-cls/transactional';
import { Prisma } from '@prisma/client';
import { Prisma, PrismaClient } from '@prisma/client';
import { PaginationInput } from '../base';
import { BaseModel } from './base';
@@ -16,6 +16,10 @@ import type {
@Injectable()
export class CopilotWorkspaceConfigModel extends BaseModel {
constructor(private readonly database: PrismaClient) {
super();
}
@Transactional()
private async listIgnoredDocIds(
workspaceId: string,
@@ -41,28 +45,25 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
* @param workspaceId id of the workspace
* @returns docIds
*/
@Transactional()
async findDocsToEmbed(workspaceId: string): Promise<string[]> {
const ignoredDocIds = (await this.listIgnoredDocIds(workspaceId)).map(
d => d.docId
);
// NOTE: for unknown reason, the transaction will timeout if call from event handler
// so we use an independent client here
const docIds = await this.database.$queryRaw<{ id: string }[]>`
SELECT s.guid as id
FROM snapshots AS s
LEFT JOIN ai_workspace_embeddings e
ON e.workspace_id = s.workspace_id
AND e.doc_id = s.guid
LEFT JOIN ai_workspace_ignored_docs id
ON id.workspace_id = s.workspace_id
AND id.doc_id = s.guid
WHERE s.workspace_id = ${workspaceId}
AND s.guid != s.workspace_id
AND s.guid NOT LIKE '%$%'
AND e.doc_id IS NULL
AND id.doc_id IS NULL;`;
const docIds = await this.db.snapshot
.findMany({
where: {
workspaceId,
AND: [
{ id: { notIn: ignoredDocIds } },
{ id: { not: workspaceId } },
{ id: { not: { contains: '$' } } },
],
embedding: { none: {} },
},
select: { id: true },
})
.then(r => r.map(doc => doc.id));
return docIds;
return docIds.map(r => r.id);
}
@Transactional()