mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 12:55:00 +00:00
feat(server): clear semantic search metadata (#13197)
fix AI-360 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Search results now display document metadata enriched with author information. * **Improvements** * Search result content is cleaner, with leading metadata lines (such as titles and creation dates) removed from document excerpts. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { tool } from 'ai';
|
||||
import { omit } from 'lodash-es';
|
||||
import { z } from 'zod';
|
||||
|
||||
import type { AccessController } from '../../../core/permission';
|
||||
@@ -8,6 +9,32 @@ import type { ContextSession } from '../context/session';
|
||||
import type { CopilotChatOptions } from '../providers';
|
||||
import { toolError } from './error';
|
||||
|
||||
const FILTER_PREFIX = [
|
||||
'Title: ',
|
||||
'Created at: ',
|
||||
'Updated at: ',
|
||||
'Created by: ',
|
||||
'Updated by: ',
|
||||
];
|
||||
|
||||
function clearEmbeddingChunk(chunk: ChunkSimilarity): ChunkSimilarity {
|
||||
if (chunk.content) {
|
||||
const lines = chunk.content.split('\n');
|
||||
let maxLines = 5;
|
||||
while (maxLines > 0 && lines.length > 0) {
|
||||
if (FILTER_PREFIX.some(prefix => lines[0].startsWith(prefix))) {
|
||||
lines.shift();
|
||||
maxLines--;
|
||||
} else {
|
||||
// only process consecutive metadata rows
|
||||
break;
|
||||
}
|
||||
}
|
||||
return { ...chunk, content: lines.join('\n') };
|
||||
}
|
||||
return chunk;
|
||||
}
|
||||
|
||||
export const buildDocSearchGetter = (
|
||||
ac: AccessController,
|
||||
context: CopilotContextService,
|
||||
@@ -47,18 +74,37 @@ export const buildDocSearchGetter = (
|
||||
if (!docChunks.length && !fileChunks.length)
|
||||
return `No results found for "${query}".`;
|
||||
|
||||
const docIds = docChunks.map(c => ({
|
||||
// oxlint-disable-next-line no-non-null-assertion
|
||||
workspaceId: options.workspace!,
|
||||
docId: c.docId,
|
||||
}));
|
||||
const docAuthors = await models.doc
|
||||
.findAuthors(docIds)
|
||||
.then(
|
||||
docs =>
|
||||
new Map(
|
||||
docs
|
||||
.filter(d => !!d)
|
||||
.map(doc => [doc.id, omit(doc, ['id', 'workspaceId'])])
|
||||
)
|
||||
);
|
||||
const docMetas = await models.doc
|
||||
.findAuthors(
|
||||
docChunks.map(c => ({
|
||||
// oxlint-disable-next-line no-non-null-assertion
|
||||
workspaceId: options.workspace!,
|
||||
docId: c.docId,
|
||||
}))
|
||||
)
|
||||
.then(docs => new Map(docs.filter(d => !!d).map(doc => [doc.id, doc])));
|
||||
.findMetas(docIds, { select: { title: true } })
|
||||
.then(
|
||||
docs =>
|
||||
new Map(
|
||||
docs
|
||||
.filter(d => !!d)
|
||||
.map(doc => [
|
||||
doc.docId,
|
||||
Object.assign({}, doc, docAuthors.get(doc.docId)),
|
||||
])
|
||||
)
|
||||
);
|
||||
|
||||
return [
|
||||
...fileChunks,
|
||||
...fileChunks.map(clearEmbeddingChunk),
|
||||
...docChunks.map(c => ({
|
||||
...c,
|
||||
...docMetas.get(c.docId),
|
||||
|
||||
Reference in New Issue
Block a user