diff --git a/packages/backend/server/src/base/storage/providers/s3.ts b/packages/backend/server/src/base/storage/providers/s3.ts index c6a693c883..4531fe47d2 100644 --- a/packages/backend/server/src/base/storage/providers/s3.ts +++ b/packages/backend/server/src/base/storage/providers/s3.ts @@ -434,7 +434,11 @@ export class S3StorageProvider implements StorageProvider { this.logger.verbose(`Deleted object \`${key}\``); } catch (e) { - this.logger.error(`Failed to delete object \`${key}\``); + this.logger.error(`Failed to delete object \`${key}\``, { + bucket: this.bucket, + key, + cause: e, + }); throw e; } } diff --git a/packages/backend/server/src/models/copilot-session.ts b/packages/backend/server/src/models/copilot-session.ts index 76e895bd33..4ba6a21403 100644 --- a/packages/backend/server/src/models/copilot-session.ts +++ b/packages/backend/server/src/models/copilot-session.ts @@ -113,6 +113,99 @@ export type CleanupSessionOptions = Pick< @Injectable() export class CopilotSessionModel extends BaseModel { + private sanitizeString(value: T): T { + if (typeof value !== 'string') { + return value; + } + return value.replace(/\u0000/g, '') as T; + } + + private sanitizeJsonValue(value: T): T { + if (typeof value === 'string') { + return this.sanitizeString(value) as T; + } + if (Array.isArray(value)) { + return value.map(v => this.sanitizeJsonValue(v)) as T; + } + if ( + value && + typeof value === 'object' && + Object.getPrototypeOf(value) === Object.prototype + ) { + return Object.fromEntries( + Object.entries(value).map(([k, v]) => [k, this.sanitizeJsonValue(v)]) + ) as T; + } + return value; + } + + private sanitizeStreamObject(stream: ChatStreamObject): ChatStreamObject { + switch (stream.type) { + case 'text-delta': + case 'reasoning': + return { + ...stream, + textDelta: this.sanitizeString(stream.textDelta), + }; + case 'tool-call': + return { + ...stream, + toolCallId: this.sanitizeString(stream.toolCallId) ?? '', + toolName: this.sanitizeString(stream.toolName) ?? '', + args: this.sanitizeJsonValue(stream.args), + }; + case 'tool-result': + return { + ...stream, + toolCallId: this.sanitizeString(stream.toolCallId) ?? '', + toolName: this.sanitizeString(stream.toolName) ?? '', + args: this.sanitizeJsonValue(stream.args), + result: this.sanitizeJsonValue(stream.result), + }; + } + } + + private sanitizeAttachments( + attachments?: ChatAttachment[] | null + ): ChatAttachment[] | undefined { + if (!attachments?.length) { + return undefined; + } + + return attachments + .map(attachment => + typeof attachment === 'string' + ? (this.sanitizeString(attachment) ?? '') + : { + attachment: + this.sanitizeString(attachment.attachment) ?? + attachment.attachment, + mimeType: + this.sanitizeString(attachment.mimeType) ?? attachment.mimeType, + } + ) + .filter(attachment => { + if (typeof attachment === 'string') { + return !!attachment; + } + return !!attachment.attachment && !!attachment.mimeType; + }); + } + + private sanitizeMessage(message: ChatMessage): ChatMessage { + return { + ...message, + content: this.sanitizeString(message.content) ?? '', + attachments: this.sanitizeAttachments(message.attachments), + params: this.sanitizeJsonValue( + omit(message.params, ['docs']) || undefined + ), + streamObjects: message.streamObjects?.map(o => + this.sanitizeStreamObject(o) + ), + }; + } + getSessionType(session: Pick): SessionType { if (session.pinned) return SessionType.Pinned; if (!session.docId) return SessionType.Workspace; @@ -401,6 +494,7 @@ export class CopilotSessionModel extends BaseModel { internalCall = false ): Promise { const { userId, sessionId, docId, promptName, pinned, title } = options; + const sanitizedTitle = this.sanitizeString(title); const session = await this.getExists( sessionId, { @@ -448,7 +542,7 @@ export class CopilotSessionModel extends BaseModel { await this.db.aiSession.update({ where: { id: sessionId }, - data: { docId, promptName, pinned, title }, + data: { docId, promptName, pinned, title: sanitizedTitle }, }); return sessionId; @@ -509,19 +603,23 @@ export class CopilotSessionModel extends BaseModel { } if (messages.length) { - const tokenCost = this.calculateTokenSize(messages, state.prompt.model); + const sanitizedMessages = messages.map(m => this.sanitizeMessage(m)); + const tokenCost = this.calculateTokenSize( + sanitizedMessages, + state.prompt.model + ); await this.db.aiSessionMessage.createMany({ - data: messages.map(m => ({ + data: sanitizedMessages.map(m => ({ ...m, attachments: m.attachments || undefined, - params: omit(m.params, ['docs']) || undefined, + params: m.params || undefined, streamObjects: m.streamObjects || undefined, sessionId, })), }); // only count message generated by user - const userMessages = messages.filter(m => m.role === 'user'); + const userMessages = sanitizedMessages.filter(m => m.role === 'user'); await this.db.aiSession.update({ where: { id: sessionId }, data: { diff --git a/packages/backend/server/src/plugins/copilot/embedding/job.ts b/packages/backend/server/src/plugins/copilot/embedding/job.ts index b4880b22b6..5769939314 100644 --- a/packages/backend/server/src/plugins/copilot/embedding/job.ts +++ b/packages/backend/server/src/plugins/copilot/embedding/job.ts @@ -533,7 +533,15 @@ export class CopilotEmbeddingJob { workspaceId ); if (!snapshot) { - this.logger.warn(`workspace snapshot ${workspaceId} not found`); + // maybe local workspace or empty workspace + this.logger.verbose(`workspace root snapshot ${workspaceId} not found`); + // mark last check time to avoid repeated checking + await this.models.workspace.update( + workspaceId, + { lastCheckEmbeddings: new Date() }, + false + ); + return; } else if ( // always check if never cleared diff --git a/packages/backend/server/src/plugins/copilot/session.ts b/packages/backend/server/src/plugins/copilot/session.ts index a2021516ee..ab8afefa0a 100644 --- a/packages/backend/server/src/plugins/copilot/session.ts +++ b/packages/backend/server/src/plugins/copilot/session.ts @@ -320,6 +320,20 @@ export class ChatSessionService { return messages.data; } + private stripNullBytes(value?: string | null): string { + if (!value) return ''; + return value.replace(/\u0000/g, ''); + } + + private isNullByteError(error: unknown): boolean { + return ( + error instanceof Error && + (error.message.includes('\\u0000') || + error.message.includes('unsupported Unicode escape sequence') || + error.message.includes('22P05')) + ); + } + private async getHistory(session: Session): Promise { const prompt = await this.prompt.get(session.promptName); if (!prompt) throw new CopilotPromptNotFound({ name: session.promptName }); @@ -655,7 +669,13 @@ export class ChatSessionService { ); return; } - const { userId, title, messages } = session; + const { userId, title } = session; + const messages = + session.messages?.map(m => ({ + ...m, + content: this.stripNullBytes(m.content), + })) ?? []; + if ( title || !messages.length || @@ -665,18 +685,41 @@ export class ChatSessionService { return; } - { - const title = await this.chatWithPrompt('Summary as title', { - content: session.messages - .map(m => `[${m.role}]: ${m.content}`) - .join('\n'), - }); - await this.models.copilotSession.update({ userId, sessionId, title }); + const promptContent = messages + .map(m => `[${m.role}]: ${m.content}`) + .join('\n'); + const generatedTitle = this.stripNullBytes( + await this.chatWithPrompt('Summary as title', { + content: promptContent, + }) + ).trim(); + + if (!generatedTitle) { + this.logger.warn( + `Generated empty title for session ${sessionId}, skip updating` + ); + return; } + await this.models.copilotSession.update({ + userId, + sessionId, + title: generatedTitle, + }); } catch (error) { - console.error( + const context = { + sessionId, + cause: error instanceof Error ? error.cause : error, + }; + if (this.isNullByteError(error)) { + this.logger.warn( + `Skip title generation for session ${sessionId} due to invalid null bytes in stored data`, + context + ); + return; + } + this.logger.error( `Failed to generate title for session ${sessionId}:`, - error + context ); throw error; }