mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-05-08 22:07:32 +08:00
fix(server): add embedding table repair (#14895)
fix #14894 #### PR Dependency Tree * **PR #14895** 👈 This tree was auto-generated by [Charcoal](https://github.com/danerwilliams/charcoal) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Chores** * Improved database initialization for self-hosted deployments with automatic creation and repair of embedding tables and indexes, applied only when related base tables and extensions are present. * Updated pre-deploy process to run Prisma migrations, perform embedding-table maintenance, and execute additional data migrations as part of setup. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
has_hnsw BOOLEAN;
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
|
||||
BEGIN
|
||||
CREATE EXTENSION IF NOT EXISTS "vector";
|
||||
EXCEPTION
|
||||
WHEN OTHERS THEN
|
||||
RAISE NOTICE 'pgvector extension is not available. Skip repairing copilot embedding tables.';
|
||||
RETURN;
|
||||
END;
|
||||
END IF;
|
||||
|
||||
SELECT EXISTS (SELECT 1 FROM pg_am WHERE amname = 'hnsw') INTO has_hnsw;
|
||||
|
||||
IF NOT has_hnsw THEN
|
||||
RAISE NOTICE 'pgvector HNSW index access method is not available. Skip repairing copilot embedding indexes.';
|
||||
END IF;
|
||||
|
||||
IF to_regclass('public.ai_contexts') IS NOT NULL THEN
|
||||
CREATE TABLE IF NOT EXISTS "ai_context_embeddings" (
|
||||
"id" VARCHAR NOT NULL,
|
||||
"context_id" VARCHAR NOT NULL,
|
||||
"file_id" VARCHAR NOT NULL,
|
||||
"chunk" INTEGER NOT NULL,
|
||||
"content" VARCHAR NOT NULL,
|
||||
"embedding" vector(1024) NOT NULL,
|
||||
"created_at" TIMESTAMPTZ(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"updated_at" TIMESTAMPTZ(3) NOT NULL,
|
||||
CONSTRAINT "ai_context_embeddings_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
IF has_hnsw THEN
|
||||
CREATE INDEX IF NOT EXISTS "ai_context_embeddings_idx"
|
||||
ON "ai_context_embeddings" USING hnsw ("embedding" vector_cosine_ops);
|
||||
END IF;
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS "ai_context_embeddings_context_id_file_id_chunk_key"
|
||||
ON "ai_context_embeddings"("context_id", "file_id", "chunk");
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'ai_context_embeddings_context_id_fkey'
|
||||
AND conrelid = 'public.ai_context_embeddings'::regclass
|
||||
) THEN
|
||||
ALTER TABLE "ai_context_embeddings"
|
||||
ADD CONSTRAINT "ai_context_embeddings_context_id_fkey"
|
||||
FOREIGN KEY ("context_id") REFERENCES "ai_contexts"("id")
|
||||
ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF to_regclass('public.snapshots') IS NOT NULL THEN
|
||||
CREATE TABLE IF NOT EXISTS "ai_workspace_embeddings" (
|
||||
"workspace_id" VARCHAR NOT NULL,
|
||||
"doc_id" VARCHAR NOT NULL,
|
||||
"chunk" INTEGER NOT NULL,
|
||||
"content" VARCHAR NOT NULL,
|
||||
"embedding" vector(1024) NOT NULL,
|
||||
"created_at" TIMESTAMPTZ(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"updated_at" TIMESTAMPTZ(3) NOT NULL,
|
||||
CONSTRAINT "ai_workspace_embeddings_pkey"
|
||||
PRIMARY KEY ("workspace_id", "doc_id", "chunk")
|
||||
);
|
||||
|
||||
IF has_hnsw THEN
|
||||
CREATE INDEX IF NOT EXISTS "ai_workspace_embeddings_idx"
|
||||
ON "ai_workspace_embeddings" USING hnsw ("embedding" vector_cosine_ops);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'ai_workspace_embeddings_workspace_id_doc_id_fkey'
|
||||
AND conrelid = 'public.ai_workspace_embeddings'::regclass
|
||||
) THEN
|
||||
ALTER TABLE "ai_workspace_embeddings"
|
||||
ADD CONSTRAINT "ai_workspace_embeddings_workspace_id_doc_id_fkey"
|
||||
FOREIGN KEY ("workspace_id", "doc_id")
|
||||
REFERENCES "snapshots"("workspace_id", "guid")
|
||||
ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF to_regclass('public.ai_workspace_files') IS NOT NULL THEN
|
||||
CREATE TABLE IF NOT EXISTS "ai_workspace_file_embeddings" (
|
||||
"workspace_id" VARCHAR NOT NULL,
|
||||
"file_id" VARCHAR NOT NULL,
|
||||
"chunk" INTEGER NOT NULL,
|
||||
"content" VARCHAR NOT NULL,
|
||||
"embedding" vector(1024) NOT NULL,
|
||||
"created_at" TIMESTAMPTZ(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT "ai_workspace_file_embeddings_pkey"
|
||||
PRIMARY KEY ("workspace_id", "file_id", "chunk")
|
||||
);
|
||||
|
||||
IF has_hnsw THEN
|
||||
CREATE INDEX IF NOT EXISTS "ai_workspace_file_embeddings_idx"
|
||||
ON "ai_workspace_file_embeddings" USING hnsw ("embedding" vector_cosine_ops);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'ai_workspace_file_embeddings_workspace_id_file_id_fkey'
|
||||
AND conrelid = 'public.ai_workspace_file_embeddings'::regclass
|
||||
) THEN
|
||||
ALTER TABLE "ai_workspace_file_embeddings"
|
||||
ADD CONSTRAINT "ai_workspace_file_embeddings_workspace_id_file_id_fkey"
|
||||
FOREIGN KEY ("workspace_id", "file_id")
|
||||
REFERENCES "ai_workspace_files"("workspace_id", "file_id")
|
||||
ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF to_regclass('public.blobs') IS NOT NULL THEN
|
||||
CREATE TABLE IF NOT EXISTS "ai_workspace_blob_embeddings" (
|
||||
"workspace_id" VARCHAR NOT NULL,
|
||||
"blob_id" VARCHAR NOT NULL,
|
||||
"chunk" INTEGER NOT NULL,
|
||||
"content" VARCHAR NOT NULL,
|
||||
"embedding" vector(1024) NOT NULL,
|
||||
"created_at" TIMESTAMPTZ(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT "ai_workspace_blob_embeddings_pkey"
|
||||
PRIMARY KEY ("workspace_id", "blob_id", "chunk")
|
||||
);
|
||||
|
||||
IF has_hnsw THEN
|
||||
CREATE INDEX IF NOT EXISTS "ai_workspace_blob_embeddings_idx"
|
||||
ON "ai_workspace_blob_embeddings" USING hnsw ("embedding" vector_cosine_ops);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'ai_workspace_blob_embeddings_workspace_id_blob_id_fkey'
|
||||
AND conrelid = 'public.ai_workspace_blob_embeddings'::regclass
|
||||
) THEN
|
||||
ALTER TABLE "ai_workspace_blob_embeddings"
|
||||
ADD CONSTRAINT "ai_workspace_blob_embeddings_workspace_id_blob_id_fkey"
|
||||
FOREIGN KEY ("workspace_id", "blob_id")
|
||||
REFERENCES "blobs"("workspace_id", "key")
|
||||
ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
END IF;
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -38,9 +38,32 @@ function prepare() {
|
||||
}
|
||||
}
|
||||
|
||||
function runPredeployScript() {
|
||||
console.log('running predeploy script.');
|
||||
execSync('yarn predeploy', {
|
||||
function runPrismaMigrations() {
|
||||
console.log('running prisma migrations.');
|
||||
execSync('yarn prisma migrate deploy', {
|
||||
encoding: 'utf-8',
|
||||
env: process.env,
|
||||
stdio: 'inherit',
|
||||
});
|
||||
}
|
||||
|
||||
function repairPgvectorEmbeddingTables() {
|
||||
console.log('repairing copilot pgvector embedding tables.');
|
||||
const sql = fs.readFileSync(
|
||||
path.join(import.meta.dirname, 'repair-pgvector-embedding-tables.sql'),
|
||||
'utf-8'
|
||||
);
|
||||
execSync('yarn prisma db execute --stdin --schema schema.prisma', {
|
||||
encoding: 'utf-8',
|
||||
env: process.env,
|
||||
input: sql,
|
||||
stdio: ['pipe', 'inherit', 'inherit'],
|
||||
});
|
||||
}
|
||||
|
||||
function runDataMigrations() {
|
||||
console.log('running data migrations.');
|
||||
execSync('yarn cli run', {
|
||||
encoding: 'utf-8',
|
||||
env: process.env,
|
||||
stdio: 'inherit',
|
||||
@@ -85,4 +108,6 @@ function fixFailedMigrations() {
|
||||
|
||||
prepare();
|
||||
fixFailedMigrations();
|
||||
runPredeployScript();
|
||||
runPrismaMigrations();
|
||||
repairPgvectorEmbeddingTables();
|
||||
runDataMigrations();
|
||||
|
||||
Reference in New Issue
Block a user