feat(server): add cloud indexer with Elasticsearch and Manticoresearch providers (#11835)

close CLOUD-137

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Introduced advanced workspace-scoped search and aggregation capabilities with support for complex queries, highlights, and pagination.
  - Added pluggable search providers: Elasticsearch and Manticoresearch.
  - New GraphQL queries, schema types, and resolver support for search and aggregation.
  - Enhanced configuration options for search providers in self-hosted and cloud deployments.
  - Added Docker Compose services and environment variables for Elasticsearch and Manticoresearch.
  - Integrated indexer service into deployment and CI workflows.

- **Bug Fixes**
  - Improved error handling with new user-friendly error messages for search provider and indexer issues.

- **Documentation**
  - Updated configuration examples and environment variable references for indexer and search providers.

- **Tests**
  - Added extensive end-to-end and provider-specific tests covering indexing, searching, aggregation, deletion, and error cases.
  - Included snapshot tests and test fixtures for search providers.

- **Chores**
  - Updated deployment scripts, Helm charts, and Kubernetes manifests to include indexer-related environment variables and secrets.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
fengmk2
2025-05-14 14:52:40 +00:00
parent 7c22b3931f
commit a1bcf77447
66 changed files with 10139 additions and 10 deletions

View File

@@ -3,4 +3,13 @@ DB_VERSION=16
# database credentials
DB_PASSWORD=affine
DB_USERNAME=affine
DB_DATABASE_NAME=affine
DB_DATABASE_NAME=affine
# elasticsearch env
# ELASTIC_VERSION=9.0.1
# enable for arm64, e.g.: macOS M1+
# ELASTIC_VERSION_ARM64=-arm64
# ELASTIC_PLATFORM=linux/arm64
# manticoresearch
MANTICORE_VERSION=9.2.14

View File

@@ -0,0 +1,65 @@
name: affine_dev_services
services:
postgres:
env_file:
- .env
image: pgvector/pgvector:pg${DB_VERSION:-16}
ports:
- 5432:5432
environment:
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_USER: ${DB_USERNAME}
POSTGRES_DB: ${DB_DATABASE_NAME}
volumes:
- postgres_data:/var/lib/postgresql/data
redis:
image: redis:latest
ports:
- 6379:6379
mailhog:
image: mailhog/mailhog:latest
ports:
- 1025:1025
- 8025:8025
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION:-9.0.1}${ELASTIC_VERSION_ARM64}
platform: ${ELASTIC_PLATFORM}
labels:
co.elastic.logs/module: elasticsearch
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
ports:
- ${ES_PORT:-9200}:9200
environment:
- node.name=es01
- cluster.name=affine-dev
- discovery.type=single-node
- bootstrap.memory_lock=true
- xpack.security.enabled=false
- xpack.security.http.ssl.enabled=false
- xpack.security.transport.ssl.enabled=false
- xpack.license.self_generated.type=basic
mem_limit: ${ES_MEM_LIMIT:-1073741824}
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
"CMD-SHELL",
"curl -s http://localhost:9200 | grep -q 'affine-dev'",
]
interval: 10s
timeout: 10s
retries: 120
networks:
dev:
volumes:
postgres_data:
elasticsearch_data:

View File

@@ -24,8 +24,26 @@ services:
- 1025:1025
- 8025:8025
# https://manual.manticoresearch.com/Starting_the_server/Docker
manticoresearch:
image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14}
restart: always
ports:
- 9308:9308
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
volumes:
- manticoresearch_data:/var/lib/manticore
networks:
dev:
volumes:
postgres_data:
manticoresearch_data:

View File

@@ -20,4 +20,9 @@ CONFIG_LOCATION=~/.affine/self-host/config
# database credentials
DB_USERNAME=affine
DB_PASSWORD=
DB_DATABASE=affine
DB_DATABASE=affine
# indexer search provider manticoresearch version
MANTICORE_VERSION=9.2.14
# position of the manticoresearch data to persist
MANTICORE_DATA_LOCATION=~/.affine/self-host/manticore

View File

@@ -10,6 +10,8 @@ services:
condition: service_healthy
postgres:
condition: service_healthy
indexer:
condition: service_healthy
affine_migration:
condition: service_completed_successfully
volumes:
@@ -41,6 +43,8 @@ services:
condition: service_healthy
redis:
condition: service_healthy
indexer:
condition: service_healthy
redis:
image: redis
@@ -72,3 +76,24 @@ services:
timeout: 5s
retries: 5
restart: unless-stopped
indexer:
image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14}
container_name: affine_indexer
volumes:
- ${MANTICORE_DATA_LOCATION}:/var/lib/manticore
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
healthcheck:
test:
['CMD', 'wget', '-O-', 'http://127.0.0.1:9308']
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped

View File

@@ -794,6 +794,37 @@
}
}
},
"indexer": {
"type": "object",
"description": "Configuration for indexer module",
"properties": {
"enabled": {
"type": "boolean",
"description": "Enable indexer plugin\n@default true",
"default": true
},
"provider.type": {
"type": "string",
"description": "Indexer search service provider name\n@default \"manticoresearch\"\n@environment `AFFINE_INDEXER_SEARCH_PROVIDER`",
"default": "manticoresearch"
},
"provider.endpoint": {
"type": "string",
"description": "Indexer search service endpoint\n@default \"http://localhost:9308\"\n@environment `AFFINE_INDEXER_SEARCH_ENDPOINT`",
"default": "http://localhost:9308"
},
"provider.username": {
"type": "string",
"description": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_USERNAME`\n@link https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html",
"default": ""
},
"provider.password": {
"type": "string",
"description": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_PASSWORD`",
"default": ""
}
}
},
"oauth": {
"type": "object",
"description": "Configuration for oauth module",