# ==============================================================================
# Heretek OpenClaw — Infrastructure Services v2.1 (OpenClaw Gateway)
# ==============================================================================
# Configuration: AMD GPU + MiniMax Primary + z.ai Failover
# Gateway: OpenClaw Gateway v2026.3.28 on port 18789
#
# Architecture:
#   ┌─────────────────────────────────────────────────────────────────────────┐
#   │                      Heretek OpenClaw Stack                             │
#   │  ┌─────────────────────────────────────────────────────────────────┐   │
#   │  │                     Core Services                                │   │
#   │  │  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────────────┐ │   │
#   │  │  │ LiteLLM  │  │PostgreSQL│  │  Redis   │  │     Ollama       │ │   │
#   │  │  │  :4000   │  │  :5432   │  │  :6379   │  │  :11434 (AMD)    │ │   │
#   │  │  │ Gateway  │  │ +pgvector│  │  Cache   │  │  Local LLM       │ │   │
#   │  │  └──────────┘  └──────────┘  └──────────┘  └──────────────────┘ │   │
#   │  └─────────────────────────────────────────────────────────────────┘   │
#   │  ┌─────────────────────────────────────────────────────────────────┐   │
#   │  │              OpenClaw Gateway (Port 18789)                       │   │
#   │  │  All 12 agents run as workspaces within Gateway process          │   │
#   │  │  Agent workspaces: ~/.openclaw/agents/{agent}/                   │   │
#   │  │                                                                  │   │
#   │  │  Agents: main, steward, alpha, beta, charlie, examiner,          │   │
#   │  │          explorer, sentinel, coder, dreamer, empath, historian   │   │
#   │  └─────────────────────────────────────────────────────────────────┘   │
#   │  ┌─────────────────────────────────────────────────────────────────┐   │
#   │  │                     Web Interface                                │   │
#   │  │  ┌────────────────────────────────────────────────────────────┐ │   │
#   │  │  │                    Web Dashboard (:3000)                   │ │   │
#   │  │  │  SvelteKit • TypeScript • TailwindCSS • WebSocket          │ │   │
#   │  │  └────────────────────────────────────────────────────────────┘ │   │
#   │  └─────────────────────────────────────────────────────────────────┘   │
#   └─────────────────────────────────────────────────────────────────────────┘
#
# Usage:
#   docker compose up -d
#   docker compose logs -f litellm
#   docker compose down (stops containers)
#   docker compose down -v (stops + removes volumes)
#
# Default Ports:
#   litellm:    4000 (Gateway + A2A)
#   postgres:   5432 (Database + Vector)
#   redis:      6379 (Cache + Rate Limiting)
#   ollama:     11434 (Local LLM - AMD GPU)
#   web:        3000 (Web Interface Dashboard)
#   gateway:    18789 (OpenClaw Gateway - all 12 agents)
# ==============================================================================

services:
  # ==============================================================================
  # Langfuse — LLM Observability Platform (Self-Hosted)
  # ==============================================================================
  # Langfuse provides tracing, monitoring, and analytics for OpenClaw agents
  # Access dashboard at: http://localhost:3000
  # Documentation: docs/operations/LANGFUSE_OBSERVABILITY.md
  # ==============================================================================
  langfuse:
    image: langfuse/langfuse:latest
    container_name: heretek-langfuse
    restart: unless-stopped
    ports:
      - "${LANGFUSE_PORT:-3000}:3000"
    environment:
      # ─────────────────────────────────────────────────────────────────────────
      # Langfuse Core Settings
      # ─────────────────────────────────────────────────────────────────────────
      - DATABASE_URL=postgresql://langfuse:${LANGFUSE_POSTGRES_PASSWORD}@langfuse-postgres:5432/langfuse
      - SALT=${LANGFUSE_SALT}
      - NEXTAUTH_SECRET=${LANGFUSE_NEXTAUTH_SECRET}
      - NEXTAUTH_URL=http://localhost:${LANGFUSE_PORT:-3000}
      - TELEMETRY_ENABLED=${LANGFUSE_TELEMETRY_ENABLED:-false}
      - AUTH_OPTIONS=CREDENTIALS
      - SIGN_UP_ENABLED=${LANGFUSE_SIGN_UP_ENABLED:-true}
    depends_on:
      langfuse-postgres:
        condition: service_healthy
    volumes:
      - langfuse_blobs:/app/.blobs
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    networks:
      - heretek-network

  # ==============================================================================
  # Langfuse PostgreSQL Database
  # ==============================================================================
  langfuse-postgres:
    image: postgres:15-alpine
    container_name: heretek-langfuse-db
    restart: unless-stopped
    environment:
      - POSTGRES_USER=langfuse
      - POSTGRES_PASSWORD=${LANGFUSE_POSTGRES_PASSWORD}
      - POSTGRES_DB=langfuse
    volumes:
      - langfuse_postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U langfuse -d langfuse"]
      interval: 5s
      timeout: 5s
      retries: 5
    networks:
      - heretek-network

  # ==============================================================================
  # LiteLLM Gateway — Unified LLM API with A2A Protocol
  # ==============================================================================
  # Passthrough Endpoints: Each agent has a virtual model (agent/steward, etc.)
  # Users can reassign models via WebUI without changing openclaw.json
  # ==============================================================================
  litellm:
    image: ghcr.io/berriai/litellm:main-latest
    container_name: heretek-litellm
    restart: unless-stopped
    ports:
      - "${LITELLM_PORT:-4000}:4000"
    volumes:
      - ./litellm_config.yaml:/app/config.yaml:ro
    environment:
      # ─────────────────────────────────────────────────────────────────────────
      # LiteLLM Core Settings
      # ─────────────────────────────────────────────────────────────────────────
      - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY}
      - LITELLM_SALT_KEY=${LITELLM_SALT_KEY}
      
      # ─────────────────────────────────────────────────────────────────────────
      # Database & Cache Connections
      # ─────────────────────────────────────────────────────────────────────────
      - DATABASE_URL=postgresql://${POSTGRES_USER:-heretek}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-heretek}
      - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
      - REDIS_HOST=redis
      - REDIS_PORT=6379
      - LANGFUSE_ENABLED=${LANGFUSE_ENABLED:-false}
      - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY:-}
      - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY:-}
      - LANGFUSE_HOST=${LANGFUSE_HOST:-https://cloud.langfuse.com}
      
      # ─────────────────────────────────────────────────────────────────────────
      # Provider API Keys
      # ─────────────────────────────────────────────────────────────────────────
      - MINIMAX_API_KEY=${MINIMAX_API_KEY}
      - MINIMAX_API_BASE=${MINIMAX_API_BASE:-https://api.minimaxi.chat/v1}
      - ZAI_API_KEY=${ZAI_API_KEY}
      - ZAI_API_BASE=${ZAI_API_BASE:-https://api.z.ai/api/coding/paas/v4}
      
      # ─────────────────────────────────────────────────────────────────────────
      # Ollama Connection
      # ─────────────────────────────────────────────────────────────────────────
      - OLLAMA_HOST=http://ollama:11434
      
      # ─────────────────────────────────────────────────────────────────────────
      # Model Settings
      # ─────────────────────────────────────────────────────────────────────────
      - STORE_MODEL_IN_DB=True
      - LITELLM_DROP_PARAMS=True
      
      # ─────────────────────────────────────────────────────────────────────────
      # A2A (Agent-to-Agent) Protocol Settings
      # ─────────────────────────────────────────────────────────────────────────
      - AGENT_MODE_ENABLED=true
      - AGENT_A2A_VERSION=1.0
      
      # ─────────────────────────────────────────────────────────────────────────
      # UI & Monitoring
      # ─────────────────────────────────────────────────────────────────────────
      - UI_USERNAME=${LITELLM_UI_USERNAME:-admin}
      - UI_PASSWORD=${LITELLM_UI_PASSWORD}
      
      # ─────────────────────────────────────────────────────────────────────────
      # Observability
      # ─────────────────────────────────────────────────────────────────────────
      - LITELLM_COST_TRACKING_ENABLED=${LITELLM_COST_TRACKING_ENABLED:-true}
      - LITELLM_METRICS_ENABLED=${LITELLM_METRICS_ENABLED:-true}
      - LITELLM_LOG_LEVEL=${LOG_LEVEL:-DEBUG}
    
    command: [
      "--config", "/app/config.yaml",
      "--port", "4000",
      "--num_workers", "4"
    ]
    depends_on:
      postgres:
        condition: service_started
      redis:
        condition: service_started
    healthcheck:
      test: ["CMD-SHELL", "python3 -c 'import socket; s=socket.socket(); s.settimeout(2); s.connect((\"localhost\", 4000)); print(\"healthy\")' || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    networks:
      - heretek-network

  # ==============================================================================
  # PostgreSQL with pgvector — Vector Database for RAG
  # ==============================================================================
  postgres:
    image: pgvector/pgvector:pg17
    container_name: heretek-postgres
    restart: unless-stopped
    environment:
      POSTGRES_DB: ${POSTGRES_DB:-heretek}
      POSTGRES_USER: ${POSTGRES_USER:-heretek}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init/pgvector-init.sql:/docker-entrypoint-initdb.d/pgvector-init.sql:ro
    ports:
      - "127.0.0.1:5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-heretek} -d ${POSTGRES_DB:-heretek}"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - heretek-network

  # ==============================================================================
  # Redis — Caching & Rate Limiting
  # ==============================================================================
  redis:
    image: redis:7-alpine
    container_name: heretek-redis
    restart: unless-stopped
    command: >
      redis-server
      --appendonly yes
      --maxmemory 256mb
      --maxmemory-policy allkeys-lru
      --tcp-keepalive 60
    volumes:
      - redis_data:/data
    ports:
      - "127.0.0.1:6379:6379"
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - heretek-network

  # ==============================================================================
  # Ollama — Local LLM Runtime (AMD ROCm)
  # ==============================================================================
  # AMD GPU Support: Uses ROCm image with /dev/kfd and /dev/dri devices
  # Embedding Model: nomic-embed-text-v2-moe (768 dimensions)
  # ==============================================================================
  ollama:
    image: ollama/ollama:rocm
    container_name: heretek-ollama
    restart: unless-stopped
    devices:
      - /dev/kfd
      - /dev/dri
    environment:
      - OLLAMA_HOST=0.0.0.0
      - HSA_OVERRIDE_GFX_VERSION=10.3.0
    volumes:
      - ollama_data:/root/.ollama
    ports:
      - "127.0.0.1:11434:11434"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - heretek-network

  # ==============================================================================
  # Redis-to-WebSocket Bridge — REMOVED (v2.0.4)
  # ==============================================================================
  # This service was removed because the Dockerfile.websocket-bridge was missing.
  # The functionality is no longer used in the current architecture.
  # If real-time WebSocket updates are needed, implement a new bridge service.
  # websocket-bridge:
  #   build:
  #     context: .
  #     dockerfile: Dockerfile.websocket-bridge
  #   container_name: heretek-websocket-bridge
  #   restart: unless-stopped
  #   environment:
  #     - WS_PORT=3002
  #     - REDIS_URL=redis://redis:6379/0
  #     - REDIS_HOST=redis
  #     - REDIS_PORT=6379
  #   ports:
  #     - "127.0.0.1:3002:3002"
  #     - "127.0.0.1:3003:3003"
  #   depends_on:
  #     redis:
  #       condition: service_started
  #   networks:
  #     - heretek-network

  # ==============================================================================
  # Web Interface — DEPRECATED (Removed in v2.0.3)
  # ==============================================================================
  # The web-interface directory was removed during the v2.0.3 codebase consolidation.
  # This service is no longer available. If you need a web interface, you will need
  # to restore it from a previous version or implement a new one.
  #
  # web:
  #   build:
  #     context: ./web-interface  # REMOVED in v2.0.3
  #     dockerfile: Dockerfile
  #   container_name: heretek-web
  #   restart: unless-stopped
  #   environment:
  #     - NODE_ENV=production
  #     - DOCKER_ENV=true
  #     - LITELLM_HOST=http://litellm:4000
  #     - LITELLM_API_KEY=${LITELLM_MASTER_KEY}
  #     - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
  #     - REDIS_HOST=redis
  #     - REDIS_PORT=6379
  #     - LANGFUSE_ENABLED=${LANGFUSE_ENABLED:-false}
  #     - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY:-}
  #     - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY:-}
  #     - LANGFUSE_HOST=${LANGFUSE_HOST:-https://cloud.langfuse.com}
  #   ports:
  #     - "3000:3000"
  #   depends_on:
  #     litellm:
  #       condition: service_started
  #   healthcheck:
  #     test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/agents"]
  #     interval: 30s
  #     timeout: 10s
  #     retries: 3
  #   networks:
  #     - heretek-network

# ==============================================================================
# Volumes — Persistent Data Storage
# ==============================================================================
volumes:
  # Core services
  postgres_data:
    driver: local
  redis_data:
    driver: local
  ollama_data:
    driver: local
  
  # Langfuse observability
  langfuse_postgres_data:
    driver: local
  langfuse_blobs:
    driver: local
  
  # Collective memory (skills are bind-mounted, not a volume)
  collective_memory:
    driver: local
  
  # Monitoring Stack
  prometheus_data:
    driver: local
  grafana_data:
    driver: local

# ==============================================================================
# Networks — Container Communication
# ==============================================================================
networks:
  heretek-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.28.0.0/16

# ==============================================================================
# END OF DOCKER-COMPOSE.YML
# ==============================================================================
# Note: Monitoring Stack services (Prometheus, Grafana, exporters) have been
# moved to docker-compose.monitoring.yml for modular deployment.
#
# To deploy the monitoring stack:
#   docker compose -f docker-compose.yml -f docker-compose.monitoring.yml up -d
#
# Documentation: docs/operations/MONITORING_STACK.md
# ==============================================================================