posthog/.github/workflows/ci-ai.yml

name: AI
on:
    pull_request:
        types: [opened, synchronize, reopened, labeled, unlabeled]
    push:
        branches:
            - master
        paths:
            - 'ee/hogai/**'
            - '.github/workflows/ci-ai.yml'

concurrency:
    group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
    cancel-in-progress: ${{ github.event_name == 'pull_request' }} # We only want one AI CI run per PR concurrently

jobs:
    eval:
        timeout-minutes: 45
        name: Run AI evals
        runs-on: ubuntu-latest
        # Skipping on forks as Braintrust credentials are not available there
        if: |
            github.repository == 'PostHog/posthog' && (
                github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'evals-ready')
            )

        steps:
            - uses: actions/checkout@v4
              with:
                  # Check out the actual branch instead of merge commit with master,
                  # because we want the Braintrust experiment to have accurate git metadata (on master it's empty)
                  ref: ${{ github.event.pull_request.head.ref }}
                  fetch-depth: 0
                  clean: false
            - name: Clean up data directories with container permissions
              run: |
                  # Use docker to clean up files created by containers
                  [ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
              continue-on-error: true

            - name: Stop/Start stack with Docker Compose
              run: |
                  docker compose -f docker-compose.dev.yml down
                  docker compose -f docker-compose.dev.yml up -d

            - name: Set up Python
              uses: actions/setup-python@v5
              with:
                  python-version-file: 'pyproject.toml'

            - name: Install uv
              uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
              with:
                  enable-cache: true
                  version: 0.8.19

            - name: Install python dependencies
              shell: bash
              run: UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev

            - name: Add Kafka and ClickHouse to /etc/hosts
              run: sudo echo "127.0.0.1 kafka clickhouse" | sudo tee -a /etc/hosts

            - name: Wait for Clickhouse & Kafka
              run: bin/check_kafka_clickhouse_up

            - name: Run LLM evals
              run: pytest ee/hogai/eval/ci -vv
              env:
                  EVAL_MODE: ci
                  EXPORT_EVAL_RESULTS: true
                  BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
                  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
                  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
                  GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
                  INKEEP_API_KEY: ${{ secrets.INKEEP_API_KEY }}
                  AZURE_INFERENCE_CREDENTIAL: ${{ secrets.AZURE_INFERENCE_CREDENTIAL }}
                  AZURE_INFERENCE_ENDPOINT: ${{ secrets.AZURE_INFERENCE_ENDPOINT }}

            - name: Post eval summary to PR
              # always() because we want to post even if `pytest` exited with an error (likely just one eval suite errored)
              if: always() && github.event_name == 'pull_request'
              uses: actions/github-script@v8
              with:
                  github-token: ${{ secrets.POSTHOG_BOT_PAT }}
                  script: |
                      const fs = require("fs")
                      const script = require('.github/scripts/post-eval-summary.js')
                      script({ github, context, fs })