Files
posthog/.github/workflows/ci-ai.yml
Michael Matloka 3201af2c5d chore(ai): In CI evals summary, hide unchanged evals (#40486)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-13 10:26:22 +00:00

90 lines
3.7 KiB
YAML

name: AI
on:
pull_request:
types: [opened, synchronize, reopened, labeled, unlabeled]
push:
branches:
- master
paths:
- 'ee/hogai/**'
- '.github/workflows/ci-ai.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }} # We only want one AI CI run per PR concurrently
jobs:
eval:
timeout-minutes: 45
name: Run AI evals
runs-on: ubuntu-latest
# Skipping on forks as Braintrust credentials are not available there
if: |
github.repository == 'PostHog/posthog' && (
github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'evals-ready')
)
steps:
- uses: actions/checkout@v4
with:
# Check out the actual branch instead of merge commit with master,
# because we want the Braintrust experiment to have accurate git metadata (on master it's empty)
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
clean: false
- name: Clean up data directories with container permissions
run: |
# Use docker to clean up files created by containers
[ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
continue-on-error: true
- name: Stop/Start stack with Docker Compose
run: |
docker compose -f docker-compose.dev.yml down
docker compose -f docker-compose.dev.yml up -d
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: 'pyproject.toml'
- name: Install uv
uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
with:
enable-cache: true
version: 0.8.19
- name: Install python dependencies
shell: bash
run: UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev
- name: Add Kafka and ClickHouse to /etc/hosts
run: sudo echo "127.0.0.1 kafka clickhouse" | sudo tee -a /etc/hosts
- name: Wait for Clickhouse & Kafka
run: bin/check_kafka_clickhouse_up
- name: Run LLM evals
run: pytest ee/hogai/eval/ci -vv
env:
EVAL_MODE: ci
EXPORT_EVAL_RESULTS: true
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
INKEEP_API_KEY: ${{ secrets.INKEEP_API_KEY }}
AZURE_INFERENCE_CREDENTIAL: ${{ secrets.AZURE_INFERENCE_CREDENTIAL }}
AZURE_INFERENCE_ENDPOINT: ${{ secrets.AZURE_INFERENCE_ENDPOINT }}
- name: Post eval summary to PR
# always() because we want to post even if `pytest` exited with an error (likely just one eval suite errored)
if: always() && github.event_name == 'pull_request'
uses: actions/github-script@v8
with:
github-token: ${{ secrets.POSTHOG_BOT_PAT }}
script: |
const fs = require("fs")
const script = require('.github/scripts/post-eval-summary.js')
script({ github, context, fs })