Merge pull request #62 from mjun0812/dev/mjun

feat: improvements to the self-hosted runner setup
2026-07-01 01:37:53 -04:00 · 2025-12-04 16:44:19 +09:00
parent e3fe4ba075 3e30ef2270
commit f71a618ff9
6 changed files with 80 additions and 54 deletions
@@ -23,7 +23,7 @@ jobs:
      fail-fast: false
      matrix:
        flash-attn-version: ["2.8.0"]
-        python-version: ["3.11"]
+        python-version: ["3.11", "3.12"]
        torch-version: ["2.7.1"]
        # https://developer.nvidia.com/cuda-toolkit-archive
        cuda-version: ["12.8.1"]
@@ -92,6 +92,7 @@ jobs:
      - name: Build wheels
        timeout-minutes: 1200
        id: build_wheels
        shell: bash
        env:
          MAX_JOBS: 4
@@ -105,7 +106,8 @@ jobs:
      - name: Install Test
        shell: bash
        run: |
-          pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
+          pip uninstall -y flash-attn > /dev/null 2>&1
          pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
          python -c "import flash_attn; print(flash_attn.__version__)"
      - name: Clean up
@@ -113,4 +115,3 @@ jobs:
        if: always()
        run: |
          rm -rf /opt/hostedtoolcache/Python
          rm -rf ~/.cache/pip
@@ -13,7 +13,6 @@ The built packages are available on the [release page](https://github.com/mjun08
 **This repository uses a self-hosted runner and AWS CodeBuild for building the wheels. If you find this project helpful, please consider sponsoring to help maintain the infrastructure!**
 [![github-sponsor](https://img.shields.io/badge/sponsor-30363D?style=for-the-badge&logo=GitHub-Sponsors&logoColor=#white)](https://github.com/sponsors/mjun0812)
 [![buy-me-a-coffee](https://img.shields.io/badge/Buy_Me_A_Coffee-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/mjun0812)
 ## Install
@@ -57,8 +56,8 @@ History of this repository is available [here](./docs/release_history.md).
 If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
 1. Fork this repository
-2. Edit workflow file [`.github/workflows/build.yml`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/.github/workflows/build.yml) to set the version you want to build.
+2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
-3. Add tag `v*.*.*` to trigger the build workflow.
+3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`
 Please note that depending on the combination of versions, it may not be possible to build.
@@ -76,8 +75,13 @@ cp env.template env
 Edit `env` file to set the environment variables.
 ```bash
-# Edit env
+# Registry Token for GitHub Personal Access Token
 PERSONAL_ACCESS_TOKEN=[Github Personal Access Token]
 # or Registry Token for GitHub Actions Runner
 REGISTRY_TOKEN=[Runner Registry Token]
 # Optional
 RUNNER_LABELS=Linux,self-hosted
 ```
 Edit compose.yml file if you use repository folked from this repository.
@@ -86,16 +90,19 @@ Edit compose.yml file if you use repository folked from this repository.
 services:
  runner:
    privileged: true
    restart: always
    env_file:
      - .env
    environment:
      REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
      RUNNER_NAME: self-hosted-runner
      RUNNER_GROUP: default
      TARGET_ARCH: x64
    build:
      context: .
      dockerfile: Dockerfile
      args:
-        REPOSITORY_URL: [Target Repository URL]
+        GH_RUNNER_VERSION: 2.329.0
        PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
        GH_RUNNER_VERSION: 2.324.0
        RUNNER_NAME: self-hosted-runner
        RUNNER_GROUP: default
        RUNNER_LABELS: self-hosted
        TARGET_ARCH: x64
 ```
@@ -107,21 +114,28 @@ docker compose build
 docker compose up -d
 ```
 ### Getting One-Time Registry Token for GitHub Actions Runner
 ```bash
 gh api \
  -X POST \
  /repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
 ```
 ## Citation
 If you use this repository in your research and find it helpful, please cite the following paper!
 ```bibtex
@misc{flash-attention-prebuild-wheels,
 author = {Morioka, Junya},
 year = {2025},
 title = {mjun0812/flash-attention-prebuild-wheels},
 url = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
 howpublished = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
 }
 ```
 ## Original Repository
 [repo](https://github.com/Dao-AILab/flash-attention)
 ```bibtex
@inproceedings{dao2022flashattention,
  title={Flash{A}ttention: Fast and Memory-Efficient Exact Attention with {IO}-Awareness},
  author={Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher},
  booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
  year={2022}
 }
@inproceedings{dao2023flashattention2,
  title={Flash{A}ttention-2: Faster Attention with Better Parallelism and Work Partitioning},
  author={Dao, Tri},
  booktitle={International Conference on Learning Representations (ICLR)},
  year={2024}
 }
 ```
@@ -1,11 +1,6 @@
 FROM ubuntu:24.04
-ARG REPOSITORY_URL
+ARG GH_RUNNER_VERSION="2.329.0"
 ARG PERSONAL_ACCESS_TOKEN
 ARG GH_RUNNER_VERSION="2.324.0"
 ARG RUNNER_NAME="self-hosted-github-actions-runner"
 ARG RUNNER_GROUP="default"
 ARG RUNNER_LABELS="self-hosted,Linux"
 ARG TARGET_ARCH="x64"
 ENV DEBIAN_FRONTEND=noninteractive \
@@ -64,15 +59,4 @@ WORKDIR /home/ubuntu
 RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${TARGET_ARCH}-${GH_RUNNER_VERSION}.tar.gz" \
    && tar xf actions-runner.tar.gz \
    && rm actions-runner.tar.gz \
-    && sudo ./bin/installdependencies.sh \
+    && sudo ./bin/installdependencies.sh
    && ./config.sh \
    --unattended \
    --url $REPOSITORY_URL \
    --pat $PERSONAL_ACCESS_TOKEN \
    --name $RUNNER_NAME \
    --runnergroup $RUNNER_GROUP \
    --labels "${RUNNER_LABELS},${TARGET_ARCH}" \
    --work /home/ubuntu/actions-runner \
    --replace
 CMD ["./run.sh"]
@@ -2,14 +2,16 @@ services:
  runner:
    privileged: true
    restart: always
    env_file:
      - .env
    environment:
      REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
      RUNNER_NAME: self-hosted-runner
      RUNNER_GROUP: default
      TARGET_ARCH: x64
    build:
      context: .
      dockerfile: Dockerfile
      args:
-        REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
+        GH_RUNNER_VERSION: 2.329.0
        PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
        GH_RUNNER_VERSION: 2.327.1
        RUNNER_NAME: self-hosted-runner
        RUNNER_GROUP: default
        RUNNER_LABELS: self-hosted
        TARGET_ARCH: x64
@@ -1,7 +1,30 @@
 #!/bin/bash
 id
 # Start docker daemon
 sudo service docker start
-exec "$@"
+if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then
    echo "Using personal access token";
    ./config.sh \
        --unattended \
        --url $REPOSITORY_URL \
        --pat "$PERSONAL_ACCESS_TOKEN" \
        --name $RUNNER_NAME \
        --runnergroup $RUNNER_GROUP \
        --labels "${RUNNER_LABELS},${TARGET_ARCH}" \
        --work /home/ubuntu/actions-runner \
        --replace;
 else
    echo "Using registry token";
    ./config.sh \
        --unattended \
        --url $REPOSITORY_URL \
        --token "$REGISTRY_TOKEN" \
        --name $RUNNER_NAME \
        --runnergroup $RUNNER_GROUP \
        --labels "${RUNNER_LABELS},${TARGET_ARCH}" \
        --work /home/ubuntu/actions-runner \
        --replace;
 fi
 exec "./run.sh"
@@ -1 +1,3 @@
 PERSONAL_ACCESS_TOKEN=
 REGISTRY_TOKEN=
 RUNNER_LABELS=Linux,self-hosted