Merge pull request #62 from mjun0812/dev/mjun

feat: improvements to the self-hosted runner setup
This commit is contained in:
Junya Morioka
2025-12-04 16:44:19 +09:00
committed by GitHub
6 changed files with 80 additions and 54 deletions
+4 -3
View File
@@ -23,7 +23,7 @@ jobs:
fail-fast: false
matrix:
flash-attn-version: ["2.8.0"]
python-version: ["3.11"]
python-version: ["3.11", "3.12"]
torch-version: ["2.7.1"]
# https://developer.nvidia.com/cuda-toolkit-archive
cuda-version: ["12.8.1"]
@@ -92,6 +92,7 @@ jobs:
- name: Build wheels
timeout-minutes: 1200
id: build_wheels
shell: bash
env:
MAX_JOBS: 4
@@ -105,7 +106,8 @@ jobs:
- name: Install Test
shell: bash
run: |
pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
pip uninstall -y flash-attn > /dev/null 2>&1
pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
python -c "import flash_attn; print(flash_attn.__version__)"
- name: Clean up
@@ -113,4 +115,3 @@ jobs:
if: always()
run: |
rm -rf /opt/hostedtoolcache/Python
rm -rf ~/.cache/pip
+39 -25
View File
@@ -13,7 +13,6 @@ The built packages are available on the [release page](https://github.com/mjun08
**This repository uses a self-hosted runner and AWS CodeBuild for building the wheels. If you find this project helpful, please consider sponsoring to help maintain the infrastructure!**
[![github-sponsor](https://img.shields.io/badge/sponsor-30363D?style=for-the-badge&logo=GitHub-Sponsors&logoColor=#white)](https://github.com/sponsors/mjun0812)
[![buy-me-a-coffee](https://img.shields.io/badge/Buy_Me_A_Coffee-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/mjun0812)
## Install
@@ -57,8 +56,8 @@ History of this repository is available [here](./docs/release_history.md).
If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
1. Fork this repository
2. Edit workflow file [`.github/workflows/build.yml`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/.github/workflows/build.yml) to set the version you want to build.
3. Add tag `v*.*.*` to trigger the build workflow.
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`
Please note that depending on the combination of versions, it may not be possible to build.
@@ -76,8 +75,13 @@ cp env.template env
Edit `env` file to set the environment variables.
```bash
# Edit env
# Registry Token for GitHub Personal Access Token
PERSONAL_ACCESS_TOKEN=[Github Personal Access Token]
# or Registry Token for GitHub Actions Runner
REGISTRY_TOKEN=[Runner Registry Token]
# Optional
RUNNER_LABELS=Linux,self-hosted
```
Edit compose.yml file if you use repository folked from this repository.
@@ -86,16 +90,19 @@ Edit compose.yml file if you use repository folked from this repository.
services:
runner:
privileged: true
restart: always
env_file:
- .env
environment:
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
TARGET_ARCH: x64
build:
context: .
dockerfile: Dockerfile
args:
REPOSITORY_URL: [Target Repository URL]
PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
GH_RUNNER_VERSION: 2.324.0
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
RUNNER_LABELS: self-hosted
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: x64
```
@@ -107,21 +114,28 @@ docker compose build
docker compose up -d
```
### Getting One-Time Registry Token for GitHub Actions Runner
```bash
gh api \
-X POST \
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
```
## Citation
If you use this repository in your research and find it helpful, please cite the following paper!
```bibtex
@misc{flash-attention-prebuild-wheels,
author = {Morioka, Junya},
year = {2025},
title = {mjun0812/flash-attention-prebuild-wheels},
url = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
howpublished = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
}
```
## Original Repository
[repo](https://github.com/Dao-AILab/flash-attention)
```bibtex
@inproceedings{dao2022flashattention,
title={Flash{A}ttention: Fast and Memory-Efficient Exact Attention with {IO}-Awareness},
author={Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher},
booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
year={2022}
}
@inproceedings{dao2023flashattention2,
title={Flash{A}ttention-2: Faster Attention with Better Parallelism and Work Partitioning},
author={Dao, Tri},
booktitle={International Conference on Learning Representations (ICLR)},
year={2024}
}
```
+2 -18
View File
@@ -1,11 +1,6 @@
FROM ubuntu:24.04
ARG REPOSITORY_URL
ARG PERSONAL_ACCESS_TOKEN
ARG GH_RUNNER_VERSION="2.324.0"
ARG RUNNER_NAME="self-hosted-github-actions-runner"
ARG RUNNER_GROUP="default"
ARG RUNNER_LABELS="self-hosted,Linux"
ARG GH_RUNNER_VERSION="2.329.0"
ARG TARGET_ARCH="x64"
ENV DEBIAN_FRONTEND=noninteractive \
@@ -64,15 +59,4 @@ WORKDIR /home/ubuntu
RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${TARGET_ARCH}-${GH_RUNNER_VERSION}.tar.gz" \
&& tar xf actions-runner.tar.gz \
&& rm actions-runner.tar.gz \
&& sudo ./bin/installdependencies.sh \
&& ./config.sh \
--unattended \
--url $REPOSITORY_URL \
--pat $PERSONAL_ACCESS_TOKEN \
--name $RUNNER_NAME \
--runnergroup $RUNNER_GROUP \
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
--work /home/ubuntu/actions-runner \
--replace
CMD ["./run.sh"]
&& sudo ./bin/installdependencies.sh
+8 -6
View File
@@ -2,14 +2,16 @@ services:
runner:
privileged: true
restart: always
env_file:
- .env
environment:
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
TARGET_ARCH: x64
build:
context: .
dockerfile: Dockerfile
args:
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
GH_RUNNER_VERSION: 2.327.1
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
RUNNER_LABELS: self-hosted
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: x64
+25 -2
View File
@@ -1,7 +1,30 @@
#!/bin/bash
id
# Start docker daemon
sudo service docker start
exec "$@"
if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then
echo "Using personal access token";
./config.sh \
--unattended \
--url $REPOSITORY_URL \
--pat "$PERSONAL_ACCESS_TOKEN" \
--name $RUNNER_NAME \
--runnergroup $RUNNER_GROUP \
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
--work /home/ubuntu/actions-runner \
--replace;
else
echo "Using registry token";
./config.sh \
--unattended \
--url $REPOSITORY_URL \
--token "$REGISTRY_TOKEN" \
--name $RUNNER_NAME \
--runnergroup $RUNNER_GROUP \
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
--work /home/ubuntu/actions-runner \
--replace;
fi
exec "./run.sh"
+2
View File
@@ -1 +1,3 @@
PERSONAL_ACCESS_TOKEN=
REGISTRY_TOKEN=
RUNNER_LABELS=Linux,self-hosted