mirror of
https://github.com/BillyOutlast/flash-attention-prebuild-wheels-rocm.git
synced 2026-06-30 23:57:53 -04:00
Merge pull request #62 from mjun0812/dev/mjun
feat: improvements to the self-hosted runner setup
This commit is contained in:
@@ -23,7 +23,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
flash-attn-version: ["2.8.0"]
|
||||
python-version: ["3.11"]
|
||||
python-version: ["3.11", "3.12"]
|
||||
torch-version: ["2.7.1"]
|
||||
# https://developer.nvidia.com/cuda-toolkit-archive
|
||||
cuda-version: ["12.8.1"]
|
||||
@@ -92,6 +92,7 @@ jobs:
|
||||
|
||||
- name: Build wheels
|
||||
timeout-minutes: 1200
|
||||
id: build_wheels
|
||||
shell: bash
|
||||
env:
|
||||
MAX_JOBS: 4
|
||||
@@ -105,7 +106,8 @@ jobs:
|
||||
- name: Install Test
|
||||
shell: bash
|
||||
run: |
|
||||
pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
|
||||
pip uninstall -y flash-attn > /dev/null 2>&1
|
||||
pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
|
||||
python -c "import flash_attn; print(flash_attn.__version__)"
|
||||
|
||||
- name: Clean up
|
||||
@@ -113,4 +115,3 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf /opt/hostedtoolcache/Python
|
||||
rm -rf ~/.cache/pip
|
||||
|
||||
@@ -13,7 +13,6 @@ The built packages are available on the [release page](https://github.com/mjun08
|
||||
**This repository uses a self-hosted runner and AWS CodeBuild for building the wheels. If you find this project helpful, please consider sponsoring to help maintain the infrastructure!**
|
||||
|
||||
[](https://github.com/sponsors/mjun0812)
|
||||
|
||||
[](https://buymeacoffee.com/mjun0812)
|
||||
|
||||
## Install
|
||||
@@ -57,8 +56,8 @@ History of this repository is available [here](./docs/release_history.md).
|
||||
If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
|
||||
|
||||
1. Fork this repository
|
||||
2. Edit workflow file [`.github/workflows/build.yml`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/.github/workflows/build.yml) to set the version you want to build.
|
||||
3. Add tag `v*.*.*` to trigger the build workflow.
|
||||
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
|
||||
3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`
|
||||
|
||||
Please note that depending on the combination of versions, it may not be possible to build.
|
||||
|
||||
@@ -76,8 +75,13 @@ cp env.template env
|
||||
Edit `env` file to set the environment variables.
|
||||
|
||||
```bash
|
||||
# Edit env
|
||||
# Registry Token for GitHub Personal Access Token
|
||||
PERSONAL_ACCESS_TOKEN=[Github Personal Access Token]
|
||||
# or Registry Token for GitHub Actions Runner
|
||||
REGISTRY_TOKEN=[Runner Registry Token]
|
||||
|
||||
# Optional
|
||||
RUNNER_LABELS=Linux,self-hosted
|
||||
```
|
||||
|
||||
Edit compose.yml file if you use repository folked from this repository.
|
||||
@@ -86,16 +90,19 @@ Edit compose.yml file if you use repository folked from this repository.
|
||||
services:
|
||||
runner:
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: x64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
REPOSITORY_URL: [Target Repository URL]
|
||||
PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
|
||||
GH_RUNNER_VERSION: 2.324.0
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
RUNNER_LABELS: self-hosted
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: x64
|
||||
```
|
||||
|
||||
@@ -107,21 +114,28 @@ docker compose build
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Getting One-Time Registry Token for GitHub Actions Runner
|
||||
|
||||
```bash
|
||||
gh api \
|
||||
-X POST \
|
||||
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
|
||||
```
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this repository in your research and find it helpful, please cite the following paper!
|
||||
|
||||
```bibtex
|
||||
@misc{flash-attention-prebuild-wheels,
|
||||
author = {Morioka, Junya},
|
||||
year = {2025},
|
||||
title = {mjun0812/flash-attention-prebuild-wheels},
|
||||
url = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
|
||||
howpublished = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
|
||||
}
|
||||
```
|
||||
|
||||
## Original Repository
|
||||
|
||||
[repo](https://github.com/Dao-AILab/flash-attention)
|
||||
|
||||
```bibtex
|
||||
@inproceedings{dao2022flashattention,
|
||||
title={Flash{A}ttention: Fast and Memory-Efficient Exact Attention with {IO}-Awareness},
|
||||
author={Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher},
|
||||
booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
|
||||
year={2022}
|
||||
}
|
||||
@inproceedings{dao2023flashattention2,
|
||||
title={Flash{A}ttention-2: Faster Attention with Better Parallelism and Work Partitioning},
|
||||
author={Dao, Tri},
|
||||
booktitle={International Conference on Learning Representations (ICLR)},
|
||||
year={2024}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,11 +1,6 @@
|
||||
FROM ubuntu:24.04
|
||||
|
||||
ARG REPOSITORY_URL
|
||||
ARG PERSONAL_ACCESS_TOKEN
|
||||
ARG GH_RUNNER_VERSION="2.324.0"
|
||||
ARG RUNNER_NAME="self-hosted-github-actions-runner"
|
||||
ARG RUNNER_GROUP="default"
|
||||
ARG RUNNER_LABELS="self-hosted,Linux"
|
||||
ARG GH_RUNNER_VERSION="2.329.0"
|
||||
ARG TARGET_ARCH="x64"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
@@ -64,15 +59,4 @@ WORKDIR /home/ubuntu
|
||||
RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${TARGET_ARCH}-${GH_RUNNER_VERSION}.tar.gz" \
|
||||
&& tar xf actions-runner.tar.gz \
|
||||
&& rm actions-runner.tar.gz \
|
||||
&& sudo ./bin/installdependencies.sh \
|
||||
&& ./config.sh \
|
||||
--unattended \
|
||||
--url $REPOSITORY_URL \
|
||||
--pat $PERSONAL_ACCESS_TOKEN \
|
||||
--name $RUNNER_NAME \
|
||||
--runnergroup $RUNNER_GROUP \
|
||||
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
|
||||
--work /home/ubuntu/actions-runner \
|
||||
--replace
|
||||
|
||||
CMD ["./run.sh"]
|
||||
&& sudo ./bin/installdependencies.sh
|
||||
|
||||
@@ -2,14 +2,16 @@ services:
|
||||
runner:
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: x64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
||||
PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
|
||||
GH_RUNNER_VERSION: 2.327.1
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
RUNNER_LABELS: self-hosted
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: x64
|
||||
|
||||
@@ -1,7 +1,30 @@
|
||||
#!/bin/bash
|
||||
id
|
||||
|
||||
# Start docker daemon
|
||||
sudo service docker start
|
||||
|
||||
exec "$@"
|
||||
if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then
|
||||
echo "Using personal access token";
|
||||
./config.sh \
|
||||
--unattended \
|
||||
--url $REPOSITORY_URL \
|
||||
--pat "$PERSONAL_ACCESS_TOKEN" \
|
||||
--name $RUNNER_NAME \
|
||||
--runnergroup $RUNNER_GROUP \
|
||||
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
|
||||
--work /home/ubuntu/actions-runner \
|
||||
--replace;
|
||||
else
|
||||
echo "Using registry token";
|
||||
./config.sh \
|
||||
--unattended \
|
||||
--url $REPOSITORY_URL \
|
||||
--token "$REGISTRY_TOKEN" \
|
||||
--name $RUNNER_NAME \
|
||||
--runnergroup $RUNNER_GROUP \
|
||||
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
|
||||
--work /home/ubuntu/actions-runner \
|
||||
--replace;
|
||||
fi
|
||||
|
||||
exec "./run.sh"
|
||||
|
||||
@@ -1 +1,3 @@
|
||||
PERSONAL_ACCESS_TOKEN=
|
||||
REGISTRY_TOKEN=
|
||||
RUNNER_LABELS=Linux,self-hosted
|
||||
Reference in New Issue
Block a user