mirror of
https://github.com/BillyOutlast/flash-attention-prebuild-wheels-rocm.git
synced 2026-07-01 01:37:53 -04:00
Merge pull request #62 from mjun0812/dev/mjun
feat: improvements to the self-hosted runner setup
This commit is contained in:
@@ -23,7 +23,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
flash-attn-version: ["2.8.0"]
|
flash-attn-version: ["2.8.0"]
|
||||||
python-version: ["3.11"]
|
python-version: ["3.11", "3.12"]
|
||||||
torch-version: ["2.7.1"]
|
torch-version: ["2.7.1"]
|
||||||
# https://developer.nvidia.com/cuda-toolkit-archive
|
# https://developer.nvidia.com/cuda-toolkit-archive
|
||||||
cuda-version: ["12.8.1"]
|
cuda-version: ["12.8.1"]
|
||||||
@@ -92,6 +92,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Build wheels
|
- name: Build wheels
|
||||||
timeout-minutes: 1200
|
timeout-minutes: 1200
|
||||||
|
id: build_wheels
|
||||||
shell: bash
|
shell: bash
|
||||||
env:
|
env:
|
||||||
MAX_JOBS: 4
|
MAX_JOBS: 4
|
||||||
@@ -105,7 +106,8 @@ jobs:
|
|||||||
- name: Install Test
|
- name: Install Test
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
|
pip uninstall -y flash-attn > /dev/null 2>&1
|
||||||
|
pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
|
||||||
python -c "import flash_attn; print(flash_attn.__version__)"
|
python -c "import flash_attn; print(flash_attn.__version__)"
|
||||||
|
|
||||||
- name: Clean up
|
- name: Clean up
|
||||||
@@ -113,4 +115,3 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
rm -rf /opt/hostedtoolcache/Python
|
rm -rf /opt/hostedtoolcache/Python
|
||||||
rm -rf ~/.cache/pip
|
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ The built packages are available on the [release page](https://github.com/mjun08
|
|||||||
**This repository uses a self-hosted runner and AWS CodeBuild for building the wheels. If you find this project helpful, please consider sponsoring to help maintain the infrastructure!**
|
**This repository uses a self-hosted runner and AWS CodeBuild for building the wheels. If you find this project helpful, please consider sponsoring to help maintain the infrastructure!**
|
||||||
|
|
||||||
[](https://github.com/sponsors/mjun0812)
|
[](https://github.com/sponsors/mjun0812)
|
||||||
|
|
||||||
[](https://buymeacoffee.com/mjun0812)
|
[](https://buymeacoffee.com/mjun0812)
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
@@ -57,8 +56,8 @@ History of this repository is available [here](./docs/release_history.md).
|
|||||||
If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
|
If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
|
||||||
|
|
||||||
1. Fork this repository
|
1. Fork this repository
|
||||||
2. Edit workflow file [`.github/workflows/build.yml`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/.github/workflows/build.yml) to set the version you want to build.
|
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
|
||||||
3. Add tag `v*.*.*` to trigger the build workflow.
|
3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`
|
||||||
|
|
||||||
Please note that depending on the combination of versions, it may not be possible to build.
|
Please note that depending on the combination of versions, it may not be possible to build.
|
||||||
|
|
||||||
@@ -76,8 +75,13 @@ cp env.template env
|
|||||||
Edit `env` file to set the environment variables.
|
Edit `env` file to set the environment variables.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Edit env
|
# Registry Token for GitHub Personal Access Token
|
||||||
PERSONAL_ACCESS_TOKEN=[Github Personal Access Token]
|
PERSONAL_ACCESS_TOKEN=[Github Personal Access Token]
|
||||||
|
# or Registry Token for GitHub Actions Runner
|
||||||
|
REGISTRY_TOKEN=[Runner Registry Token]
|
||||||
|
|
||||||
|
# Optional
|
||||||
|
RUNNER_LABELS=Linux,self-hosted
|
||||||
```
|
```
|
||||||
|
|
||||||
Edit compose.yml file if you use repository folked from this repository.
|
Edit compose.yml file if you use repository folked from this repository.
|
||||||
@@ -86,16 +90,19 @@ Edit compose.yml file if you use repository folked from this repository.
|
|||||||
services:
|
services:
|
||||||
runner:
|
runner:
|
||||||
privileged: true
|
privileged: true
|
||||||
|
restart: always
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
|
||||||
|
RUNNER_NAME: self-hosted-runner
|
||||||
|
RUNNER_GROUP: default
|
||||||
|
TARGET_ARCH: x64
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
REPOSITORY_URL: [Target Repository URL]
|
GH_RUNNER_VERSION: 2.329.0
|
||||||
PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
|
|
||||||
GH_RUNNER_VERSION: 2.324.0
|
|
||||||
RUNNER_NAME: self-hosted-runner
|
|
||||||
RUNNER_GROUP: default
|
|
||||||
RUNNER_LABELS: self-hosted
|
|
||||||
TARGET_ARCH: x64
|
TARGET_ARCH: x64
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -107,21 +114,28 @@ docker compose build
|
|||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Getting One-Time Registry Token for GitHub Actions Runner
|
||||||
|
|
||||||
|
```bash
|
||||||
|
gh api \
|
||||||
|
-X POST \
|
||||||
|
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
|
||||||
|
```
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
|
||||||
|
If you use this repository in your research and find it helpful, please cite the following paper!
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@misc{flash-attention-prebuild-wheels,
|
||||||
|
author = {Morioka, Junya},
|
||||||
|
year = {2025},
|
||||||
|
title = {mjun0812/flash-attention-prebuild-wheels},
|
||||||
|
url = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
|
||||||
|
howpublished = {https://github.com/mjun0812/flash-attention-prebuild-wheels},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Original Repository
|
## Original Repository
|
||||||
|
|
||||||
[repo](https://github.com/Dao-AILab/flash-attention)
|
[repo](https://github.com/Dao-AILab/flash-attention)
|
||||||
|
|
||||||
```bibtex
|
|
||||||
@inproceedings{dao2022flashattention,
|
|
||||||
title={Flash{A}ttention: Fast and Memory-Efficient Exact Attention with {IO}-Awareness},
|
|
||||||
author={Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher},
|
|
||||||
booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
|
|
||||||
year={2022}
|
|
||||||
}
|
|
||||||
@inproceedings{dao2023flashattention2,
|
|
||||||
title={Flash{A}ttention-2: Faster Attention with Better Parallelism and Work Partitioning},
|
|
||||||
author={Dao, Tri},
|
|
||||||
booktitle={International Conference on Learning Representations (ICLR)},
|
|
||||||
year={2024}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -1,11 +1,6 @@
|
|||||||
FROM ubuntu:24.04
|
FROM ubuntu:24.04
|
||||||
|
|
||||||
ARG REPOSITORY_URL
|
ARG GH_RUNNER_VERSION="2.329.0"
|
||||||
ARG PERSONAL_ACCESS_TOKEN
|
|
||||||
ARG GH_RUNNER_VERSION="2.324.0"
|
|
||||||
ARG RUNNER_NAME="self-hosted-github-actions-runner"
|
|
||||||
ARG RUNNER_GROUP="default"
|
|
||||||
ARG RUNNER_LABELS="self-hosted,Linux"
|
|
||||||
ARG TARGET_ARCH="x64"
|
ARG TARGET_ARCH="x64"
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive \
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
@@ -64,15 +59,4 @@ WORKDIR /home/ubuntu
|
|||||||
RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${TARGET_ARCH}-${GH_RUNNER_VERSION}.tar.gz" \
|
RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${TARGET_ARCH}-${GH_RUNNER_VERSION}.tar.gz" \
|
||||||
&& tar xf actions-runner.tar.gz \
|
&& tar xf actions-runner.tar.gz \
|
||||||
&& rm actions-runner.tar.gz \
|
&& rm actions-runner.tar.gz \
|
||||||
&& sudo ./bin/installdependencies.sh \
|
&& sudo ./bin/installdependencies.sh
|
||||||
&& ./config.sh \
|
|
||||||
--unattended \
|
|
||||||
--url $REPOSITORY_URL \
|
|
||||||
--pat $PERSONAL_ACCESS_TOKEN \
|
|
||||||
--name $RUNNER_NAME \
|
|
||||||
--runnergroup $RUNNER_GROUP \
|
|
||||||
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
|
|
||||||
--work /home/ubuntu/actions-runner \
|
|
||||||
--replace
|
|
||||||
|
|
||||||
CMD ["./run.sh"]
|
|
||||||
|
|||||||
@@ -2,14 +2,16 @@ services:
|
|||||||
runner:
|
runner:
|
||||||
privileged: true
|
privileged: true
|
||||||
restart: always
|
restart: always
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
||||||
|
RUNNER_NAME: self-hosted-runner
|
||||||
|
RUNNER_GROUP: default
|
||||||
|
TARGET_ARCH: x64
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
GH_RUNNER_VERSION: 2.329.0
|
||||||
PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN
|
|
||||||
GH_RUNNER_VERSION: 2.327.1
|
|
||||||
RUNNER_NAME: self-hosted-runner
|
|
||||||
RUNNER_GROUP: default
|
|
||||||
RUNNER_LABELS: self-hosted
|
|
||||||
TARGET_ARCH: x64
|
TARGET_ARCH: x64
|
||||||
|
|||||||
@@ -1,7 +1,30 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
id
|
|
||||||
|
|
||||||
# Start docker daemon
|
# Start docker daemon
|
||||||
sudo service docker start
|
sudo service docker start
|
||||||
|
|
||||||
exec "$@"
|
if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then
|
||||||
|
echo "Using personal access token";
|
||||||
|
./config.sh \
|
||||||
|
--unattended \
|
||||||
|
--url $REPOSITORY_URL \
|
||||||
|
--pat "$PERSONAL_ACCESS_TOKEN" \
|
||||||
|
--name $RUNNER_NAME \
|
||||||
|
--runnergroup $RUNNER_GROUP \
|
||||||
|
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
|
||||||
|
--work /home/ubuntu/actions-runner \
|
||||||
|
--replace;
|
||||||
|
else
|
||||||
|
echo "Using registry token";
|
||||||
|
./config.sh \
|
||||||
|
--unattended \
|
||||||
|
--url $REPOSITORY_URL \
|
||||||
|
--token "$REGISTRY_TOKEN" \
|
||||||
|
--name $RUNNER_NAME \
|
||||||
|
--runnergroup $RUNNER_GROUP \
|
||||||
|
--labels "${RUNNER_LABELS},${TARGET_ARCH}" \
|
||||||
|
--work /home/ubuntu/actions-runner \
|
||||||
|
--replace;
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "./run.sh"
|
||||||
|
|||||||
@@ -1 +1,3 @@
|
|||||||
PERSONAL_ACCESS_TOKEN=
|
PERSONAL_ACCESS_TOKEN=
|
||||||
|
REGISTRY_TOKEN=
|
||||||
|
RUNNER_LABELS=Linux,self-hosted
|
||||||
Reference in New Issue
Block a user