From b7dfa77278295b735c0c2e836ef69a27ae0d88e9 Mon Sep 17 00:00:00 2001 From: Junya Morioka Date: Thu, 4 Dec 2025 15:00:55 +0900 Subject: [PATCH 1/4] docs: Update README.md --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index febb2ca..e1a1d06 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,6 @@ The built packages are available on the [release page](https://github.com/mjun08 **This repository uses a self-hosted runner and AWS CodeBuild for building the wheels. If you find this project helpful, please consider sponsoring to help maintain the infrastructure!** [![github-sponsor](https://img.shields.io/badge/sponsor-30363D?style=for-the-badge&logo=GitHub-Sponsors&logoColor=#white)](https://github.com/sponsors/mjun0812) - [![buy-me-a-coffee](https://img.shields.io/badge/Buy_Me_A_Coffee-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/mjun0812) ## Install @@ -57,8 +56,8 @@ History of this repository is available [here](./docs/release_history.md). If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions. 1. Fork this repository -2. Edit workflow file [`.github/workflows/build.yml`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/.github/workflows/build.yml) to set the version you want to build. -3. Add tag `v*.*.*` to trigger the build workflow. +2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build. +3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags` Please note that depending on the combination of versions, it may not be possible to build. From e538e76c04242c7ab312f4bf816e6220d08b5b02 Mon Sep 17 00:00:00 2001 From: Junya Morioka Date: Thu, 4 Dec 2025 15:39:36 +0900 Subject: [PATCH 2/4] feat: add authorization for self-hosted runner --- .github/workflows/test-self-hosted-linux.yml | 7 +++--- self-hosted-runner/Dockerfile | 25 +++++++++++++++----- self-hosted-runner/compose.yml | 6 +++-- self-hosted-runner/env.template | 2 ++ 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-self-hosted-linux.yml b/.github/workflows/test-self-hosted-linux.yml index d3a8d65..6ad3410 100644 --- a/.github/workflows/test-self-hosted-linux.yml +++ b/.github/workflows/test-self-hosted-linux.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: flash-attn-version: ["2.8.0"] - python-version: ["3.11"] + python-version: ["3.11", "3.12"] torch-version: ["2.7.1"] # https://developer.nvidia.com/cuda-toolkit-archive cuda-version: ["12.8.1"] @@ -92,6 +92,7 @@ jobs: - name: Build wheels timeout-minutes: 1200 + id: build_wheels shell: bash env: MAX_JOBS: 4 @@ -105,7 +106,8 @@ jobs: - name: Install Test shell: bash run: | - pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }} + pip uninstall -y flash-attn > /dev/null 2>&1 + pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} python -c "import flash_attn; print(flash_attn.__version__)" - name: Clean up @@ -113,4 +115,3 @@ jobs: if: always() run: | rm -rf /opt/hostedtoolcache/Python - rm -rf ~/.cache/pip diff --git a/self-hosted-runner/Dockerfile b/self-hosted-runner/Dockerfile index f231690..1e69b5a 100644 --- a/self-hosted-runner/Dockerfile +++ b/self-hosted-runner/Dockerfile @@ -1,12 +1,13 @@ FROM ubuntu:24.04 -ARG REPOSITORY_URL -ARG PERSONAL_ACCESS_TOKEN -ARG GH_RUNNER_VERSION="2.324.0" +ARG REPOSITORY_URL="https://github.com/user/repo" +ARG GH_RUNNER_VERSION="2.329.0" ARG RUNNER_NAME="self-hosted-github-actions-runner" ARG RUNNER_GROUP="default" ARG RUNNER_LABELS="self-hosted,Linux" ARG TARGET_ARCH="x64" +ARG PERSONAL_ACCESS_TOKEN="" +ARG REGISTRY_TOKEN="" ENV DEBIAN_FRONTEND=noninteractive \ AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache @@ -65,14 +66,26 @@ RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/re && tar xf actions-runner.tar.gz \ && rm actions-runner.tar.gz \ && sudo ./bin/installdependencies.sh \ - && ./config.sh \ + && if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then \ + ./config.sh \ --unattended \ --url $REPOSITORY_URL \ - --pat $PERSONAL_ACCESS_TOKEN \ + --pat "$PERSONAL_ACCESS_TOKEN" \ --name $RUNNER_NAME \ --runnergroup $RUNNER_GROUP \ --labels "${RUNNER_LABELS},${TARGET_ARCH}" \ --work /home/ubuntu/actions-runner \ - --replace + --replace; \ + else \ + ./config.sh \ + --unattended \ + --url $REPOSITORY_URL \ + --token "$REGISTRY_TOKEN" \ + --name $RUNNER_NAME \ + --runnergroup $RUNNER_GROUP \ + --labels "${RUNNER_LABELS},${TARGET_ARCH}" \ + --work /home/ubuntu/actions-runner \ + --replace; \ + fi CMD ["./run.sh"] diff --git a/self-hosted-runner/compose.yml b/self-hosted-runner/compose.yml index c4e92ab..b7c82b6 100644 --- a/self-hosted-runner/compose.yml +++ b/self-hosted-runner/compose.yml @@ -2,14 +2,16 @@ services: runner: privileged: true restart: always + env_file: + - .env build: context: . dockerfile: Dockerfile args: REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN - GH_RUNNER_VERSION: 2.327.1 + GH_RUNNER_VERSION: 2.329.0 RUNNER_NAME: self-hosted-runner RUNNER_GROUP: default - RUNNER_LABELS: self-hosted + RUNNER_LABELS: self-hosted,Linux TARGET_ARCH: x64 diff --git a/self-hosted-runner/env.template b/self-hosted-runner/env.template index cf1aca7..923ae88 100644 --- a/self-hosted-runner/env.template +++ b/self-hosted-runner/env.template @@ -1 +1,3 @@ PERSONAL_ACCESS_TOKEN= +REGISTRY_TOKEN= +RUNNER_LABELS=Linux,self-hosted \ No newline at end of file From 2bad59efd6c7084119c2671b06c986180b9f6995 Mon Sep 17 00:00:00 2001 From: Junya Morioka Date: Thu, 4 Dec 2025 16:12:29 +0900 Subject: [PATCH 3/4] update README.md --- README.md | 48 ++++++++++++++++++++-------------- self-hosted-runner/compose.yml | 2 -- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index e1a1d06..a959185 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,13 @@ cp env.template env Edit `env` file to set the environment variables. ```bash -# Edit env +# Registry Token for GitHub Personal Access Token PERSONAL_ACCESS_TOKEN=[Github Personal Access Token] +# or Registry Token for GitHub Actions Runner +REGISTRY_TOKEN=[Runner Registry Token] + +# Optional +RUNNER_LABELS=Linux,self-hosted ``` Edit compose.yml file if you use repository folked from this repository. @@ -90,11 +95,9 @@ services: dockerfile: Dockerfile args: REPOSITORY_URL: [Target Repository URL] - PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN - GH_RUNNER_VERSION: 2.324.0 + GH_RUNNER_VERSION: 2.329.0 RUNNER_NAME: self-hosted-runner RUNNER_GROUP: default - RUNNER_LABELS: self-hosted TARGET_ARCH: x64 ``` @@ -106,21 +109,28 @@ docker compose build docker compose up -d ``` +### Getting One-Time Registry Token for GitHub Actions Runner + +```bash +gh api \ + -X POST \ + /repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token +``` + +## Citation + +If you use this repository in your research and find it helpful, please cite the following paper! + +```bibtex +@misc{flash-attention-prebuild-wheels, + author = {Morioka, Junya}, + year = {2025}, + title = {mjun0812/flash-attention-prebuild-wheels}, + url = {https://github.com/mjun0812/flash-attention-prebuild-wheels}, + howpublished = {https://github.com/mjun0812/flash-attention-prebuild-wheels}, +} +``` + ## Original Repository [repo](https://github.com/Dao-AILab/flash-attention) - -```bibtex -@inproceedings{dao2022flashattention, - title={Flash{A}ttention: Fast and Memory-Efficient Exact Attention with {IO}-Awareness}, - author={Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher}, - booktitle={Advances in Neural Information Processing Systems (NeurIPS)}, - year={2022} -} -@inproceedings{dao2023flashattention2, - title={Flash{A}ttention-2: Faster Attention with Better Parallelism and Work Partitioning}, - author={Dao, Tri}, - booktitle={International Conference on Learning Representations (ICLR)}, - year={2024} -} -``` diff --git a/self-hosted-runner/compose.yml b/self-hosted-runner/compose.yml index b7c82b6..bebcd1e 100644 --- a/self-hosted-runner/compose.yml +++ b/self-hosted-runner/compose.yml @@ -9,9 +9,7 @@ services: dockerfile: Dockerfile args: REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels - PERSONAL_ACCESS_TOKEN: $PERSONAL_ACCESS_TOKEN GH_RUNNER_VERSION: 2.329.0 RUNNER_NAME: self-hosted-runner RUNNER_GROUP: default - RUNNER_LABELS: self-hosted,Linux TARGET_ARCH: x64 From 3e30ef2270492718e91150ec3d0521ca127d93e3 Mon Sep 17 00:00:00 2001 From: Junya Morioka Date: Thu, 4 Dec 2025 16:42:36 +0900 Subject: [PATCH 4/4] feat: improve registoration for self-hosted runner --- README.md | 11 ++++++++--- self-hosted-runner/Dockerfile | 31 +------------------------------ self-hosted-runner/compose.yml | 8 +++++--- self-hosted-runner/entrypoint.sh | 27 +++++++++++++++++++++++++-- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index a959185..fa865e1 100644 --- a/README.md +++ b/README.md @@ -90,14 +90,19 @@ Edit compose.yml file if you use repository folked from this repository. services: runner: privileged: true + restart: always + env_file: + - .env + environment: + REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY] + RUNNER_NAME: self-hosted-runner + RUNNER_GROUP: default + TARGET_ARCH: x64 build: context: . dockerfile: Dockerfile args: - REPOSITORY_URL: [Target Repository URL] GH_RUNNER_VERSION: 2.329.0 - RUNNER_NAME: self-hosted-runner - RUNNER_GROUP: default TARGET_ARCH: x64 ``` diff --git a/self-hosted-runner/Dockerfile b/self-hosted-runner/Dockerfile index 1e69b5a..f46da23 100644 --- a/self-hosted-runner/Dockerfile +++ b/self-hosted-runner/Dockerfile @@ -1,13 +1,7 @@ FROM ubuntu:24.04 -ARG REPOSITORY_URL="https://github.com/user/repo" ARG GH_RUNNER_VERSION="2.329.0" -ARG RUNNER_NAME="self-hosted-github-actions-runner" -ARG RUNNER_GROUP="default" -ARG RUNNER_LABELS="self-hosted,Linux" ARG TARGET_ARCH="x64" -ARG PERSONAL_ACCESS_TOKEN="" -ARG REGISTRY_TOKEN="" ENV DEBIAN_FRONTEND=noninteractive \ AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache @@ -65,27 +59,4 @@ WORKDIR /home/ubuntu RUN curl -fsSL -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${TARGET_ARCH}-${GH_RUNNER_VERSION}.tar.gz" \ && tar xf actions-runner.tar.gz \ && rm actions-runner.tar.gz \ - && sudo ./bin/installdependencies.sh \ - && if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then \ - ./config.sh \ - --unattended \ - --url $REPOSITORY_URL \ - --pat "$PERSONAL_ACCESS_TOKEN" \ - --name $RUNNER_NAME \ - --runnergroup $RUNNER_GROUP \ - --labels "${RUNNER_LABELS},${TARGET_ARCH}" \ - --work /home/ubuntu/actions-runner \ - --replace; \ - else \ - ./config.sh \ - --unattended \ - --url $REPOSITORY_URL \ - --token "$REGISTRY_TOKEN" \ - --name $RUNNER_NAME \ - --runnergroup $RUNNER_GROUP \ - --labels "${RUNNER_LABELS},${TARGET_ARCH}" \ - --work /home/ubuntu/actions-runner \ - --replace; \ - fi - -CMD ["./run.sh"] + && sudo ./bin/installdependencies.sh diff --git a/self-hosted-runner/compose.yml b/self-hosted-runner/compose.yml index bebcd1e..0e03fd4 100644 --- a/self-hosted-runner/compose.yml +++ b/self-hosted-runner/compose.yml @@ -4,12 +4,14 @@ services: restart: always env_file: - .env + environment: + REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels + RUNNER_NAME: self-hosted-runner + RUNNER_GROUP: default + TARGET_ARCH: x64 build: context: . dockerfile: Dockerfile args: - REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels GH_RUNNER_VERSION: 2.329.0 - RUNNER_NAME: self-hosted-runner - RUNNER_GROUP: default TARGET_ARCH: x64 diff --git a/self-hosted-runner/entrypoint.sh b/self-hosted-runner/entrypoint.sh index d56b933..1a04ffe 100644 --- a/self-hosted-runner/entrypoint.sh +++ b/self-hosted-runner/entrypoint.sh @@ -1,7 +1,30 @@ #!/bin/bash -id # Start docker daemon sudo service docker start -exec "$@" \ No newline at end of file +if [ -n "$PERSONAL_ACCESS_TOKEN" ]; then + echo "Using personal access token"; + ./config.sh \ + --unattended \ + --url $REPOSITORY_URL \ + --pat "$PERSONAL_ACCESS_TOKEN" \ + --name $RUNNER_NAME \ + --runnergroup $RUNNER_GROUP \ + --labels "${RUNNER_LABELS},${TARGET_ARCH}" \ + --work /home/ubuntu/actions-runner \ + --replace; +else + echo "Using registry token"; + ./config.sh \ + --unattended \ + --url $REPOSITORY_URL \ + --token "$REGISTRY_TOKEN" \ + --name $RUNNER_NAME \ + --runnergroup $RUNNER_GROUP \ + --labels "${RUNNER_LABELS},${TARGET_ARCH}" \ + --work /home/ubuntu/actions-runner \ + --replace; +fi + +exec "./run.sh"