diff --git a/.github/workflows/_build_linux_arm_self_host.yml b/.github/workflows/_build_linux_arm_self_host.yml index 6aa55f5..0460596 100644 --- a/.github/workflows/_build_linux_arm_self_host.yml +++ b/.github/workflows/_build_linux_arm_self_host.yml @@ -27,7 +27,7 @@ on: description: "Runner type" required: false type: string - default: "self-hosted" + default: "self-hosted-arm" is-upload: description: "Whether to upload the release asset" required: false diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 35883e8..7a1050d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -96,6 +96,26 @@ jobs: cuda-version: ${{ matrix.cuda-version }} secrets: inherit + build_wheels_linux_arm64_self_hosted: + name: Build Linux ARM64 (self-hosted) + needs: [create_releases, create_matrix] + if: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted }} + strategy: + fail-fast: false + matrix: + flash-attn-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.flash-attn-version }} + python-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.python-version }} + torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.torch-version }} + cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.cuda-version }} + exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }} + uses: ./.github/workflows/_build_linux_arm_self_host.yml + with: + flash-attn-version: ${{ matrix.flash-attn-version }} + python-version: ${{ matrix.python-version }} + torch-version: ${{ matrix.torch-version }} + cuda-version: ${{ matrix.cuda-version }} + secrets: inherit + # ######################################################### # Windows # ######################################################### @@ -145,6 +165,7 @@ jobs: - build_wheels_linux - build_wheels_linux_arm64 - build_wheels_linux_self_hosted + - build_wheels_linux_arm64_self_hosted - build_wheels_windows - build_wheels_windows_code_build if: always() @@ -173,6 +194,7 @@ jobs: - build_wheels_linux - build_wheels_linux_arm64 - build_wheels_linux_self_hosted + - build_wheels_linux_arm64_self_hosted - build_wheels_windows - build_wheels_windows_code_build permissions: diff --git a/README.md b/README.md index 8fa27b3..81c09bf 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,6 @@ If you use this repository in your research and find it helpful, please cite thi - [@kun432](https://github.com/kun432) : Buy me a coffee! - [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me! - ## Star History and Download Statistics @@ -125,11 +124,21 @@ To build the wheels for these versions, you can use self-hosted runners. ```bash git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git -cd self-hosted-runner -cp env.template env +cd flash-attention-prebuild-wheels/self-hosted-runner ``` -Edit `env` file to set the environment variables. +Install qemu-user-static for ARM64 support. + +```bash +sudo apt install qemu-user-static +``` + +Edit `env` and `env.arm` files to set the environment variables. + +```bash +cp env.template env +cp env.template env.arm +``` ```bash # Registry Token for GitHub Personal Access Token @@ -161,6 +170,24 @@ services: args: GH_RUNNER_VERSION: 2.329.0 TARGET_ARCH: x64 + + runner-arm: + privileged: true + restart: always + env_file: + - .env.arm + environment: + REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY] + RUNNER_NAME: self-hosted-runner + RUNNER_GROUP: default + TARGET_ARCH: arm64 + build: + context: . + dockerfile: Dockerfile + args: + GH_RUNNER_VERSION: 2.329.0 + TARGET_ARCH: arm64 + PLATFORM: linux/arm64 ``` Then, build and run the docker container. diff --git a/create_matrix.py b/create_matrix.py index 43ed1a6..b8922f6 100644 --- a/create_matrix.py +++ b/create_matrix.py @@ -90,6 +90,34 @@ LINUX_SELF_HOSTED_MATRIX = { "cuda-version": ["12.8", "13.0"], } +LINUX_ARM64_SELF_HOSTED_MATRIX = { + "flash-attn-version": [ + # "2.6.3", + "2.7.4", + # "2.8.3", + ], + "python-version": [ + "3.10", + "3.11", + "3.12", + # "3.13", + ], + "torch-version": [ + "2.5.1", + "2.6.0", + "2.7.1", + # "2.8.0", + "2.9.1", + ], + "cuda-version": [ + "12.4", + # "12.6", + "12.8", + # "12.9", + "13.0", + ], +} + WINDOWS_MATRIX = { "flash-attn-version": [ "2.6.3", @@ -133,12 +161,15 @@ def main(): "linux": False, # "linux": LINUX_MATRIX, # - # "linux_arm64": False, - "linux_arm64": LINUX_ARM64_MATRIX, + "linux_arm64": False, + # "linux_arm64": LINUX_ARM64_MATRIX, # "linux_self_hosted": False, # "linux_self_hosted": LINUX_SELF_HOSTED_MATRIX, # + # "linux_arm64_self_hosted": False, + "linux_arm64_self_hosted": LINUX_ARM64_SELF_HOSTED_MATRIX, + # "windows": False, # "windows": WINDOWS_MATRIX, # diff --git a/self-hosted-runner/Dockerfile b/self-hosted-runner/Dockerfile index 3c15e5a..db8aa51 100644 --- a/self-hosted-runner/Dockerfile +++ b/self-hosted-runner/Dockerfile @@ -1,4 +1,6 @@ -FROM ubuntu:24.04 +ARG PLATFORM="linux/amd64" + +FROM --platform=${PLATFORM} ubuntu:24.04 ARG GH_RUNNER_VERSION="2.329.0" ARG TARGET_ARCH="x64" diff --git a/self-hosted-runner/compose.yml b/self-hosted-runner/compose.yml index 6d0d463..02794ab 100644 --- a/self-hosted-runner/compose.yml +++ b/self-hosted-runner/compose.yml @@ -15,8 +15,31 @@ services: args: GH_RUNNER_VERSION: 2.329.0 TARGET_ARCH: x64 + PLATFORM: linux/amd64 volumes: - fa-self:/var/lib/docker + runner-arm: + platform: linux/arm64 + privileged: true + restart: always + env_file: + - .env.arm + environment: + REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels + RUNNER_NAME: self-hosted-runner + RUNNER_GROUP: default + TARGET_ARCH: arm64 + build: + context: . + dockerfile: Dockerfile + args: + GH_RUNNER_VERSION: 2.329.0 + TARGET_ARCH: arm64 + PLATFORM: linux/arm64 + volumes: + - fa-self-arm:/var/lib/docker + volumes: fa-self: + fa-self-arm: