feat: add ARM64 self-hosted runner build support

- Update build workflow to include ARM64 self-hosted runner job. - Modify Dockerfile and compose.yml for ARM64 runner setup. - Add ARM64 self-hosted matrix configuration. - Update README with ARM64 self-hosted runner instructions.
2026-07-01 01:37:53 -04:00 · 2025-12-11 01:10:52 +09:00
parent 67e64ccae1
commit 521f9dcbaf
6 changed files with 113 additions and 8 deletions
@@ -27,7 +27,7 @@ on:
        description: "Runner type"
        required: false
        type: string
-        default: "self-hosted"
+        default: "self-hosted-arm"
      is-upload:
        description: "Whether to upload the release asset"
        required: false
@@ -96,6 +96,26 @@ jobs:
      cuda-version: ${{ matrix.cuda-version }}
    secrets: inherit

+  build_wheels_linux_arm64_self_hosted:
+    name: Build Linux ARM64 (self-hosted)
+    needs: [create_releases, create_matrix]
+    if: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted }}
+    strategy:
+      fail-fast: false
+      matrix:
+        flash-attn-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.flash-attn-version }}
+        python-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.python-version }}
+        torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.torch-version }}
+        cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.cuda-version }}
+        exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }}
+    uses: ./.github/workflows/_build_linux_arm_self_host.yml
+    with:
+      flash-attn-version: ${{ matrix.flash-attn-version }}
+      python-version: ${{ matrix.python-version }}
+      torch-version: ${{ matrix.torch-version }}
+      cuda-version: ${{ matrix.cuda-version }}
+    secrets: inherit
+
  # #########################################################
  # Windows
  # #########################################################
@@ -145,6 +165,7 @@ jobs:
      - build_wheels_linux
      - build_wheels_linux_arm64
      - build_wheels_linux_self_hosted
+      - build_wheels_linux_arm64_self_hosted
      - build_wheels_windows
      - build_wheels_windows_code_build
    if: always()
@@ -173,6 +194,7 @@ jobs:
      - build_wheels_linux
      - build_wheels_linux_arm64
      - build_wheels_linux_self_hosted
+      - build_wheels_linux_arm64_self_hosted
      - build_wheels_windows
      - build_wheels_windows_code_build
    permissions:
@@ -76,7 +76,6 @@ If you use this repository in your research and find it helpful, please cite thi
 - [@kun432](https://github.com/kun432) : Buy me a coffee!
 - [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me!

-
 ## Star History and Download Statistics

 <a href="https://www.star-history.com/#mjun0812/flash-attention-prebuild-wheels&type=date&legend=top-left">
@@ -125,11 +124,21 @@ To build the wheels for these versions, you can use self-hosted runners.

 ```bash
 git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git
-cd self-hosted-runner
-cp env.template env
+cd flash-attention-prebuild-wheels/self-hosted-runner
 ```

-Edit `env` file to set the environment variables.
+Install qemu-user-static for ARM64 support.
+
+```bash
+sudo apt install qemu-user-static
+```
+
+Edit `env` and `env.arm` files to set the environment variables.
+
+```bash
+cp env.template env
+cp env.template env.arm
+```

 ```bash
 # Registry Token for GitHub Personal Access Token
@@ -161,6 +170,24 @@ services:
      args:
        GH_RUNNER_VERSION: 2.329.0
        TARGET_ARCH: x64
+
+  runner-arm:
+    privileged: true
+    restart: always
+    env_file:
+      - .env.arm
+    environment:
+      REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
+      RUNNER_NAME: self-hosted-runner
+      RUNNER_GROUP: default
+      TARGET_ARCH: arm64
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        GH_RUNNER_VERSION: 2.329.0
+        TARGET_ARCH: arm64
+        PLATFORM: linux/arm64
 ```

 Then, build and run the docker container.
@@ -90,6 +90,34 @@ LINUX_SELF_HOSTED_MATRIX = {
    "cuda-version": ["12.8", "13.0"],
 }

+LINUX_ARM64_SELF_HOSTED_MATRIX = {
+    "flash-attn-version": [
+        # "2.6.3",
+        "2.7.4",
+        # "2.8.3",
+    ],
+    "python-version": [
+        "3.10",
+        "3.11",
+        "3.12",
+        # "3.13",
+    ],
+    "torch-version": [
+        "2.5.1",
+        "2.6.0",
+        "2.7.1",
+        # "2.8.0",
+        "2.9.1",
+    ],
+    "cuda-version": [
+        "12.4",
+        # "12.6",
+        "12.8",
+        # "12.9",
+        "13.0",
+    ],
+}
+
 WINDOWS_MATRIX = {
    "flash-attn-version": [
        "2.6.3",
@@ -133,12 +161,15 @@ def main():
                "linux": False,
                # "linux": LINUX_MATRIX,
                #
-                # "linux_arm64": False,
-                "linux_arm64": LINUX_ARM64_MATRIX,
+                "linux_arm64": False,
+                # "linux_arm64": LINUX_ARM64_MATRIX,
                #
                "linux_self_hosted": False,
                # "linux_self_hosted": LINUX_SELF_HOSTED_MATRIX,
                #
+                # "linux_arm64_self_hosted": False,
+                "linux_arm64_self_hosted": LINUX_ARM64_SELF_HOSTED_MATRIX,
+                #
                "windows": False,
                # "windows": WINDOWS_MATRIX,
                #
@@ -1,4 +1,6 @@
-FROM ubuntu:24.04
+ARG PLATFORM="linux/amd64"
+
+FROM --platform=${PLATFORM} ubuntu:24.04

 ARG GH_RUNNER_VERSION="2.329.0"
 ARG TARGET_ARCH="x64"
@@ -15,8 +15,31 @@ services:
      args:
        GH_RUNNER_VERSION: 2.329.0
        TARGET_ARCH: x64
+        PLATFORM: linux/amd64
    volumes:
      - fa-self:/var/lib/docker

+  runner-arm:
+    platform: linux/arm64
+    privileged: true
+    restart: always
+    env_file:
+      - .env.arm
+    environment:
+      REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
+      RUNNER_NAME: self-hosted-runner
+      RUNNER_GROUP: default
+      TARGET_ARCH: arm64
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        GH_RUNNER_VERSION: 2.329.0
+        TARGET_ARCH: arm64
+        PLATFORM: linux/arm64
+    volumes:
+      - fa-self-arm:/var/lib/docker
+
 volumes:
  fa-self:
+  fa-self-arm: