feat: add ARM64 self-hosted runner build support

- Update build workflow to include ARM64 self-hosted runner job.
- Modify Dockerfile and compose.yml for ARM64 runner setup.
- Add ARM64 self-hosted matrix configuration.
- Update README with ARM64 self-hosted runner instructions.
This commit is contained in:
Junya Morioka
2025-12-11 01:10:52 +09:00
parent 67e64ccae1
commit 521f9dcbaf
6 changed files with 113 additions and 8 deletions
@@ -27,7 +27,7 @@ on:
description: "Runner type"
required: false
type: string
default: "self-hosted"
default: "self-hosted-arm"
is-upload:
description: "Whether to upload the release asset"
required: false
+22
View File
@@ -96,6 +96,26 @@ jobs:
cuda-version: ${{ matrix.cuda-version }}
secrets: inherit
build_wheels_linux_arm64_self_hosted:
name: Build Linux ARM64 (self-hosted)
needs: [create_releases, create_matrix]
if: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted }}
strategy:
fail-fast: false
matrix:
flash-attn-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.flash-attn-version }}
python-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.python-version }}
torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.torch-version }}
cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.cuda-version }}
exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }}
uses: ./.github/workflows/_build_linux_arm_self_host.yml
with:
flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }}
torch-version: ${{ matrix.torch-version }}
cuda-version: ${{ matrix.cuda-version }}
secrets: inherit
# #########################################################
# Windows
# #########################################################
@@ -145,6 +165,7 @@ jobs:
- build_wheels_linux
- build_wheels_linux_arm64
- build_wheels_linux_self_hosted
- build_wheels_linux_arm64_self_hosted
- build_wheels_windows
- build_wheels_windows_code_build
if: always()
@@ -173,6 +194,7 @@ jobs:
- build_wheels_linux
- build_wheels_linux_arm64
- build_wheels_linux_self_hosted
- build_wheels_linux_arm64_self_hosted
- build_wheels_windows
- build_wheels_windows_code_build
permissions:
+31 -4
View File
@@ -76,7 +76,6 @@ If you use this repository in your research and find it helpful, please cite thi
- [@kun432](https://github.com/kun432) : Buy me a coffee!
- [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me!
## Star History and Download Statistics
<a href="https://www.star-history.com/#mjun0812/flash-attention-prebuild-wheels&type=date&legend=top-left">
@@ -125,11 +124,21 @@ To build the wheels for these versions, you can use self-hosted runners.
```bash
git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git
cd self-hosted-runner
cp env.template env
cd flash-attention-prebuild-wheels/self-hosted-runner
```
Edit `env` file to set the environment variables.
Install qemu-user-static for ARM64 support.
```bash
sudo apt install qemu-user-static
```
Edit `env` and `env.arm` files to set the environment variables.
```bash
cp env.template env
cp env.template env.arm
```
```bash
# Registry Token for GitHub Personal Access Token
@@ -161,6 +170,24 @@ services:
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: x64
runner-arm:
privileged: true
restart: always
env_file:
- .env.arm
environment:
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
TARGET_ARCH: arm64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: arm64
PLATFORM: linux/arm64
```
Then, build and run the docker container.
+33 -2
View File
@@ -90,6 +90,34 @@ LINUX_SELF_HOSTED_MATRIX = {
"cuda-version": ["12.8", "13.0"],
}
LINUX_ARM64_SELF_HOSTED_MATRIX = {
"flash-attn-version": [
# "2.6.3",
"2.7.4",
# "2.8.3",
],
"python-version": [
"3.10",
"3.11",
"3.12",
# "3.13",
],
"torch-version": [
"2.5.1",
"2.6.0",
"2.7.1",
# "2.8.0",
"2.9.1",
],
"cuda-version": [
"12.4",
# "12.6",
"12.8",
# "12.9",
"13.0",
],
}
WINDOWS_MATRIX = {
"flash-attn-version": [
"2.6.3",
@@ -133,12 +161,15 @@ def main():
"linux": False,
# "linux": LINUX_MATRIX,
#
# "linux_arm64": False,
"linux_arm64": LINUX_ARM64_MATRIX,
"linux_arm64": False,
# "linux_arm64": LINUX_ARM64_MATRIX,
#
"linux_self_hosted": False,
# "linux_self_hosted": LINUX_SELF_HOSTED_MATRIX,
#
# "linux_arm64_self_hosted": False,
"linux_arm64_self_hosted": LINUX_ARM64_SELF_HOSTED_MATRIX,
#
"windows": False,
# "windows": WINDOWS_MATRIX,
#
+3 -1
View File
@@ -1,4 +1,6 @@
FROM ubuntu:24.04
ARG PLATFORM="linux/amd64"
FROM --platform=${PLATFORM} ubuntu:24.04
ARG GH_RUNNER_VERSION="2.329.0"
ARG TARGET_ARCH="x64"
+23
View File
@@ -15,8 +15,31 @@ services:
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: x64
PLATFORM: linux/amd64
volumes:
- fa-self:/var/lib/docker
runner-arm:
platform: linux/arm64
privileged: true
restart: always
env_file:
- .env.arm
environment:
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
TARGET_ARCH: arm64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: arm64
PLATFORM: linux/arm64
volumes:
- fa-self-arm:/var/lib/docker
volumes:
fa-self:
fa-self-arm: