mirror of
https://github.com/BillyOutlast/flash-attention-prebuild-wheels-rocm.git
synced 2026-07-01 01:37:53 -04:00
feat: add ARM64 self-hosted runner build support
- Update build workflow to include ARM64 self-hosted runner job. - Modify Dockerfile and compose.yml for ARM64 runner setup. - Add ARM64 self-hosted matrix configuration. - Update README with ARM64 self-hosted runner instructions.
This commit is contained in:
@@ -27,7 +27,7 @@ on:
|
||||
description: "Runner type"
|
||||
required: false
|
||||
type: string
|
||||
default: "self-hosted"
|
||||
default: "self-hosted-arm"
|
||||
is-upload:
|
||||
description: "Whether to upload the release asset"
|
||||
required: false
|
||||
|
||||
@@ -96,6 +96,26 @@ jobs:
|
||||
cuda-version: ${{ matrix.cuda-version }}
|
||||
secrets: inherit
|
||||
|
||||
build_wheels_linux_arm64_self_hosted:
|
||||
name: Build Linux ARM64 (self-hosted)
|
||||
needs: [create_releases, create_matrix]
|
||||
if: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
flash-attn-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.flash-attn-version }}
|
||||
python-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.python-version }}
|
||||
torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.torch-version }}
|
||||
cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64_self_hosted.cuda-version }}
|
||||
exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }}
|
||||
uses: ./.github/workflows/_build_linux_arm_self_host.yml
|
||||
with:
|
||||
flash-attn-version: ${{ matrix.flash-attn-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
torch-version: ${{ matrix.torch-version }}
|
||||
cuda-version: ${{ matrix.cuda-version }}
|
||||
secrets: inherit
|
||||
|
||||
# #########################################################
|
||||
# Windows
|
||||
# #########################################################
|
||||
@@ -145,6 +165,7 @@ jobs:
|
||||
- build_wheels_linux
|
||||
- build_wheels_linux_arm64
|
||||
- build_wheels_linux_self_hosted
|
||||
- build_wheels_linux_arm64_self_hosted
|
||||
- build_wheels_windows
|
||||
- build_wheels_windows_code_build
|
||||
if: always()
|
||||
@@ -173,6 +194,7 @@ jobs:
|
||||
- build_wheels_linux
|
||||
- build_wheels_linux_arm64
|
||||
- build_wheels_linux_self_hosted
|
||||
- build_wheels_linux_arm64_self_hosted
|
||||
- build_wheels_windows
|
||||
- build_wheels_windows_code_build
|
||||
permissions:
|
||||
|
||||
@@ -76,7 +76,6 @@ If you use this repository in your research and find it helpful, please cite thi
|
||||
- [@kun432](https://github.com/kun432) : Buy me a coffee!
|
||||
- [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me!
|
||||
|
||||
|
||||
## Star History and Download Statistics
|
||||
|
||||
<a href="https://www.star-history.com/#mjun0812/flash-attention-prebuild-wheels&type=date&legend=top-left">
|
||||
@@ -125,11 +124,21 @@ To build the wheels for these versions, you can use self-hosted runners.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git
|
||||
cd self-hosted-runner
|
||||
cp env.template env
|
||||
cd flash-attention-prebuild-wheels/self-hosted-runner
|
||||
```
|
||||
|
||||
Edit `env` file to set the environment variables.
|
||||
Install qemu-user-static for ARM64 support.
|
||||
|
||||
```bash
|
||||
sudo apt install qemu-user-static
|
||||
```
|
||||
|
||||
Edit `env` and `env.arm` files to set the environment variables.
|
||||
|
||||
```bash
|
||||
cp env.template env
|
||||
cp env.template env.arm
|
||||
```
|
||||
|
||||
```bash
|
||||
# Registry Token for GitHub Personal Access Token
|
||||
@@ -161,6 +170,24 @@ services:
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: x64
|
||||
|
||||
runner-arm:
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env.arm
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: arm64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: arm64
|
||||
PLATFORM: linux/arm64
|
||||
```
|
||||
|
||||
Then, build and run the docker container.
|
||||
|
||||
+33
-2
@@ -90,6 +90,34 @@ LINUX_SELF_HOSTED_MATRIX = {
|
||||
"cuda-version": ["12.8", "13.0"],
|
||||
}
|
||||
|
||||
LINUX_ARM64_SELF_HOSTED_MATRIX = {
|
||||
"flash-attn-version": [
|
||||
# "2.6.3",
|
||||
"2.7.4",
|
||||
# "2.8.3",
|
||||
],
|
||||
"python-version": [
|
||||
"3.10",
|
||||
"3.11",
|
||||
"3.12",
|
||||
# "3.13",
|
||||
],
|
||||
"torch-version": [
|
||||
"2.5.1",
|
||||
"2.6.0",
|
||||
"2.7.1",
|
||||
# "2.8.0",
|
||||
"2.9.1",
|
||||
],
|
||||
"cuda-version": [
|
||||
"12.4",
|
||||
# "12.6",
|
||||
"12.8",
|
||||
# "12.9",
|
||||
"13.0",
|
||||
],
|
||||
}
|
||||
|
||||
WINDOWS_MATRIX = {
|
||||
"flash-attn-version": [
|
||||
"2.6.3",
|
||||
@@ -133,12 +161,15 @@ def main():
|
||||
"linux": False,
|
||||
# "linux": LINUX_MATRIX,
|
||||
#
|
||||
# "linux_arm64": False,
|
||||
"linux_arm64": LINUX_ARM64_MATRIX,
|
||||
"linux_arm64": False,
|
||||
# "linux_arm64": LINUX_ARM64_MATRIX,
|
||||
#
|
||||
"linux_self_hosted": False,
|
||||
# "linux_self_hosted": LINUX_SELF_HOSTED_MATRIX,
|
||||
#
|
||||
# "linux_arm64_self_hosted": False,
|
||||
"linux_arm64_self_hosted": LINUX_ARM64_SELF_HOSTED_MATRIX,
|
||||
#
|
||||
"windows": False,
|
||||
# "windows": WINDOWS_MATRIX,
|
||||
#
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
FROM ubuntu:24.04
|
||||
ARG PLATFORM="linux/amd64"
|
||||
|
||||
FROM --platform=${PLATFORM} ubuntu:24.04
|
||||
|
||||
ARG GH_RUNNER_VERSION="2.329.0"
|
||||
ARG TARGET_ARCH="x64"
|
||||
|
||||
@@ -15,8 +15,31 @@ services:
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: x64
|
||||
PLATFORM: linux/amd64
|
||||
volumes:
|
||||
- fa-self:/var/lib/docker
|
||||
|
||||
runner-arm:
|
||||
platform: linux/arm64
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env.arm
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: arm64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: arm64
|
||||
PLATFORM: linux/arm64
|
||||
volumes:
|
||||
- fa-self-arm:/var/lib/docker
|
||||
|
||||
volumes:
|
||||
fa-self:
|
||||
fa-self-arm:
|
||||
|
||||
Reference in New Issue
Block a user