feat: add Windows self-hosted runner support and update documentation

- Add .github/workflows/test-windows-self-hosted.yml for Windows self-hosted runner testing.
- Update README.md with comprehensive self-hosted runner setup guides for Linux, ARM64, and Windows.
- Update self-hosted-runner/compose.yml to enable both x86_64 and ARM64 runner services.
- Add a note about manylinux2_28 and update the sponsor list in README.md.
This commit is contained in:
Junya Morioka
2026-01-03 00:20:57 +09:00
parent 5f8e6bc102
commit 991becbb7f
3 changed files with 251 additions and 100 deletions
@@ -0,0 +1,166 @@
# #########################################################
# Test build wheels with self-hosted runner on Windows x86_64
#
# Prerequisites (must be pre-installed on the runner):
# - Git
# - Chocolatey
# - Visual Studio BuildTools 2022 with:
# - Microsoft.VisualStudio.Component.VC.Tools.x86.x64
# - Microsoft.VisualStudio.Component.VC.CMake.Project
# - Microsoft.VisualStudio.Component.Windows11SDK.22621
# - CMake
# - Ninja
# - Make (optional)
# #########################################################
name: Test Windows build (self-hosted)
on:
workflow_dispatch:
inputs:
flash-attn-version:
description: "Flash-Attention version"
required: true
default: "2.8.3"
type: string
python-version:
description: "Python version"
required: true
default: "3.13"
type: string
torch-version:
description: "PyTorch version"
required: true
default: "2.9.1"
type: string
cuda-version:
description: "CUDA version"
required: true
default: "12.8.1"
type: string
jobs:
build_windows_wheels_self_hosted:
name: Build wheels and Test (Windows x86_64, self-hosted runner)
runs-on: ["self-hosted", "windows", "x64"]
timeout-minutes: 360
env:
MAX_JOBS: 2
NVCC_THREADS: 2
steps:
- uses: actions/checkout@v4
- name: Enable Git long paths
shell: pwsh
run: git config --system core.longpaths true
# Install Python using uv
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Install Python
shell: pwsh
run: |
uv venv -p ${{ inputs.python-version }}
uv pip install -U pip setuptools==75.8.0 wheel packaging psutil numpy ninja
$current_dir = (Get-Location).Path
echo "$current_dir\.venv\Scripts" >> $env:GITHUB_PATH
- uses: mjun0812/setup-cuda@v1
with:
version: ${{ inputs.cuda-version }}
# Visual Studio BuildTools is pre-installed on the runner
- name: Setup MSVC Developer Command Prompt
uses: TheMrMilchmann/setup-msvc-dev@v3
with:
arch: x64
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v2
- name: Build wheels
shell: pwsh
run: |
.\build_windows.ps1 -FlashAttnVersion "${{ inputs.flash-attn-version }}" -PythonVersion "${{ inputs.python-version }}" -TorchVersion "${{ inputs.torch-version }}" -CudaVersion "${{ inputs.cuda-version }}"
$wheelName = Get-ChildItem -Path "flash-attention\dist\*.whl" | Select-Object -First 1 | ForEach-Object { $_.Name }
echo "wheel_name=$wheelName" >> $env:GITHUB_ENV
- name: Install Test
shell: pwsh
run: |
pip install --no-cache-dir flash-attention/dist/$env:wheel_name
python -c "import flash_attn; print(flash_attn.__version__)"
# Cleanup step - always runs even if previous steps fail
# Only cleans up Python and CUDA installations (VS BuildTools is pre-installed)
- name: Cleanup (always run)
if: always()
shell: pwsh
run: |
Write-Host "=========================================="
Write-Host "Starting cleanup for self-hosted runner..."
Write-Host "=========================================="
# 1. Remove flash-attention directory (source and build artifacts)
$flashAttnDir = Join-Path (Get-Location) "flash-attention"
if (Test-Path $flashAttnDir) {
Write-Host "[1/6] Removing flash-attention directory: $flashAttnDir"
Remove-Item -Path $flashAttnDir -Recurse -Force -ErrorAction SilentlyContinue
} else {
Write-Host "[1/6] flash-attention directory not found, skipping"
}
# 2. Remove Python virtual environment (.venv)
$venvDir = Join-Path (Get-Location) ".venv"
if (Test-Path $venvDir) {
Write-Host "[2/6] Removing Python virtual environment: $venvDir"
Remove-Item -Path $venvDir -Recurse -Force -ErrorAction SilentlyContinue
} else {
Write-Host "[2/6] .venv directory not found, skipping"
}
# 3. Remove pip cache
$pipCacheDir = Join-Path $env:LOCALAPPDATA "pip\cache"
if (Test-Path $pipCacheDir) {
Write-Host "[3/6] Removing pip cache: $pipCacheDir"
Remove-Item -Path $pipCacheDir -Recurse -Force -ErrorAction SilentlyContinue
} else {
Write-Host "[3/6] pip cache not found, skipping"
}
# 4. Remove uv cache
$uvCacheDir = Join-Path $env:LOCALAPPDATA "uv"
if (Test-Path $uvCacheDir) {
Write-Host "[4/6] Removing uv cache: $uvCacheDir"
Remove-Item -Path $uvCacheDir -Recurse -Force -ErrorAction SilentlyContinue
} else {
Write-Host "[4/6] uv cache not found, skipping"
}
# 5. Remove CUDA installation
$cudaBaseDir = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
if (Test-Path $cudaBaseDir) {
Write-Host "[5/6] Removing CUDA installations: $cudaBaseDir"
Get-ChildItem -Path $cudaBaseDir -Directory -ErrorAction SilentlyContinue | ForEach-Object {
Write-Host " Removing: $($_.FullName)"
Remove-Item -Path $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
}
} else {
Write-Host "[5/6] CUDA directory not found, skipping"
}
# 6. Remove temp files
Write-Host "[6/6] Removing temporary files"
$tempPatterns = @("pip-*", "torch*", "cuda*", "flash*", "uv-*")
foreach ($pattern in $tempPatterns) {
$tempPath = Join-Path $env:TEMP $pattern
Get-ChildItem -Path $tempPath -ErrorAction SilentlyContinue | ForEach-Object {
Write-Host " Removing temp: $($_.FullName)"
Remove-Item -Path $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
}
}
Write-Host "=========================================="
Write-Host "Cleanup completed."
Write-Host "=========================================="
+63 -79
View File
@@ -43,6 +43,9 @@ pip install ./flash_attn-2.6.3+cu124torch2.5-cp312-cp312-linux_x86_64.whl
## Packages
> [!NOTE]
> Since v0.7.0, wheels are built with manylinux2_28 platform.
> [!NOTE]
> Since v0.5.0, wheels are built with a local version label indicating the CUDA and PyTorch versions.
> Example: `pip list` -> `flash_attn==2.8.3 -> flash_attn==2.8.3+cu130torch2.9`
@@ -75,6 +78,7 @@ If you use this repository in your research and find it helpful, please cite thi
- [@KiralyCraft](https://github.com/KiralyCraft) : Provided with computing resource!
- [@kun432](https://github.com/kun432) : Buy me a coffee!
- [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me!
- Gabr1e1 : Buy me a coffee!
## Star History and Download Statistics
@@ -112,7 +116,7 @@ If you use this repository in your research and find it helpful, please cite thi
If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
1. Fork this repository
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build. You can use GitHub hosted runners or self-hosted runners with below settings.
3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`
Please note that depending on the combination of versions, it may not be possible to build.
@@ -122,7 +126,15 @@ Please note that depending on the combination of versions, it may not be possibl
In some version combinations, you cannot build wheels on GitHub-hosted runners due to job time limitations.
To build the wheels for these versions, you can use self-hosted runners.
#### Setup x86_64 Runner
#### Getting One-Time Registry Token for GitHub Actions Runner
```bash
gh api \
-X POST \
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
```
#### Setup Linux Self-Hosted Runner
Clone the repository and navigate to the self-hosted-runner directory.
@@ -131,13 +143,17 @@ git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git
cd flash-attention-prebuild-wheels/self-hosted-runner
```
Create the environment file from the template.
Create environment files from the template. Create one file per architecture you want to build.
```bash
# For x86_64
cp env.template env
# For ARM64
cp env.template env.arm
```
Edit the `env` file to set the environment variables.
Edit the environment file(s) to set the required variables.
```bash
# Registry Token for GitHub Personal Access Token
@@ -152,93 +168,61 @@ RUNNER_LABELS=Linux,self-hosted
Edit the `compose.yml` file if you use a repository forked from this repository.
```yaml
services:
runner:
privileged: true
restart: always
env_file:
- .env
environment:
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
runner:
platform: linux/amd64
privileged: true
restart: always
env_file:
- .env
environment:
REPOSITORY_URL: https://github.com/[YOUR_USERNAME]/flash-attention-prebuild-wheels
RUNNER_NAME: self-hosted-runner
RUNNER_GROUP: default
TARGET_ARCH: x64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: x64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: x64
PLATFORM: linux/amd64
volumes:
- fa-self:/var/lib/docker
runner-arm:
platform: linux/arm64
privileged: true
restart: always
env_file:
- .env.arm
environment:
REPOSITORY_URL: https://github.com/[YOUR_USERNAME]/flash-attention-prebuild-wheels
RUNNER_NAME: self-hosted-runner-arm
RUNNER_GROUP: default
TARGET_ARCH: arm64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: arm64
PLATFORM: linux/arm64
volumes:
- fa-self-arm:/var/lib/docker
```
Build and run the docker container.
Build and run the docker container(s).
```bash
# Build and run
# x86_64 runner
docker compose build runner
docker compose up -d runner
```
#### (Optional) Setup ARM64 Runner
If you also want to build wheels for ARM64 architecture, follow these additional steps.
Install qemu-user-static for ARM64 support.
```bash
sudo apt install qemu-user-static
```
Create the environment file for ARM64 runner.
```bash
cp env.template env.arm
```
Edit the `env.arm` file with the same configuration as the `env` file.
Add the ARM64 runner service to your `compose.yml` file.
```yaml
services:
runner:
# ... (existing x86_64 runner configuration)
runner-arm:
privileged: true
restart: always
env_file:
- .env.arm
environment:
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
RUNNER_NAME: self-hosted-runner-arm
RUNNER_GROUP: default
TARGET_ARCH: arm64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: arm64
PLATFORM: linux/arm64
```
Build and run the ARM64 runner container.
```bash
# Build and run both x86_64 and ARM64 runners
# ARM64 runner (optional)
docker compose build runner-arm
docker compose up -d runner-arm
```
### Getting One-Time Registry Token for GitHub Actions Runner
```bash
gh api \
-X POST \
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
```
## Build Environments
This repository builds wheels across multiple platforms and environments:
+22 -21
View File
@@ -1,5 +1,6 @@
services:
runner:
platform: linux/amd64
privileged: true
restart: always
env_file:
@@ -19,27 +20,27 @@ services:
volumes:
- fa-self:/var/lib/docker
# runner-arm:
# platform: linux/arm64
# privileged: true
# restart: always
# env_file:
# - .env.arm
# environment:
# REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
# RUNNER_NAME: self-hosted-runner-arm
# RUNNER_GROUP: default
# TARGET_ARCH: arm64
# build:
# context: .
# dockerfile: Dockerfile
# args:
# GH_RUNNER_VERSION: 2.329.0
# TARGET_ARCH: arm64
# PLATFORM: linux/arm64
# volumes:
# - fa-self-arm:/var/lib/docker
runner-arm:
platform: linux/arm64
privileged: true
restart: always
env_file:
- .env.arm
environment:
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
RUNNER_NAME: self-hosted-runner-arm
RUNNER_GROUP: default
TARGET_ARCH: arm64
build:
context: .
dockerfile: Dockerfile
args:
GH_RUNNER_VERSION: 2.329.0
TARGET_ARCH: arm64
PLATFORM: linux/arm64
volumes:
- fa-self-arm:/var/lib/docker
volumes:
fa-self:
# fa-self-arm:
fa-self-arm: