mirror of
https://github.com/BillyOutlast/flash-attention-prebuild-wheels-rocm.git
synced 2026-07-01 01:37:53 -04:00
feat: add Windows self-hosted runner support and update documentation
- Add .github/workflows/test-windows-self-hosted.yml for Windows self-hosted runner testing. - Update README.md with comprehensive self-hosted runner setup guides for Linux, ARM64, and Windows. - Update self-hosted-runner/compose.yml to enable both x86_64 and ARM64 runner services. - Add a note about manylinux2_28 and update the sponsor list in README.md.
This commit is contained in:
@@ -0,0 +1,166 @@
|
||||
# #########################################################
|
||||
# Test build wheels with self-hosted runner on Windows x86_64
|
||||
#
|
||||
# Prerequisites (must be pre-installed on the runner):
|
||||
# - Git
|
||||
# - Chocolatey
|
||||
# - Visual Studio BuildTools 2022 with:
|
||||
# - Microsoft.VisualStudio.Component.VC.Tools.x86.x64
|
||||
# - Microsoft.VisualStudio.Component.VC.CMake.Project
|
||||
# - Microsoft.VisualStudio.Component.Windows11SDK.22621
|
||||
# - CMake
|
||||
# - Ninja
|
||||
# - Make (optional)
|
||||
# #########################################################
|
||||
|
||||
name: Test Windows build (self-hosted)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
flash-attn-version:
|
||||
description: "Flash-Attention version"
|
||||
required: true
|
||||
default: "2.8.3"
|
||||
type: string
|
||||
python-version:
|
||||
description: "Python version"
|
||||
required: true
|
||||
default: "3.13"
|
||||
type: string
|
||||
torch-version:
|
||||
description: "PyTorch version"
|
||||
required: true
|
||||
default: "2.9.1"
|
||||
type: string
|
||||
cuda-version:
|
||||
description: "CUDA version"
|
||||
required: true
|
||||
default: "12.8.1"
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
build_windows_wheels_self_hosted:
|
||||
name: Build wheels and Test (Windows x86_64, self-hosted runner)
|
||||
runs-on: ["self-hosted", "windows", "x64"]
|
||||
timeout-minutes: 360
|
||||
env:
|
||||
MAX_JOBS: 2
|
||||
NVCC_THREADS: 2
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Enable Git long paths
|
||||
shell: pwsh
|
||||
run: git config --system core.longpaths true
|
||||
|
||||
# Install Python using uv
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: Install Python
|
||||
shell: pwsh
|
||||
run: |
|
||||
uv venv -p ${{ inputs.python-version }}
|
||||
uv pip install -U pip setuptools==75.8.0 wheel packaging psutil numpy ninja
|
||||
$current_dir = (Get-Location).Path
|
||||
echo "$current_dir\.venv\Scripts" >> $env:GITHUB_PATH
|
||||
|
||||
- uses: mjun0812/setup-cuda@v1
|
||||
with:
|
||||
version: ${{ inputs.cuda-version }}
|
||||
|
||||
# Visual Studio BuildTools is pre-installed on the runner
|
||||
- name: Setup MSVC Developer Command Prompt
|
||||
uses: TheMrMilchmann/setup-msvc-dev@v3
|
||||
with:
|
||||
arch: x64
|
||||
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
|
||||
- name: Build wheels
|
||||
shell: pwsh
|
||||
run: |
|
||||
.\build_windows.ps1 -FlashAttnVersion "${{ inputs.flash-attn-version }}" -PythonVersion "${{ inputs.python-version }}" -TorchVersion "${{ inputs.torch-version }}" -CudaVersion "${{ inputs.cuda-version }}"
|
||||
$wheelName = Get-ChildItem -Path "flash-attention\dist\*.whl" | Select-Object -First 1 | ForEach-Object { $_.Name }
|
||||
echo "wheel_name=$wheelName" >> $env:GITHUB_ENV
|
||||
|
||||
- name: Install Test
|
||||
shell: pwsh
|
||||
run: |
|
||||
pip install --no-cache-dir flash-attention/dist/$env:wheel_name
|
||||
python -c "import flash_attn; print(flash_attn.__version__)"
|
||||
|
||||
# Cleanup step - always runs even if previous steps fail
|
||||
# Only cleans up Python and CUDA installations (VS BuildTools is pre-installed)
|
||||
- name: Cleanup (always run)
|
||||
if: always()
|
||||
shell: pwsh
|
||||
run: |
|
||||
Write-Host "=========================================="
|
||||
Write-Host "Starting cleanup for self-hosted runner..."
|
||||
Write-Host "=========================================="
|
||||
|
||||
# 1. Remove flash-attention directory (source and build artifacts)
|
||||
$flashAttnDir = Join-Path (Get-Location) "flash-attention"
|
||||
if (Test-Path $flashAttnDir) {
|
||||
Write-Host "[1/6] Removing flash-attention directory: $flashAttnDir"
|
||||
Remove-Item -Path $flashAttnDir -Recurse -Force -ErrorAction SilentlyContinue
|
||||
} else {
|
||||
Write-Host "[1/6] flash-attention directory not found, skipping"
|
||||
}
|
||||
|
||||
# 2. Remove Python virtual environment (.venv)
|
||||
$venvDir = Join-Path (Get-Location) ".venv"
|
||||
if (Test-Path $venvDir) {
|
||||
Write-Host "[2/6] Removing Python virtual environment: $venvDir"
|
||||
Remove-Item -Path $venvDir -Recurse -Force -ErrorAction SilentlyContinue
|
||||
} else {
|
||||
Write-Host "[2/6] .venv directory not found, skipping"
|
||||
}
|
||||
|
||||
# 3. Remove pip cache
|
||||
$pipCacheDir = Join-Path $env:LOCALAPPDATA "pip\cache"
|
||||
if (Test-Path $pipCacheDir) {
|
||||
Write-Host "[3/6] Removing pip cache: $pipCacheDir"
|
||||
Remove-Item -Path $pipCacheDir -Recurse -Force -ErrorAction SilentlyContinue
|
||||
} else {
|
||||
Write-Host "[3/6] pip cache not found, skipping"
|
||||
}
|
||||
|
||||
# 4. Remove uv cache
|
||||
$uvCacheDir = Join-Path $env:LOCALAPPDATA "uv"
|
||||
if (Test-Path $uvCacheDir) {
|
||||
Write-Host "[4/6] Removing uv cache: $uvCacheDir"
|
||||
Remove-Item -Path $uvCacheDir -Recurse -Force -ErrorAction SilentlyContinue
|
||||
} else {
|
||||
Write-Host "[4/6] uv cache not found, skipping"
|
||||
}
|
||||
|
||||
# 5. Remove CUDA installation
|
||||
$cudaBaseDir = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
|
||||
if (Test-Path $cudaBaseDir) {
|
||||
Write-Host "[5/6] Removing CUDA installations: $cudaBaseDir"
|
||||
Get-ChildItem -Path $cudaBaseDir -Directory -ErrorAction SilentlyContinue | ForEach-Object {
|
||||
Write-Host " Removing: $($_.FullName)"
|
||||
Remove-Item -Path $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
} else {
|
||||
Write-Host "[5/6] CUDA directory not found, skipping"
|
||||
}
|
||||
|
||||
# 6. Remove temp files
|
||||
Write-Host "[6/6] Removing temporary files"
|
||||
$tempPatterns = @("pip-*", "torch*", "cuda*", "flash*", "uv-*")
|
||||
foreach ($pattern in $tempPatterns) {
|
||||
$tempPath = Join-Path $env:TEMP $pattern
|
||||
Get-ChildItem -Path $tempPath -ErrorAction SilentlyContinue | ForEach-Object {
|
||||
Write-Host " Removing temp: $($_.FullName)"
|
||||
Remove-Item -Path $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host "=========================================="
|
||||
Write-Host "Cleanup completed."
|
||||
Write-Host "=========================================="
|
||||
@@ -43,6 +43,9 @@ pip install ./flash_attn-2.6.3+cu124torch2.5-cp312-cp312-linux_x86_64.whl
|
||||
|
||||
## Packages
|
||||
|
||||
> [!NOTE]
|
||||
> Since v0.7.0, wheels are built with manylinux2_28 platform.
|
||||
|
||||
> [!NOTE]
|
||||
> Since v0.5.0, wheels are built with a local version label indicating the CUDA and PyTorch versions.
|
||||
> Example: `pip list` -> `flash_attn==2.8.3 -> flash_attn==2.8.3+cu130torch2.9`
|
||||
@@ -75,6 +78,7 @@ If you use this repository in your research and find it helpful, please cite thi
|
||||
- [@KiralyCraft](https://github.com/KiralyCraft) : Provided with computing resource!
|
||||
- [@kun432](https://github.com/kun432) : Buy me a coffee!
|
||||
- [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me!
|
||||
- Gabr1e1 : Buy me a coffee!
|
||||
|
||||
## Star History and Download Statistics
|
||||
|
||||
@@ -112,7 +116,7 @@ If you use this repository in your research and find it helpful, please cite thi
|
||||
If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.
|
||||
|
||||
1. Fork this repository
|
||||
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
|
||||
2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build. You can use GitHub hosted runners or self-hosted runners with below settings.
|
||||
3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`
|
||||
|
||||
Please note that depending on the combination of versions, it may not be possible to build.
|
||||
@@ -122,7 +126,15 @@ Please note that depending on the combination of versions, it may not be possibl
|
||||
In some version combinations, you cannot build wheels on GitHub-hosted runners due to job time limitations.
|
||||
To build the wheels for these versions, you can use self-hosted runners.
|
||||
|
||||
#### Setup x86_64 Runner
|
||||
#### Getting One-Time Registry Token for GitHub Actions Runner
|
||||
|
||||
```bash
|
||||
gh api \
|
||||
-X POST \
|
||||
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
|
||||
```
|
||||
|
||||
#### Setup Linux Self-Hosted Runner
|
||||
|
||||
Clone the repository and navigate to the self-hosted-runner directory.
|
||||
|
||||
@@ -131,13 +143,17 @@ git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git
|
||||
cd flash-attention-prebuild-wheels/self-hosted-runner
|
||||
```
|
||||
|
||||
Create the environment file from the template.
|
||||
Create environment files from the template. Create one file per architecture you want to build.
|
||||
|
||||
```bash
|
||||
# For x86_64
|
||||
cp env.template env
|
||||
|
||||
# For ARM64
|
||||
cp env.template env.arm
|
||||
```
|
||||
|
||||
Edit the `env` file to set the environment variables.
|
||||
Edit the environment file(s) to set the required variables.
|
||||
|
||||
```bash
|
||||
# Registry Token for GitHub Personal Access Token
|
||||
@@ -152,93 +168,61 @@ RUNNER_LABELS=Linux,self-hosted
|
||||
Edit the `compose.yml` file if you use a repository forked from this repository.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
runner:
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
runner:
|
||||
platform: linux/amd64
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/[YOUR_USERNAME]/flash-attention-prebuild-wheels
|
||||
RUNNER_NAME: self-hosted-runner
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: x64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: x64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: x64
|
||||
PLATFORM: linux/amd64
|
||||
volumes:
|
||||
- fa-self:/var/lib/docker
|
||||
|
||||
runner-arm:
|
||||
platform: linux/arm64
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env.arm
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/[YOUR_USERNAME]/flash-attention-prebuild-wheels
|
||||
RUNNER_NAME: self-hosted-runner-arm
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: arm64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: arm64
|
||||
PLATFORM: linux/arm64
|
||||
volumes:
|
||||
- fa-self-arm:/var/lib/docker
|
||||
```
|
||||
|
||||
Build and run the docker container.
|
||||
Build and run the docker container(s).
|
||||
|
||||
```bash
|
||||
# Build and run
|
||||
# x86_64 runner
|
||||
docker compose build runner
|
||||
docker compose up -d runner
|
||||
```
|
||||
|
||||
#### (Optional) Setup ARM64 Runner
|
||||
|
||||
If you also want to build wheels for ARM64 architecture, follow these additional steps.
|
||||
|
||||
Install qemu-user-static for ARM64 support.
|
||||
|
||||
```bash
|
||||
sudo apt install qemu-user-static
|
||||
```
|
||||
|
||||
Create the environment file for ARM64 runner.
|
||||
|
||||
```bash
|
||||
cp env.template env.arm
|
||||
```
|
||||
|
||||
Edit the `env.arm` file with the same configuration as the `env` file.
|
||||
|
||||
Add the ARM64 runner service to your `compose.yml` file.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
runner:
|
||||
# ... (existing x86_64 runner configuration)
|
||||
|
||||
runner-arm:
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env.arm
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
|
||||
RUNNER_NAME: self-hosted-runner-arm
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: arm64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: arm64
|
||||
PLATFORM: linux/arm64
|
||||
```
|
||||
|
||||
Build and run the ARM64 runner container.
|
||||
|
||||
```bash
|
||||
# Build and run both x86_64 and ARM64 runners
|
||||
# ARM64 runner (optional)
|
||||
docker compose build runner-arm
|
||||
docker compose up -d runner-arm
|
||||
```
|
||||
|
||||
### Getting One-Time Registry Token for GitHub Actions Runner
|
||||
|
||||
```bash
|
||||
gh api \
|
||||
-X POST \
|
||||
/repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
|
||||
```
|
||||
|
||||
## Build Environments
|
||||
|
||||
This repository builds wheels across multiple platforms and environments:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
services:
|
||||
runner:
|
||||
platform: linux/amd64
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
@@ -19,27 +20,27 @@ services:
|
||||
volumes:
|
||||
- fa-self:/var/lib/docker
|
||||
|
||||
# runner-arm:
|
||||
# platform: linux/arm64
|
||||
# privileged: true
|
||||
# restart: always
|
||||
# env_file:
|
||||
# - .env.arm
|
||||
# environment:
|
||||
# REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
||||
# RUNNER_NAME: self-hosted-runner-arm
|
||||
# RUNNER_GROUP: default
|
||||
# TARGET_ARCH: arm64
|
||||
# build:
|
||||
# context: .
|
||||
# dockerfile: Dockerfile
|
||||
# args:
|
||||
# GH_RUNNER_VERSION: 2.329.0
|
||||
# TARGET_ARCH: arm64
|
||||
# PLATFORM: linux/arm64
|
||||
# volumes:
|
||||
# - fa-self-arm:/var/lib/docker
|
||||
runner-arm:
|
||||
platform: linux/arm64
|
||||
privileged: true
|
||||
restart: always
|
||||
env_file:
|
||||
- .env.arm
|
||||
environment:
|
||||
REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
|
||||
RUNNER_NAME: self-hosted-runner-arm
|
||||
RUNNER_GROUP: default
|
||||
TARGET_ARCH: arm64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
GH_RUNNER_VERSION: 2.329.0
|
||||
TARGET_ARCH: arm64
|
||||
PLATFORM: linux/arm64
|
||||
volumes:
|
||||
- fa-self-arm:/var/lib/docker
|
||||
|
||||
volumes:
|
||||
fa-self:
|
||||
# fa-self-arm:
|
||||
fa-self-arm:
|
||||
|
||||
Reference in New Issue
Block a user