feat: add Windows self-hosted runner support and update documentation

- Add .github/workflows/test-windows-self-hosted.yml for Windows self-hosted runner testing. - Update README.md with comprehensive self-hosted runner setup guides for Linux, ARM64, and Windows. - Update self-hosted-runner/compose.yml to enable both x86_64 and ARM64 runner services. - Add a note about manylinux2_28 and update the sponsor list in README.md.
2026-07-01 01:37:53 -04:00 · 2026-01-03 00:20:57 +09:00
parent 5f8e6bc102
commit 991becbb7f
3 changed files with 251 additions and 100 deletions
@@ -0,0 +1,166 @@
+# #########################################################
+# Test build wheels with self-hosted runner on Windows x86_64
+#
+# Prerequisites (must be pre-installed on the runner):
+#   - Git
+#   - Chocolatey
+#   - Visual Studio BuildTools 2022 with:
+#     - Microsoft.VisualStudio.Component.VC.Tools.x86.x64
+#     - Microsoft.VisualStudio.Component.VC.CMake.Project
+#     - Microsoft.VisualStudio.Component.Windows11SDK.22621
+#   - CMake
+#   - Ninja
+#   - Make (optional)
+# #########################################################
+
+name: Test Windows build (self-hosted)
+
+on:
+  workflow_dispatch:
+    inputs:
+      flash-attn-version:
+        description: "Flash-Attention version"
+        required: true
+        default: "2.8.3"
+        type: string
+      python-version:
+        description: "Python version"
+        required: true
+        default: "3.13"
+        type: string
+      torch-version:
+        description: "PyTorch version"
+        required: true
+        default: "2.9.1"
+        type: string
+      cuda-version:
+        description: "CUDA version"
+        required: true
+        default: "12.8.1"
+        type: string
+
+jobs:
+  build_windows_wheels_self_hosted:
+    name: Build wheels and Test (Windows x86_64, self-hosted runner)
+    runs-on: ["self-hosted", "windows", "x64"]
+    timeout-minutes: 360
+    env:
+      MAX_JOBS: 2
+      NVCC_THREADS: 2
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Enable Git long paths
+        shell: pwsh
+        run: git config --system core.longpaths true
+
+      # Install Python using uv
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Install Python
+        shell: pwsh
+        run: |
+          uv venv -p ${{ inputs.python-version }}
+          uv pip install -U pip setuptools==75.8.0 wheel packaging psutil numpy ninja
+          $current_dir = (Get-Location).Path
+          echo "$current_dir\.venv\Scripts" >> $env:GITHUB_PATH
+
+      - uses: mjun0812/setup-cuda@v1
+        with:
+          version: ${{ inputs.cuda-version }}
+
+      # Visual Studio BuildTools is pre-installed on the runner
+      - name: Setup MSVC Developer Command Prompt
+        uses: TheMrMilchmann/setup-msvc-dev@v3
+        with:
+          arch: x64
+
+      - name: Add msbuild to PATH
+        uses: microsoft/setup-msbuild@v2
+
+      - name: Build wheels
+        shell: pwsh
+        run: |
+          .\build_windows.ps1 -FlashAttnVersion "${{ inputs.flash-attn-version }}" -PythonVersion "${{ inputs.python-version }}" -TorchVersion "${{ inputs.torch-version }}" -CudaVersion "${{ inputs.cuda-version }}"
+          $wheelName = Get-ChildItem -Path "flash-attention\dist\*.whl" | Select-Object -First 1 | ForEach-Object { $_.Name }
+          echo "wheel_name=$wheelName" >> $env:GITHUB_ENV
+
+      - name: Install Test
+        shell: pwsh
+        run: |
+          pip install --no-cache-dir flash-attention/dist/$env:wheel_name
+          python -c "import flash_attn; print(flash_attn.__version__)"
+
+      # Cleanup step - always runs even if previous steps fail
+      # Only cleans up Python and CUDA installations (VS BuildTools is pre-installed)
+      - name: Cleanup (always run)
+        if: always()
+        shell: pwsh
+        run: |
+          Write-Host "=========================================="
+          Write-Host "Starting cleanup for self-hosted runner..."
+          Write-Host "=========================================="
+
+          # 1. Remove flash-attention directory (source and build artifacts)
+          $flashAttnDir = Join-Path (Get-Location) "flash-attention"
+          if (Test-Path $flashAttnDir) {
+            Write-Host "[1/6] Removing flash-attention directory: $flashAttnDir"
+            Remove-Item -Path $flashAttnDir -Recurse -Force -ErrorAction SilentlyContinue
+          } else {
+            Write-Host "[1/6] flash-attention directory not found, skipping"
+          }
+
+          # 2. Remove Python virtual environment (.venv)
+          $venvDir = Join-Path (Get-Location) ".venv"
+          if (Test-Path $venvDir) {
+            Write-Host "[2/6] Removing Python virtual environment: $venvDir"
+            Remove-Item -Path $venvDir -Recurse -Force -ErrorAction SilentlyContinue
+          } else {
+            Write-Host "[2/6] .venv directory not found, skipping"
+          }
+
+          # 3. Remove pip cache
+          $pipCacheDir = Join-Path $env:LOCALAPPDATA "pip\cache"
+          if (Test-Path $pipCacheDir) {
+            Write-Host "[3/6] Removing pip cache: $pipCacheDir"
+            Remove-Item -Path $pipCacheDir -Recurse -Force -ErrorAction SilentlyContinue
+          } else {
+            Write-Host "[3/6] pip cache not found, skipping"
+          }
+
+          # 4. Remove uv cache
+          $uvCacheDir = Join-Path $env:LOCALAPPDATA "uv"
+          if (Test-Path $uvCacheDir) {
+            Write-Host "[4/6] Removing uv cache: $uvCacheDir"
+            Remove-Item -Path $uvCacheDir -Recurse -Force -ErrorAction SilentlyContinue
+          } else {
+            Write-Host "[4/6] uv cache not found, skipping"
+          }
+
+          # 5. Remove CUDA installation
+          $cudaBaseDir = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
+          if (Test-Path $cudaBaseDir) {
+            Write-Host "[5/6] Removing CUDA installations: $cudaBaseDir"
+            Get-ChildItem -Path $cudaBaseDir -Directory -ErrorAction SilentlyContinue | ForEach-Object {
+              Write-Host "  Removing: $($_.FullName)"
+              Remove-Item -Path $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
+            }
+          } else {
+            Write-Host "[5/6] CUDA directory not found, skipping"
+          }
+
+          # 6. Remove temp files
+          Write-Host "[6/6] Removing temporary files"
+          $tempPatterns = @("pip-*", "torch*", "cuda*", "flash*", "uv-*")
+          foreach ($pattern in $tempPatterns) {
+            $tempPath = Join-Path $env:TEMP $pattern
+            Get-ChildItem -Path $tempPath -ErrorAction SilentlyContinue | ForEach-Object {
+              Write-Host "  Removing temp: $($_.FullName)"
+              Remove-Item -Path $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
+            }
+          }
+
+          Write-Host "=========================================="
+          Write-Host "Cleanup completed."
+          Write-Host "=========================================="
@@ -43,6 +43,9 @@ pip install ./flash_attn-2.6.3+cu124torch2.5-cp312-cp312-linux_x86_64.whl

 ## Packages

+> [!NOTE]
+> Since v0.7.0, wheels are built with manylinux2_28 platform.
+
 > [!NOTE]
 > Since v0.5.0, wheels are built with a local version label indicating the CUDA and PyTorch versions.  
 > Example: `pip list` -> `flash_attn==2.8.3 -> flash_attn==2.8.3+cu130torch2.9`
@@ -75,6 +78,7 @@ If you use this repository in your research and find it helpful, please cite thi
 - [@KiralyCraft](https://github.com/KiralyCraft) : Provided with computing resource!
 - [@kun432](https://github.com/kun432) : Buy me a coffee!
 - [@wodeyuzhou](https://github.com/wodeyuzhou) : Sponsored me!
+- Gabr1e1 : Buy me a coffee!

 ## Star History and Download Statistics

@@ -112,7 +116,7 @@ If you use this repository in your research and find it helpful, please cite thi
 If you cannot find the version you are looking for, you can fork this repository and create a wheel on GitHub Actions.

 1. Fork this repository
-2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build.
+2. Edit Python script [`create_matrix.py`](https://github.com/mjun0812/flash-attention-prebuild-wheels/blob/main/create_matrix.py) to set the version you want to build. You can use GitHub hosted runners or self-hosted runners with below settings.
 3. Add tag `v*.*.*` to trigger the build workflow. `git tag v*.*.* && git push --tags`

 Please note that depending on the combination of versions, it may not be possible to build.
@@ -122,7 +126,15 @@ Please note that depending on the combination of versions, it may not be possibl
 In some version combinations, you cannot build wheels on GitHub-hosted runners due to job time limitations.
 To build the wheels for these versions, you can use self-hosted runners.

-#### Setup x86_64 Runner
+#### Getting One-Time Registry Token for GitHub Actions Runner
+
+```bash
+gh api \
+  -X POST \
+  /repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
+```
+
+#### Setup Linux Self-Hosted Runner

 Clone the repository and navigate to the self-hosted-runner directory.

@@ -131,13 +143,17 @@ git clone https://github.com/mjun0812/flash-attention-prebuild-wheels.git
 cd flash-attention-prebuild-wheels/self-hosted-runner
 ```

-Create the environment file from the template.
+Create environment files from the template. Create one file per architecture you want to build.

 ```bash
+# For x86_64
 cp env.template env
+
+# For ARM64
+cp env.template env.arm
 ```

-Edit the `env` file to set the environment variables.
+Edit the environment file(s) to set the required variables.

 ```bash
 # Registry Token for GitHub Personal Access Token
@@ -152,93 +168,61 @@ RUNNER_LABELS=Linux,self-hosted
 Edit the `compose.yml` file if you use a repository forked from this repository.

 ```yaml
-services:
-  runner:
-    privileged: true
-    restart: always
-    env_file:
-      - .env
-    environment:
-      REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
-      RUNNER_NAME: self-hosted-runner
-      RUNNER_GROUP: default
+runner:
+  platform: linux/amd64
+  privileged: true
+  restart: always
+  env_file:
+    - .env
+  environment:
+    REPOSITORY_URL: https://github.com/[YOUR_USERNAME]/flash-attention-prebuild-wheels
+    RUNNER_NAME: self-hosted-runner
+    RUNNER_GROUP: default
+    TARGET_ARCH: x64
+  build:
+    context: .
+    dockerfile: Dockerfile
+    args:
+      GH_RUNNER_VERSION: 2.329.0
      TARGET_ARCH: x64
-    build:
-      context: .
-      dockerfile: Dockerfile
-      args:
-        GH_RUNNER_VERSION: 2.329.0
-        TARGET_ARCH: x64
+      PLATFORM: linux/amd64
+  volumes:
+    - fa-self:/var/lib/docker
+
+runner-arm:
+  platform: linux/arm64
+  privileged: true
+  restart: always
+  env_file:
+    - .env.arm
+  environment:
+    REPOSITORY_URL: https://github.com/[YOUR_USERNAME]/flash-attention-prebuild-wheels
+    RUNNER_NAME: self-hosted-runner-arm
+    RUNNER_GROUP: default
+    TARGET_ARCH: arm64
+  build:
+    context: .
+    dockerfile: Dockerfile
+    args:
+      GH_RUNNER_VERSION: 2.329.0
+      TARGET_ARCH: arm64
+      PLATFORM: linux/arm64
+  volumes:
+    - fa-self-arm:/var/lib/docker
 ```

-Build and run the docker container.
+Build and run the docker container(s).

 ```bash
-# Build and run
+# x86_64 runner
 docker compose build runner
 docker compose up -d runner
-```

-#### (Optional) Setup ARM64 Runner
-
-If you also want to build wheels for ARM64 architecture, follow these additional steps.
-
-Install qemu-user-static for ARM64 support.
-
-```bash
-sudo apt install qemu-user-static
-```
-
-Create the environment file for ARM64 runner.
-
-```bash
-cp env.template env.arm
-```
-
-Edit the `env.arm` file with the same configuration as the `env` file.
-
-Add the ARM64 runner service to your `compose.yml` file.
-
-```yaml
-services:
-  runner:
-    # ... (existing x86_64 runner configuration)
-
-  runner-arm:
-    privileged: true
-    restart: always
-    env_file:
-      - .env.arm
-    environment:
-      REPOSITORY_URL: https://github.com/[OWNER]/[REPOSITORY]
-      RUNNER_NAME: self-hosted-runner-arm
-      RUNNER_GROUP: default
-      TARGET_ARCH: arm64
-    build:
-      context: .
-      dockerfile: Dockerfile
-      args:
-        GH_RUNNER_VERSION: 2.329.0
-        TARGET_ARCH: arm64
-        PLATFORM: linux/arm64
-```
-
-Build and run the ARM64 runner container.
-
-```bash
-# Build and run both x86_64 and ARM64 runners
+# ARM64 runner (optional)
 docker compose build runner-arm
 docker compose up -d runner-arm
 ```

-### Getting One-Time Registry Token for GitHub Actions Runner
-
-```bash
-gh api \
-  -X POST \
-  /repos/[OWNER]/[REPOSITORY]/actions/runners/registration-token
-```
-
 ## Build Environments

 This repository builds wheels across multiple platforms and environments:
@@ -1,5 +1,6 @@
 services:
  runner:
+    platform: linux/amd64
    privileged: true
    restart: always
    env_file:
@@ -19,27 +20,27 @@ services:
    volumes:
      - fa-self:/var/lib/docker

-  # runner-arm:
-  #   platform: linux/arm64
-  #   privileged: true
-  #   restart: always
-  #   env_file:
-  #     - .env.arm
-  #   environment:
-  #     REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
-  #     RUNNER_NAME: self-hosted-runner-arm
-  #     RUNNER_GROUP: default
-  #     TARGET_ARCH: arm64
-  #   build:
-  #     context: .
-  #     dockerfile: Dockerfile
-  #     args:
-  #       GH_RUNNER_VERSION: 2.329.0
-  #       TARGET_ARCH: arm64
-  #       PLATFORM: linux/arm64
-  #   volumes:
-  #     - fa-self-arm:/var/lib/docker
+  runner-arm:
+    platform: linux/arm64
+    privileged: true
+    restart: always
+    env_file:
+      - .env.arm
+    environment:
+      REPOSITORY_URL: https://github.com/mjun0812/flash-attention-prebuild-wheels
+      RUNNER_NAME: self-hosted-runner-arm
+      RUNNER_GROUP: default
+      TARGET_ARCH: arm64
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        GH_RUNNER_VERSION: 2.329.0
+        TARGET_ARCH: arm64
+        PLATFORM: linux/arm64
+    volumes:
+      - fa-self-arm:/var/lib/docker

 volumes:
  fa-self:
-  # fa-self-arm:
+  fa-self-arm: