From d5245a6c989f4775678ff046fe11bab70b9ed068 Mon Sep 17 00:00:00 2001
From: Junya Morioka <mjun@mjunya.com>
Date: Sat, 24 May 2025 04:47:16 +0900
Subject: [PATCH] add support torch nightly

---
 .github/workflows/build.yml | 51 +++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6f86357..f61cfdc 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,7 +1,7 @@
 name: Build wheels and upload to GitHub Releases
 
 on:
-  create:
+  push:
     tags:
       - "v*"
 
@@ -25,19 +25,25 @@ jobs:
           body: |
             | Flash-Attention | Python | PyTorch | CUDA |
             |-----------------|--------|---------|------|
-            | 2.4.3, 2.5.9, 2.6.3, 2.7.4 | 3.10, 3.11, 3.12 | 2.7.0 | 12.8.1 |
+            | 2.4.3, 2.5.9, 2.6.3, 2.7.4 | 3.10, 3.11, 3.12 | 2.8.0.dev20250523 | 12.8.1 |
 
+  # #########################################################
+  # Build wheels with GitHub hosted runner
+  # #########################################################
   build_wheels:
     name: Build wheels and Upload
     needs: create_releases
     runs-on: ubuntu-22.04
+    env:
+      DEBIAN_FRONTEND: noninteractive
+      TERM: xterm-256color
     timeout-minutes: 1000
     strategy:
       fail-fast: false
       matrix:
         flash-attn-version: ["2.4.3", "2.5.9", "2.6.3"]
         python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.7.0"]
+        torch-version: ["2.8.0.dev20250523"]
         # https://developer.nvidia.com/cuda-toolkit-archive
         cuda-version: ["12.8.1"]
         exclude:
@@ -58,9 +64,9 @@ jobs:
           # torch 2.7.0 does not support CUDA 12.4
           - torch-version: "2.7.0"
             cuda-version: "12.4.1"
-
     steps:
       - uses: actions/checkout@v4
+
       - name: Maximize build space
         run: |
           df -h
@@ -70,6 +76,7 @@ jobs:
           sudo rm -rf /opt/ghc
           sudo rm -rf /opt/hostedtoolcache/CodeQL
           df -h
+
       - name: Set Swap Space
         uses: pierotofy/set-swap-space@master
         with:
@@ -78,6 +85,7 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+
       - uses: Jimver/cuda-toolkit@master
         with:
           cuda: ${{ matrix.cuda-version }}
@@ -88,7 +96,6 @@ jobs:
         run: |
           echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
           echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-          echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV
 
       - name: Install build dependencies
         run: |
@@ -107,13 +114,20 @@ jobs:
                   '2.5': [118, 121, 124], \
                   '2.6': [118, 124, 126], \
                   '2.7': [118, 126, 128], \
+                  '2.8': [128], \
               }; \
               target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \
               cuda_version = int(env['MATRIX_CUDA_VERSION']); \
               closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
               print(closest_version) \
           ")
-          pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+
+          if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
+            pip install --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
+          else
+            pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+          fi
+
           nvcc --version
           python -V
           python -c "import torch; print('PyTorch:', torch.__version__)"
@@ -123,7 +137,6 @@ jobs:
       - name: Checkout flash-attn
         run: |
           git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}"
-          cd flash-attention
 
       - name: Build wheels
         timeout-minutes: 800
@@ -132,7 +145,8 @@ jobs:
           export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
           export MAX_JOBS=2
           export NVCC_THREADS=2
-          export FLASH_ATTENTION_FORCE_BUILD="TRUE"
+          export FLASH_ATTENTION_FORCE_BUILD=TRUE
+
           cd flash-attention
           python setup.py bdist_wheel --dist-dir=dist
           base_wheel_name=$(basename $(ls dist/*.whl | head -n 1))
@@ -167,6 +181,9 @@ jobs:
           asset_name: ${{ env.wheel_name }}
           asset_content_type: application/*
 
+  # #########################################################
+  # Build wheels with self-hosted runner
+  # #########################################################
   build_wheels_self_hosted:
     name: Build wheels and Upload
     needs: create_releases
@@ -182,7 +199,7 @@ jobs:
       matrix:
         flash-attn-version: ["2.7.4"]
         python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.7.0"]
+        torch-version: ["2.8.0.dev20250523"]
         # https://developer.nvidia.com/cuda-toolkit-archive
         cuda-version: ["12.8.1"]
         exclude:
@@ -224,9 +241,11 @@ jobs:
             keyboard-configuration
 
       - uses: actions/checkout@v4
+
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+
       - uses: Jimver/cuda-toolkit@master
         env:
           DEBIAN_FRONTEND: noninteractive
@@ -239,7 +258,6 @@ jobs:
         run: |
           echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
           echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-          echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV
 
       - name: Install build dependencies
         run: |
@@ -258,13 +276,20 @@ jobs:
                   '2.5': [118, 121, 124], \
                   '2.6': [118, 124, 126], \
                   '2.7': [118, 126, 128], \
+                  '2.8': [128], \
               }; \
               target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \
               cuda_version = int(env['MATRIX_CUDA_VERSION']); \
               closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
               print(closest_version) \
           ")
-          pip install --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+
+          if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
+            pip install --pre --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
+          else
+            pip install --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+          fi
+
           nvcc --version
           python -V
           python -c "import torch; print('PyTorch:', torch.__version__)"
@@ -274,7 +299,6 @@ jobs:
       - name: Checkout flash-attn
         run: |
           git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}"
-          cd flash-attention
 
       - name: Build wheels
         timeout-minutes: 800
@@ -283,7 +307,8 @@ jobs:
           export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
           export MAX_JOBS=2
           export NVCC_THREADS=4
-          export FLASH_ATTENTION_FORCE_BUILD="TRUE"
+          export FLASH_ATTENTION_FORCE_BUILD=TRUE
+
           cd flash-attention
           python setup.py bdist_wheel --dist-dir=dist
           base_wheel_name=$(basename $(ls dist/*.whl | head -n 1))