Merge pull request #17 from mjun0812/feature/build-width-torch-nightly

add support torch nightly
2026-07-01 01:37:53 -04:00 · 2025-05-24 04:49:13 +09:00
parent b4d8949d68 d5245a6c98
commit f8c933ee9d
1 changed files with 38 additions and 13 deletions
@@ -1,7 +1,7 @@
 name: Build wheels and upload to GitHub Releases

 on:
-  create:
+  push:
    tags:
      - "v*"

@@ -25,19 +25,25 @@ jobs:
          body: |
            | Flash-Attention | Python | PyTorch | CUDA |
            |-----------------|--------|---------|------|
-            | 2.4.3, 2.5.9, 2.6.3, 2.7.4 | 3.10, 3.11, 3.12 | 2.7.0 | 12.8.1 |
+            | 2.4.3, 2.5.9, 2.6.3, 2.7.4 | 3.10, 3.11, 3.12 | 2.8.0.dev20250523 | 12.8.1 |

+  # #########################################################
+  # Build wheels with GitHub hosted runner
+  # #########################################################
  build_wheels:
    name: Build wheels and Upload
    needs: create_releases
    runs-on: ubuntu-22.04
+    env:
+      DEBIAN_FRONTEND: noninteractive
+      TERM: xterm-256color
    timeout-minutes: 1000
    strategy:
      fail-fast: false
      matrix:
        flash-attn-version: ["2.4.3", "2.5.9", "2.6.3"]
        python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.7.0"]
+        torch-version: ["2.8.0.dev20250523"]
        # https://developer.nvidia.com/cuda-toolkit-archive
        cuda-version: ["12.8.1"]
        exclude:
@@ -58,9 +64,9 @@ jobs:
          # torch 2.7.0 does not support CUDA 12.4
          - torch-version: "2.7.0"
            cuda-version: "12.4.1"
-
    steps:
      - uses: actions/checkout@v4
+
      - name: Maximize build space
        run: |
          df -h
@@ -70,6 +76,7 @@ jobs:
          sudo rm -rf /opt/ghc
          sudo rm -rf /opt/hostedtoolcache/CodeQL
          df -h
+
      - name: Set Swap Space
        uses: pierotofy/set-swap-space@master
        with:
@@ -78,6 +85,7 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
+
      - uses: Jimver/cuda-toolkit@master
        with:
          cuda: ${{ matrix.cuda-version }}
@@ -88,7 +96,6 @@ jobs:
        run: |
          echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
          echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-          echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV

      - name: Install build dependencies
        run: |
@@ -107,13 +114,20 @@ jobs:
                  '2.5': [118, 121, 124], \
                  '2.6': [118, 124, 126], \
                  '2.7': [118, 126, 128], \
+                  '2.8': [128], \
              }; \
              target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \
              cuda_version = int(env['MATRIX_CUDA_VERSION']); \
              closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
              print(closest_version) \
          ")
-          pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+
+          if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
+            pip install --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
+          else
+            pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+          fi
+
          nvcc --version
          python -V
          python -c "import torch; print('PyTorch:', torch.__version__)"
@@ -123,7 +137,6 @@ jobs:
      - name: Checkout flash-attn
        run: |
          git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}"
-          cd flash-attention

      - name: Build wheels
        timeout-minutes: 800
@@ -132,7 +145,8 @@ jobs:
          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
          export MAX_JOBS=2
          export NVCC_THREADS=2
-          export FLASH_ATTENTION_FORCE_BUILD="TRUE"
+          export FLASH_ATTENTION_FORCE_BUILD=TRUE
+
          cd flash-attention
          python setup.py bdist_wheel --dist-dir=dist
          base_wheel_name=$(basename $(ls dist/*.whl | head -n 1))
@@ -167,6 +181,9 @@ jobs:
          asset_name: ${{ env.wheel_name }}
          asset_content_type: application/*

+  # #########################################################
+  # Build wheels with self-hosted runner
+  # #########################################################
  build_wheels_self_hosted:
    name: Build wheels and Upload
    needs: create_releases
@@ -182,7 +199,7 @@ jobs:
      matrix:
        flash-attn-version: ["2.7.4"]
        python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.7.0"]
+        torch-version: ["2.8.0.dev20250523"]
        # https://developer.nvidia.com/cuda-toolkit-archive
        cuda-version: ["12.8.1"]
        exclude:
@@ -224,9 +241,11 @@ jobs:
            keyboard-configuration

      - uses: actions/checkout@v4
+
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
+
      - uses: Jimver/cuda-toolkit@master
        env:
          DEBIAN_FRONTEND: noninteractive
@@ -239,7 +258,6 @@ jobs:
        run: |
          echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
          echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-          echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV

      - name: Install build dependencies
        run: |
@@ -258,13 +276,20 @@ jobs:
                  '2.5': [118, 121, 124], \
                  '2.6': [118, 124, 126], \
                  '2.7': [118, 126, 128], \
+                  '2.8': [128], \
              }; \
              target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \
              cuda_version = int(env['MATRIX_CUDA_VERSION']); \
              closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
              print(closest_version) \
          ")
-          pip install --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+
+          if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
+            pip install --pre --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
+          else
+            pip install --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+          fi
+
          nvcc --version
          python -V
          python -c "import torch; print('PyTorch:', torch.__version__)"
@@ -274,7 +299,6 @@ jobs:
      - name: Checkout flash-attn
        run: |
          git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}"
-          cd flash-attention

      - name: Build wheels
        timeout-minutes: 800
@@ -283,7 +307,8 @@ jobs:
          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
          export MAX_JOBS=2
          export NVCC_THREADS=4
-          export FLASH_ATTENTION_FORCE_BUILD="TRUE"
+          export FLASH_ATTENTION_FORCE_BUILD=TRUE
+
          cd flash-attention
          python setup.py bdist_wheel --dist-dir=dist
          base_wheel_name=$(basename $(ls dist/*.whl | head -n 1))