update workflow

2026-07-01 01:37:53 -04:00 · 2025-06-15 21:19:23 +09:00
parent dedf5eb8d5
commit e15e8237d4
5 changed files with 143 additions and 373 deletions
@@ -39,16 +39,14 @@ jobs:
    env:
      DEBIAN_FRONTEND: noninteractive
      TERM: xterm-256color
-      MAX_JOBS: 2
-      NVCC_THREADS: 2
    strategy:
      fail-fast: false
      matrix:
        flash-attn-version: ["2.8.0"]
        python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.4.1","2.5.1","2.6.0"]
+        torch-version: ["2.4.1", "2.5.1", "2.6.0"]
        # # https://developer.nvidia.com/cuda-toolkit-archive
-        cuda-version: ["12.1.1","12.4.1"]
+        cuda-version: ["12.1.1", "12.4.1"]
        exclude:
          # torch < 2.2 does not support Python 3.12
          - python-version: "3.12"
@@ -111,6 +109,9 @@ jobs:
          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH

      - name: Build wheels
+        env:
+          MAX_JOBS: 2
+          NVCC_THREADS: 2
        run: |
          chmod +x build_linux.sh
          ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }}
@@ -119,7 +120,7 @@ jobs:

      - name: Install Test
        run: |
-          pip install flash-attention/dist/${{ env.wheel_name }}
+          pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
          python -c "import flash_attn; print(flash_attn.__version__)"

      - name: Upload Release Asset
@@ -155,15 +156,13 @@ jobs:
    env:
      DEBIAN_FRONTEND: noninteractive
      TERM: xterm-256color
-      MAX_JOBS: 2
-      NVCC_THREADS: 2
-    timeout-minutes: 1000
+    timeout-minutes: 2000
    strategy:
      fail-fast: false
      matrix:
        flash-attn-version: ["2.8.0"]
        python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.4.1","2.5.1","2.6.0","2.7.1"]
+        torch-version: ["2.4.1", "2.5.1", "2.6.0", "2.7.1"]
        # https://developer.nvidia.com/cuda-toolkit-archive
        cuda-version: ["12.8.1"]
        exclude:
@@ -251,8 +250,11 @@ jobs:
          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH

      - name: Build wheels
-        timeout-minutes: 800
+        timeout-minutes: 1200
        shell: bash
+        env:
+          MAX_JOBS: 4
+          NVCC_THREADS: 3
        run: |
          chmod +x build_linux.sh
          ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }}
@@ -262,7 +264,7 @@ jobs:
      - name: Install Test
        shell: bash
        run: |
-          pip install flash-attention/dist/${{ env.wheel_name }}
+          pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
          python -c "import flash_attn; print(flash_attn.__version__)"

      - name: Upload Release Asset
@@ -288,7 +290,8 @@ jobs:
        shell: bash
        if: always()
        run: |
-          sudo rm -rf /opt/hostedtoolcache/Python/${{ matrix.python-version }}*
+          rm -rf /opt/hostedtoolcache/Python
+          rm -rf ~/.cache/pip

  # build_windows_wheels:
  #   name: Build wheels and Upload (Windows x86_64, GitHub hosted runner)
@@ -365,7 +368,7 @@ jobs:
  #     - name: Install Test
  #       shell: pwsh
  #       run: |
-  #         pip install flash-attention/dist/$env:wheel_name
+  #         pip install --no-cache-dir --force-reinstall flash-attention/dist/$env:wheel_name
  #         python -c "import flash_attn; print(flash_attn.__version__)"

  #     - name: Upload Release Asset
@@ -0,0 +1,118 @@
+name: Build wheels and upload to GitHub Releases
+
+on:
+  push:
+    tags:
+      - "v*"
+
+jobs:
+  # #########################################################
+  # Build wheels with self-hosted runner
+  # #########################################################
+  build_wheels_self_hosted:
+    name: Build wheels and Upload (Linux x86_64, self-hosted runner)
+    runs-on: self-hosted
+    container:
+      image: ubuntu:22.04
+    defaults:
+      run:
+        shell: bash
+    env:
+      DEBIAN_FRONTEND: noninteractive
+      TERM: xterm-256color
+    timeout-minutes: 2000
+    strategy:
+      fail-fast: false
+      matrix:
+        flash-attn-version: ["2.8.0"]
+        python-version: ["3.11"]
+        torch-version: ["2.7.1"]
+        # https://developer.nvidia.com/cuda-toolkit-archive
+        cuda-version: ["12.8.1"]
+    steps:
+      - name: Install tools
+        shell: bash
+        run: |
+          apt-get update && apt-get install -y --no-install-recommends \
+            curl \
+            ca-certificates \
+            sudo \
+            software-properties-common \
+            wget \
+            unzip \
+            zip \
+            git \
+            build-essential \
+            gcc \
+            g++ \
+            clang \
+            ninja-build \
+            keyboard-configuration
+
+      - name: Install gh
+        shell: bash
+        run: |
+          sudo mkdir -p -m 755 /etc/apt/keyrings
+          out=$(mktemp)
+          wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg
+          cat $out | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null
+          sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg
+          echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
+          sudo apt update
+          sudo apt install gh -y
+
+      - uses: actions/checkout@v4
+
+      - name: Configure Git safe directory
+        shell: bash
+        run: |
+          git config --global --add safe.directory $(pwd)
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - uses: Jimver/cuda-toolkit@master
+        with:
+          cuda: ${{ matrix.cuda-version }}
+          sub-packages: '["nvcc", "toolkit"]'
+          method: "network"
+          use-github-cache: false
+          use-local-cache: false
+
+      - name: Install build dependencies
+        shell: bash
+        run: |
+          sudo apt install -y ninja-build clang
+          pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil
+
+      - name: Set environment variables
+        shell: bash
+        run: |
+          export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
+          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+
+      - name: Build wheels
+        timeout-minutes: 1200
+        shell: bash
+        env:
+          MAX_JOBS: 4
+          NVCC_THREADS: 3
+        run: |
+          chmod +x build_linux.sh
+          ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }}
+          wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1))
+          echo "wheel_name=$wheel_name" >> $GITHUB_ENV
+
+      - name: Install Test
+        shell: bash
+        run: |
+          pip install --no-cache-dir --force-reinstall flash-attention/dist/${{ env.wheel_name }}
+          python -c "import flash_attn; print(flash_attn.__version__)"
+
+      - name: Clean up
+        shell: bash
+        if: always()
+        run: |
+          rm -rf /opt/hostedtoolcache/Python
+          rm -rf ~/.cache/pip
@@ -1,130 +0,0 @@
-name: "Test self-hosted runner"
-on:
-  workflow_dispatch:
-
-jobs:
-  docker:
-    runs-on:
-      - self-hosted
-    timeout-minutes: 10
-    steps:
-      - run: ls
-      - run: pwd
-
-  containers-test:
-    runs-on:
-      - self-hosted
-    container:
-      image: ubuntu:22.04
-    timeout-minutes: 10
-    steps:
-      - run: ls
-      - run: pwd
-
-  test-build:
-    runs-on:
-      - self-hosted
-    timeout-minutes: 1000
-    container:
-      image: ubuntu:22.04
-    env:
-      DEBIAN_FRONTEND: noninteractive
-      TERM: xterm-256color
-    strategy:
-      fail-fast: false
-      matrix:
-        flash-attn-version: ["2.4.3"]
-        python-version: ["3.12"]
-        torch-version: ["2.7.0"]
-        # https://developer.nvidia.com/cuda-toolkit-archive
-        cuda-version: ["11.8.0", "12.8.1"]
-    steps:
-      - name: Install tools
-        run: |
-          apt-get update && apt-get install -y --no-install-recommends \
-            curl \
-            ca-certificates \
-            sudo \
-            software-properties-common \
-            wget \
-            unzip \
-            zip \
-            git \
-            build-essential \
-            gcc \
-            g++ \
-            clang \
-            ninja-build \
-            keyboard-configuration
-
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - uses: Jimver/cuda-toolkit@master
-        with:
-          cuda: ${{ matrix.cuda-version }}
-          linux-local-args: '["--toolkit"]'
-          method: "network"
-        env:
-          DEBIAN_FRONTEND: noninteractive
-
-      - name: Set CUDA and PyTorch versions
-        run: |
-          echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
-          echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-          echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV
-
-      - name: Install build dependencies
-        run: |
-          pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil
-
-      - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
-        run: |
-          export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
-              support_cuda_versions = { \
-                  '2.0': [117, 118], \
-                  '2.1': [118, 121], \
-                  '2.2': [118, 121], \
-                  '2.3': [118, 121], \
-                  '2.4': [118, 121, 124], \
-                  '2.5': [118, 121, 124], \
-                  '2.6': [118, 124, 126], \
-                  '2.7': [118, 126, 128], \
-              }; \
-              target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \
-              cuda_version = int(env['MATRIX_CUDA_VERSION']); \
-              closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
-              print(closest_version) \
-          ")
-          pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
-          nvcc --version
-          python -V
-          python -c "import torch; print('PyTorch:', torch.__version__)"
-          python -c "import torch; print('CUDA:', torch.version.cuda)"
-          python -c "from torch.utils import cpp_extension; print(cpp_extension.CUDA_HOME)"
-
-      - name: Checkout flash-attn
-        run: |
-          git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}"
-          cd flash-attention
-
-      - name: Build wheels
-        timeout-minutes: 800
-        run: |
-          export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
-          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
-          export MAX_JOBS=2
-          export NVCC_THREADS=2
-          export FLASH_ATTENTION_FORCE_BUILD="TRUE"
-          cd flash-attention
-          python setup.py bdist_wheel --dist-dir=dist
-          base_wheel_name=$(basename $(ls dist/*.whl | head -n 1))
-          wheel_name=$(echo $base_wheel_name | sed "s/${{ matrix.flash-attn-version }}/${{ matrix.flash-attn-version }}+cu${{ env.MATRIX_CUDA_VERSION }}torch${{ env.MATRIX_TORCH_VERSION }}/")
-          mv dist/$base_wheel_name dist/$wheel_name
-          echo "wheel_name=$wheel_name" >> $GITHUB_ENV
-
-      - name: Install Test
-        run: |
-          pip install flash-attention/dist/${{ env.wheel_name }}
-          python -c "import flash_attn; print(flash_attn.__version__)"
@@ -1,224 +0,0 @@
-name: Test workflow
-
-on:
-  workflow_dispatch:
-
-jobs:
-  # #########################################################
-  # Build wheels with GitHub hosted runner
-  # #########################################################
-  build_wheels:
-    name: Build wheels
-    runs-on: ubuntu-22.04
-    env:
-      DEBIAN_FRONTEND: noninteractive
-      TERM: xterm-256color
-    timeout-minutes: 1000
-    strategy:
-      fail-fast: false
-      matrix:
-        flash-attn-version: ["2.4.3", "2.5.9", "2.6.3"]
-        python-version: ["3.10", "3.11", "3.12"]
-        torch-version: ["2.8.0.dev20250523"]
-        # # https://developer.nvidia.com/cuda-toolkit-archive
-        cuda-version: ["12.8.1"]
-        exclude:
-          # torch < 2.2 does not support Python 3.12
-          - python-version: "3.12"
-            torch-version: "2.0.1"
-          - python-version: "3.12"
-            torch-version: "2.1.2"
-          # torch 2.0.1 does not support CUDA 12.x
-          - torch-version: "2.0.1"
-            cuda-version: "12.1.1"
-          - torch-version: "2.0.1"
-            cuda-version: "12.4.1"
-          - torch-version: "2.0.1"
-            cuda-version: "12.6.3"
-          - torch-version: "2.0.1"
-            cuda-version: "12.8.1"
-          # torch 2.7.0 does not support CUDA 12.4
-          - torch-version: "2.7.0"
-            cuda-version: "12.4.1"
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Maximize build space
-        run: |
-          df -h
-          echo "-----------------------------"
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /opt/ghc
-          sudo rm -rf /opt/hostedtoolcache/CodeQL
-          df -h
-
-      - name: Set Swap Space
-        uses: pierotofy/set-swap-space@master
-        with:
-          swap-size-gb: 48
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - uses: Jimver/cuda-toolkit@master
-        with:
-          cuda: ${{ matrix.cuda-version }}
-          sub-packages: '["nvcc", "toolkit"]'
-          method: "network"
-
-      - name: Install build dependencies
-        run: |
-          sudo apt install -y ninja-build clang
-          pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil
-
-      - name: Set environment variables
-        run: |
-          export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
-          export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
-          export MAX_JOBS=2
-          export NVCC_THREADS=2
-          export FLASH_ATTENTION_FORCE_BUILD=TRUE
-
-      - name: Build wheels
-        timeout-minutes: 800
-        run: |
-          chmod +x build_linux.sh
-          ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }}
-          wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1))
-          echo "wheel_name=$wheel_name" >> $GITHUB_ENV
-
-      - name: Install Test
-        run: |
-          pip install flash-attention/dist/${{ env.wheel_name }}
-          python -c "import flash_attn; print(flash_attn.__version__)"
-
-  # #########################################################
-  # Build wheels with self-hosted runner
-  # #########################################################
-  # build_wheels_self_hosted:
-  #   name: Build wheels
-  #   runs-on: self-hosted
-  #   container:
-  #     image: ubuntu:22.04
-  #   defaults:
-  #     run:
-  #       shell: bash
-  #   env:
-  #     DEBIAN_FRONTEND: noninteractive
-  #     TERM: xterm-256color
-  #   timeout-minutes: 1000
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       flash-attn-version: ["2.7.4"]
-  #       python-version: ["3.10", "3.11", "3.12"]
-  #       torch-version: ["2.8.0.dev20250523"]
-  #       # https://developer.nvidia.com/cuda-toolkit-archive
-  #       cuda-version: ["12.8.1"]
-  #       exclude:
-  #         # torch < 2.2 does not support Python 3.12
-  #         - python-version: "3.12"
-  #           torch-version: "2.0.1"
-  #         - python-version: "3.12"
-  #           torch-version: "2.1.2"
-  #         # torch 2.0.1 does not support CUDA 12.x
-  #         - torch-version: "2.0.1"
-  #           cuda-version: "12.1.1"
-  #         - torch-version: "2.0.1"
-  #           cuda-version: "12.4.1"
-  #         - torch-version: "2.0.1"
-  #           cuda-version: "12.6.3"
-  #         - torch-version: "2.0.1"
-  #           cuda-version: "12.8.1"
-  #         # torch 2.7.0 does not support CUDA 12.4
-  #         - torch-version: "2.7.0"
-  #           cuda-version: "12.4.1"
-
-  #   steps:
-  #     - name: Install tools
-  #       shell: bash
-  #       run: |
-  #         apt-get update && apt-get install -y --no-install-recommends \
-  #           curl \
-  #           ca-certificates \
-  #           sudo \
-  #           software-properties-common \
-  #           wget \
-  #           unzip \
-  #           zip \
-  #           git \
-  #           build-essential \
-  #           gcc \
-  #           g++ \
-  #           clang \
-  #           ninja-build \
-  #           keyboard-configuration
-
-  #     - uses: actions/checkout@v4
-
-  #     - uses: actions/setup-python@v5
-  #       with:
-  #         python-version: ${{ matrix.python-version }}
-
-  #     - uses: Jimver/cuda-toolkit@master
-  #       env:
-  #         DEBIAN_FRONTEND: noninteractive
-  #       with:
-  #         cuda: ${{ matrix.cuda-version }}
-  #         sub-packages: '["nvcc", "toolkit"]'
-  #         method: "network"
-
-  #     - name: Install build dependencies
-  #       run: |
-  #         pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil
-
-  #     - name: Set environment variables
-  #       run: |
-  #         export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
-  #         export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
-  #         export MAX_JOBS=2
-  #         export NVCC_THREADS=2
-  #         export FLASH_ATTENTION_FORCE_BUILD=TRUE
-
-  #     - name: Build wheels
-  #       timeout-minutes: 800
-  #       run: |
-  #         chmod +x build_linux.sh
-  #         ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }}
-  #         wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1))
-  #         echo "wheel_name=$wheel_name" >> $GITHUB_ENV
-
-  #     - name: Install Test
-  #       run: |
-  #         pip install flash-attention/dist/${{ env.wheel_name }}
-  #         python -c "import flash_attn; print(flash_attn.__version__)"
-
-  #     - name: Get the tag version
-  #       id: extract_branch
-  #       shell: bash
-  #       run: echo "branch=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
-
-  #     - name: Get Release with Tag
-  #       id: get_release
-  #       uses: joutvhu/get-release@v1
-  #       env:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  #       with:
-  #         tag_name: ${{ steps.extract_branch.outputs.branch }}
-
-  #     - name: Upload Release Asset
-  #       uses: actions/upload-release-asset@v1
-  #       env:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  #       with:
-  #         upload_url: ${{ steps.get_release.outputs.upload_url }}
-  #         asset_path: flash-attention/dist/${{ env.wheel_name }}
-  #         asset_name: ${{ env.wheel_name }}
-  #         asset_content_type: application/*
-
-  #     - name: Clean up
-  #       if: always()
-  #       run: |
-  #         sudo rm -rf /opt/hostedtoolcache/Python/${{ matrix.python-version }}*
@@ -24,7 +24,7 @@ echo "  Torch Matrix: $MATRIX_TORCH_VERSION"

 # Install PyTorch
 echo "Installing PyTorch $TORCH_VERSION+cu$CUDA_VERSION..."
-TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
+TORCH_CUDA_VERSION=$(python -c "\
    support_cuda_versions = { \
        '2.0': [117, 118], \
        '2.1': [118, 121], \
@@ -36,18 +36,21 @@ TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
        '2.7': [118, 126, 128], \
        '2.8': [128], \
    }; \
-    matrix_cuda_version = '$MATRIX_CUDA_VERSION'; \
+    cuda_version = int('$MATRIX_CUDA_VERSION'); \
    matrix_torch_version = '$MATRIX_TORCH_VERSION'; \
    target_cuda_versions = support_cuda_versions[matrix_torch_version]; \
-    cuda_version = int(matrix_cuda_version); \
-    closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
+    target_cuda_versions = [v for v in target_cuda_versions if str(v)[:2] == str(cuda_version)[:2]]; \
+    if len(target_cuda_versions) == 0: \
+        closest_version = support_cuda_versions[matrix_torch_version][-1]; \
+    else: \
+        closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \
    print(closest_version) \
 ")

 if [[ $TORCH_VERSION == *"dev"* ]]; then
-  pip install --pre torch==$TORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
+  pip install --force-reinstall --no-cache-dir --pre torch==$TORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
 else
-  pip install --no-cache-dir torch==$TORCH_VERSION --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+  pip install --force-reinstall --no-cache-dir torch==$TORCH_VERSION --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
 fi

 # Verify installation