From d5245a6c989f4775678ff046fe11bab70b9ed068 Mon Sep 17 00:00:00 2001 From: Junya Morioka Date: Sat, 24 May 2025 04:47:16 +0900 Subject: [PATCH] add support torch nightly --- .github/workflows/build.yml | 51 +++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6f86357..f61cfdc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,7 @@ name: Build wheels and upload to GitHub Releases on: - create: + push: tags: - "v*" @@ -25,19 +25,25 @@ jobs: body: | | Flash-Attention | Python | PyTorch | CUDA | |-----------------|--------|---------|------| - | 2.4.3, 2.5.9, 2.6.3, 2.7.4 | 3.10, 3.11, 3.12 | 2.7.0 | 12.8.1 | + | 2.4.3, 2.5.9, 2.6.3, 2.7.4 | 3.10, 3.11, 3.12 | 2.8.0.dev20250523 | 12.8.1 | + # ######################################################### + # Build wheels with GitHub hosted runner + # ######################################################### build_wheels: name: Build wheels and Upload needs: create_releases runs-on: ubuntu-22.04 + env: + DEBIAN_FRONTEND: noninteractive + TERM: xterm-256color timeout-minutes: 1000 strategy: fail-fast: false matrix: flash-attn-version: ["2.4.3", "2.5.9", "2.6.3"] python-version: ["3.10", "3.11", "3.12"] - torch-version: ["2.7.0"] + torch-version: ["2.8.0.dev20250523"] # https://developer.nvidia.com/cuda-toolkit-archive cuda-version: ["12.8.1"] exclude: @@ -58,9 +64,9 @@ jobs: # torch 2.7.0 does not support CUDA 12.4 - torch-version: "2.7.0" cuda-version: "12.4.1" - steps: - uses: actions/checkout@v4 + - name: Maximize build space run: | df -h @@ -70,6 +76,7 @@ jobs: sudo rm -rf /opt/ghc sudo rm -rf /opt/hostedtoolcache/CodeQL df -h + - name: Set Swap Space uses: pierotofy/set-swap-space@master with: @@ -78,6 +85,7 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - uses: Jimver/cuda-toolkit@master with: cuda: ${{ matrix.cuda-version }} @@ -88,7 +96,6 @@ jobs: run: | echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV - echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV - name: Install build dependencies run: | @@ -107,13 +114,20 @@ jobs: '2.5': [118, 121, 124], \ '2.6': [118, 124, 126], \ '2.7': [118, 126, 128], \ + '2.8': [128], \ }; \ target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \ cuda_version = int(env['MATRIX_CUDA_VERSION']); \ closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \ print(closest_version) \ ") - pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} + + if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then + pip install --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} + else + pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} + fi + nvcc --version python -V python -c "import torch; print('PyTorch:', torch.__version__)" @@ -123,7 +137,6 @@ jobs: - name: Checkout flash-attn run: | git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}" - cd flash-attention - name: Build wheels timeout-minutes: 800 @@ -132,7 +145,8 @@ jobs: export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export MAX_JOBS=2 export NVCC_THREADS=2 - export FLASH_ATTENTION_FORCE_BUILD="TRUE" + export FLASH_ATTENTION_FORCE_BUILD=TRUE + cd flash-attention python setup.py bdist_wheel --dist-dir=dist base_wheel_name=$(basename $(ls dist/*.whl | head -n 1)) @@ -167,6 +181,9 @@ jobs: asset_name: ${{ env.wheel_name }} asset_content_type: application/* + # ######################################################### + # Build wheels with self-hosted runner + # ######################################################### build_wheels_self_hosted: name: Build wheels and Upload needs: create_releases @@ -182,7 +199,7 @@ jobs: matrix: flash-attn-version: ["2.7.4"] python-version: ["3.10", "3.11", "3.12"] - torch-version: ["2.7.0"] + torch-version: ["2.8.0.dev20250523"] # https://developer.nvidia.com/cuda-toolkit-archive cuda-version: ["12.8.1"] exclude: @@ -224,9 +241,11 @@ jobs: keyboard-configuration - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - uses: Jimver/cuda-toolkit@master env: DEBIAN_FRONTEND: noninteractive @@ -239,7 +258,6 @@ jobs: run: | echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV - echo "CACHE_KEY=cuda-ext-${{ matrix.flash-attn-version }}-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-cuda${{ matrix.cuda-version }}" >> $GITHUB_ENV - name: Install build dependencies run: | @@ -258,13 +276,20 @@ jobs: '2.5': [118, 121, 124], \ '2.6': [118, 124, 126], \ '2.7': [118, 126, 128], \ + '2.8': [128], \ }; \ target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \ cuda_version = int(env['MATRIX_CUDA_VERSION']); \ closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \ print(closest_version) \ ") - pip install --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} + + if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then + pip install --pre --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} + else + pip install --force-reinstall --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} + fi + nvcc --version python -V python -c "import torch; print('PyTorch:', torch.__version__)" @@ -274,7 +299,6 @@ jobs: - name: Checkout flash-attn run: | git clone https://github.com/Dao-AILab/flash-attention.git -b "v${{ matrix.flash-attn-version }}" - cd flash-attention - name: Build wheels timeout-minutes: 800 @@ -283,7 +307,8 @@ jobs: export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export MAX_JOBS=2 export NVCC_THREADS=4 - export FLASH_ATTENTION_FORCE_BUILD="TRUE" + export FLASH_ATTENTION_FORCE_BUILD=TRUE + cd flash-attention python setup.py bdist_wheel --dist-dir=dist base_wheel_name=$(basename $(ls dist/*.whl | head -n 1))