diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7f3ec85..041093f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,121 +33,119 @@ jobs: # ######################################################### # Build wheels with GitHub hosted runner # ######################################################### - # build_wheels: - # name: Build wheels and Upload (Linux x86_64, GitHub hosted runner) - # needs: create_releases - # runs-on: ubuntu-22.04 - # env: - # DEBIAN_FRONTEND: noninteractive - # TERM: xterm-256color - # strategy: - # fail-fast: false - # matrix: - # flash-attn-version: ["2.6.3", "2.7.4", "2.8.2"] - # python-version: ["3.10", "3.11", "3.12"] - # torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] - # # https://developer.nvidia.com/cuda-toolkit-archive - # cuda-version: ["12.4.1", "12.8.1", "12.9.1"] - # exclude: - # # torch < 2.2 does not support Python 3.12 - # - python-version: "3.12" - # torch-version: "2.0.1" - # - python-version: "3.12" - # torch-version: "2.1.2" - # # torch 2.0.1 does not support CUDA 12.x - # - torch-version: "2.0.1" - # cuda-version: "12.1.1" - # - torch-version: "2.0.1" - # cuda-version: "12.4.1" - # - torch-version: "2.0.1" - # cuda-version: "12.6.3" - # - torch-version: "2.0.1" - # cuda-version: "12.8.1" - # # torch 2.6.0 does not support CUDA 12.1 - # - torch-version: "2.6.0" - # cuda-version: "12.1.1" - # # torch 2.7.0 does not support CUDA 12.4 - # - torch-version: "2.7.0" - # cuda-version: "12.4.1" - # # torch < 2.8 does not support CUDA 12.9 - # - torch-version: "2.5.1" - # cuda-version: "12.9.1" - # - torch-version: "2.6.3" - # cuda-version: "12.9.1" - # - torch-version: "2.7.1" - # cuda-version: "12.9.1" - # steps: - # - uses: actions/checkout@v4 + build_wheels: + name: Build wheels and Upload (Linux x86_64, GitHub hosted runner) + needs: create_releases + runs-on: ubuntu-22.04 + env: + DEBIAN_FRONTEND: noninteractive + TERM: xterm-256color + strategy: + fail-fast: false + matrix: + flash-attn-version: ["2.6.3", "2.7.4", "2.8.2"] + python-version: ["3.10", "3.11", "3.12"] + torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] + # https://developer.nvidia.com/cuda-toolkit-archive + cuda-version: ["12.4.1", "12.8.1", "12.9.1"] + exclude: + # torch < 2.2 does not support Python 3.12 + - python-version: "3.12" + torch-version: "2.0.1" + - python-version: "3.12" + torch-version: "2.1.2" + # torch 2.0.1 does not support CUDA 12.x + - torch-version: "2.0.1" + cuda-version: "12.1.1" + - torch-version: "2.0.1" + cuda-version: "12.4.1" + - torch-version: "2.0.1" + cuda-version: "12.6.3" + - torch-version: "2.0.1" + cuda-version: "12.8.1" + # torch 2.6.0 does not support CUDA 12.1 + - torch-version: "2.6.0" + cuda-version: "12.1.1" + # torch 2.7.0 does not support CUDA 12.4 + - torch-version: "2.7.0" + cuda-version: "12.4.1" + # torch < 2.8 does not support CUDA 12.9 + - torch-version: "2.5.1" + cuda-version: "12.9.1" + - torch-version: "2.6.3" + cuda-version: "12.9.1" + - torch-version: "2.7.1" + cuda-version: "12.9.1" + steps: + - uses: actions/checkout@v4 - # - name: Maximize build space - # run: | - # df -h - # echo "-----------------------------" - # sudo rm -rf /usr/share/dotnet - # sudo rm -rf /usr/local/lib/android - # sudo rm -rf /opt/ghc - # sudo rm -rf /opt/hostedtoolcache/CodeQL - # df -h + - name: Maximize build space + run: | + df -h + echo "-----------------------------" + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + df -h - # - name: Set Swap Space - # uses: pierotofy/set-swap-space@master - # with: - # swap-size-gb: 48 + - name: Set Swap Space + uses: pierotofy/set-swap-space@master + with: + swap-size-gb: 48 - # - uses: actions/setup-python@v5 - # with: - # python-version: ${{ matrix.python-version }} + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - # - uses: Jimver/cuda-toolkit@master - # with: - # cuda: ${{ matrix.cuda-version }} - # sub-packages: '["nvcc", "toolkit"]' - # method: "network" - # use-github-cache: false - # use-local-cache: false + - uses: Jimver/cuda-toolkit@master + with: + cuda: ${{ matrix.cuda-version }} + sub-packages: '["nvcc", "toolkit"]' + method: "network" + use-github-cache: false + use-local-cache: false - # - name: Install build dependencies - # run: | - # sudo apt install -y ninja-build clang - # pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil + - name: Install build dependencies + run: | + sudo apt install -y ninja-build clang + pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil - # - name: Set environment variables - # run: | - # export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH - # export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + - name: Set environment variables + run: | + export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH + export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH - # - name: Build wheels - # env: - # MAX_JOBS: 2 - # NVCC_THREADS: 2 - # run: | - # chmod +x build_linux.sh - # ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} - # wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) - # echo "wheel_name=$wheel_name" >> $GITHUB_ENV + - name: Build wheels + env: + MAX_JOBS: 2 + NVCC_THREADS: 2 + run: | + chmod +x build_linux.sh + ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} + wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) + echo "wheel_name=$wheel_name" >> $GITHUB_ENV - # - name: Install Test - # run: | - # pip install --no-cache-dir flash-attention/dist/${{ env.wheel_name }} - # python -c "import flash_attn; print(flash_attn.__version__)" + - name: Install Test + run: | + pip install --no-cache-dir flash-attention/dist/${{ env.wheel_name }} + python -c "import flash_attn; print(flash_attn.__version__)" - # - name: Upload Release Asset - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # run: | - # tag_name=${{ github.ref_name }} - # wheel_path="flash-attention/dist/${{ env.wheel_name }}" + - name: Upload Release Asset + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + tag_name=${{ github.ref_name }} + wheel_path="flash-attention/dist/${{ env.wheel_name }}" - # # Check if the file exists - # if [ ! -f "$wheel_path" ]; then - # echo "Error: Wheel file not found at $wheel_path" - # exit 1 - # fi + # Check if the file exists + if [ ! -f "$wheel_path" ]; then + echo "Error: Wheel file not found at $wheel_path" + exit 1 + fi - # # Upload the release asset using GitHub CLI - # gh release upload "$tag_name" "$wheel_path" --clobber - - # echo "Successfully uploaded $wheel_name to release $tag_name" + # Upload the release asset using GitHub CLI + gh release upload "$tag_name" "$wheel_path" --clobber # ######################################################### # Build wheels with self-hosted runner @@ -303,8 +301,6 @@ jobs: # Upload the release asset using GitHub CLI gh release upload "$tag_name" "$wheel_path" --clobber - echo "Successfully uploaded $wheel_name to release $tag_name" - - name: Clean up shell: bash if: always() @@ -422,5 +418,3 @@ jobs: # Upload the release asset using GitHub CLI gh release upload "$tag_name" "$wheel_path" --clobber - - Write-Host "Successfully uploaded $env:wheel_name to release $tag_name" diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml new file mode 100644 index 0000000..1c47e33 --- /dev/null +++ b/.github/workflows/build_linux.yml @@ -0,0 +1,107 @@ +# ######################################################### +# Build wheels with GitHub hosted runner +# ######################################################### + +name: Build wheels and upload to GitHub Releases (Linux x86_64) + +on: + workflow_call: + inputs: + flash-attn-version: + description: "Flash-Attention version" + required: true + type: string + python-version: + description: "Python version" + required: true + type: string + torch-version: + description: "PyTorch version" + required: true + type: string + cuda-version: + description: "CUDA version" + required: true + type: string + secrets: + GITHUB_TOKEN: + required: true + +jobs: + build_wheels: + name: Build wheels and Upload (Linux x86_64) + runs-on: ubuntu-22.04 + env: + DEBIAN_FRONTEND: noninteractive + TERM: xterm-256color + steps: + - uses: actions/checkout@v4 + + - name: Maximize build space + run: | + df -h + echo "-----------------------------" + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + df -h + + - name: Set Swap Space + uses: pierotofy/set-swap-space@master + with: + swap-size-gb: 48 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - uses: Jimver/cuda-toolkit@master + with: + cuda: ${{ inputs.cuda-version }} + sub-packages: '["nvcc", "toolkit"]' + method: "network" + use-github-cache: false + use-local-cache: false + + - name: Install build dependencies + run: | + sudo apt install -y ninja-build clang + pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil + + - name: Set environment variables + run: | + export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH + export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + + - name: Build wheels + id: build_wheels + env: + MAX_JOBS: 2 + NVCC_THREADS: 2 + run: | + chmod +x build_linux.sh + ./build_linux.sh ${{ inputs.flash-attn-version }} ${{ inputs.python-version }} ${{ inputs.torch-version }} ${{ inputs.cuda-version }} + wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) + echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT + + - name: Install Test + run: | + pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} + python -c "import flash_attn; print(flash_attn.__version__)" + + - name: Upload Release Asset + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + tag_name=${{ github.ref_name }} + wheel_path="flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}" + + # Check if the file exists + if [ ! -f "$wheel_path" ]; then + echo "Error: Wheel file not found at $wheel_path" + exit 1 + fi + + # Upload the release asset using GitHub CLI + gh release upload "$tag_name" "$wheel_path" --clobber diff --git a/.github/workflows/build_linux_self_host.yml b/.github/workflows/build_linux_self_host.yml new file mode 100644 index 0000000..c204b19 --- /dev/null +++ b/.github/workflows/build_linux_self_host.yml @@ -0,0 +1,147 @@ +# ######################################################### +# Build wheels with self-hosted runner +# ######################################################### + +name: Build wheels and upload to GitHub Releases (Linux x86_64, self-hosted runner) + +on: + workflow_call: + inputs: + flash-attn-version: + description: "Flash-Attention version" + required: true + type: string + python-version: + description: "Python version" + required: true + type: string + torch-version: + description: "PyTorch version" + required: true + type: string + cuda-version: + description: "CUDA version" + required: true + type: string + secrets: + GITHUB_TOKEN: + required: true + +jobs: + build_wheels_self_hosted: + name: Build wheels and Upload (Linux x86_64, self-hosted runner) + runs-on: self-hosted + container: + image: ubuntu:22.04 + defaults: + run: + shell: bash + env: + DEBIAN_FRONTEND: noninteractive + TERM: xterm-256color + timeout-minutes: 2000 + steps: + - name: Install tools + shell: bash + run: | + apt-get update && apt-get install -y --no-install-recommends \ + curl \ + ca-certificates \ + sudo \ + software-properties-common \ + wget \ + unzip \ + zip \ + git \ + build-essential \ + gcc \ + g++ \ + clang \ + ninja-build \ + keyboard-configuration + + - name: Install gh + shell: bash + run: | + sudo mkdir -p -m 755 /etc/apt/keyrings + out=$(mktemp) + wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg + cat $out | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null + sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null + sudo apt update + sudo apt install gh -y + + - uses: actions/checkout@v4 + + - name: Configure Git safe directory + shell: bash + run: | + git config --global --add safe.directory $(pwd) + + - uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - uses: Jimver/cuda-toolkit@master + with: + cuda: ${{ inputs.cuda-version }} + sub-packages: '["nvcc", "toolkit"]' + method: "network" + use-github-cache: false + use-local-cache: false + + - name: Install build dependencies + shell: bash + run: | + sudo apt install -y ninja-build clang + pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil + + - name: Set environment variables + shell: bash + run: | + export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH + export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + + - name: Build wheels + timeout-minutes: 1200 + id: build_wheels + shell: bash + env: + MAX_JOBS: 2 + NVCC_THREADS: 2 + run: | + chmod +x build_linux.sh + ./build_linux.sh ${{ inputs.flash-attn-version }} ${{ inputs.python-version }} ${{ inputs.torch-version }} ${{ inputs.cuda-version }} + wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) + echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT + + - name: Install Test + shell: bash + run: | + pip uninstall -y flash-attn > /dev/null 2>&1 + pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} + python -c "import flash_attn; print(flash_attn.__version__)" + + - name: Upload Release Asset + shell: bash + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + tag_name=${{ github.ref_name }} + wheel_path="flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}" + + # Check if the file exists + if [ ! -f "$wheel_path" ]; then + echo "Error: Wheel file not found at $wheel_path" + exit 1 + fi + + # Upload the release asset using GitHub CLI + gh release upload "$tag_name" "$wheel_path" --clobber + + - name: Clean up + shell: bash + if: always() + run: | + rm -rf /opt/hostedtoolcache/Python diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml new file mode 100644 index 0000000..66d0b5a --- /dev/null +++ b/.github/workflows/build_test.yml @@ -0,0 +1,49 @@ +name: Build wheels and upload to GitHub Releases + +on: + workflow_dispatch: + +jobs: + # ######################################################### + # Build wheels with GitHub hosted runner + # ######################################################### + build_wheels: + name: Build Root + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + flash-attn-version: ["2.6.3"] + python-version: ["3.10"] + torch-version: ["2.5.1"] + # https://developer.nvidia.com/cuda-toolkit-archive + cuda-version: ["12.4.1"] + steps: + - uses: actions/checkout@v4 + + - uses: ./.github/workflows/build_linux.yml + with: + flash-attn-version: ${{ matrix.flash-attn-version }} + python-version: ${{ matrix.python-version }} + torch-version: ${{ matrix.torch-version }} + cuda-version: ${{ matrix.cuda-version }} + build_wheels_windows: + name: Build Root + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + flash-attn-version: ["2.6.3"] + python-version: ["3.10"] + torch-version: ["2.5.1"] + # https://developer.nvidia.com/cuda-toolkit-archive + cuda-version: ["12.4.1"] + steps: + - uses: actions/checkout@v4 + + - uses: ./.github/workflows/build_windows.yml + with: + flash-attn-version: ${{ matrix.flash-attn-version }} + python-version: ${{ matrix.python-version }} + torch-version: ${{ matrix.torch-version }} + cuda-version: ${{ matrix.cuda-version }} diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml new file mode 100644 index 0000000..dcd6c1d --- /dev/null +++ b/.github/workflows/build_windows.yml @@ -0,0 +1,101 @@ +# ######################################################### +# Build wheels with GitHub hosted runner on Windows x86_64 +# ######################################################### + +name: Build wheels and upload to GitHub Releases (Windows x86_64) + +on: + workflow_call: + inputs: + flash-attn-version: + description: "Flash-Attention version" + required: true + type: string + python-version: + description: "Python version" + required: true + type: string + torch-version: + description: "PyTorch version" + required: true + type: string + cuda-version: + description: "CUDA version" + required: true + type: string + secrets: + GITHUB_TOKEN: + required: true + +jobs: + build_windows_wheels: + name: Build wheels and Upload (Windows x86_64, GitHub hosted runner) + runs-on: windows-2022 + timeout-minutes: 1000 + env: + MAX_JOBS: 2 + NVCC_THREADS: 2 + steps: + - uses: actions/checkout@v4 + + - name: Enable Git long paths + shell: pwsh + run: git config --system core.longpaths true + + - uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - uses: Jimver/cuda-toolkit@v0.2.24 + with: + cuda: ${{ inputs.cuda-version }} + method: "network" + use-github-cache: false + use-local-cache: false + + - name: Install VS2022 BuildTools + shell: pwsh + run: | + choco install -y visualstudio2022buildtools ` + --version=117.14.1 ` + --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64" + + - name: Install build dependencies + shell: pwsh + run: | + python -m pip install -U setuptools==75.8.0 wheel packaging psutil ninja + + - name: Build wheels + shell: pwsh + run: | + .\build_windows.ps1 -FlashAttnVersion "${{ inputs.flash-attn-version }}" -PythonVersion "${{ inputs.python-version }}" -TorchVersion "${{ inputs.torch-version }}" -CudaVersion "${{ inputs.cuda-version }}" + $wheelName = Get-ChildItem -Path "flash-attention\dist\*.whl" | Select-Object -First 1 | ForEach-Object { $_.Name } + echo "wheel_name=$wheelName" >> $env:GITHUB_ENV + + - name: Install Test + shell: pwsh + run: | + pip install --no-cache-dir flash-attention/dist/$env:wheel_name + python -c "import flash_attn; print(flash_attn.__version__)" + + - name: Upload Release Asset + shell: pwsh + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + $tag_name = "${env:GITHUB_REF}".Replace("refs/tags/", "") + $wheel_path = "flash-attention/dist/$env:wheel_name" + + # Check if the file exists + if (-not (Test-Path $wheel_path)) { + $tag_name = "${env:GITHUB_REF}".Replace("refs/tags/", "") + $wheel_path = "flash-attention/dist/$env:wheel_name" + + # Check if the file exists + if (-not (Test-Path $wheel_path)) { + Write-Host "Error: Wheel file not found at $wheel_path" + exit 1 + } + + # Upload the release asset using GitHub CLI + gh release upload "$tag_name" "$wheel_path" --clobber