name: Build wheels and upload to GitHub Releases on: create: tags: - "v*" jobs: create_releases: name: Create Releases runs-on: ubuntu-latest steps: - name: Get the tag version id: extract_branch run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} shell: bash - name: Create Release id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ steps.extract_branch.outputs.branch }} release_name: ${{ steps.extract_branch.outputs.branch }} body: | - Flash-Attention - 2.4.3 - 2.5.6 - 2.5.9 - 2.6.3 - Python - 3.10 - 3.11 - 3.12 - PyTorch - 2.0.1 - 2.1.2 - 2.2.2 - 2.3.1 - 2.4.1 - 2.5.0 - CUDA - 11.8.0 - 12.1.1 - 12.4.1 build_wheels: name: Build wheels and Upload needs: create_releases runs-on: ubuntu-latest strategy: fails-fast: false matrix: flash-attn-version: [2.4.3, 2.5.6, 2.5.9, 2.6.3] python-version: [3.10, 3.11, 3.12] torch-version: [2.0.1, 2.1.2, 2.2.2, 2.3.1, 2.4.1, 2.5.0] cuda-version: [11.8.0, 12.1.1, 12.4.1] exclude: # torch < 2.2 does not support Python 3.12 - python-version: 3.12 torch-version: 2.0.1 - python-version: 3.12 torch-version: 2.1.2 # torch 2.0.1 does not support CUDA 12.x - torch-version: 2.0.1 cuda-version: 12.1.1 - torch-version: 2.0.1 cuda-version: 12.4.1 steps: - uses: actions/checkout@v4 - name: Maximize build space run: | df -h echo "-----------------------------" sudo rm -rf /usr/share/dotnet sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/ghc sudo rm -rf /opt/hostedtoolcache/CodeQL df -h - name: Set Swap Space uses: pierotofy/set-swap-space@master with: swap-size-gb: 10 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - uses: Jimver/cuda-toolkit@master with: cuda: ${{ matrix.cuda-version }} linux-local-args: '["--toolkit"]' method: "network" - run: sudo apt install -y ninja-build - name: Set CUDA and PyTorch versions run: | echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} run: | pip install -U pip pip install wheel setuptools packaging export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \ support_cuda_versions = { \ '2.0': [117, 118], \ '2.1': [118, 121], \ '2.2': [118, 121], \ '2.3': [118, 121], \ '2.4': [118, 121, 124], \ '2.5': [118, 121, 124], \ }; \ target_cuda_versions = support_cuda_versions[env['MATRIX_TORCH_VERSION']]; \ cuda_version = int(env['MATRIX_CUDA_VERSION']); \ closest_version = min(target_cuda_versions, key=lambda x: abs(x - cuda_version)); \ print(closest_version) \ ") pip install --no-cache-dir torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} nvcc --version python -V python -c import torch; print("'PyTorch:', torch.__version__)" python -c "import torch; print('CUDA:', torch.version.cuda)" python -c "from torch.utils import cpp_extension; print(cpp_extension.CUDA_HOME)") - name: Checkout flash-attn run: | git clone https://github.com/Dao-AILab/flash-attention.git cd flash-attention git checkout v${{ matrix.flash-attn-version }} - name: Build wheels run: | pip install setuptools==68.0.0 ninja packaging wheel export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export MAX_JOBS=1 FLASH_ATTENTION_FORCE_BUILD="TRUE" python setup.py bdist_wheel --dist-dir=dist wheel_name=$(basename $(ls dist/*.whl | head -n 1)) echo "wheel_name=$wheel_name" >> $GITHUB_ENV - name: Install Test run: | pip install flash_attention/dist/${{ env.wheel_name }} python -c "import flash_atten; print(flash_atten.__version__)" - name: Get the tag version id: extract_branch run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} - name: Get Release with Tag id: get_release uses: joutvhu/get-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ steps.extract_branch.outputs.branch }} - name: Upload Release Asset uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.get_release.outputs.upload_url }} asset_path: flash-attention/dist/${{ env.wheel_name }} asset_name: ${{ env.wheel_name }} asset_content_type: application/*