# ######################################################### # Build wheels with GitHub-hosted runner # ######################################################### name: "[Linux] Build wheels and upload to GitHub Releases" on: workflow_call: inputs: flash-attn-version: description: "Flash-Attention version" required: true type: string python-version: description: "Python version" required: true type: string torch-version: description: "PyTorch version" required: true type: string cuda-version: description: "CUDA version" required: true type: string runner: description: "Runner type" required: false type: string default: "ubuntu-22.04" is-upload: description: "Whether to upload the release asset" required: false type: boolean default: true jobs: build_wheels: name: Build wheels and Upload (Linux x86_64, GitHub hosted runner) runs-on: ${{ inputs.runner }} env: DEBIAN_FRONTEND: noninteractive TERM: xterm-256color steps: - uses: actions/checkout@v4 - name: Maximize build space run: | df -h echo "-----------------------------" sudo rm -rf /usr/share/dotnet sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/ghc sudo rm -rf /opt/hostedtoolcache/CodeQL df -h - uses: actions/setup-python@v5 with: python-version: ${{ inputs.python-version }} - uses: mjun0812/setup-cuda@v1 with: version: ${{ inputs.cuda-version }} - name: Install build dependencies run: | sudo apt install -y ninja-build clang time pip install -U pip setuptools==75.8.0 wheel packaging psutil - name: Build wheels id: build_wheels run: | chmod +x build_linux.sh ./build_linux.sh ${{ inputs.flash-attn-version }} ${{ inputs.python-version }} ${{ inputs.torch-version }} ${{ inputs.cuda-version }} wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT - name: Install Test run: | pip uninstall -y flash-attn > /dev/null 2>&1 pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} python -c "import flash_attn; print(flash_attn.__version__)" - name: Upload Release Asset if: ${{ inputs.is-upload }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | tag_name=${{ github.ref_name }} wheel_path="flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}" # Check if the file exists if [ ! -f "$wheel_path" ]; then echo "Error: Wheel file not found at $wheel_path" exit 1 fi # Upload the release asset using GitHub CLI gh release upload "$tag_name" "$wheel_path" --clobber - name: Apply auditwheel repair continue-on-error: true run: | pip install auditwheel auditwheel show flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} auditwheel repair flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} -w flash-attention/dist_manylinux wheel_name=$(basename $(ls flash-attention/dist_manylinux/*.whl | head -n 1)) echo "WHEEL_NAME_MANYLINUX=$wheel_name" >> $GITHUB_OUTPUT - name: Apply auditwheel repair if: ${{ inputs.is-upload }} continue-on-error: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | pip uninstall -y flash-attn > /dev/null 2>&1 pip install --no-cache-dir flash-attention/dist_manylinux/${{ steps.build_wheels.outputs.WHEEL_NAME_MANYLINUX }} python -c "import flash_attn; print(flash_attn.__version__)" wheel_path_manylinux="flash-attention/dist_manylinux/${{ steps.build_wheels.outputs.WHEEL_NAME_MANYLINUX }}" if [ ! -f "$wheel_path_manylinux" ]; then echo "Error: Wheel file not found at $wheel_path_manylinux" exit 1 fi # Upload the release asset using GitHub CLI gh release upload "$tag_name" "$wheel_path_manylinux" --clobber