ci: refactor wheel handling and enable manylinux artifact generation

- Change wheel output to use full path instead of basename for better flexibility
- Add patchelf to build dependencies for wheel repair operations
- Enable auditwheel repair step with proper exclusions for CUDA/torch libraries
- Separate wheel path variable to track both standard and manylinux versions
- Rename manylinux workflow file to reflect its dedicated purpose
- Update workflow references to use renamed manylinux workflow
- Use fromjson() for runner parameter parsing in ARM build workflow
- Update test workflows to use container environment consistently
- Support both manylinux and standard wheel uploads to releases
This commit is contained in:
Junya Morioka
2025-12-13 18:18:50 +09:00
parent a18d6ac478
commit 92464a9b3b
6 changed files with 43 additions and 40 deletions
@@ -42,7 +42,7 @@ on:
jobs: jobs:
build_wheels_self_hosted: build_wheels_self_hosted:
name: Build wheels and Upload (Linux x86_64, self-hosted runner) name: Build wheels and Upload (Linux x86_64, self-hosted runner)
runs-on: ${{ inputs.runner }} runs-on: ${{ fromjson(inputs.runner) }}
container: container:
image: ${{ inputs.container-image }} image: ${{ inputs.container-image }}
options: --platform linux/arm64 options: --platform linux/arm64
+33 -30
View File
@@ -78,7 +78,8 @@ jobs:
clang \ clang \
ninja-build \ ninja-build \
keyboard-configuration \ keyboard-configuration \
time time \
patchelf
- name: Install gh - name: Install gh
shell: bash shell: bash
@@ -121,14 +122,14 @@ jobs:
run: | run: |
chmod +x build_linux.sh chmod +x build_linux.sh
./build_linux.sh ${{ inputs.flash-attn-version }} ${{ inputs.python-version }} ${{ inputs.torch-version }} ${{ inputs.cuda-version }} ./build_linux.sh ${{ inputs.flash-attn-version }} ${{ inputs.python-version }} ${{ inputs.torch-version }} ${{ inputs.cuda-version }}
wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) wheel_path=$(ls flash-attention/dist/*.whl | head -n 1)
echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT echo "WHEEL_PATH=$wheel_path" >> $GITHUB_OUTPUT
- name: Install Test - name: Install Test
shell: bash shell: bash
run: | run: |
pip uninstall -y flash-attn > /dev/null 2>&1 pip uninstall -y flash-attn > /dev/null 2>&1
pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} pip install --no-cache-dir ${{ steps.build_wheels.outputs.WHEEL_PATH }}
python -c "import flash_attn; print(flash_attn.__version__)" python -c "import flash_attn; print(flash_attn.__version__)"
- name: Upload Release Asset - name: Upload Release Asset
@@ -138,7 +139,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: | run: |
tag_name=${{ github.ref_name }} tag_name=${{ github.ref_name }}
wheel_path="flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}" wheel_path="${{ steps.build_wheels.outputs.WHEEL_PATH }}"
# Check if the file exists # Check if the file exists
if [ ! -f "$wheel_path" ]; then if [ ! -f "$wheel_path" ]; then
@@ -149,33 +150,35 @@ jobs:
# Upload the release asset using GitHub CLI # Upload the release asset using GitHub CLI
gh release upload "$tag_name" "$wheel_path" --clobber gh release upload "$tag_name" "$wheel_path" --clobber
# - name: Apply auditwheel repair - name: Apply auditwheel repair
# continue-on-error: true continue-on-error: true
# run: | run: |
# pip install auditwheel auditwheel show ${{ steps.build_wheels.outputs.WHEEL_PATH }}
# auditwheel show flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} auditwheel repair \
# auditwheel repair flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} -w flash-attention/dist_manylinux --exclude libc10* --exclude libtorch* --exclude libcu* --exclude libnv* --exclude 'libtorch*' \
# wheel_name=$(basename $(ls flash-attention/dist_manylinux/*.whl | head -n 1)) ${{ steps.build_wheels.outputs.WHEEL_PATH }} -w flash-attention/dist_manylinux
# echo "WHEEL_NAME_MANYLINUX=$wheel_name" >> $GITHUB_OUTPUT wheel_path_manylinux=$(ls flash-attention/dist_manylinux/*manylinux*.whl | head -n 1)
echo "WHEEL_PATH_MANYLINUX=$wheel_path_manylinux" >> $GITHUB_OUTPUT
# - name: Apply auditwheel repair - name: Test manylinux wheel
# if: ${{ inputs.is-upload }} continue-on-error: true
# continue-on-error: true run: |
# env: pip uninstall -y flash-attn > /dev/null 2>&1
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} pip install --no-cache-dir ${{ steps.build_wheels.outputs.WHEEL_PATH_MANYLINUX }}
# run: | python -c "import flash_attn; print(flash_attn.__version__)"
# pip uninstall -y flash-attn > /dev/null 2>&1
# pip install --no-cache-dir flash-attention/dist_manylinux/${{ steps.build_wheels.outputs.WHEEL_NAME_MANYLINUX }}
# python -c "import flash_attn; print(flash_attn.__version__)"
# wheel_path_manylinux="flash-attention/dist_manylinux/${{ steps.build_wheels.outputs.WHEEL_NAME_MANYLINUX }}" - name: Upload manylinux wheel
# if [ ! -f "$wheel_path_manylinux" ]; then continue-on-error: true
# echo "Error: Wheel file not found at $wheel_path_manylinux" env:
# exit 1 GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# fi run: |
wheel_path_manylinux="${{ steps.build_wheels.outputs.WHEEL_PATH_MANYLINUX }}"
# # Upload the release asset using GitHub CLI if [ ! -f "$wheel_path_manylinux" ]; then
# gh release upload "$tag_name" "$wheel_path_manylinux" --clobber echo "Error: Wheel file not found at $wheel_path_manylinux"
exit 1
fi
# Upload the release asset using GitHub CLI
gh release upload "$tag_name" "$wheel_path_manylinux" --clobber
- name: Clean up - name: Clean up
shell: bash shell: bash
@@ -2,7 +2,7 @@
# Build wheels with self-hosted runner # Build wheels with self-hosted runner
# ######################################################### # #########################################################
name: "[Linux x86_64, self-hosted] Build wheels and upload to GitHub Releases" name: "[manylinux x86_64, self-hosted] Build wheels and upload to GitHub Releases"
on: on:
workflow_call: workflow_call:
@@ -41,7 +41,7 @@ on:
jobs: jobs:
build_wheels_self_hosted: build_wheels_self_hosted:
name: Build wheels and Upload (Linux x86_64, self-hosted runner) name: Build wheels and Upload (manylinux x86_64, self-hosted runner)
runs-on: ${{ fromjson(inputs.runner) }} runs-on: ${{ fromjson(inputs.runner) }}
container: container:
image: ${{ inputs.container-image }} image: ${{ inputs.container-image }}
+1 -1
View File
@@ -88,7 +88,7 @@ jobs:
torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_self_hosted.torch-version }} torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_self_hosted.torch-version }}
cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_self_hosted.cuda-version }} cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_self_hosted.cuda-version }}
exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }} exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }}
uses: ./.github/workflows/_build_linux_self_host.yml uses: ./.github/workflows/_build_manylinux_self_host.yml
with: with:
flash-attn-version: ${{ matrix.flash-attn-version }} flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
+4 -4
View File
@@ -14,12 +14,12 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
flash-attn-version: ["2.8.3"] flash-attn-version: ["2.8.3"]
python-version: ["3.13", "3.13"] python-version: ["3.13"]
torch-version: ["2.9.1"] torch-version: ["2.9.1"]
# https://developer.nvidia.com/cuda-toolkit-archive # https://developer.nvidia.com/cuda-toolkit-archive
cuda-version: ["13.0.1"] cuda-version: ["13.0.1"]
runner: ['["openci-runner-beta"]'] # runner: ['["openci-runner-beta"]']
use-container: [false] runner: ['["self-hosted", "x64"]']
with: with:
flash-attn-version: ${{ matrix.flash-attn-version }} flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
@@ -27,4 +27,4 @@ jobs:
cuda-version: ${{ matrix.cuda-version }} cuda-version: ${{ matrix.cuda-version }}
is-upload: false is-upload: false
runner: ${{ matrix.runner}} runner: ${{ matrix.runner}}
use-container: ${{ matrix.use-container}} use-container: true
@@ -9,7 +9,7 @@ jobs:
# ######################################################### # #########################################################
build_wheels_self_hosted: build_wheels_self_hosted:
name: Build wheels and Upload (Linux x86_64, self-hosted runner, Manylinux 2_28) name: Build wheels and Upload (Linux x86_64, self-hosted runner, Manylinux 2_28)
uses: ./.github/workflows/_build_linux_self_host_manylinux.yml uses: ./.github/workflows/_build_manylinux_self_host.yml
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
@@ -19,7 +19,7 @@ jobs:
# https://developer.nvidia.com/cuda-toolkit-archive # https://developer.nvidia.com/cuda-toolkit-archive
cuda-version: ["13.0.1"] cuda-version: ["13.0.1"]
runner: ['["self-hosted", "x64"]'] runner: ['["self-hosted", "x64"]']
use-container: [false] use-container: [true]
with: with:
flash-attn-version: ${{ matrix.flash-attn-version }} flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}