ci: refactor wheel handling and enable manylinux artifact generation

- Change wheel output to use full path instead of basename for better flexibility
- Add patchelf to build dependencies for wheel repair operations
- Enable auditwheel repair step with proper exclusions for CUDA/torch libraries
- Separate wheel path variable to track both standard and manylinux versions
- Rename manylinux workflow file to reflect its dedicated purpose
- Update workflow references to use renamed manylinux workflow
- Use fromjson() for runner parameter parsing in ARM build workflow
- Update test workflows to use container environment consistently
- Support both manylinux and standard wheel uploads to releases
This commit is contained in:
Junya Morioka
2025-12-13 18:18:50 +09:00
parent a18d6ac478
commit 92464a9b3b
6 changed files with 43 additions and 40 deletions
@@ -42,7 +42,7 @@ on:
jobs:
build_wheels_self_hosted:
name: Build wheels and Upload (Linux x86_64, self-hosted runner)
runs-on: ${{ inputs.runner }}
runs-on: ${{ fromjson(inputs.runner) }}
container:
image: ${{ inputs.container-image }}
options: --platform linux/arm64
+33 -30
View File
@@ -78,7 +78,8 @@ jobs:
clang \
ninja-build \
keyboard-configuration \
time
time \
patchelf
- name: Install gh
shell: bash
@@ -121,14 +122,14 @@ jobs:
run: |
chmod +x build_linux.sh
./build_linux.sh ${{ inputs.flash-attn-version }} ${{ inputs.python-version }} ${{ inputs.torch-version }} ${{ inputs.cuda-version }}
wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1))
echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT
wheel_path=$(ls flash-attention/dist/*.whl | head -n 1)
echo "WHEEL_PATH=$wheel_path" >> $GITHUB_OUTPUT
- name: Install Test
shell: bash
run: |
pip uninstall -y flash-attn > /dev/null 2>&1
pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
pip install --no-cache-dir ${{ steps.build_wheels.outputs.WHEEL_PATH }}
python -c "import flash_attn; print(flash_attn.__version__)"
- name: Upload Release Asset
@@ -138,7 +139,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
tag_name=${{ github.ref_name }}
wheel_path="flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}"
wheel_path="${{ steps.build_wheels.outputs.WHEEL_PATH }}"
# Check if the file exists
if [ ! -f "$wheel_path" ]; then
@@ -149,33 +150,35 @@ jobs:
# Upload the release asset using GitHub CLI
gh release upload "$tag_name" "$wheel_path" --clobber
# - name: Apply auditwheel repair
# continue-on-error: true
# run: |
# pip install auditwheel
# auditwheel show flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
# auditwheel repair flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} -w flash-attention/dist_manylinux
# wheel_name=$(basename $(ls flash-attention/dist_manylinux/*.whl | head -n 1))
# echo "WHEEL_NAME_MANYLINUX=$wheel_name" >> $GITHUB_OUTPUT
- name: Apply auditwheel repair
continue-on-error: true
run: |
auditwheel show ${{ steps.build_wheels.outputs.WHEEL_PATH }}
auditwheel repair \
--exclude libc10* --exclude libtorch* --exclude libcu* --exclude libnv* --exclude 'libtorch*' \
${{ steps.build_wheels.outputs.WHEEL_PATH }} -w flash-attention/dist_manylinux
wheel_path_manylinux=$(ls flash-attention/dist_manylinux/*manylinux*.whl | head -n 1)
echo "WHEEL_PATH_MANYLINUX=$wheel_path_manylinux" >> $GITHUB_OUTPUT
# - name: Apply auditwheel repair
# if: ${{ inputs.is-upload }}
# continue-on-error: true
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# run: |
# pip uninstall -y flash-attn > /dev/null 2>&1
# pip install --no-cache-dir flash-attention/dist_manylinux/${{ steps.build_wheels.outputs.WHEEL_NAME_MANYLINUX }}
# python -c "import flash_attn; print(flash_attn.__version__)"
- name: Test manylinux wheel
continue-on-error: true
run: |
pip uninstall -y flash-attn > /dev/null 2>&1
pip install --no-cache-dir ${{ steps.build_wheels.outputs.WHEEL_PATH_MANYLINUX }}
python -c "import flash_attn; print(flash_attn.__version__)"
# wheel_path_manylinux="flash-attention/dist_manylinux/${{ steps.build_wheels.outputs.WHEEL_NAME_MANYLINUX }}"
# if [ ! -f "$wheel_path_manylinux" ]; then
# echo "Error: Wheel file not found at $wheel_path_manylinux"
# exit 1
# fi
# # Upload the release asset using GitHub CLI
# gh release upload "$tag_name" "$wheel_path_manylinux" --clobber
- name: Upload manylinux wheel
continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
wheel_path_manylinux="${{ steps.build_wheels.outputs.WHEEL_PATH_MANYLINUX }}"
if [ ! -f "$wheel_path_manylinux" ]; then
echo "Error: Wheel file not found at $wheel_path_manylinux"
exit 1
fi
# Upload the release asset using GitHub CLI
gh release upload "$tag_name" "$wheel_path_manylinux" --clobber
- name: Clean up
shell: bash
@@ -2,7 +2,7 @@
# Build wheels with self-hosted runner
# #########################################################
name: "[Linux x86_64, self-hosted] Build wheels and upload to GitHub Releases"
name: "[manylinux x86_64, self-hosted] Build wheels and upload to GitHub Releases"
on:
workflow_call:
@@ -41,7 +41,7 @@ on:
jobs:
build_wheels_self_hosted:
name: Build wheels and Upload (Linux x86_64, self-hosted runner)
name: Build wheels and Upload (manylinux x86_64, self-hosted runner)
runs-on: ${{ fromjson(inputs.runner) }}
container:
image: ${{ inputs.container-image }}
+1 -1
View File
@@ -88,7 +88,7 @@ jobs:
torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_self_hosted.torch-version }}
cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_self_hosted.cuda-version }}
exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }}
uses: ./.github/workflows/_build_linux_self_host.yml
uses: ./.github/workflows/_build_manylinux_self_host.yml
with:
flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }}
+4 -4
View File
@@ -14,12 +14,12 @@ jobs:
fail-fast: false
matrix:
flash-attn-version: ["2.8.3"]
python-version: ["3.13", "3.13"]
python-version: ["3.13"]
torch-version: ["2.9.1"]
# https://developer.nvidia.com/cuda-toolkit-archive
cuda-version: ["13.0.1"]
runner: ['["openci-runner-beta"]']
use-container: [false]
# runner: ['["openci-runner-beta"]']
runner: ['["self-hosted", "x64"]']
with:
flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }}
@@ -27,4 +27,4 @@ jobs:
cuda-version: ${{ matrix.cuda-version }}
is-upload: false
runner: ${{ matrix.runner}}
use-container: ${{ matrix.use-container}}
use-container: true
@@ -9,7 +9,7 @@ jobs:
# #########################################################
build_wheels_self_hosted:
name: Build wheels and Upload (Linux x86_64, self-hosted runner, Manylinux 2_28)
uses: ./.github/workflows/_build_linux_self_host_manylinux.yml
uses: ./.github/workflows/_build_manylinux_self_host.yml
strategy:
fail-fast: false
matrix:
@@ -19,7 +19,7 @@ jobs:
# https://developer.nvidia.com/cuda-toolkit-archive
cuda-version: ["13.0.1"]
runner: ['["self-hosted", "x64"]']
use-container: [false]
use-container: [true]
with:
flash-attn-version: ${{ matrix.flash-attn-version }}
python-version: ${{ matrix.python-version }}