Files
flash-attention-prebuild-wh…/.github/workflows/test-arm.yml
T
2025-12-04 19:27:27 +09:00

69 lines
2.1 KiB
YAML

name: Test ARM Linux build
on:
workflow_dispatch:
jobs:
# #########################################################
# Build wheels with self-hosted runner
# #########################################################
build_wheels_self_hosted:
name: Build wheels and Upload (Linux arm64)
runs-on: ubuntu-22.04-arm
env:
DEBIAN_FRONTEND: noninteractive
TERM: xterm-256color
strategy:
fail-fast: false
matrix:
flash-attn-version: ["2.8.0"]
python-version: ["3.11"]
torch-version: ["2.7.1"]
# https://developer.nvidia.com/cuda-toolkit-archive
cuda-version: ["12.8.1"]
steps:
- uses: actions/checkout@v4
- name: Maximize build space
run: |
df -h
echo "-----------------------------"
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
df -h
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: mjun0812/setup-cuda@v1
with:
version: "${{ matrix.cuda-version }}"
- name: Install build dependencies
shell: bash
run: |
sudo apt install -y ninja-build clang time
pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil
- name: Build wheels
id: build_wheels
shell: bash
env:
MAX_JOBS: 2
NVCC_THREADS: 1
run: |
chmod +x build_linux.sh
./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }}
wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1))
echo "wheel_name=$wheel_name" >> $GITHUB_ENV
- name: Install Test
shell: bash
run: |
pip uninstall -y flash-attn > /dev/null 2>&1
pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }}
python -c "import flash_attn; print(flash_attn.__version__)"