From d1715633db3e092f7a2ef1a11f1d13a80347800e Mon Sep 17 00:00:00 2001 From: Junya Morioka Date: Fri, 5 Dec 2025 01:15:34 +0900 Subject: [PATCH] feat: add building for arm64 and openci runner test --- .github/workflows/_build_linux.yml | 17 ++- .github/workflows/_build_linux_self_host.yml | 17 ++- .github/workflows/_build_windows.yml | 15 ++- .../workflows/_build_windows_code_build.yml | 8 +- .github/workflows/build.yml | 23 +++- .github/workflows/test-arm.yml | 68 +++--------- .github/workflows/test-self-hosted-linux.yml | 103 +++--------------- create_matrix.py | 31 ++++-- 8 files changed, 120 insertions(+), 162 deletions(-) diff --git a/.github/workflows/_build_linux.yml b/.github/workflows/_build_linux.yml index 8059283..4243a93 100644 --- a/.github/workflows/_build_linux.yml +++ b/.github/workflows/_build_linux.yml @@ -2,7 +2,7 @@ # Build wheels with GitHub-hosted runner # ######################################################### -name: "[Linux x86_64] Build wheels and upload to GitHub Releases" +name: "[Linux] Build wheels and upload to GitHub Releases" on: workflow_call: @@ -23,11 +23,21 @@ on: description: "CUDA version" required: true type: string + runner: + description: "Runner type" + required: false + type: string + default: "ubuntu-22.04" + is-upload: + description: "Whether to upload the release asset" + required: false + type: boolean + default: true jobs: build_wheels: - name: Build wheels and Upload (Linux x86_64, ${{ inputs.flash-attn-version }}, ${{ inputs.python-version }}, ${{ inputs.torch-version }}, ${{ inputs.cuda-version }}) - runs-on: ubuntu-22.04 + name: Build wheels and Upload (Linux x86_64, GitHub hosted runner) + runs-on: ${{ inputs.runner }} env: DEBIAN_FRONTEND: noninteractive TERM: xterm-256color @@ -72,6 +82,7 @@ jobs: python -c "import flash_attn; print(flash_attn.__version__)" - name: Upload Release Asset + if: ${{ inputs.is-upload }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | diff --git a/.github/workflows/_build_linux_self_host.yml b/.github/workflows/_build_linux_self_host.yml index 07f2460..cd06429 100644 --- a/.github/workflows/_build_linux_self_host.yml +++ b/.github/workflows/_build_linux_self_host.yml @@ -2,7 +2,7 @@ # Build wheels with self-hosted runner # ######################################################### -name: "[Linux x86_64, self-hosted runner] Build wheels and upload to GitHub Releases" +name: "[Linux x86_64, self-hosted] Build wheels and upload to GitHub Releases" on: workflow_call: @@ -23,11 +23,21 @@ on: description: "CUDA version" required: true type: string + runner: + description: "Runner type" + required: false + type: string + default: "self-hosted" + is-upload: + description: "Whether to upload the release asset" + required: false + type: boolean + default: true jobs: build_wheels_self_hosted: - name: Build wheels and Upload (Linux x86_64, self-hosted runner, ${{ inputs.flash-attn-version }}, ${{ inputs.python-version }}, ${{ inputs.torch-version }}, ${{ inputs.cuda-version }}) - runs-on: self-hosted + name: Build wheels and Upload (Linux x86_64, self-hosted runner) + runs-on: ${{ inputs.runner }} container: image: ubuntu:22.04 defaults: @@ -109,6 +119,7 @@ jobs: python -c "import flash_attn; print(flash_attn.__version__)" - name: Upload Release Asset + if: ${{ inputs.is-upload }} shell: bash env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/_build_windows.yml b/.github/workflows/_build_windows.yml index 533b852..1513df0 100644 --- a/.github/workflows/_build_windows.yml +++ b/.github/workflows/_build_windows.yml @@ -23,11 +23,21 @@ on: description: "CUDA version" required: true type: string + runner: + description: "Runner type" + required: false + type: string + default: "windows-2022" + is-upload: + description: "Whether to upload the release asset" + required: false + type: boolean + default: true jobs: build_windows_wheels: - name: Build wheels and Upload (Windows x86_64, GitHub hosted runner, ${{ inputs.flash-attn-version }}, ${{ inputs.python-version }}, ${{ inputs.torch-version }}, ${{ inputs.cuda-version }}) - runs-on: windows-2022 + name: Build wheels and Upload (Windows x86_64, GitHub hosted runner) + runs-on: ${{ inputs.runner }} env: MAX_JOBS: 2 NVCC_THREADS: 2 @@ -82,6 +92,7 @@ jobs: python -c "import flash_attn; print(flash_attn.__version__)" - name: Upload Release Asset + if: ${{ inputs.is-upload }} shell: pwsh env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/_build_windows_code_build.yml b/.github/workflows/_build_windows_code_build.yml index c5f9bbf..976f7f8 100644 --- a/.github/workflows/_build_windows_code_build.yml +++ b/.github/workflows/_build_windows_code_build.yml @@ -22,10 +22,15 @@ on: description: "CUDA version" required: true type: string + is-upload: + description: "Whether to upload the release asset" + required: false + type: boolean + default: true jobs: build_wheels: - name: Build wheels and Upload (Windows x86_64, AWS CodeBuild, ${{ inputs.flash-attn-version }}, ${{ inputs.python-version }}, ${{ inputs.torch-version }}, ${{ inputs.cuda-version }}) + name: Build wheels and Upload (Windows x86_64, AWS CodeBuild) timeout-minutes: 2160 runs-on: codebuild-flash-attention-pre-build-wheel-windows-${{ github.run_id }}-${{ github.run_attempt }} # Large Instance @@ -84,6 +89,7 @@ jobs: python -c "import flash_attn; print(flash_attn.__version__)" - name: Upload Release Asset + if: ${{ inputs.is-upload }} shell: pwsh env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4e1f0ce..8ac36a4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: # Linux # ######################################################### build_wheels_linux: - name: Build Linux + name: Build Linux x86_64 needs: [create_releases, create_matrix] if: ${{ fromjson(needs.create_matrix.outputs.matrix).linux }} strategy: @@ -55,6 +55,27 @@ jobs: cuda-version: ${{ matrix.cuda-version }} secrets: inherit + build_wheels_linux_arm64: + name: Build Linux ARM64 + needs: [create_releases, create_matrix] + if: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64 }} + strategy: + fail-fast: false + matrix: + flash-attn-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64.flash-attn-version }} + python-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64.python-version }} + torch-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64.torch-version }} + cuda-version: ${{ fromjson(needs.create_matrix.outputs.matrix).linux_arm64.cuda-version }} + exclude: ${{ fromjson(needs.create_matrix.outputs.matrix).exclude }} + uses: ./.github/workflows/_build_linux.yml + with: + flash-attn-version: ${{ matrix.flash-attn-version }} + python-version: ${{ matrix.python-version }} + torch-version: ${{ matrix.torch-version }} + cuda-version: ${{ matrix.cuda-version }} + runner: "ubuntu-22.04-arm64" + secrets: inherit + build_wheels_linux_self_hosted: name: Build Linux (self-hosted) needs: [create_releases, create_matrix] diff --git a/.github/workflows/test-arm.yml b/.github/workflows/test-arm.yml index fae2e22..0ca50b1 100644 --- a/.github/workflows/test-arm.yml +++ b/.github/workflows/test-arm.yml @@ -8,61 +8,21 @@ jobs: # Build wheels with self-hosted runner # ######################################################### build_wheels_self_hosted: - name: Build wheels and Upload (Linux arm64) - runs-on: ubuntu-22.04-arm - env: - DEBIAN_FRONTEND: noninteractive - TERM: xterm-256color + name: Build wheels and Upload (Linux ARM64, self-hosted runner) + uses: ./.github/workflows/_build_linux.yml strategy: fail-fast: false matrix: - flash-attn-version: ["2.8.0"] - python-version: ["3.11"] - torch-version: ["2.7.1"] + flash-attn-version: ["2.8.3"] + python-version: ["3.13"] + torch-version: ["2.9.1"] # https://developer.nvidia.com/cuda-toolkit-archive - cuda-version: ["12.8.1"] - steps: - - uses: actions/checkout@v4 - - - name: Maximize build space - run: | - df -h - echo "-----------------------------" - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - sudo rm -rf /opt/hostedtoolcache/CodeQL - df -h - - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - uses: mjun0812/setup-cuda@v1 - with: - version: "${{ matrix.cuda-version }}" - - - name: Install build dependencies - shell: bash - run: | - sudo apt install -y ninja-build clang time - pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil - - - name: Build wheels - id: build_wheels - shell: bash - env: - MAX_JOBS: 2 - NVCC_THREADS: 1 - run: | - chmod +x build_linux.sh - ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} - wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) - echo "wheel_name=$wheel_name" >> $GITHUB_ENV - - - name: Install Test - shell: bash - run: | - pip uninstall -y flash-attn > /dev/null 2>&1 - pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} - python -c "import flash_attn; print(flash_attn.__version__)" + cuda-version: ["13.0.1"] + runner: ["ubuntu-22.04-arm64"] + with: + flash-attn-version: ${{ matrix.flash-attn-version }} + python-version: ${{ matrix.python-version }} + torch-version: ${{ matrix.torch-version }} + cuda-version: ${{ matrix.cuda-version }} + is-upload: false + runner: ${{ matrix.cuda-version}} diff --git a/.github/workflows/test-self-hosted-linux.yml b/.github/workflows/test-self-hosted-linux.yml index b3f8e1c..dd0ff4f 100644 --- a/.github/workflows/test-self-hosted-linux.yml +++ b/.github/workflows/test-self-hosted-linux.yml @@ -9,97 +9,20 @@ jobs: # ######################################################### build_wheels_self_hosted: name: Build wheels and Upload (Linux x86_64, self-hosted runner) - runs-on: self-hosted - container: - image: ubuntu:22.04 - defaults: - run: - shell: bash - env: - DEBIAN_FRONTEND: noninteractive - TERM: xterm-256color - timeout-minutes: 2000 + uses: ./.github/workflows/_build_linux_self_host.yml strategy: fail-fast: false matrix: - flash-attn-version: ["2.8.0"] - python-version: ["3.11", "3.11"] - torch-version: ["2.7.1"] + flash-attn-version: ["2.8.3"] + python-version: ["3.13", "3.13"] + torch-version: ["2.9.1"] # https://developer.nvidia.com/cuda-toolkit-archive - cuda-version: ["12.8.1"] - steps: - - name: Install tools - shell: bash - run: | - apt-get update && apt-get install -y --no-install-recommends \ - curl \ - ca-certificates \ - sudo \ - software-properties-common \ - wget \ - unzip \ - zip \ - git \ - build-essential \ - gcc \ - g++ \ - clang \ - ninja-build \ - keyboard-configuration \ - time - - - name: Install gh - shell: bash - run: | - sudo mkdir -p -m 755 /etc/apt/keyrings - out=$(mktemp) - wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg - cat $out | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null - sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null - sudo apt update - sudo apt install gh -y - - - uses: actions/checkout@v4 - - - name: Configure Git safe directory - shell: bash - run: | - git config --global --add safe.directory $(pwd) - - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - uses: mjun0812/setup-cuda@v1 - with: - version: "${{ matrix.cuda-version }}" - - - name: Install build dependencies - shell: bash - run: | - sudo apt install -y ninja-build clang - pip install -U pip setuptools==75.8.0 wheel setuptools packaging psutil - - - name: Build wheels - timeout-minutes: 1200 - id: build_wheels - shell: bash - run: | - chmod +x build_linux.sh - ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} - wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) - echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT - - - name: Install Test - shell: bash - run: | - pip uninstall -y flash-attn > /dev/null 2>&1 - pip install --no-cache-dir flash-attention/dist/${{ steps.build_wheels.outputs.WHEEL_NAME }} - python -c "import flash_attn; print(flash_attn.__version__)" - - - name: Clean up - shell: bash - if: always() - run: | - rm -rf /opt/hostedtoolcache/Python + cuda-version: ["13.0.1"] + runner: ["self-hosted", "openci-runner-beta"] + with: + flash-attn-version: ${{ matrix.flash-attn-version }} + python-version: ${{ matrix.python-version }} + torch-version: ${{ matrix.torch-version }} + cuda-version: ${{ matrix.cuda-version }} + is-upload: false + runner: ${{ matrix.cuda-version}} diff --git a/create_matrix.py b/create_matrix.py index 9c691d7..421c645 100644 --- a/create_matrix.py +++ b/create_matrix.py @@ -19,6 +19,12 @@ EXCLUDE = [ {"torch-version": "2.7.1", "cuda-version": "12.9.1"}, # torch >= 2.9 does not support Python 3.9 {"torch-version": "2.9.1", "python-version": "3.9"}, + # torch < 2.9 does not support CUDA 13.0 + {"torch-version": "2.5.1", "cuda-version": "13.0.1"}, + {"torch-version": "2.6.0", "cuda-version": "13.0.1"}, + {"torch-version": "2.7.1", "cuda-version": "13.0.1"}, + {"torch-version": "2.8.1", "cuda-version": "13.0.1"}, + {"torch-version": "2.8.0", "cuda-version": "13.0.1"}, ] LINUX_MATRIX = { @@ -31,7 +37,7 @@ LINUX_MATRIX = { "3.10", "3.11", "3.12", - # "3.13" + "3.13", ], "torch-version": [ "2.5.1", @@ -42,13 +48,15 @@ LINUX_MATRIX = { ], "cuda-version": [ "12.4.1", - "12.6.3", + # "12.6.3", "12.8.1", # "12.9.1", "13.0.2", ], } +LINUX_ARM64_MATRIX = LINUX_MATRIX + LINUX_SELF_HOSTED_MATRIX = { "flash-attn-version": ["2.7.4"], "python-version": ["3.10", "3.11", "3.12", "3.13"], @@ -96,14 +104,21 @@ def main(): print( json.dumps( { - "linux": LINUX_MATRIX, - # "linux": False, - # "linux_self_hosted": LINUX_SELF_HOSTED_MATRIX, + "linux": False, + # "linux": LINUX_MATRIX, + # + # "linux_arm64": False, + "linux_arm64": LINUX_ARM64_MATRIX, + # "linux_self_hosted": False, - "windows": WINDOWS_MATRIX, - # "windows": False, - # "windows_code_build": WINDOWS_CODEBUILD_MATRIX, + # "linux_self_hosted": LINUX_SELF_HOSTED_MATRIX, + # + "windows": False, + # "windows": WINDOWS_MATRIX, + # "windows_code_build": False, + # "windows_code_build": WINDOWS_CODEBUILD_MATRIX, + # "exclude": EXCLUDE, } )