mirror of
https://github.com/BillyOutlast/flash-attention-prebuild-wheels-rocm.git
synced 2026-07-01 01:27:54 -04:00
ci: refactor self-hosted runner configuration to use JSON format
- Convert runs-on array syntax to single runner value with fromjson() - Update runner parameter defaults to JSON string format - Add runner labels to build workflow calls - Update test workflow runner configurations - Remove unused test-utils.yml workflow
This commit is contained in:
@@ -42,9 +42,7 @@ on:
|
||||
jobs:
|
||||
build_wheels_self_hosted:
|
||||
name: Build wheels and Upload (Linux x86_64, self-hosted runner)
|
||||
runs-on:
|
||||
- ${{ inputs.runner }}
|
||||
- arm64
|
||||
runs-on: ${{ inputs.runner }}
|
||||
container:
|
||||
image: ${{ inputs.container-image }}
|
||||
options: --platform linux/arm64
|
||||
|
||||
@@ -27,7 +27,7 @@ on:
|
||||
description: "Runner type"
|
||||
required: false
|
||||
type: string
|
||||
default: "self-hosted"
|
||||
default: '["self-hosted"]'
|
||||
is-upload:
|
||||
description: "Whether to upload the release asset"
|
||||
required: false
|
||||
@@ -48,9 +48,7 @@ jobs:
|
||||
build_wheels_self_hosted:
|
||||
if: ${{ inputs.use-container }}
|
||||
name: Build wheels and Upload (Linux x86_64, self-hosted runner)
|
||||
runs-on:
|
||||
- ${{ inputs.runner }}
|
||||
- x64
|
||||
runs-on: ${{ fromjson(inputs.runner) }}
|
||||
container:
|
||||
image: ${{ inputs.container-image }}
|
||||
defaults:
|
||||
@@ -187,9 +185,7 @@ jobs:
|
||||
build_wheels_self_hosted_no_container:
|
||||
if: ${{ !inputs.use-container }}
|
||||
name: Build wheels and Upload (Linux x86_64, self-hosted runner)
|
||||
runs-on:
|
||||
- ${{ inputs.runner }}
|
||||
- x64
|
||||
runs-on: ${{ fromjson(inputs.runner) }}
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
@@ -94,6 +94,7 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
torch-version: ${{ matrix.torch-version }}
|
||||
cuda-version: ${{ matrix.cuda-version }}
|
||||
runner: '["self-hosted", "x64"]'
|
||||
secrets: inherit
|
||||
|
||||
build_wheels_linux_arm64_self_hosted:
|
||||
@@ -114,6 +115,7 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
torch-version: ${{ matrix.torch-version }}
|
||||
cuda-version: ${{ matrix.cuda-version }}
|
||||
runner: '["self-hosted-arm", "arm64"]'
|
||||
secrets: inherit
|
||||
|
||||
# #########################################################
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
torch-version: ["2.9.1"]
|
||||
# https://developer.nvidia.com/cuda-toolkit-archive
|
||||
cuda-version: ["13.0.1"]
|
||||
runner: ["self-hosted"]
|
||||
runner: ['["self-hosted-arm"]']
|
||||
with:
|
||||
flash-attn-version: ${{ matrix.flash-attn-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
torch-version: ["2.9.1"]
|
||||
# https://developer.nvidia.com/cuda-toolkit-archive
|
||||
cuda-version: ["13.0.1"]
|
||||
runner: ["openci-runner-beta"]
|
||||
runner: ['["openci-runner-beta"]']
|
||||
use-container: [false]
|
||||
with:
|
||||
flash-attn-version: ${{ matrix.flash-attn-version }}
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
name: Test runner environment
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create_releases:
|
||||
name: Create Releases
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.13", "3.12"]
|
||||
runner: ["ubuntu-22.04-arm", "ubuntu-22.04", "self-hosted"]
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Check environment
|
||||
shell: bash
|
||||
run: |
|
||||
cat /etc/os-release || true
|
||||
echo "-----------------------------"
|
||||
cat /etc/lsb-release || true
|
||||
echo "-----------------------------"
|
||||
lscpu || true
|
||||
echo "-----------------------------"
|
||||
df -h || true
|
||||
echo "-----------------------------"
|
||||
free -h || true
|
||||
|
||||
- name: Determine MAX_JOBS and NVCC_THREADS
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine MAX_JOBS and NVCC_THREADS based on system resources
|
||||
NUM_THREADS=$(nproc)
|
||||
RAM_GB=$(free -g | awk '/^Mem:/{print $2}')
|
||||
echo "System resources:"
|
||||
echo " CPU threads: $NUM_THREADS"
|
||||
echo " RAM: ${RAM_GB}GB"
|
||||
|
||||
# Determine MAX_JOBS and NVCC_THREADS based on system resources
|
||||
if [[ -z "${MAX_JOBS:-}" && -z "${NVCC_THREADS:-}" ]]; then
|
||||
# Calculate max product based on following constraints:
|
||||
# - MAX_JOBS x NVCC_THREADS(<= 4) <= NUM_THREADS
|
||||
# - 2.5GB x MAX_JOBS x NVCC_THREADS(<= 4) <= RAM_GB
|
||||
|
||||
# Set MAX_PRODUCT from RAM
|
||||
MAX_PRODUCT_CPU=$NUM_THREADS
|
||||
MAX_PRODUCT_RAM=$(awk -v ram="$RAM_GB" 'BEGIN {print int(ram / 2.5)}')
|
||||
MAX_PRODUCT=$((MAX_PRODUCT_CPU < MAX_PRODUCT_RAM ? MAX_PRODUCT_CPU : MAX_PRODUCT_RAM))
|
||||
|
||||
# Set MAX_JOBS and NVCC_THREADS so that MAX_JOBS x NVCC_THREADS ≈ MAX_PRODUCT with NVCC_THREADS <= 4
|
||||
BASE_THREADS=$(awk -v max="$MAX_PRODUCT" 'BEGIN {print int(sqrt(max))}')
|
||||
|
||||
if awk "BEGIN {exit !($RAM_GB <= 16)}"; then
|
||||
# If RAM is less than 16GB, set NVCC_THREADS to 1 and MAX_JOBS to 2
|
||||
NVCC_THREADS=1
|
||||
MAX_JOBS=2
|
||||
elif (( BASE_THREADS <= 4 )); then
|
||||
NVCC_THREADS=$BASE_THREADS
|
||||
MAX_JOBS=$BASE_THREADS
|
||||
else
|
||||
NVCC_THREADS=4
|
||||
MAX_JOBS=$((MAX_PRODUCT / NVCC_THREADS))
|
||||
fi
|
||||
|
||||
# Ensure minimum values of 1
|
||||
MAX_JOBS=$((MAX_JOBS < 1 ? 1 : MAX_JOBS))
|
||||
NVCC_THREADS=$((NVCC_THREADS < 1 ? 1 : NVCC_THREADS))
|
||||
fi
|
||||
|
||||
echo "Build parallelism settings:"
|
||||
echo " MAX_JOBS: $MAX_JOBS"
|
||||
echo " NVCC_THREADS: $NVCC_THREADS"
|
||||
Reference in New Issue
Block a user