diff --git a/.github/workflows/test-self-hosted-linux.yml b/.github/workflows/test-self-hosted-linux.yml index 949177b..b3f8e1c 100644 --- a/.github/workflows/test-self-hosted-linux.yml +++ b/.github/workflows/test-self-hosted-linux.yml @@ -89,7 +89,7 @@ jobs: chmod +x build_linux.sh ./build_linux.sh ${{ matrix.flash-attn-version }} ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} wheel_name=$(basename $(ls flash-attention/dist/*.whl | head -n 1)) - echo "wheel_name=$wheel_name" >> $GITHUB_ENV + echo "WHEEL_NAME=$wheel_name" >> $GITHUB_OUTPUT - name: Install Test shell: bash diff --git a/build_linux.sh b/build_linux.sh index 31e8d70..0819c3b 100755 --- a/build_linux.sh +++ b/build_linux.sh @@ -54,15 +54,21 @@ echo " RAM: ${RAM_GB}GB" # Determine MAX_JOBS and NVCC_THREADS based on system resources if [[ -z "${MAX_JOBS:-}" && -z "${NVCC_THREADS:-}" ]]; then # Calculate max product based on following constraints: - # - MAX_JOBS x NVCC_THREADS <= NUM_THREADS - # - 2.5GB x MAX_JOBS x NVCC_THREADS <= RAM_GB + # - MAX_JOBS x NVCC_THREADS(<= 4) <= NUM_THREADS + # - 2.5GB x MAX_JOBS x NVCC_THREADS(<= 4) <= RAM_GB MAX_PRODUCT_CPU=$NUM_THREADS - MAX_PRODUCT_RAM=$(awk -v ram="$RAM_GB" 'BEGIN {print int(ram / 2.5)}') + MAX_PRODUCT_RAM=$(awk -v ram="$RAM_GB" 'BEGIN {print int(ram / 2.25)}') MAX_PRODUCT=$((MAX_PRODUCT_CPU < MAX_PRODUCT_RAM ? MAX_PRODUCT_CPU : MAX_PRODUCT_RAM)) - # Set MAX_JOBS = NVCC_THREADS = floor(sqrt(MAX_PRODUCT)) - MAX_JOBS=$(awk -v max="$MAX_PRODUCT" 'BEGIN {print int(sqrt(max))}') - NVCC_THREADS=$MAX_JOBS + # Set MAX_JOBS and NVCC_THREADS so that MAX_JOBS x NVCC_THREADS ≈ MAX_PRODUCT with NVCC_THREADS <= 4 + BASE_THREADS=$(awk -v max="$MAX_PRODUCT" 'BEGIN {print int(sqrt(max))}') + if (( BASE_THREADS <= 4 )); then + MAX_JOBS=$BASE_THREADS + NVCC_THREADS=$BASE_THREADS + else + NVCC_THREADS=4 + MAX_JOBS=$((MAX_PRODUCT / NVCC_THREADS)) + fi # Ensure minimum values of 1 MAX_JOBS=$((MAX_JOBS < 1 ? 1 : MAX_JOBS))