mirror of
https://github.com/BillyOutlast/rocm-automated.git
synced 2026-02-04 03:51:19 +01:00
This commit is contained in:
@@ -102,10 +102,11 @@ fi
|
||||
|
||||
echo "Using PyTorch index URL: ${PYTORCH_INDEX_URL}"
|
||||
if echo "${PYTORCH_INDEX_URL}" | grep -q "rocm.nightlies.amd.com"; then
|
||||
pip install --pre torch torchvision torchaudio --extra-index-url ${PYTORCH_INDEX_URL}
|
||||
pip install --pre torch torchvision torchaudio pytorch-triton-rocm --extra-index-url ${PYTORCH_INDEX_URL}
|
||||
else
|
||||
pip install --pre torch torchvision torchaudio --index-url ${PYTORCH_INDEX_URL}
|
||||
pip install --pre torch torchvision torchaudio pytorch-triton-rocm --index-url ${PYTORCH_INDEX_URL}
|
||||
fi
|
||||
pip install flash-attn --index-url https://pypi.org/simple
|
||||
echo "Installing ComfyUI requirements..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
@@ -116,7 +117,7 @@ if [ -f "start.sh" ]; then
|
||||
./start.sh
|
||||
else
|
||||
echo "No start.sh found, creating default startup script..."
|
||||
echo "python main.py --listen 0.0.0.0 --port 8188 --use-split-cross-attention" > start.sh
|
||||
echo "python main.py --listen 0.0.0.0 --port 8188 --use-split-cross-attention --use-quad-cross-attention" > start.sh
|
||||
chmod +x start.sh
|
||||
./start.sh
|
||||
fi
|
||||
|
||||
@@ -97,15 +97,42 @@ services:
|
||||
image: docker.io/getterup/comfyui-rocm7.1:latest
|
||||
container_name: comfyui
|
||||
environment:
|
||||
- ROCR_VISIBLE_DEVICES=1
|
||||
- COMFYUI_ENABLE_ROCM=True
|
||||
- GPU_ARCH=gfx110X
|
||||
- PYTORCH_TUNABLEOP_ENABLED=0
|
||||
- MIOPEN_FIND_MODE=NORMAL
|
||||
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
|
||||
- AMD_SERIALIZE_KERNEL=1
|
||||
- MIOPEN_USER_DB_PATH=/tmp/.miopen
|
||||
- MIOPEN_CUSTOM_CACHE_DIR=/tmp/.miopen
|
||||
# === ROCm paths ===
|
||||
- HIP_VISIBLE_DEVICES=0
|
||||
- RROCR_VISIBLE_DEVICES=1
|
||||
# === GPU targeting ===
|
||||
- HCC_AMDGPU_TARGET="gfx1100" # Change for your GPU
|
||||
- PYTORCH_ROCM_ARCH="gfx1100" # e.g., gfx1030 for RX 6800/6900
|
||||
# === Memory allocator tuning ===
|
||||
- PYTORCH_HIP_ALLOC_CONF="garbage_collection_threshold:0.6,max_split_size_mb:6144"
|
||||
# === Precision and performance ===
|
||||
- TORCH_BLAS_PREFER_HIPBLASLT=0
|
||||
- TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDS="CK,TRITON,ROCBLAS"
|
||||
- TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE="BEST"
|
||||
- TORCHINDUCTOR_FORCE_FALLBACK=0
|
||||
# === Flash Attention ===
|
||||
- FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
|
||||
- FLASH_ATTENTION_BACKEND="flash_attn_triton_amd"
|
||||
- FLASH_ATTENTION_TRITON_AMD_SEQ_LEN=4096
|
||||
- USE_CK=ON
|
||||
- TRANSFORMERS_USE_FLASH_ATTENTION=1
|
||||
- TRITON_USE_ROCM=ON
|
||||
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
|
||||
# === CPU threading ===
|
||||
- OMP_NUM_THREADS=8
|
||||
- MKL_NUM_THREADS=8
|
||||
- NUMEXPR_NUM_THREADS=8
|
||||
# === Experimental ROCm flags ===
|
||||
- HSA_ENABLE_ASYNC_COPY=1
|
||||
- HSA_ENABLE_SDMA=1
|
||||
- MIOPEN_FIND_MODE=2
|
||||
- MIOPEN_ENABLE_CACHE=1
|
||||
ports:
|
||||
- "8188:8188"
|
||||
networks:
|
||||
|
||||
Reference in New Issue
Block a user