updating using https://www.reddit.com/r/comfyui/comments/1nuipsu/finally_my_comfyui_setup_works/

2026-02-04 03:51:19 +01:00 · 2025-11-23 22:22:14 -05:00
parent 2720aedbef
commit 4824fd795b
2 changed files with 34 additions and 6 deletions
--- a/comfyui-build/docker-entrypoint.sh
+++ b/comfyui-build/docker-entrypoint.sh
@@ -102,10 +102,11 @@ fi

 echo "Using PyTorch index URL: ${PYTORCH_INDEX_URL}"
 if echo "${PYTORCH_INDEX_URL}" | grep -q "rocm.nightlies.amd.com"; then
-    pip install --pre torch torchvision torchaudio --extra-index-url ${PYTORCH_INDEX_URL}
+    pip install --pre torch torchvision torchaudio pytorch-triton-rocm --extra-index-url ${PYTORCH_INDEX_URL}
 else
-    pip install --pre torch torchvision torchaudio --index-url ${PYTORCH_INDEX_URL}
+    pip install --pre torch torchvision torchaudio pytorch-triton-rocm --index-url ${PYTORCH_INDEX_URL}
 fi
+pip install flash-attn --index-url https://pypi.org/simple
 echo "Installing ComfyUI requirements..."
 pip install -r requirements.txt

@@ -116,7 +117,7 @@ if [ -f "start.sh" ]; then
    ./start.sh
 else
    echo "No start.sh found, creating default startup script..."
-    echo "python main.py --listen 0.0.0.0 --port 8188 --use-split-cross-attention" > start.sh
+    echo "python main.py --listen 0.0.0.0 --port 8188 --use-split-cross-attention --use-quad-cross-attention" > start.sh
    chmod +x start.sh
    ./start.sh
 fi
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -97,15 +97,42 @@ services:
    image: docker.io/getterup/comfyui-rocm7.1:latest
    container_name: comfyui
    environment:
-      - ROCR_VISIBLE_DEVICES=1
      - COMFYUI_ENABLE_ROCM=True
      - GPU_ARCH=gfx110X
      - PYTORCH_TUNABLEOP_ENABLED=0
-      - MIOPEN_FIND_MODE=NORMAL
-      - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
      - AMD_SERIALIZE_KERNEL=1
      - MIOPEN_USER_DB_PATH=/tmp/.miopen
      - MIOPEN_CUSTOM_CACHE_DIR=/tmp/.miopen
+      # === ROCm paths ===
+      - HIP_VISIBLE_DEVICES=0
+      - RROCR_VISIBLE_DEVICES=1
+    # === GPU targeting ===
+      - HCC_AMDGPU_TARGET="gfx1100"   # Change for your GPU
+      - PYTORCH_ROCM_ARCH="gfx1100"   # e.g., gfx1030 for RX 6800/6900
+    # === Memory allocator tuning ===
+      - PYTORCH_HIP_ALLOC_CONF="garbage_collection_threshold:0.6,max_split_size_mb:6144"
+    # === Precision and performance ===
+      - TORCH_BLAS_PREFER_HIPBLASLT=0
+      - TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDS="CK,TRITON,ROCBLAS"
+      - TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE="BEST"
+      - TORCHINDUCTOR_FORCE_FALLBACK=0
+    # === Flash Attention ===
+      - FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
+      - FLASH_ATTENTION_BACKEND="flash_attn_triton_amd"
+      - FLASH_ATTENTION_TRITON_AMD_SEQ_LEN=4096
+      - USE_CK=ON
+      - TRANSFORMERS_USE_FLASH_ATTENTION=1
+      - TRITON_USE_ROCM=ON
+      - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
+    # === CPU threading ===
+      - OMP_NUM_THREADS=8
+      - MKL_NUM_THREADS=8
+      - NUMEXPR_NUM_THREADS=8
+    # === Experimental ROCm flags ===
+      - HSA_ENABLE_ASYNC_COPY=1
+      - HSA_ENABLE_SDMA=1
+      - MIOPEN_FIND_MODE=2
+      - MIOPEN_ENABLE_CACHE=1
    ports:
      - "8188:8188"
    networks: