John Doe
2025-11-23 22:22:14 -05:00
parent 2720aedbef
commit 4824fd795b
2 changed files with 34 additions and 6 deletions

View File

@@ -102,10 +102,11 @@ fi
echo "Using PyTorch index URL: ${PYTORCH_INDEX_URL}"
if echo "${PYTORCH_INDEX_URL}" | grep -q "rocm.nightlies.amd.com"; then
pip install --pre torch torchvision torchaudio --extra-index-url ${PYTORCH_INDEX_URL}
pip install --pre torch torchvision torchaudio pytorch-triton-rocm --extra-index-url ${PYTORCH_INDEX_URL}
else
pip install --pre torch torchvision torchaudio --index-url ${PYTORCH_INDEX_URL}
pip install --pre torch torchvision torchaudio pytorch-triton-rocm --index-url ${PYTORCH_INDEX_URL}
fi
pip install flash-attn --index-url https://pypi.org/simple
echo "Installing ComfyUI requirements..."
pip install -r requirements.txt
@@ -116,7 +117,7 @@ if [ -f "start.sh" ]; then
./start.sh
else
echo "No start.sh found, creating default startup script..."
echo "python main.py --listen 0.0.0.0 --port 8188 --use-split-cross-attention" > start.sh
echo "python main.py --listen 0.0.0.0 --port 8188 --use-split-cross-attention --use-quad-cross-attention" > start.sh
chmod +x start.sh
./start.sh
fi

View File

@@ -97,15 +97,42 @@ services:
image: docker.io/getterup/comfyui-rocm7.1:latest
container_name: comfyui
environment:
- ROCR_VISIBLE_DEVICES=1
- COMFYUI_ENABLE_ROCM=True
- GPU_ARCH=gfx110X
- PYTORCH_TUNABLEOP_ENABLED=0
- MIOPEN_FIND_MODE=NORMAL
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
- AMD_SERIALIZE_KERNEL=1
- MIOPEN_USER_DB_PATH=/tmp/.miopen
- MIOPEN_CUSTOM_CACHE_DIR=/tmp/.miopen
# === ROCm paths ===
- HIP_VISIBLE_DEVICES=0
- RROCR_VISIBLE_DEVICES=1
# === GPU targeting ===
- HCC_AMDGPU_TARGET="gfx1100" # Change for your GPU
- PYTORCH_ROCM_ARCH="gfx1100" # e.g., gfx1030 for RX 6800/6900
# === Memory allocator tuning ===
- PYTORCH_HIP_ALLOC_CONF="garbage_collection_threshold:0.6,max_split_size_mb:6144"
# === Precision and performance ===
- TORCH_BLAS_PREFER_HIPBLASLT=0
- TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDS="CK,TRITON,ROCBLAS"
- TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE="BEST"
- TORCHINDUCTOR_FORCE_FALLBACK=0
# === Flash Attention ===
- FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
- FLASH_ATTENTION_BACKEND="flash_attn_triton_amd"
- FLASH_ATTENTION_TRITON_AMD_SEQ_LEN=4096
- USE_CK=ON
- TRANSFORMERS_USE_FLASH_ATTENTION=1
- TRITON_USE_ROCM=ON
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
# === CPU threading ===
- OMP_NUM_THREADS=8
- MKL_NUM_THREADS=8
- NUMEXPR_NUM_THREADS=8
# === Experimental ROCm flags ===
- HSA_ENABLE_ASYNC_COPY=1
- HSA_ENABLE_SDMA=1
- MIOPEN_FIND_MODE=2
- MIOPEN_ENABLE_CACHE=1
ports:
- "8188:8188"
networks: