Stable Diffusion ComfyUI Scaling
ComfyUI Node-based GUI สำหรับ Stable Diffusion สร้างภาพ AI Visual Workflow SDXL SD1.5 ControlNet LoRA IPAdapter Scale สำหรับ Production Multi-GPU Multi-Node
| Scale Level | Setup | Users | Throughput |
|---|---|---|---|
| Single GPU | 1x RTX 4090 | 1-5 | 2-5 img/min |
| Multi-GPU | 2-4x GPU | 5-20 | 10-20 img/min |
| Docker Compose | 4-8 Workers | 20-50 | 20-40 img/min |
| Kubernetes | Auto-scaling | 50-500+ | 50-200+ img/min |
Docker Setup
# === ComfyUI Docker Setup ===
# Dockerfile
# FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
#
# RUN apt-get update && apt-get install -y \
# python3 python3-pip git wget \
# && rm -rf /var/lib/apt/lists/*
#
# WORKDIR /app
# RUN git clone https://github.com/comfyanonymous/ComfyUI.git .
# RUN pip3 install -r requirements.txt
# RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu121
#
# EXPOSE 8188
# CMD ["python3", "main.py", "--listen", "0.0.0.0", "--port", "8188"]
# docker-compose.yml
# version: '3.8'
# services:
# comfyui-worker-1:
# build: .
# runtime: nvidia
# environment:
# - NVIDIA_VISIBLE_DEVICES=0
# ports:
# - "8188:8188"
# volumes:
# - ./models:/app/models
# - ./output:/app/output
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
#
# comfyui-worker-2:
# build: .
# runtime: nvidia
# environment:
# - NVIDIA_VISIBLE_DEVICES=1
# ports:
# - "8189:8188"
# volumes:
# - ./models:/app/models
# - ./output:/app/output
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
#
# nginx:
# image: nginx:alpine
# ports:
# - "80:80"
# volumes:
# - ./nginx.conf:/etc/nginx/nginx.conf
# depends_on:
# - comfyui-worker-1
# - comfyui-worker-2
#
# redis:
# image: redis:7-alpine
# ports:
# - "6379:6379"
# nginx.conf — Load Balancer
# upstream comfyui_workers {
# least_conn;
# server comfyui-worker-1:8188;
# server comfyui-worker-2:8188;
# }
#
# server {
# listen 80;
# location / {
# proxy_pass http://comfyui_workers;
# proxy_http_version 1.1;
# proxy_set_header Upgrade $http_upgrade;
# proxy_set_header Connection "upgrade";
# proxy_read_timeout 300s;
# }
# }
# Commands
# docker compose up -d
# docker compose logs -f comfyui-worker-1
# docker compose scale comfyui-worker-1=4
# docker compose down
import json
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class WorkerConfig:
name: str
gpu_id: int
port: int
vram_gb: float
status: str = "idle"
class ComfyUICluster:
def __init__(self):
self.workers: List[WorkerConfig] = []
def add_worker(self, worker: WorkerConfig):
self.workers.append(worker)
def get_idle_worker(self) -> WorkerConfig:
for w in self.workers:
if w.status == "idle":
w.status = "busy"
return w
return None
def release_worker(self, name: str):
for w in self.workers:
if w.name == name:
w.status = "idle"
def show_status(self):
print("ComfyUI Cluster Status:")
for w in self.workers:
print(f" {w.name} | GPU:{w.gpu_id} | Port:{w.port} | "
f"VRAM:{w.vram_gb}GB | {w.status}")
cluster = ComfyUICluster()
cluster.add_worker(WorkerConfig("worker-1", 0, 8188, 24.0))
cluster.add_worker(WorkerConfig("worker-2", 1, 8189, 24.0))
cluster.add_worker(WorkerConfig("worker-3", 2, 8190, 24.0))
cluster.add_worker(WorkerConfig("worker-4", 3, 8191, 24.0))
cluster.show_status()
Kubernetes Deployment
# === Kubernetes ComfyUI Deployment ===
# comfyui-deployment.yaml
# apiVersion: apps/v1
# kind: Deployment
# metadata:
# name: comfyui-worker
# labels:
# app: comfyui
# spec:
# replicas: 3
# selector:
# matchLabels:
# app: comfyui
# template:
# metadata:
# labels:
# app: comfyui
# spec:
# containers:
# - name: comfyui
# image: comfyui:latest
# ports:
# - containerPort: 8188
# resources:
# limits:
# nvidia.com/gpu: 1
# memory: "16Gi"
# requests:
# nvidia.com/gpu: 1
# memory: "8Gi"
# volumeMounts:
# - name: models
# mountPath: /app/models
# env:
# - name: COMFYUI_LISTEN
# value: "0.0.0.0"
# volumes:
# - name: models
# persistentVolumeClaim:
# claimName: comfyui-models-pvc
# nodeSelector:
# gpu: "true"
# tolerations:
# - key: "nvidia.com/gpu"
# operator: "Exists"
# effect: "NoSchedule"
#
# ---
# apiVersion: v1
# kind: Service
# metadata:
# name: comfyui-service
# spec:
# selector:
# app: comfyui
# ports:
# - port: 80
# targetPort: 8188
# type: ClusterIP
#
# ---
# apiVersion: autoscaling/v2
# kind: HorizontalPodAutoscaler
# metadata:
# name: comfyui-hpa
# spec:
# scaleTargetRef:
# apiVersion: apps/v1
# kind: Deployment
# name: comfyui-worker
# minReplicas: 2
# maxReplicas: 10
# metrics:
# - type: Pods
# pods:
# metric:
# name: queue_length
# target:
# type: AverageValue
# averageValue: "5"
# kubectl commands
# kubectl apply -f comfyui-deployment.yaml
# kubectl get pods -l app=comfyui
# kubectl scale deployment comfyui-worker --replicas=5
# kubectl logs -f deployment/comfyui-worker
# kubectl top pods -l app=comfyui
# Queue System with Redis
import time
from typing import Optional
class ImageQueue:
"""Queue System สำหรับ ComfyUI"""
def __init__(self, redis_url: str = "redis://localhost:6379"):
self.queue = []
self.results = {}
self.job_counter = 0
def submit_job(self, workflow: dict, priority: int = 0) -> str:
self.job_counter += 1
job_id = f"job-{self.job_counter:06d}"
self.queue.append({
"id": job_id,
"workflow": workflow,
"priority": priority,
"status": "queued",
"submitted_at": time.time(),
})
return job_id
def get_next_job(self) -> Optional[dict]:
if not self.queue:
return None
self.queue.sort(key=lambda x: -x["priority"])
job = self.queue.pop(0)
job["status"] = "processing"
return job
def complete_job(self, job_id: str, result: dict):
self.results[job_id] = {
"status": "completed",
"result": result,
"completed_at": time.time(),
}
def get_status(self, job_id: str) -> dict:
if job_id in self.results:
return self.results[job_id]
for job in self.queue:
if job["id"] == job_id:
return {"status": job["status"], "position": self.queue.index(job)}
return {"status": "not_found"}
queue = ImageQueue()
job1 = queue.submit_job({"prompt": "a cat", "steps": 20}, priority=1)
job2 = queue.submit_job({"prompt": "a dog", "steps": 30}, priority=2)
print(f"Submitted: {job1}, {job2}")
print(f"Status: {queue.get_status(job1)}")
next_job = queue.get_next_job()
print(f"Processing: {next_job['id']}")
Performance Optimization
# performance.py — ComfyUI Performance Tips
optimizations = {
"Model Loading": {
"tip": "ใช้ --highvram หรือ --gpu-only ให้ Model อยู่ใน VRAM",
"impact": "ลดเวลา Load 80%",
"command": "python main.py --highvram --listen 0.0.0.0",
},
"xformers": {
"tip": "ติดตั้ง xformers สำหรับ Memory Efficient Attention",
"impact": "ลด VRAM 30% เร็วขึ้น 20%",
"command": "pip install xformers",
},
"FP16/BF16": {
"tip": "ใช้ Half Precision ลด VRAM และเพิ่มความเร็ว",
"impact": "ลด VRAM 50% เร็วขึ้น 30%",
"command": "python main.py --force-fp16",
},
"Batch Processing": {
"tip": "รวม Requests เป็น Batch ประมวลผลพร้อมกัน",
"impact": "เพิ่ม Throughput 40%",
"command": "ตั้ง batch_size ใน Workflow",
},
"Model Caching": {
"tip": "Cache Models ใน RAM/VRAM ไม่ต้อง Load จาก Disk",
"impact": "ลดเวลา First Image จาก 30s เหลือ 3s",
"command": "python main.py --highvram --disable-smart-memory",
},
"TensorRT": {
"tip": "Compile Model เป็น TensorRT Engine",
"impact": "เร็วขึ้น 40-60%",
"command": "ใช้ ComfyUI-TensorRT Node",
},
}
print("ComfyUI Performance Optimization:")
for opt, info in optimizations.items():
print(f"\n [{opt}]")
print(f" Tip: {info['tip']}")
print(f" Impact: {info['impact']}")
print(f" Command: {info['command']}")
# VRAM Requirements
vram_req = {
"SD 1.5 (512x512)": {"min": "4GB", "rec": "6GB", "speed": "~5 img/min"},
"SD 1.5 (768x768)": {"min": "6GB", "rec": "8GB", "speed": "~3 img/min"},
"SDXL (1024x1024)": {"min": "8GB", "rec": "12GB", "speed": "~2 img/min"},
"SDXL + Refiner": {"min": "12GB", "rec": "16GB", "speed": "~1 img/min"},
"SDXL + ControlNet": {"min": "12GB", "rec": "24GB", "speed": "~1.5 img/min"},
"Flux.1 Dev": {"min": "16GB", "rec": "24GB", "speed": "~0.5 img/min"},
}
print(f"\n\nVRAM Requirements:")
for model, req in vram_req.items():
print(f" {model}: Min {req['min']} | Rec {req['rec']} | {req['speed']}")
เคล็ดลับ
- GPU: RTX 4090 24GB ดีที่สุดสำหรับ Price/Performance
- Queue: ใช้ Redis Queue จัดคิว ป้องกัน GPU Overload
- Models: แชร์ Models ผ่าน NFS/PVC ไม่ต้อง Download ทุก Worker
- Monitoring: ใช้ Prometheus + Grafana Monitor GPU VRAM Queue
- Autoscale: Scale ตาม Queue Length ไม่ใช่ CPU Usage
ComfyUI คืออะไร
Node-based GUI สำหรับ Stable Diffusion Visual Workflow SDXL SD1.5 ControlNet LoRA IPAdapter Custom Node Python Browser
ทำไมต้อง Scale ComfyUI
Users มาก GPU ตัวเดียวไม่พอ Queue ยาว Scale เพิ่ม Workers Load Balancer Queue System Concurrent Users Throughput
ComfyUI รัน Docker ได้ไหม
ได้ NVIDIA Container Toolkit GPU Passthrough Dockerfile Models Volume Port 8188 docker compose Reproducible Kubernetes
ComfyUI Scale บน Kubernetes อย่างไร
GPU Node Pool Resource Limits HPA Scale Queue Length Redis Ingress Load Balancer PVC Models แชร์ Pods Autoscaler
สรุป
Stable Diffusion ComfyUI Scaling Strategy Docker Compose Multi-GPU Kubernetes HPA Redis Queue Nginx Load Balancer TensorRT xformers Performance Optimization Production Deployment
