SiamCafe · Blog
Stable Diffusion ComfyUI Scaling Strategy วิธี
บทความ

Stable Diffusion ComfyUI Scaling Strategy วิธี

เผยแพร่ 28 พฤษภาคม 2569

Stable Diffusion ComfyUI Scaling

Stable Diffusion ComfyUI Scaling Strategy วิธี

ComfyUI Node-based GUI สำหรับ Stable Diffusion สร้างภาพ AI Visual Workflow SDXL SD1.5 ControlNet LoRA IPAdapter Scale สำหรับ Production Multi-GPU Multi-Node

Scale LevelSetupUsersThroughput
Single GPU1x RTX 40901-52-5 img/min
Multi-GPU2-4x GPU5-2010-20 img/min
Docker Compose4-8 Workers20-5020-40 img/min
KubernetesAuto-scaling50-500+50-200+ img/min

Docker Setup

=== ComfyUI Docker Setup ===

Dockerfile

FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04

RUN apt-get update && apt-get install -y \

python3 python3-pip git wget \

&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

RUN git clone https://github.com/comfyanonymous/ComfyUI.git .

RUN pip3 install -r requirements.txt

RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu121

EXPOSE 8188

CMD ["python3", "main.py", "--listen", "0.0.0.0", "--port", "8188"]

docker-compose.yml

version: '3.8'

services:

comfyui-worker-1:

build: .

runtime: nvidia

environment:

  • NVIDIA_VISIBLE_DEVICES=0

ports:

  • "8188:8188"

volumes:

  • ./models:/app/models
  • ./output:/app/output

deploy:

resources:

reservations:

devices:

  • driver: nvidia

count: 1

capabilities: [gpu]

comfyui-worker-2:

build: .

runtime: nvidia

environment:

  • NVIDIA_VISIBLE_DEVICES=1

ports:

  • "8189:8188"

volumes:

  • ./models:/app/models
  • ./output:/app/output

deploy:

resources:

reservations:

devices:

  • driver: nvidia

count: 1

capabilities: [gpu]

nginx:

image: nginx:alpine

ports:

  • "80:80"

volumes:

  • ./nginx.conf:/etc/nginx/nginx.conf

depends_on:

  • comfyui-worker-1
  • comfyui-worker-2

redis:

image: redis:7-alpine

ports:

  • "6379:6379"

nginx.conf — Load Balancer

upstream comfyui_workers {

least_conn;

server comfyui-worker-1:8188;

server comfyui-worker-2:8188;

}

server {

listen 80;

location / {

proxy_pass http://comfyui_workers;

proxy_http_version 1.1;

proxy_set_header Upgrade $http_upgrade;

proxy_set_header Connection "upgrade";

proxy_read_timeout 300s;

}

}

Commands

docker compose up -d

docker compose logs -f comfyui-worker-1

docker compose scale comfyui-worker-1=4

docker compose down

import json

from dataclasses import dataclass

from typing import List, Dict

@dataclass

class WorkerConfig:

name: str

gpu_id: int

port: int

vram_gb: float

status: str = "idle"

class ComfyUICluster:

def __init__(self):

self.workers: List[WorkerConfig] = []

def add_worker(self, worker: WorkerConfig):

self.workers.append(worker)

def get_idle_worker(self) -> WorkerConfig:

for w in self.workers:

if w.status == "idle":

w.status = "busy"

return w

return None

def release_worker(self, name: str):

for w in self.workers:

if w.name == name:

w.status = "idle"

def show_status(self):

print("ComfyUI Cluster Status:")

for w in self.workers:

print(f" {w.name} | GPU:{w.gpu_id} | Port:{w.port} | "

f"VRAM:{w.vram_gb}GB | {w.status}")

cluster = ComfyUICluster()

cluster.add_worker(WorkerConfig("worker-1", 0, 8188, 24.0))

cluster.add_worker(WorkerConfig("worker-2", 1, 8189, 24.0))

cluster.add_worker(WorkerConfig("worker-3", 2, 8190, 24.0))

cluster.add_worker(WorkerConfig("worker-4", 3, 8191, 24.0))

cluster.show_status()

Kubernetes Deployment

=== Kubernetes ComfyUI Deployment ===

comfyui-deployment.yaml

apiVersion: apps/v1

kind: Deployment

metadata:

name: comfyui-worker

labels:

app: comfyui

spec:

Stable Diffusion ComfyUI Scaling Strategy วิธี

replicas: 3

selector:

matchLabels:

app: comfyui

template:

metadata:

labels:

app: comfyui

spec:

containers:

  • name: comfyui

image: comfyui:latest

ports:

  • containerPort: 8188

resources:

limits:

nvidia.com/gpu: 1

memory: "16Gi"

requests:

nvidia.com/gpu: 1

memory: "8Gi"

volumeMounts:

  • name: models

mountPath: /app/models

env:

  • name: COMFYUI_LISTEN

value: "0.0.0.0"

volumes:

  • name: models

persistentVolumeClaim:

claimName: comfyui-models-pvc

nodeSelector:

gpu: "true"

tolerations:

  • key: "nvidia.com/gpu"

operator: "Exists"

effect: "NoSchedule"

---

apiVersion: v1

kind: Service

metadata:

name: comfyui-service

spec:

selector:

app: comfyui

ports:

  • port: 80

targetPort: 8188

type: ClusterIP

---

apiVersion: autoscaling/v2

kind: HorizontalPodAutoscaler

metadata:

name: comfyui-hpa

spec:

scaleTargetRef:

apiVersion: apps/v1

kind: Deployment

name: comfyui-worker

minReplicas: 2

maxReplicas: 10

metrics:

  • type: Pods

pods:

metric:

name: queue_length

target:

type: AverageValue

averageValue: "5"

kubectl commands

kubectl apply -f comfyui-deployment.yaml

kubectl get pods -l app=comfyui

kubectl scale deployment comfyui-worker --replicas=5

kubectl logs -f deployment/comfyui-worker

kubectl top pods -l app=comfyui

Queue System with Redis

import time

from typing import Optional

class ImageQueue:

"""Queue System สำหรับ ComfyUI"""

def __init__(self, redis_url: str = "redis://localhost:6379"):

self.queue = []

self.results = {}

self.job_counter = 0

def submit_job(self, workflow: dict, priority: int = 0) -> str:

self.job_counter += 1

job_id = f"job-{self.job_counter:06d}"

self.queue.append({

"id": job_id,

"workflow": workflow,

"priority": priority,

"status": "queued",

"submitted_at": time.time(),

})

return job_id

def get_next_job(self) -> Optional[dict]:

if not self.queue:

return None

self.queue.sort(key=lambda x: -x["priority"])

job = self.queue.pop(0)

job["status"] = "processing"

return job

def complete_job(self, job_id: str, result: dict):

self.results[job_id] = {

"status": "completed",

"result": result,

"completed_at": time.time(),

}

def get_status(self, job_id: str) -> dict:

if job_id in self.results:

return self.results[job_id]

for job in self.queue:

if job["id"] == job_id:

return {"status": job["status"], "position": self.queue.index(job)}

return {"status": "not_found"}

queue = ImageQueue()

job1 = queue.submit_job({"prompt": "a cat", "steps": 20}, priority=1)

job2 = queue.submit_job({"prompt": "a dog", "steps": 30}, priority=2)

print(f"Submitted: {job1}, {job2}")

print(f"Status: {queue.get_status(job1)}")

next_job = queue.get_next_job()

print(f"Processing: {next_job['id']}")

Performance Optimization

# performance.py — ComfyUI Performance Tips
optimizations = {
    "Model Loading": {
        "tip": "ใช้ --highvram หรือ --gpu-only ให้ Model อยู่ใน VRAM",
        "impact": "ลดเวลา Load 80%",
        "command": "python main.py --highvram --listen 0.0.0.0",
    },
    "xformers": {
        "tip": "ติดตั้ง xformers สำหรับ Memory Efficient Attention",
        "impact": "ลด VRAM 30% เร็วขึ้น 20%",
        "command": "pip install xformers",
    },
    "FP16/BF16": {
        "tip": "ใช้ Half Precision ลด VRAM และเพิ่มความเร็ว",
        "impact": "ลด VRAM 50% เร็วขึ้น 30%",
        "command": "python main.py --force-fp16",
    },
    "Batch Processing": {
        "tip": "รวม Requests เป็น Batch ประมวลผลพร้อมกัน",
        "impact": "เพิ่ม Throughput 40%",
        "command": "ตั้ง batch_size ใน Workflow",
    },
    "Model Caching": {
        "tip": "Cache Models ใน RAM/VRAM ไม่ต้อง Load จาก Disk",
        "impact": "ลดเวลา First Image จาก 30s เหลือ 3s",
        "command": "python main.py --highvram --disable-smart-memory",
    },
    "TensorRT": {
        "tip": "Compile Model เป็น TensorRT Engine",
        "impact": "เร็วขึ้น 40-60%",
        "command": "ใช้ ComfyUI-TensorRT Node",
    },
}

print("ComfyUI Performance Optimization:")
for opt, info in optimizations.items():
    print(f"\n  [{opt}]")
    print(f"    Tip: {info['tip']}")
    print(f"    Impact: {info['impact']}")
    print(f"    Command: {info['command']}")

# VRAM Requirements
vram_req = {
    "SD 1.5 (512x512)": {"min": "4GB", "rec": "6GB", "speed": "~5 img/min"},
    "SD 1.5 (768x768)": {"min": "6GB", "rec": "8GB", "speed": "~3 img/min"},
    "SDXL (1024x1024)": {"min": "8GB", "rec": "12GB", "speed": "~2 img/min"},
    "SDXL + Refiner": {"min": "12GB", "rec": "16GB", "speed": "~1 img/min"},
    "SDXL + ControlNet": {"min": "12GB", "rec": "24GB", "speed": "~1.5 img/min"},
    "Flux.1 Dev": {"min": "16GB", "rec": "24GB", "speed": "~0.5 img/min"},
}

print(f"\n\nVRAM Requirements:")
for model, req in vram_req.items():
    print(f"  {model}: Min {req['min']} | Rec {req['rec']} | {req['speed']}")

เคล็ดลับ

  • GPU: RTX 4090 24GB ดีที่สุดสำหรับ Price/Performance
  • Queue: ใช้ Redis Queue จัดคิว ป้องกัน GPU Overload
  • Models: แชร์ Models ผ่าน NFS/PVC ไม่ต้อง Download ทุก Worker
  • Monitoring: ใช้ Prometheus + Grafana Monitor GPU VRAM Queue
  • Autoscale: Scale ตาม Queue Length ไม่ใช่ CPU Usage

ComfyUI คืออะไร

Node-based GUI สำหรับ Stable Diffusion Visual Workflow SDXL SD1.5 ControlNet LoRA IPAdapter Custom Node Python Browser