Stable Diffusion ComfyUI Load Testing Strategy คืออะไร
Stable Diffusion เป็น open-source AI image generation model ที่สร้างภาพจาก text prompts ComfyUI เป็น node-based GUI สำหรับ Stable Diffusion ที่ยืดหยุ่นสูง ออกแบบ workflow ได้อิสระ รองรับ custom nodes และ advanced pipelines Load Testing คือการทดสอบระบบด้วย traffic จำนวนมากเพื่อวัด performance, scalability และ breaking point การทำ load test สำหรับ Stable Diffusion/ComfyUI infrastructure สำคัญมากเมื่อ deploy เป็น service สำหรับหลาย users เพื่อหา concurrent user capacity, GPU utilization threshold และ queue management strategy
ComfyUI Architecture
# comfyui_arch.py — ComfyUI architecture overview
import json
class ComfyUIArchitecture:
COMPONENTS = {
"frontend": {
"name": "Frontend (Web UI)",
"description": "Node-based editor ใน browser — ลาก nodes เชื่อมกันสร้าง workflow",
"tech": "HTML/JS, LiteGraph.js",
},
"backend": {
"name": "Backend (Python Server)",
"description": "รับ workflow JSON → queue → execute nodes ตามลำดับ",
"tech": "Python, aiohttp, PyTorch",
},
"queue": {
"name": "Execution Queue",
"description": "จัดคิว generation requests — FIFO, priority queue support",
},
"gpu_executor": {
"name": "GPU Executor",
"description": "รัน inference บน GPU — load models, denoise, VAE decode",
"bottleneck": "GPU VRAM + compute — จำกัด concurrent generations",
},
"api": {
"name": "REST API",
"description": "HTTP API สำหรับ submit workflows, check status, download results",
"endpoints": ["/prompt", "/queue", "/history", "/view"],
},
}
WORKFLOW_TYPES = {
"txt2img": "Text to Image — สร้างภาพจาก text prompt",
"img2img": "Image to Image — แปลงภาพเดิมด้วย prompt",
"inpainting": "Inpainting — แก้ไขส่วนเฉพาะของภาพ",
"upscale": "Upscale — เพิ่ม resolution ด้วย AI",
"controlnet": "ControlNet — ควบคุม pose, depth, edge",
"lora": "LoRA — fine-tuned style/character models",
}
def show_components(self):
print("=== ComfyUI Architecture ===\n")
for key, comp in self.COMPONENTS.items():
print(f"[{comp['name']}]")
print(f" {comp['description']}")
print()
def show_workflows(self):
print("=== Workflow Types ===")
for key, desc in self.WORKFLOW_TYPES.items():
print(f" [{key}] {desc}")
arch = ComfyUIArchitecture()
arch.show_components()
arch.show_workflows()
Load Testing Framework
# load_test.py — Load testing framework for ComfyUI
import json
class LoadTestFramework:
CODE = """
# comfyui_loadtest.py — Load test ComfyUI API
import asyncio
import aiohttp
import json
import time
import random
from dataclasses import dataclass, field
from typing import List
import statistics
@dataclass
class TestResult:
request_id: str
status: str # success, error, timeout
queue_time_ms: float = 0
generation_time_ms: float = 0
total_time_ms: float = 0
error: str = ""
class ComfyUILoadTester:
def __init__(self, base_url="http://localhost:8188", timeout=300):
self.base_url = base_url
self.timeout = timeout
self.results: List[TestResult] = []
def create_workflow(self, prompt="a beautiful landscape",
steps=20, width=512, height=512, seed=None):
'''Create a basic txt2img workflow'''
if seed is None:
seed = random.randint(0, 2**32)
return {
"3": {
"class_type": "KSampler",
"inputs": {
"seed": seed,
"steps": steps,
"cfg": 7.0,
"sampler_name": "euler",
"scheduler": "normal",
"denoise": 1.0,
"model": ["4", 0],
"positive": ["6", 0],
"negative": ["7", 0],
"latent_image": ["5", 0],
}
},
"4": {
"class_type": "CheckpointLoaderSimple",
"inputs": {"ckpt_name": "sd_xl_base_1.0.safetensors"}
},
"5": {
"class_type": "EmptyLatentImage",
"inputs": {"width": width, "height": height, "batch_size": 1}
},
"6": {
"class_type": "CLIPTextEncode",
"inputs": {"text": prompt, "clip": ["4", 1]}
},
"7": {
"class_type": "CLIPTextEncode",
"inputs": {"text": "bad quality, blurry", "clip": ["4", 1]}
},
"8": {
"class_type": "VAEDecode",
"inputs": {"samples": ["3", 0], "vae": ["4", 2]}
},
"9": {
"class_type": "SaveImage",
"inputs": {"filename_prefix": "loadtest", "images": ["8", 0]}
},
}
async def submit_and_wait(self, session, workflow, client_id):
'''Submit workflow and wait for completion'''
start = time.time()
result = TestResult(request_id=client_id, status="pending")
try:
# Submit prompt
payload = {"prompt": workflow, "client_id": client_id}
async with session.post(
f"{self.base_url}/prompt", json=payload
) as resp:
if resp.status != 200:
result.status = "error"
result.error = f"HTTP {resp.status}"
return result
data = await resp.json()
prompt_id = data.get("prompt_id")
queue_end = time.time()
result.queue_time_ms = (queue_end - start) * 1000
# Poll for completion
while time.time() - start < self.timeout:
async with session.get(f"{self.base_url}/history/{prompt_id}") as resp:
history = await resp.json()
if prompt_id in history:
result.status = "success"
break
await asyncio.sleep(1)
else:
result.status = "timeout"
end = time.time()
result.generation_time_ms = (end - queue_end) * 1000
result.total_time_ms = (end - start) * 1000
except Exception as e:
result.status = "error"
result.error = str(e)
result.total_time_ms = (time.time() - start) * 1000
return result
async def run_load_test(self, concurrent_users=5, requests_per_user=3):
'''Run load test with concurrent users'''
prompts = [
"a beautiful sunset over mountains",
"cyberpunk city at night, neon lights",
"cute cat wearing a hat, digital art",
"fantasy castle in clouds, epic",
"underwater coral reef, vibrant colors",
]
async with aiohttp.ClientSession() as session:
tasks = []
for user in range(concurrent_users):
for req in range(requests_per_user):
workflow = self.create_workflow(
prompt=random.choice(prompts),
steps=20,
)
client_id = f"user-{user}-req-{req}"
tasks.append(self.submit_and_wait(session, workflow, client_id))
self.results = await asyncio.gather(*tasks)
return self.generate_report()
def generate_report(self):
'''Generate load test report'''
successful = [r for r in self.results if r.status == "success"]
failed = [r for r in self.results if r.status != "success"]
if not successful:
return {"error": "No successful requests"}
gen_times = [r.generation_time_ms for r in successful]
total_times = [r.total_time_ms for r in successful]
return {
"total_requests": len(self.results),
"successful": len(successful),
"failed": len(failed),
"success_rate": round(len(successful) / len(self.results) * 100, 1),
"avg_generation_ms": round(statistics.mean(gen_times)),
"p50_generation_ms": round(statistics.median(gen_times)),
"p95_generation_ms": round(sorted(gen_times)[int(len(gen_times)*0.95)]),
"avg_total_ms": round(statistics.mean(total_times)),
"throughput_per_min": round(len(successful) / (max(total_times) / 60000), 1),
}
# tester = ComfyUILoadTester("http://localhost:8188")
# report = asyncio.run(tester.run_load_test(concurrent_users=5, requests_per_user=3))
# print(json.dumps(report, indent=2))
"""
def show_code(self):
print("=== Load Test Framework ===")
print(self.CODE[:600])
framework = LoadTestFramework()
framework.show_code()
Scaling Strategies
# scaling.py — Scaling strategies for ComfyUI
import json
import random
class ScalingStrategies:
STRATEGIES = {
"vertical": {
"name": "Vertical Scaling (GPU Upgrade)",
"description": "เพิ่ม GPU ที่แรงกว่า — RTX 3090 → RTX 4090 → A100",
"pros": "ง่าย, ไม่ต้องเปลี่ยน architecture",
"cons": "จำกัด, แพง, single point of failure",
"capacity": "1 GPU = 2-6 images/min (ขึ้นกับ model + resolution)",
},
"horizontal": {
"name": "Horizontal Scaling (Multiple GPUs/Nodes)",
"description": "เพิ่มจำนวน GPU workers — load balance requests",
"pros": "Scalable, fault tolerant, cost effective",
"cons": "ซับซ้อนกว่า, ต้อง queue management",
"capacity": "N GPUs = N × single GPU throughput",
},
"queue_based": {
"name": "Queue-Based Architecture",
"description": "ใช้ message queue (Redis, RabbitMQ) กระจาย requests ไป workers",
"pros": "Decouple frontend/backend, handle spikes, priority queues",
"cons": "เพิ่ม latency เล็กน้อย, infrastructure complexity",
},
"serverless_gpu": {
"name": "Serverless GPU (RunPod, Modal, Replicate)",
"description": "ใช้ cloud GPU on-demand — scale to zero เมื่อไม่มี requests",
"pros": "จ่ายตามใช้, auto-scale, ไม่ต้อง manage hardware",
"cons": "Cold start latency, cost ต่อ image สูงกว่า self-hosted",
},
}
def show_strategies(self):
print("=== Scaling Strategies ===\n")
for key, strat in self.STRATEGIES.items():
print(f"[{strat['name']}]")
print(f" {strat['description']}")
print(f" Pros: {strat['pros']}")
print(f" Cons: {strat['cons']}")
print()
def capacity_planning(self):
print("=== Capacity Planning ===")
scenarios = [
{"users": 10, "images_day": 100, "gpus": 1, "gpu_type": "RTX 3090"},
{"users": 50, "images_day": 500, "gpus": 2, "gpu_type": "RTX 4090"},
{"users": 200, "images_day": 2000, "gpus": 4, "gpu_type": "A100"},
{"users": 1000, "images_day": 10000, "gpus": 8, "gpu_type": "A100 (cloud)"},
]
for s in scenarios:
print(f" {s['users']:>5} users | {s['images_day']:>6} img/day | {s['gpus']}x {s['gpu_type']}")
scaling = ScalingStrategies()
scaling.show_strategies()
scaling.capacity_planning()
Monitoring & Metrics
# monitoring.py — Monitoring ComfyUI performance
import json
import random
class PerformanceMonitoring:
CODE = """
# comfyui_monitor.py — Monitor ComfyUI performance
import psutil
import subprocess
import json
import time
from prometheus_client import Gauge, Histogram, Counter, start_http_server
# Prometheus metrics
GPU_UTIL = Gauge('comfyui_gpu_utilization_percent', 'GPU utilization')
GPU_MEMORY = Gauge('comfyui_gpu_memory_used_mb', 'GPU memory used')
GPU_TEMP = Gauge('comfyui_gpu_temperature_celsius', 'GPU temperature')
QUEUE_SIZE = Gauge('comfyui_queue_size', 'Number of pending requests')
GEN_TIME = Histogram('comfyui_generation_seconds', 'Image generation time',
buckets=[5, 10, 15, 20, 30, 45, 60, 90, 120])
GEN_COUNT = Counter('comfyui_generations_total', 'Total generations', ['status'])
class ComfyUIMonitor:
def __init__(self, comfyui_url="http://localhost:8188"):
self.url = comfyui_url
def get_gpu_stats(self):
result = subprocess.run(
['nvidia-smi', '--query-gpu=utilization.gpu, memory.used, memory.total, temperature.gpu',
'--format=csv, noheader, nounits'],
capture_output=True, text=True
)
parts = result.stdout.strip().split(', ')
stats = {
'utilization': float(parts[0]),
'memory_used_mb': float(parts[1]),
'memory_total_mb': float(parts[2]),
'temperature': float(parts[3]),
}
GPU_UTIL.set(stats['utilization'])
GPU_MEMORY.set(stats['memory_used_mb'])
GPU_TEMP.set(stats['temperature'])
return stats
def get_queue_status(self):
import requests
resp = requests.get(f"{self.url}/queue")
data = resp.json()
pending = len(data.get('queue_pending', []))
running = len(data.get('queue_running', []))
QUEUE_SIZE.set(pending)
return {'pending': pending, 'running': running}
# start_http_server(9090) # Prometheus metrics
# monitor = ComfyUIMonitor()
# while True:
# monitor.get_gpu_stats()
# monitor.get_queue_status()
# time.sleep(15)
"""
def show_code(self):
print("=== Performance Monitor ===")
print(self.CODE[:600])
def sample_dashboard(self):
print(f"\n=== ComfyUI Dashboard ===")
print(f" GPU Utilization: {random.randint(60, 99)}%")
print(f" GPU Memory: {random.randint(8, 22)}/{24} GB")
print(f" GPU Temp: {random.randint(55, 82)}°C")
print(f" Queue Pending: {random.randint(0, 15)}")
print(f" Queue Running: {random.randint(0, 2)}")
print(f" Avg Gen Time: {random.uniform(8, 30):.1f}s")
print(f" Throughput: {random.uniform(2, 8):.1f} img/min")
print(f" Success Rate: {random.uniform(95, 100):.1f}%")
mon = PerformanceMonitoring()
mon.show_code()
mon.sample_dashboard()
Docker Deployment
# deployment.py — ComfyUI Docker deployment
import json
class DockerDeployment:
COMPOSE = """
# docker-compose.yml — ComfyUI with load balancing
version: '3.8'
services:
comfyui-worker-1:
image: comfyui:latest
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
volumes:
- ./models:/comfyui/models
- ./output:/comfyui/output
environment:
- NVIDIA_VISIBLE_DEVICES=0
ports:
- "8188:8188"
comfyui-worker-2:
image: comfyui:latest
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['1']
capabilities: [gpu]
volumes:
- ./models:/comfyui/models
- ./output:/comfyui/output
environment:
- NVIDIA_VISIBLE_DEVICES=1
ports:
- "8189:8188"
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
depends_on:
- comfyui-worker-1
- comfyui-worker-2
redis:
image: redis:7-alpine
ports:
- "6379:6379"
prometheus:
image: prom/prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
"""
def show_compose(self):
print("=== Docker Compose ===")
print(self.COMPOSE[:600])
def deployment_checklist(self):
print(f"\n=== Deployment Checklist ===")
checks = [
"NVIDIA Container Toolkit installed",
"Models downloaded to shared volume",
"GPU memory sufficient for model (SDXL = 6GB+)",
"Nginx load balancer configured",
"Prometheus + Grafana for monitoring",
"Queue management (Redis) for request buffering",
"Auto-restart on OOM or crash",
"Rate limiting per user/IP",
]
for c in checks:
print(f" • {c}")
deploy = DockerDeployment()
deploy.show_compose()
deploy.deployment_checklist()
FAQ - คำถามที่พบบ่อย
Q: ComfyUI รองรับ concurrent users กี่คน?
A: ขึ้นกับ GPU: 1x RTX 3090: ~2-4 img/min (queue 5-10 users ได้) 1x RTX 4090: ~4-8 img/min (queue 10-20 users) 1x A100: ~8-15 img/min (queue 20-50 users) Scale: เพิ่ม GPU workers + load balancer สำหรับ users มากขึ้น สำคัญ: ใช้ queue system — users รอคิว ไม่ใช่ reject
Q: GPU ไหนคุ้มที่สุดสำหรับ Stable Diffusion?
A: คุ้มที่สุด: RTX 3090 (24GB VRAM) — มือสองราคาดี, VRAM เยอะ ดีที่สุด: RTX 4090 (24GB) — เร็วกว่า 3090 ~2x Cloud: A100/H100 — สำหรับ production scale ไม่แนะนำ: GPU ที่ VRAM < 8GB (SDXL ต้อง 6GB+) Budget: RTX 3060 12GB — VRAM พอ แต่ช้า
Q: Load test ควรวัดอะไรบ้าง?
A: Generation time: เวลาสร้างภาพ (P50, P95, P99) Queue time: เวลารอคิว Throughput: images per minute Success rate: % requests ที่สำเร็จ GPU utilization: ควร 80-95% (ต่ำไป = underutilized, สูงไป = throttle) Memory usage: VRAM ต้องไม่เกิน 90% Error rate: OOM errors, timeout errors
Q: ComfyUI กับ Automatic1111 อันไหนดีสำหรับ production?
A: ComfyUI: ดีกว่าสำหรับ production — API design ดี, workflow flexibility, memory efficient กว่า A1111: ดีกว่าสำหรับ casual use — UI ง่ายกว่า, extensions เยอะกว่า สำหรับ API service: ComfyUI แนะนำ — node-based workflow = reproducible, API-first design สำหรับ personal use: A1111 ง่ายกว่าสำหรับมือใหม่
