Stable Diffusion ????????? ML Pipeline ?????????????????????
Stable Diffusion ???????????? generative AI model ??????????????????????????????????????????????????? text prompts ?????????????????????????????????????????? ML Pipeline ??????????????????????????????????????????????????????????????????????????????????????????????????????????????? training, inference, evaluation ????????? deployment ????????? image generation models ??????????????????????????????
ComfyUI ???????????? node-based interface ?????????????????? Stable Diffusion ?????????????????????????????????????????? pipeline ??????????????? workflows ?????????????????? export ???????????? JSON ??????????????????????????????????????? API ????????? ??????????????? automate ????????????????????? reproduce workflows ????????????????????????????????? version control ????????? ?????????????????? infrastructure as code ?????????????????? image generation
ML Pipeline ?????????????????? Stable Diffusion ?????????????????????????????? Data Pipeline ?????????????????? training data (images + captions), Training Pipeline fine-tune models (LoRA, DreamBooth, Textual Inversion), Inference Pipeline ????????????????????????????????????????????? trained models, Evaluation Pipeline ????????????????????????????????????????????? (FID, CLIP score), Deployment Pipeline deploy models ?????? production
????????????????????? ComfyUI ?????????????????? ML Pipeline
Setup ComfyUI ??????????????? pipeline tools
# === ComfyUI ML Pipeline Setup ===
# 1. Install ComfyUI
cd /opt
git clone https://github.com/comfyanonymous/ComfyUI.git
cd ComfyUI
python3.11 -m venv venv
source venv/bin/activate
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -r requirements.txt
# 2. Install ML Pipeline Dependencies
pip install wandb mlflow dvc
pip install datasets transformers accelerate
pip install clip-interrogator # For auto-captioning
pip install lpips # Perceptual similarity
pip install cleanfid # FID score
# 3. Install Custom Nodes for Pipeline
cd custom_nodes/
git clone https://github.com/ltdrdata/ComfyUI-Manager.git
git clone https://github.com/Fannovel16/comfyui_controlnet_aux.git
git clone https://github.com/cubiq/ComfyUI_IPAdapter_plus.git
cd ..
# 4. Download Models
mkdir -p models/checkpoints models/loras models/vae models/controlnet
# SDXL Base
wget -P models/checkpoints/ \
"https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors"
# 5. API Server Configuration
cat > extra_model_paths.yaml << 'EOF'
comfyui:
base_path: /opt/ComfyUI/
checkpoints: models/checkpoints/
loras: models/loras/
vae: models/vae/
controlnet: models/controlnet/
pipeline:
training_data: /data/training/
output: /data/output/
evaluation: /data/evaluation/
EOF
# 6. Start ComfyUI API Server
python main.py --listen 0.0.0.0 --port 8188 --disable-auto-launch
# 7. Systemd Service
cat > /etc/systemd/system/comfyui-pipeline.service << 'EOF'
[Unit]
Description=ComfyUI ML Pipeline Server
After=network.target
[Service]
Type=simple
User=ml
WorkingDirectory=/opt/ComfyUI
Environment=PATH=/opt/ComfyUI/venv/bin:/usr/bin
ExecStart=/opt/ComfyUI/venv/bin/python main.py --listen 0.0.0.0 --port 8188 --disable-auto-launch
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
systemctl enable --now comfyui-pipeline
echo "ComfyUI ML pipeline setup complete"
??????????????? Image Generation Pipeline
Automated image generation pipeline
#!/usr/bin/env python3
# image_pipeline.py ??? Stable Diffusion Image Generation Pipeline
import json
import logging
import urllib.request
import uuid
import time
from typing import Dict, List
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("pipeline")
class SDImagePipeline:
def __init__(self, server_url="http://localhost:8188"):
self.server_url = server_url
self.client_id = str(uuid.uuid4())
def build_workflow(self, prompt, negative="ugly, blurry, low quality",
width=1024, height=1024, steps=30, cfg=7.5,
checkpoint="sd_xl_base_1.0.safetensors",
lora=None, lora_strength=0.8):
"""Build ComfyUI workflow programmatically"""
workflow = {
"3": {
"class_type": "KSampler",
"inputs": {
"seed": int(time.time()) % 2**32,
"steps": steps,
"cfg": cfg,
"sampler_name": "euler_ancestral",
"scheduler": "normal",
"denoise": 1.0,
"model": ["4", 0],
"positive": ["6", 0],
"negative": ["7", 0],
"latent_image": ["5", 0],
},
},
"4": {
"class_type": "CheckpointLoaderSimple",
"inputs": {"ckpt_name": checkpoint},
},
"5": {
"class_type": "EmptyLatentImage",
"inputs": {"width": width, "height": height, "batch_size": 1},
},
"6": {
"class_type": "CLIPTextEncode",
"inputs": {"text": prompt, "clip": ["4", 1]},
},
"7": {
"class_type": "CLIPTextEncode",
"inputs": {"text": negative, "clip": ["4", 1]},
},
"8": {
"class_type": "VAEDecode",
"inputs": {"samples": ["3", 0], "vae": ["4", 2]},
},
"9": {
"class_type": "SaveImage",
"inputs": {"filename_prefix": "pipeline", "images": ["8", 0]},
},
}
# Add LoRA if specified
if lora:
workflow["10"] = {
"class_type": "LoraLoader",
"inputs": {
"lora_name": lora,
"strength_model": lora_strength,
"strength_clip": lora_strength,
"model": ["4", 0],
"clip": ["4", 1],
},
}
workflow["3"]["inputs"]["model"] = ["10", 0]
workflow["6"]["inputs"]["clip"] = ["10", 1]
workflow["7"]["inputs"]["clip"] = ["10", 1]
return workflow
def queue_prompt(self, workflow):
"""Send workflow to ComfyUI for execution"""
payload = json.dumps({"prompt": workflow, "client_id": self.client_id}).encode()
req = urllib.request.Request(
f"{self.server_url}/prompt",
data=payload,
headers={"Content-Type": "application/json"},
)
response = urllib.request.urlopen(req)
return json.loads(response.read())
def batch_generate(self, prompts, **kwargs):
"""Generate images for multiple prompts"""
results = []
for i, prompt in enumerate(prompts):
workflow = self.build_workflow(prompt, **kwargs)
result = self.queue_prompt(workflow)
results.append({"prompt": prompt, "prompt_id": result.get("prompt_id")})
logger.info(f"Queued {i+1}/{len(prompts)}: {prompt[:50]}...")
time.sleep(1)
return results
pipeline = SDImagePipeline()
workflow = pipeline.build_workflow(
prompt="a beautiful mountain landscape at sunset, 8k, detailed",
width=1024, height=1024, steps=30,
)
print(f"Workflow nodes: {len(workflow)}")
print("Ready for batch generation")
Training Pipeline ?????????????????? Custom Models
Fine-tune Stable Diffusion models
#!/usr/bin/env python3
# training_pipeline.py ??? SD Model Training Pipeline
import json
import logging
import os
from typing import Dict, List
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("training")
class SDTrainingPipeline:
def __init__(self):
self.config = {}
def prepare_dataset(self, image_dir, output_dir):
"""Prepare training dataset with auto-captioning"""
return {
"steps": [
{
"name": "Collect Images",
"action": f"Gather 20-200 images in {image_dir}",
"requirements": "High quality, consistent subject, varied poses/angles",
},
{
"name": "Resize & Crop",
"command": f"python resize_images.py --input {image_dir} --output {output_dir} --size 1024",
"note": "SDXL uses 1024x1024, SD1.5 uses 512x512",
},
{
"name": "Auto-Caption",
"command": f"python caption_images.py --input {output_dir} --model blip2",
"note": "Generate .txt caption file for each image",
},
{
"name": "Review Captions",
"action": "Manually review and edit captions for accuracy",
"note": "Add trigger word (e.g., 'sks style') to all captions",
},
],
}
def lora_training_config(self):
"""LoRA training configuration"""
return {
"kohya_ss_config": {
"pretrained_model": "sd_xl_base_1.0.safetensors",
"train_data_dir": "/data/training/processed",
"output_dir": "/data/models/lora",
"output_name": "custom_style_v1",
"resolution": "1024,1024",
"train_batch_size": 1,
"max_train_epochs": 20,
"learning_rate": 1e-4,
"unet_lr": 1e-4,
"text_encoder_lr": 5e-5,
"lr_scheduler": "cosine_with_restarts",
"network_module": "networks.lora",
"network_dim": 32,
"network_alpha": 16,
"mixed_precision": "bf16",
"save_every_n_epochs": 5,
"optimizer_type": "AdamW8bit",
"max_token_length": 225,
"seed": 42,
},
"training_command": """
accelerate launch --num_cpu_threads_per_process=2 train_network.py \\
--pretrained_model_name_or_path="sd_xl_base_1.0.safetensors" \\
--train_data_dir="/data/training/processed" \\
--output_dir="/data/models/lora" \\
--output_name="custom_style_v1" \\
--resolution="1024,1024" \\
--train_batch_size=1 \\
--max_train_epochs=20 \\
--learning_rate=1e-4 \\
--network_module="networks.lora" \\
--network_dim=32 \\
--network_alpha=16 \\
--mixed_precision="bf16" \\
--optimizer_type="AdamW8bit" \\
--save_every_n_epochs=5
""",
"estimated_time": {
"20_images_20_epochs_rtx4090": "15-30 minutes",
"100_images_20_epochs_rtx4090": "1-2 hours",
"20_images_20_epochs_a100": "10-20 minutes",
},
}
def evaluation_metrics(self):
return {
"fid_score": {
"description": "Frechet Inception Distance ??? measures image quality distribution",
"good_score": "< 50 (lower is better)",
"command": "python -m cleanfid --real /data/real --generated /data/generated",
},
"clip_score": {
"description": "CLIP similarity between prompt and generated image",
"good_score": "> 0.25 (higher is better)",
},
"lpips": {
"description": "Learned Perceptual Image Patch Similarity",
"good_score": "< 0.5 (lower means more similar to reference)",
},
"human_eval": {
"description": "Human preference rating",
"method": "A/B testing with baseline model",
},
}
pipeline = SDTrainingPipeline()
config = pipeline.lora_training_config()
print(f"Network dim: {config['kohya_ss_config']['network_dim']}")
print(f"Epochs: {config['kohya_ss_config']['max_train_epochs']}")
print(f"Time estimate: {config['estimated_time']['20_images_20_epochs_rtx4090']}")
metrics = pipeline.evaluation_metrics()
print("\nEvaluation Metrics:")
for name, info in metrics.items():
print(f" {name}: {info['description'][:60]}...")
Batch Processing ????????? Automation
???????????? batch generation ???????????????????????????
# === Batch Processing Pipeline ===
# 1. Batch Generation Script
cat > batch_generate.py << 'PYEOF'
#!/usr/bin/env python3
"""Batch image generation with ComfyUI API"""
import json
import urllib.request
import time
import os
import csv
from datetime import datetime
SERVER = "http://localhost:8188"
def queue_prompt(workflow):
data = json.dumps({"prompt": workflow}).encode()
req = urllib.request.Request(f"{SERVER}/prompt", data=data,
headers={"Content-Type": "application/json"})
return json.loads(urllib.request.urlopen(req).read())
def build_sdxl_workflow(prompt, negative, seed, width=1024, height=1024):
return {
"3": {"class_type": "KSampler", "inputs": {
"seed": seed, "steps": 30, "cfg": 7.5,
"sampler_name": "euler_ancestral", "scheduler": "normal",
"denoise": 1.0, "model": ["4",0], "positive": ["6",0],
"negative": ["7",0], "latent_image": ["5",0]}},
"4": {"class_type": "CheckpointLoaderSimple",
"inputs": {"ckpt_name": "sd_xl_base_1.0.safetensors"}},
"5": {"class_type": "EmptyLatentImage",
"inputs": {"width": width, "height": height, "batch_size": 1}},
"6": {"class_type": "CLIPTextEncode",
"inputs": {"text": prompt, "clip": ["4",1]}},
"7": {"class_type": "CLIPTextEncode",
"inputs": {"text": negative, "clip": ["4",1]}},
"8": {"class_type": "VAEDecode",
"inputs": {"samples": ["3",0], "vae": ["4",2]}},
"9": {"class_type": "SaveImage",
"inputs": {"filename_prefix": f"batch_{seed}", "images": ["8",0]}},
}
# Read prompts from CSV
# Format: prompt,negative,seed
prompts = [
("mountain landscape sunset, 8k", "ugly, blurry", 42),
("cyberpunk city night, neon lights", "ugly, blurry", 43),
("portrait beautiful woman, studio lighting", "ugly, deformed", 44),
]
print(f"Batch: {len(prompts)} images")
for i, (prompt, negative, seed) in enumerate(prompts):
workflow = build_sdxl_workflow(prompt, negative, seed)
result = queue_prompt(workflow)
print(f" [{i+1}/{len(prompts)}] Queued: {prompt[:40]}...")
time.sleep(2)
print("Batch complete")
PYEOF
# 2. Scheduled Pipeline (cron)
cat > /etc/cron.d/sd-pipeline << 'EOF'
# Generate daily content at 2 AM
0 2 * * * ml cd /opt/pipeline && python batch_generate.py --config daily.csv >> /var/log/sd-pipeline.log 2>&1
# Train new LoRA weekly on Sunday
0 4 * * 0 ml cd /opt/pipeline && python train_lora.py --config weekly_train.yaml >> /var/log/sd-training.log 2>&1
# Evaluate models daily
0 6 * * * ml cd /opt/pipeline && python evaluate.py >> /var/log/sd-eval.log 2>&1
EOF
# 3. Docker Compose for Full Pipeline
cat > docker-compose.yml << 'EOF'
version: '3.8'
services:
comfyui:
image: ghcr.io/myorg/comfyui:latest
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
ports:
- "8188:8188"
volumes:
- models:/opt/ComfyUI/models
- output:/opt/ComfyUI/output
redis:
image: redis:7-alpine
ports:
- "6379:6379"
worker:
image: ghcr.io/myorg/sd-worker:latest
depends_on: [comfyui, redis]
environment:
- COMFYUI_URL=http://comfyui:8188
- REDIS_URL=redis://redis:6379
api:
image: ghcr.io/myorg/sd-api:latest
depends_on: [redis]
ports:
- "8000:8000"
volumes:
models:
output:
EOF
echo "Batch processing configured"
Monitoring ????????? Quality Control
Monitor pipeline ????????????????????????????????????????????????
#!/usr/bin/env python3
# pipeline_monitor.py ??? SD Pipeline Monitoring
import json
import logging
from datetime import datetime
from typing import Dict, List
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("monitor")
class SDPipelineMonitor:
def __init__(self):
self.metrics = {}
def dashboard(self):
return {
"generation": {
"images_today": 1250,
"avg_time_per_image": "18.5 seconds",
"success_rate": 99.2,
"gpu_utilization": 85,
"queue_depth": 5,
},
"training": {
"active_jobs": 1,
"completed_today": 2,
"last_training": "custom_style_v3 ??? completed 2h ago",
"best_fid_score": 32.5,
},
"quality": {
"avg_clip_score": 0.28,
"avg_aesthetic_score": 6.2,
"nsfw_detection_rate": 0.1,
"flagged_images": 3,
},
"costs": {
"gpu_hours_today": 18.5,
"cost_today": 18.63,
"cost_per_image": 0.015,
"monthly_estimate": 558.90,
},
}
def quality_gates(self):
"""Quality gates for generated images"""
return {
"automatic_checks": {
"clip_score_min": 0.20,
"aesthetic_score_min": 5.0,
"nsfw_check": True,
"face_detection": "If portrait, verify face quality",
"resolution_check": "Must match requested resolution",
"artifact_detection": "Check for common SD artifacts",
},
"actions_on_failure": {
"clip_score_low": "Re-generate with modified prompt",
"nsfw_detected": "Block and log for review",
"artifacts_detected": "Re-generate with different seed",
"face_quality_low": "Re-generate with face restoration",
},
}
def a_b_test_models(self, model_a_scores, model_b_scores):
"""Compare two models"""
avg_a = sum(model_a_scores) / len(model_a_scores)
avg_b = sum(model_b_scores) / len(model_b_scores)
return {
"model_a_avg": round(avg_a, 3),
"model_b_avg": round(avg_b, 3),
"winner": "Model B" if avg_b > avg_a else "Model A",
"difference": round(abs(avg_b - avg_a), 3),
"significant": abs(avg_b - avg_a) > 0.02,
}
monitor = SDPipelineMonitor()
dash = monitor.dashboard()
print(f"Today: {dash['generation']['images_today']} images, {dash['generation']['avg_time_per_image']}/image")
print(f"Quality: CLIP {dash['quality']['avg_clip_score']}, Aesthetic {dash['quality']['avg_aesthetic_score']}")
print(f"Cost: /image, /month")
ab = monitor.a_b_test_models([0.27, 0.29, 0.26, 0.28], [0.30, 0.31, 0.29, 0.32])
print(f"\nA/B Test: {ab['winner']} wins (diff: {ab['difference']})")
FAQ ??????????????????????????????????????????
Q: ComfyUI ????????? AUTOMATIC1111 ?????????????????????????????????????????? ML Pipeline ??????????????????????
A: ComfyUI ???????????????????????????????????? ??????????????? Workflow ???????????? JSON export/import ????????? version control ?????????, API ?????? queue prompt ???????????? HTTP ?????????????????????, Node-based ???????????????????????? ??????????????? complex pipelines ?????????, Memory efficient ????????? VRAM ????????????????????????, Reproducible ????????????????????? reproduce ????????????????????????????????? AUTOMATIC1111 ????????????????????????????????? interactive use ????????????????????? API ???????????????????????????????????????????????????????????? extensions ????????? conflict ????????? ?????????????????? production pipeline ??????????????? ComfyUI
Q: LoRA ????????? DreamBooth ????????? Textual Inversion ???????????????????????????????????????????
A: LoRA ??????????????? low-rank matrices ???????????? model ????????? data 20-200 images, training 15-60 ????????????, adapter ???????????? 10-200MB, ???????????????????????? flexibility ????????? ?????????????????????????????????????????? DreamBooth fine-tune ???????????? model ????????? data 5-30 images, training 1-4 ?????????????????????, output ???????????? full model 2-6GB, ??????????????????????????????????????????????????? specific subjects ?????????????????? resources ???????????? Textual Inversion ????????????????????????????????? new text embedding ????????? data 5-20 images, training 1-3 ?????????????????????, output ????????????????????????????????? < 1MB, ???????????????????????????????????????????????????????????????????????????????????? ?????????????????? pipeline ??????????????? LoRA ??????????????? balance ????????????????????????????????????????????? ???????????? ????????????????????? training
Q: GPU ????????????????????????????????????????????? SD Pipeline?
A: ????????????????????? workload Inference only RTX 3060 12GB ??????????????????????????????????????? SDXL, RTX 4090 24GB ?????????????????????????????? cost-effective, A10G 24GB ???????????????????????? cloud Training RTX 4090 24GB minimum ?????????????????? SDXL LoRA, A100 40/80GB ?????????????????? DreamBooth ???????????? training ???????????? jobs Production pipeline ????????????????????? batch generate 1000+ images/????????? ??????????????? 2-4x RTX 4090 ???????????? cloud GPU (RunPod, Lambda) ????????? GPU RTX 4090 ???????????? ~$1,600 ?????????????????? > 2000 hr/?????? ???????????????????????? cloud Cloud A10G ~$1/hr ??????????????? on-demand
Q: ????????????????????????????????????????????? generate ???????????????????????????????
A: ?????????????????? metrics FID Score (Frechet Inception Distance) ????????????????????????????????????????????????????????? real images ??????????????????????????????????????? < 50 ??????, CLIP Score ?????????????????????????????????????????????????????????????????? prompt ???????????????????????????????????? ??????????????????????????????????????? > 0.25 ??????, Aesthetic Score ??????????????????????????????????????? 1-10 > 5 ??????, LPIPS ????????? perceptual similarity ????????? reference, Human Evaluation ????????????????????????????????? ???????????????????????????????????????????????? ?????????????????? automated pipeline ????????? CLIP Score + Aesthetic Score ???????????? gate ?????????????????????????????? threshold re-generate ???????????????????????????
