Stable Diffusion
Stable Diffusion เป็น Open-source Text-to-Image AI สร้างภาพจาก Prompt รันบนเครื่องตัวเองฟรี ปรับแต่งได้ทุกอย่าง Community ใหญ่มาก Models Extensions มากมาย
รองรับ txt2img img2img inpainting ControlNet LoRA Training Custom Models สำหรับงาน Production ใช้ ComfyUI หรือ A1111 WebUI
Installation และ Setup
# === Stable Diffusion Installation ===
# 1. AUTOMATIC1111 WebUI (แนะนำสำหรับเริ่มต้น)
# git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
# cd stable-diffusion-webui
# Windows
# webui-user.bat
# Linux/Mac
# ./webui.sh
# Arguments สำหรับ GPU VRAM น้อย
# set COMMANDLINE_ARGS=--medvram --xformers
# set COMMANDLINE_ARGS=--lowvram # สำหรับ 4GB VRAM
# 2. ComfyUI (แนะนำสำหรับ Advanced/Production)
# git clone https://github.com/comfyanonymous/ComfyUI.git
# cd ComfyUI
# pip install -r requirements.txt
# python main.py
# 3. Diffusers (Python Library)
# pip install diffusers transformers accelerate torch
from dataclasses import dataclass, field
from typing import List, Dict, Optional
@dataclass
class SDConfig:
"""Stable Diffusion Configuration"""
model: str = "stabilityai/stable-diffusion-xl-base-1.0"
scheduler: str = "DPM++ 2M Karras"
steps: int = 30
cfg_scale: float = 7.0
width: int = 1024
height: int = 1024
seed: int = -1 # -1 = random
sampler: str = "Euler a"
clip_skip: int = 2
vae: str = "auto"
# Performance
use_xformers: bool = True
use_fp16: bool = True
batch_size: int = 1
@dataclass
class SDPrompt:
positive: str
negative: str = ""
lora: List[str] = field(default_factory=list)
controlnet: Optional[str] = None
def build(self) -> Dict:
prompt = self.positive
# Add LoRA
for lora in self.lora:
prompt += f" "
return {
"prompt": prompt,
"negative_prompt": self.negative or
"low quality, bad anatomy, blurry, watermark, text, "
"deformed, ugly, duplicate, error",
}
# Recommended Models
models = {
"SDXL 1.0": "1024x1024, General purpose, High quality",
"SD 1.5": "512x512, Fast, Huge ecosystem of LoRAs",
"Juggernaut XL": "Photorealistic, Portraits",
"DreamShaper XL": "Fantasy, Illustration",
"RealVisXL": "Photorealistic, Product photos",
"AnimagineXL": "Anime style",
}
# Samplers
samplers = {
"DPM++ 2M Karras": "Best overall quality/speed balance",
"Euler a": "Creative, good for exploration",
"DPM++ SDE Karras": "High quality, slower",
"DDIM": "Deterministic, good for img2img",
}
config = SDConfig()
print("Stable Diffusion Config:")
print(f" Model: {config.model}")
print(f" Steps: {config.steps} | CFG: {config.cfg_scale}")
print(f" Size: {config.width}x{config.height}")
print(f" Sampler: {config.sampler}")
print(f"\nRecommended Models:")
for name, desc in models.items():
print(f" {name}: {desc}")
Prompt Writing และ ControlNet
# sd_prompts.py — Stable Diffusion Prompt Engineering
from dataclasses import dataclass, field
from typing import List
@dataclass
class PromptTemplate:
name: str
category: str
positive: str
negative: str
settings: dict = field(default_factory=dict)
class SDPromptLibrary:
"""Stable Diffusion Prompt Library"""
def __init__(self):
self.templates: List[PromptTemplate] = []
def add(self, template: PromptTemplate):
self.templates.append(template)
def get_by_category(self, category: str):
return [t for t in self.templates if t.category == category]
def show_all(self):
categories = set(t.category for t in self.templates)
for cat in sorted(categories):
print(f"\n [{cat}]")
for t in self.get_by_category(cat):
print(f" {t.name}:")
print(f" + {t.positive[:70]}...")
# Prompt Templates
library = SDPromptLibrary()
templates = [
PromptTemplate(
"Product Photo", "Commercial",
"professional product photography, luxury perfume bottle on marble surface, "
"soft studio lighting, shallow depth of field, bokeh background, "
"8k uhd, sharp focus, commercial quality",
"low quality, blurry, watermark, text, deformed",
{"steps": 35, "cfg": 7.5, "size": "1024x1024"},
),
PromptTemplate(
"Portrait", "Photography",
"professional portrait photography, beautiful woman, natural skin texture, "
"golden hour sunlight, shallow depth of field, canon EOS R5, "
"85mm lens f/1.4, raw photo, film grain",
"bad anatomy, deformed face, ugly, blurry, watermark",
{"steps": 30, "cfg": 7.0, "size": "832x1216"},
),
PromptTemplate(
"Anime Character", "Illustration",
"masterpiece, best quality, 1girl, cyberpunk outfit, neon city background, "
"detailed eyes, dynamic pose, night scene, rain, reflections, "
"volumetric lighting, anime style",
"low quality, worst quality, bad anatomy, extra fingers",
{"steps": 25, "cfg": 8.0, "size": "832x1216"},
),
PromptTemplate(
"Architecture", "Design",
"architectural visualization, modern minimalist house, "
"floor-to-ceiling windows, infinity pool, tropical garden, "
"golden hour, dramatic sky, photorealistic rendering, 8k",
"low quality, blurry, watermark, people",
{"steps": 40, "cfg": 7.0, "size": "1216x832"},
),
PromptTemplate(
"UI Dashboard", "Web Design",
"modern SaaS dashboard UI design, dark mode, data visualization, "
"clean minimal layout, glassmorphism, gradient accents, "
"professional design, figma mockup style",
"realistic photo, 3d render, blurry text",
{"steps": 30, "cfg": 7.0, "size": "1216x832"},
),
]
for t in templates:
library.add(t)
library.show_all()
# ControlNet Types
controlnet_types = {
"Canny": "ตรวจจับเส้นขอบ ควบคุม Structure ของภาพ",
"OpenPose": "ตรวจจับท่าทางร่างกาย ควบคุม Pose",
"Depth": "ตรวจจับความลึก ควบคุม Perspective",
"Scribble": "ใช้ร่างคร่าวๆ เป็น Guide",
"Tile": "Upscale ภาพ เพิ่มรายละเอียด",
"IP-Adapter": "ใช้ภาพต้นแบบ ควบคุม Style",
"Reference": "ใช้ภาพ Reference สำหรับ Style Transfer",
}
print(f"\n\nControlNet Types:")
for cn_type, desc in controlnet_types.items():
print(f" {cn_type}: {desc}")
LoRA Training และ Deployment
# === LoRA Training สำหรับ Stable Diffusion ===
# pip install kohya-ss
# 1. Dataset Preparation
# dataset/
# ├── img/
# │ ├── 20_concept_name/ # 20 = repeat count
# │ │ ├── image_001.png
# │ │ ├── image_001.txt # Caption
# │ │ ├── image_002.png
# │ │ └── image_002.txt
# ├── reg/ # Regularization images (optional)
# │ └── 1_class/
# │ ├── reg_001.png
# │ └── reg_002.png
# 2. Training Config (kohya_ss)
training_config = {
# Model
"pretrained_model": "stabilityai/stable-diffusion-xl-base-1.0",
"output_dir": "./output/my_lora",
"output_name": "my_custom_lora",
# LoRA
"network_dim": 32, # LoRA rank (8-128)
"network_alpha": 16, # Alpha (usually rank/2)
"network_module": "networks.lora",
# Training
"learning_rate": 1e-4,
"unet_lr": 1e-4,
"text_encoder_lr": 5e-5,
"lr_scheduler": "cosine_with_restarts",
"train_batch_size": 1,
"max_train_epochs": 20,
"resolution": "1024,1024", # SDXL
"mixed_precision": "bf16",
# Optimization
"optimizer_type": "AdamW8bit",
"xformers": True,
"gradient_checkpointing": True,
"cache_latents": True,
"cache_latents_to_disk": True,
# Dataset
"train_data_dir": "./dataset/img",
"reg_data_dir": "./dataset/reg",
}
# 3. Training Command (kohya_ss)
# accelerate launch --num_cpu_threads_per_process=2 \
# sdxl_train_network.py \
# --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
# --train_data_dir="./dataset/img" \
# --output_dir="./output" \
# --output_name="my_lora" \
# --network_module=networks.lora \
# --network_dim=32 \
# --network_alpha=16 \
# --learning_rate=1e-4 \
# --lr_scheduler=cosine_with_restarts \
# --train_batch_size=1 \
# --max_train_epochs=20 \
# --resolution=1024,1024 \
# --mixed_precision=bf16 \
# --optimizer_type=AdamW8bit \
# --xformers \
# --gradient_checkpointing \
# --cache_latents
# 4. Using LoRA in A1111
# วาง .safetensors ไว้ใน models/Lora/
# ใช้ใน Prompt:
# 5. ComfyUI Workflow
# Load Checkpoint -> Load LoRA -> KSampler -> VAE Decode -> Save Image
# 6. Production API
# from diffusers import StableDiffusionXLPipeline, AutoencoderKL
# import torch
#
# pipe = StableDiffusionXLPipeline.from_pretrained(
# "stabilityai/stable-diffusion-xl-base-1.0",
# torch_dtype=torch.float16,
# variant="fp16",
# ).to("cuda")
#
# pipe.load_lora_weights("./output/my_lora.safetensors")
#
# image = pipe(
# prompt="your prompt here",
# negative_prompt="low quality",
# num_inference_steps=30,
# guidance_scale=7.0,
# width=1024, height=1024,
# ).images[0]
#
# image.save("output.png")
print("LoRA Training Config:")
for key, value in training_config.items():
print(f" {key}: {value}")
Best Practices
- Model Selection: SDXL สำหรับคุณภาพสูง SD 1.5 สำหรับความเร็วและ LoRA ecosystem
- Prompt Structure: Subject + Style + Lighting + Details + Quality Tags
- Negative Prompt: ใส่เสมอ ลบสิ่งไม่ต้องการ low quality, blurry, watermark
- ControlNet: ใช้ Canny/OpenPose ควบคุม Structure ของภาพ
- LoRA Training: ใช้ 20-50 ภาพคุณภาพสูง Caption ให้ละเอียด
- Batch Processing: ใช้ ComfyUI Workflow สำหรับ Production Pipeline
Stable Diffusion คืออะไร
Open-source Text-to-Image AI Stability AI สร้างภาพจาก Prompt รันเครื่องตัวเองฟรี img2img inpainting ControlNet LoRA Community ใหญ่ Models Extensions มาก
Stable Diffusion ต่างจาก Midjourney อย่างไร
SD Open-source รันเครื่องฟรี ปรับแต่งทุกอย่าง Train Custom Models Midjourney Cloud Service จ่ายรายเดือน Discord ง่ายกว่า สวยกว่าเริ่มต้น ปรับแต่งน้อยกว่า
ต้องใช้ GPU อะไรรัน Stable Diffusion
NVIDIA GPU VRAM 8GB+ RTX 3060 12GB RTX 3070 RTX 4060 SDXL ควร 12GB+ --medvram --lowvram VRAM น้อย Google Colab ฟรี
ControlNet คืออะไร
Extension ควบคุมการสร้างภาพด้วย Input เพิ่ม Canny Edge เส้นขอบ OpenPose ท่าทาง Depth Map ความลึก Scribble ร่าง ควบคุมผลลัพธ์แม่นยำกว่า Prompt อย่างเดียว
สรุป
Stable Diffusion เป็น Open-source Text-to-Image AI รันเครื่องตัวเองฟรี SDXL คุณภาพสูง SD 1.5 เร็ว LoRA ecosystem ControlNet ควบคุม Structure LoRA Training ด้วย kohya_ss ComfyUI สำหรับ Production Diffusers Library สำหรับ Python API
