Stable Diffusion Generative AI — เรียนรู้ Stable

Stable Diffusion

Stable Diffusion เป็น Open-source Text-to-Image AI สร้างภาพจาก Prompt รันบนเครื่องตัวเองฟรี ปรับแต่งได้ทุกอย่าง Community ใหญ่มาก Models Extensions มากมาย

รองรับ txt2img img2img inpainting ControlNet LoRA Training Custom Models สำหรับงาน Production ใช้ ComfyUI หรือ A1111 WebUI

เนื้อหาเกี่ยวข้อง — บทความที่เกี่ยวข้อง: aws cloudfront คือ — ข้อมูลครบถ้วน 2026

Installation และ Setup

# === Stable Diffusion Installation === # 1. AUTOMATIC1111 WebUI (แนะนำสำหรับเริ่มต้น) # git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git # cd stable-diffusion-webui # Windows # webui-user.bat # Linux/Mac # ./webui.sh # Arguments สำหรับ GPU VRAM น้อย # set COMMANDLINE_ARGS=--medvram --xformers # set COMMANDLINE_ARGS=--lowvram # สำหรับ 4GB VRAM # 2. ComfyUI (แนะนำสำหรับ Advanced/Production) # git clone https://github.com/comfyanonymous/ComfyUI.git # cd ComfyUI # pip install -r requirements.txt # python main.py # 3. Diffusers (Python Library) # pip install diffusers transformers accelerate torch from dataclasses import dataclass, field from typing import List, Dict, Optional @dataclass class SDConfig: """Stable Diffusion Configuration""" model: str = "stabilityai/stable-diffusion-xl-base-1.0" scheduler: str = "DPM++ 2M Karras" steps: int = 30 cfg_scale: float = 7.0 width: int = 1024 height: int = 1024 seed: int = -1 # -1 = random sampler: str = "Euler a" clip_skip: int = 2 vae: str = "auto" # Performance use_xformers: bool = True use_fp16: bool = True batch_size: int = 1 @dataclass class SDPrompt: positive: str negative: str = "" lora: List[str] = field(default_factory=list) controlnet: Optional[str] = None def build(self) -> Dict: prompt = self.positive # Add LoRA for lora in self.lora: prompt += f" " return { "prompt": prompt, "negative_prompt": self.negative or "low quality, bad anatomy, blurry, watermark, text, " "deformed, ugly, duplicate, error", } # Recommended Models models = { "SDXL 1.0": "1024x1024, General purpose, High quality", "SD 1.5": "512x512, Fast, Huge ecosystem of LoRAs", "Juggernaut XL": "Photorealistic, Portraits", "DreamShaper XL": "Fantasy, Illustration", "RealVisXL": "Photorealistic, Product photos", "AnimagineXL": "Anime style", } # Samplers samplers = { "DPM++ 2M Karras": "Best overall quality/speed balance", "Euler a": "Creative, good for exploration", "DPM++ SDE Karras": "High quality, slower", "DDIM": "Deterministic, good for img2img", } config = SDConfig() print("Stable Diffusion Config:") print(f" Model: {config.model}") print(f" Steps: {config.steps} | CFG: {config.cfg_scale}") print(f" Size: {config.width}x{config.height}") print(f" Sampler: {config.sampler}") print(f"\nRecommended Models:") for name, desc in models.items(): print(f" {name}: {desc}")

Prompt Writing และ ControlNet

# sd_prompts.py — Stable Diffusion Prompt Engineering
from dataclasses import dataclass, field
from typing import List

@dataclass
class PromptTemplate:
    name: str
    category: str
    positive: str
    negative: str
    settings: dict = field(default_factory=dict)

class SDPromptLibrary:
    """Stable Diffusion Prompt Library"""

    def __init__(self):
        self.templates: List[PromptTemplate] = []

    def add(self, template: PromptTemplate):
        self.templates.append(template)

    def get_by_category(self, category: str):
        return [t for t in self.templates if t.category == category]

    def show_all(self):
        categories = set(t.category for t in self.templates)
        for cat in sorted(categories):
            print(f"\n  [{cat}]")
            for t in self.get_by_category(cat):
                print(f"    {t.name}:")
                print(f"      + {t.positive[:70]}...")

# Prompt Templates
library = SDPromptLibrary()

templates = [
    PromptTemplate(
        "Product Photo", "Commercial",
        "professional product photography, luxury perfume bottle on marble surface, "
        "soft studio lighting, shallow depth of field, bokeh background, "
        "8k uhd, sharp focus, commercial quality",
        "low quality, blurry, watermark, text, deformed",
        {"steps": 35, "cfg": 7.5, "size": "1024x1024"},
    ),
    PromptTemplate(
        "Portrait", "Photography",
        "professional portrait photography, beautiful woman, natural skin texture, "
        "golden hour sunlight, shallow depth of field, canon EOS R5, "
        "85mm lens f/1.4, raw photo, film grain",
        "bad anatomy, deformed face, ugly, blurry, watermark",
        {"steps": 30, "cfg": 7.0, "size": "832x1216"},
    ),
    PromptTemplate(
        "Anime Character", "Illustration",
        "masterpiece, best quality, 1girl, cyberpunk outfit, neon city background, "
        "detailed eyes, dynamic pose, night scene, rain, reflections, "
        "volumetric lighting, anime style",
        "low quality, worst quality, bad anatomy, extra fingers",
        {"steps": 25, "cfg": 8.0, "size": "832x1216"},
    ),
    PromptTemplate(
        "Architecture", "Design",
        "architectural visualization, modern minimalist house, "
        "floor-to-ceiling windows, infinity pool, tropical garden, "
        "golden hour, dramatic sky, photorealistic rendering, 8k",
        "low quality, blurry, watermark, people",
        {"steps": 40, "cfg": 7.0, "size": "1216x832"},
    ),
    PromptTemplate(
        "UI Dashboard", "Web Design",
        "modern SaaS dashboard UI design, dark mode, data visualization, "
        "clean minimal layout, glassmorphism, gradient accents, "
        "professional design, figma mockup style",
        "realistic photo, 3d render, blurry text",
        {"steps": 30, "cfg": 7.0, "size": "1216x832"},
    ),
]

for t in templates:
    library.add(t)

library.show_all()

# ControlNet Types
controlnet_types = {
    "Canny": "ตรวจจับเส้นขอบ ควบคุม Structure ของภาพ",
    "OpenPose": "ตรวจจับท่าทางร่างกาย ควบคุม Pose",
    "Depth": "ตรวจจับความลึก ควบคุม Perspective",
    "Scribble": "ใช้ร่างคร่าวๆ เป็น Guide",
    "Tile": "Upscale ภาพ เพิ่มรายละเอียด",
    "IP-Adapter": "ใช้ภาพต้นแบบ ควบคุม Style",
    "Reference": "ใช้ภาพ Reference สำหรับ Style Transfer",
}

print(f"\n\nControlNet Types:")
for cn_type, desc in controlnet_types.items():
    print(f"  {cn_type}: {desc}")

LoRA Training และ Deployment

# === LoRA Training สำหรับ Stable Diffusion === # pip install kohya-ss # 1. Dataset Preparation # dataset/ # ├── img/ # │ ├── 20_concept_name/ # 20 = repeat count # │ │ ├── image_001.png # │ │ ├── image_001.txt # Caption # │ │ ├── image_002.png # │ │ └── image_002.txt # ├── reg/ # Regularization images (optional) # │ └── 1_class/ # │ ├── reg_001.png # │ └── reg_002.png # 2. Training Config (kohya_ss) training_config = { # Model "pretrained_model": "stabilityai/stable-diffusion-xl-base-1.0", "output_dir": "./output/my_lora", "output_name": "my_custom_lora", # LoRA "network_dim": 32, # LoRA rank (8-128) "network_alpha": 16, # Alpha (usually rank/2) "network_module": "networks.lora", # Training "learning_rate": 1e-4, "unet_lr": 1e-4, "text_encoder_lr": 5e-5, "lr_scheduler": "cosine_with_restarts", "train_batch_size": 1, "max_train_epochs": 20, "resolution": "1024,1024", # SDXL "mixed_precision": "bf16", # Optimization "optimizer_type": "AdamW8bit", "xformers": True, "gradient_checkpointing": True, "cache_latents": True, "cache_latents_to_disk": True, # Dataset "train_data_dir": "./dataset/img", "reg_data_dir": "./dataset/reg", } # 3. Training Command (kohya_ss) # accelerate launch --num_cpu_threads_per_process=2 \ # sdxl_train_network.py \ # --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \ # --train_data_dir="./dataset/img" \ # --output_dir="./output" \ # --output_name="my_lora" \ # --network_module=networks.lora \ # --network_dim=32 \ # --network_alpha=16 \ # --learning_rate=1e-4 \ # --lr_scheduler=cosine_with_restarts \ # --train_batch_size=1 \ # --max_train_epochs=20 \ # --resolution=1024,1024 \ # --mixed_precision=bf16 \ # --optimizer_type=AdamW8bit \ # --xformers \ # --gradient_checkpointing \ # --cache_latents # 4. Using LoRA in A1111 # วาง .safetensors ไว้ใน models/Lora/ # ใช้ใน Prompt: # 5. ComfyUI Workflow # Load Checkpoint -> Load LoRA -> KSampler -> VAE Decode -> Save Image # 6. Production API # from diffusers import StableDiffusionXLPipeline, AutoencoderKL # import torch # # pipe = StableDiffusionXLPipeline.from_pretrained( # "stabilityai/stable-diffusion-xl-base-1.0", # torch_dtype=torch.float16, # variant="fp16", # ).to("cuda") # # pipe.load_lora_weights("./output/my_lora.safetensors") # # image = pipe( # prompt="your prompt here", # negative_prompt="low quality", # num_inference_steps=30, # guidance_scale=7.0, # width=1024, height=1024, # ).images[0] # # image.save("output.png") print("LoRA Training Config:") for key, value in training_config.items(): print(f" {key}: {value}")

Best Practices

Model Selection: SDXL สำหรับคุณภาพสูง SD 1.5 สำหรับความเร็วและ LoRA ecosystem
Prompt Structure: Subject + Style + Lighting + Details + Quality Tags
Negative Prompt: ใส่เสมอ ลบสิ่งไม่ต้องการ low quality, blurry, watermark
ControlNet: ใช้ Canny/OpenPose ควบคุม Structure ของภาพ
LoRA Training: ใช้ 20-50 ภาพคุณภาพสูง Caption ให้ละเอียด
Batch Processing: ใช้ ComfyUI Workflow สำหรับ Production Pipeline

Stable Diffusion คืออะไร

Open-source Text-to-Image AI Stability AI สร้างภาพจาก Prompt รันเครื่องตัวเองฟรี img2img inpainting ControlNet LoRA Community ใหญ่ Models Extensions มาก

Stable Diffusion ต่างจาก Midjourney อย่างไร

SD Open-source รันเครื่องฟรี ปรับแต่งทุกอย่าง Train Custom Models Midjourney Cloud Service จ่ายรายเดือน Discord ง่ายกว่า สวยกว่าเริ่มต้น ปรับแต่งน้อยกว่า

แนะนำเพิ่มเติม — คู่มือเทรดจาก SiamCafeBook

เนื้อหาเกี่ยวข้อง — แนะนำให้อ่าน home automation devices คือ —

ต้องใช้ GPU อะไรรัน Stable Diffusion

NVIDIA GPU VRAM 8GB+ RTX 3060 12GB RTX 3070 RTX 4060 SDXL ควร 12GB+ --medvram --lowvram VRAM น้อย Google Colab ฟรี

เนื้อหาเกี่ยวข้อง — Docker Compose v2 Performance Tuning

ControlNet คืออะไร

Extension ควบคุมการสร้างภาพด้วย Input เพิ่ม Canny Edge เส้นขอบ OpenPose ท่าทาง Depth Map ความลึก Scribble ร่าง ควบคุมผลลัพธ์แม่นยำกว่า Prompt อย่างเดียว

แนะนำเพิ่มเติม — ติดตาม XM Signal

สรุป

Stable Diffusion เป็น Open-source Text-to-Image AI รันเครื่องตัวเองฟรี SDXL คุณภาพสูง SD 1.5 เร็ว LoRA ecosystem ControlNet ควบคุม Structure LoRA Training ด้วย kohya_ss ComfyUI สำหรับ Production Diffusers Library สำหรับ Python API

เนื้อหาเกี่ยวข้อง — บทความที่เกี่ยวข้อง: OAuth 2.1 12 Factor App