Stable Diffusion img2img

Stable Diffusion img2img Image-to-Image Denoising Strength ControlNet Style Transfer Inpainting Prompt CFG Scale Sampler Production

เนื้อหาเกี่ยวข้อง — ทำความเข้าใจ Bond แปลว่าอะไร — ข้อมูลครบถ้วน 2026

Parameter	Range	แนะนำ	ผล
Denoising Strength	0.0 - 1.0	0.4 - 0.6	มาก = เปลี่ยนมาก น้อย = คงเดิม
CFG Scale	1 - 30	7 - 12	สูง = ตาม Prompt มาก ต่ำ = อิสระ
Sampling Steps	1 - 150	20 - 30	มาก = ละเอียดกว่า ช้ากว่า
ControlNet Weight	0.0 - 2.0	0.5 - 1.0	สูง = ตาม Control มาก
Image Size	256 - 2048	512x512 / 768x768	ใหญ่ = ใช้ VRAM มาก ช้า

img2img Pipeline

# === Stable Diffusion img2img with diffusers ===

# pip install diffusers transformers accelerate torch

# from diffusers import StableDiffusionImg2ImgPipeline
# from PIL import Image
# import torch
#
# # Load model
# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
#     "runwayml/stable-diffusion-v1-5",
#     torch_dtype=torch.float16,
#     safety_checker=None,
# ).to("cuda")
#
# # Load input image
# init_image = Image.open("input.jpg").convert("RGB").resize((512, 512))
#
# # Generate
# result = pipe(
#     prompt="oil painting of a beautiful landscape, masterpiece",
#     negative_prompt="blurry, low quality, deformed",
#     image=init_image,
#     strength=0.5,          # Denoising Strength
#     guidance_scale=7.5,    # CFG Scale
#     num_inference_steps=30,
#     generator=torch.Generator("cuda").manual_seed(42),
# ).images[0]
#
# result.save("output.jpg")

from dataclasses import dataclass

@dataclass
class Img2ImgUseCase:
    use_case: str
    denoising: str
    cfg_scale: str
    prompt_tip: str
    example: str

use_cases = [
    Img2ImgUseCase("Style Transfer",
        "0.4 - 0.6",
        "7 - 10",
        "ระบุ Style ชัดเจน เช่น 'oil painting' 'anime style'",
        "ภาพถ่ายเป็นภาพวาดสีน้ำมัน"),
    Img2ImgUseCase("Color Correction",
        "0.1 - 0.3",
        "5 - 7",
        "ระบุ Tone สี เช่น 'warm colors' 'golden hour'",
        "แก้สีภาพให้สว่างขึ้น Warm Tone"),
    Img2ImgUseCase("Concept Art from Sketch",
        "0.6 - 0.8",
        "8 - 12",
        "ระบุ Detail มาก เพราะจะสร้างใหม่เยอะ",
        "Sketch ดินสอ → Concept Art สี Full Detail"),
    Img2ImgUseCase("Upscale + Detail",
        "0.2 - 0.4",
        "7 - 10",
        "ใช้ Prompt เดิม เพิ่ม 'highly detailed' '4k'",
        "ภาพ 512x512 → 1024x1024 เพิ่ม Detail"),
    Img2ImgUseCase("Background Change",
        "0.5 - 0.7 (with Inpainting)",
        "7 - 10",
        "ใช้ Inpainting Mask พื้นหลัง Prompt พื้นหลังใหม่",
        "เปลี่ยนพื้นหลังจากห้อง → ทะเล"),
]

print("=== img2img Use Cases ===")
for u in use_cases:
    print(f"\n  [{u.use_case}]")
    print(f"    Denoising: {u.denoising} | CFG: {u.cfg_scale}")
    print(f"    Prompt Tip: {u.prompt_tip}")
    print(f"    Example: {u.example}")

ControlNet Integration

# === ControlNet + img2img ===

# from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
# import cv2, numpy as np
#
# # Load ControlNet (Canny Edge)
# controlnet = ControlNetModel.from_pretrained(
#     "lllyasviel/sd-controlnet-canny",
#     torch_dtype=torch.float16
# ).to("cuda")
#
# pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
#     "runwayml/stable-diffusion-v1-5",
#     controlnet=controlnet,
#     torch_dtype=torch.float16,
# ).to("cuda")
#
# # Create Canny edge map
# image = cv2.imread("input.jpg")
# canny = cv2.Canny(image, 100, 200)
# canny_image = Image.fromarray(canny)
#
# result = pipe(
#     prompt="beautiful anime girl, masterpiece",
#     image=init_image,
#     control_image=canny_image,
#     strength=0.5,
#     controlnet_conditioning_scale=0.8,
#     num_inference_steps=30,
# ).images[0]

@dataclass
class ControlNetType:
    name: str
    model_id: str
    input_type: str
    use_case: str
    weight: str

controlnets = [
    ControlNetType("Canny Edge",
        "lllyasviel/sd-controlnet-canny",
        "Edge detection (Canny)",
        "คงโครงสร้างขอบภาพ เปลี่ยน Style",
        "0.5 - 1.0"),
    ControlNetType("Depth",
        "lllyasviel/sd-controlnet-depth",
        "Depth map (MiDaS)",
        "คง 3D Structure เปลี่ยน Content",
        "0.5 - 1.0"),
    ControlNetType("OpenPose",
        "lllyasviel/sd-controlnet-openpose",
        "Human pose keypoints",
        "คงท่าทางคน เปลี่ยน Outfit Style",
        "0.5 - 0.8"),
    ControlNetType("Scribble",
        "lllyasviel/sd-controlnet-scribble",
        "Hand-drawn scribble",
        "วาดมือคร่าวๆ ให้ AI สร้างภาพ",
        "0.5 - 1.0"),
    ControlNetType("IP-Adapter",
        "h94/IP-Adapter",
        "Reference image (style)",
        "ใช้ภาพเป็น Style Reference ไม่ต้องเขียน Prompt",
        "0.3 - 0.8"),
]

print("=== ControlNet Types ===")
for c in controlnets:
    print(f"  [{c.name}] Weight: {c.weight}")
    print(f"    Model: {c.model_id}")
    print(f"    Input: {c.input_type}")
    print(f"    Use: {c.use_case}")

Production API

# === Production img2img API ===

# from fastapi import FastAPI, UploadFile, File, Form
# from diffusers import StableDiffusionImg2ImgPipeline
# from PIL import Image
# import io, torch
#
# app = FastAPI()
# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
#     "runwayml/stable-diffusion-v1-5",
#     torch_dtype=torch.float16
# ).to("cuda")
#
# @app.post("/img2img")
# async def img2img(
#     image: UploadFile = File(...),
#     prompt: str = Form(...),
#     strength: float = Form(0.5),
#     cfg_scale: float = Form(7.5),
# ):
#     img = Image.open(io.BytesIO(await image.read())).convert("RGB")
#     img = img.resize((512, 512))
#     result = pipe(prompt=prompt, image=img,
#                   strength=strength, guidance_scale=cfg_scale).images[0]
#     buffer = io.BytesIO()
#     result.save(buffer, format="PNG")
#     return Response(content=buffer.getvalue(), media_type="image/png")

@dataclass
class ProductionTip:
    aspect: str
    recommendation: str
    gpu: str
    latency: str

tips = [
    ProductionTip("Model Loading",
        "Load Model ตอน Startup ไม่ใช่ทุก Request",
        "ใช้ VRAM 4-6GB (fp16)",
        "First Request 0ms (Model อยู่ใน Memory)"),
    ProductionTip("Batch Processing",
        "รวมหลาย Request เป็น Batch ลด Overhead",
        "Batch 4 ใช้ VRAM 12-16GB",
        "4 Images พร้อมกัน เร็วกว่า 4 ทีละ 1"),
    ProductionTip("ONNX/TensorRT",
        "Export Model เป็น TensorRT Inference เร็วขึ้น 2-3x",
        "เท่ากัน",
        "512x512 จาก 3s → 1s"),
    ProductionTip("Queue System",
        "Redis + Celery Async Processing",
        "Worker per GPU",
        "ไม่ Block API Response ทันที"),
    ProductionTip("Auto-scaling",
        "Kubernetes HPA Scale GPU Pods ตาม Queue",
        "ขึ้นกับ Demand",
        "Scale 0 → N ตาม Traffic"),
]

print("=== Production Tips ===")
for t in tips:
    print(f"  [{t.aspect}] {t.recommendation}")
    print(f"    GPU: {t.gpu} | Latency: {t.latency}")

เคล็ดลับ

Denoising: เริ่ม 0.5 ปรับขึ้นลงตามต้องการ
Prompt: ใส่ Negative Prompt เสมอ ลด Artifact
ControlNet: ใช้ Canny + img2img คงโครงสร้างดีที่สุด
VRAM: ใช้ fp16 ลด VRAM 50% xformers เพิ่มเร็ว
Seed: Fix Seed ทดสอบ Parameter ทีละตัว

img2img คืออะไร

Image-to-Image Stable Diffusion ภาพต้นฉบับ Prompt Denoising Strength Style Transfer Inpainting Outpainting Upscale Concept Art

แนะนำเพิ่มเติม — ดูสัญญาณเทรดที่ XM Signal

เนื้อหาเกี่ยวข้อง — แนะนำให้อ่าน mô hình tăng giá chứng khoán

เนื้อหาเกี่ยวข้อง — บทความที่เกี่ยวข้อง: what is ethical hacking