SiamCafe · Blog
Ceph Storage Cluster Developer Experience DX —
บทความ

Ceph Storage Cluster Developer Experience DX —

เผยแพร่ 28 พฤษภาคม 2569

Ceph Storage Cluster

Ceph Software-defined Storage RBD CephFS RGW S3 Distributed CRUSH Self-healing Kubernetes Rook Scale-out Enterprise

Storage TypeProtocolUse CaseKubernetes
RBD (Block)RADOS Block DeviceVM Disk, Container VolumeStorageClass RBD
CephFS (File)POSIX FilesystemShared Storage, NFSStorageClass CephFS
RGW (Object)S3 / Swift APIBackup, Media, Data LakeObjectBucketClaim

Architecture & Deployment

# === Ceph Cluster Architecture ===

# Rook Operator Deployment (Kubernetes)
# kubectl create -f https://raw.githubusercontent.com/rook/rook/release-1.13/deploy/examples/crds.yaml
# kubectl create -f https://raw.githubusercontent.com/rook/rook/release-1.13/deploy/examples/common.yaml
# kubectl create -f https://raw.githubusercontent.com/rook/rook/release-1.13/deploy/examples/operator.yaml
# kubectl create -f cluster.yaml

# cephadm Bootstrap (Bare Metal)
# curl --silent --remote-name --location https://github.com/ceph/ceph/raw/quincy/src/cephadm/cephadm
# chmod +x cephadm
# ./cephadm bootstrap --mon-ip 192.168.1.10
# ceph orch host add node2 192.168.1.11
# ceph orch host add node3 192.168.1.12
# ceph orch apply osd --all-available-devices

from dataclasses import dataclass

@dataclass
class CephComponent:
    component: str
    role: str
    min_count: str
    resource: str
    tip: str

components = [
    CephComponent("MON (Monitor)",
        "จัดการ Cluster Map ลงคะแนน Leader",
        "3 (ต้องเป็นเลขคี่)",
        "CPU 1 core, RAM 2GB, SSD 50GB",
        "กระจายไปต่าง Node เสมอ"),
    CephComponent("OSD (Object Storage Daemon)",
        "เก็บข้อมูลจริงบน Disk",
        "3+ (ยิ่งมากยิ่ง Capacity สูง)",
        "CPU 1 core/OSD, RAM 4GB/OSD, SSD/HDD",
        "ใช้ SSD สำหรับ WAL+DB แยกจาก HDD Data"),
    CephComponent("MGR (Manager)",
        "จัดการ Dashboard Prometheus Module",
        "2 (Active + Standby)",
        "CPU 1 core, RAM 1GB",
        "เปิด Dashboard Module สำหรับ Web UI"),
    CephComponent("MDS (Metadata Server)",
        "จัดการ Metadata สำหรับ CephFS",
        "2+ (ถ้าใช้ CephFS)",
        "CPU 2 core, RAM 4GB",
        "ต้องมีถ้าใช้ CephFS ไม่ต้องถ้าใช้แค่ RBD"),
    CephComponent("RGW (RADOS Gateway)",
        "S3/Swift API Gateway",
        "2+ (ถ้าใช้ Object Storage)",
        "CPU 2 core, RAM 4GB",
        "ใช้ Load Balancer หน้า RGW สำหรับ HA"),
]

print("=== Ceph Components ===")
for c in components:
    print(f"\n  [{c.component}] {c.role}")
    print(f"    Min: {c.min_count}")
    print(f"    Resource: {c.resource}")
    print(f"    Tip: {c.tip}")

Developer Workflow

# === Developer Workflow with Ceph ===

@dataclass
class DevWorkflow:
    task: str
    tool: str
    command: str
    verify: str

workflows = [
    DevWorkflow("สร้าง Block Volume (RBD)",
        "kubectl + StorageClass",
        "kubectl apply -f pvc-rbd.yaml\n# apiVersion: v1\n# kind: PersistentVolumeClaim\n# spec:\n#   storageClassName: rook-ceph-block\n#   resources:\n#     requests:\n#       storage: 10Gi",
        "kubectl get pvc (Status: Bound)"),
    DevWorkflow("สร้าง Shared Filesystem (CephFS)",
        "kubectl + StorageClass",
        "kubectl apply -f pvc-cephfs.yaml\n# storageClassName: rook-cephfs\n# accessModes: [ReadWriteMany]",
        "kubectl get pvc (Bound + RWX)"),
    DevWorkflow("ใช้ S3 API (RGW)",
        "boto3 / aws-cli",
        "# aws --endpoint-url http://rook-ceph-rgw:80 s3 ls\n# aws --endpoint-url http://rook-ceph-rgw:80 s3 cp file.txt s3://mybucket/",
        "aws s3 ls s3://mybucket/"),
    DevWorkflow("Monitor Cluster",
        "ceph CLI / Dashboard",
        "ceph status\nceph osd tree\nceph df\nceph health detail",
        "HEALTH_OK = ปกติ"),
    DevWorkflow("Benchmark Performance",
        "rados bench",
        "rados bench -p testpool 30 write --no-cleanup\nrados bench -p testpool 30 seq",
        "ดู Bandwidth IOPS Latency"),
]

print("=== Developer Workflows ===")
for w in workflows:
    print(f"\n  [{w.task}] Tool: {w.tool}")
    print(f"    Command: {w.command}")
    print(f"    Verify: {w.verify}")

Troubleshooting

# === Common Issues & Fixes ===

@dataclass
class CephIssue:
    issue: str
    symptom: str
    diagnose: str
    fix: str

issues = [
    CephIssue("OSD Down",
        "HEALTH_WARN: 1 osds down",
        "ceph osd tree (ดู OSD ไหน down)\nsystemctl status ceph-osd@ID",
        "systemctl restart ceph-osd@ID\nดู Log: journalctl -u ceph-osd@ID"),
    CephIssue("Cluster Near Full",
        "HEALTH_ERR: OSD near full (> 85%)",
        "ceph df (ดู Usage)\nceph osd df tree",
        "ลบข้อมูลไม่จำเป็น หรือ เพิ่ม OSD\nceph osd set-nearfull-ratio 0.90"),
    CephIssue("PG Not Active+Clean",
        "HEALTH_WARN: Degraded PGs",
        "ceph pg stat\nceph pg dump_stuck",
        "รอ Recovery อัตโนมัติ\nถ้านาน: ceph pg repair PG_ID"),
    CephIssue("Slow OSD / High Latency",
        "Slow requests, Client timeout",
        "ceph osd perf\nsmartctl -a /dev/sdX",
        "เปลี่ยน Disk ที่ช้า\nแยก WAL+DB ไป SSD\nตรวจ Network"),
    CephIssue("Clock Skew",
        "HEALTH_WARN: clock skew detected",
        "ceph time-sync-status",
        "ตั้ง NTP ทุก Node: timedatectl set-ntp true"),
    CephIssue("Mon Quorum Lost",
        "HEALTH_ERR: mon quorum lost",
        "ceph mon stat\nceph mon dump",
        "Restart Mon ที่ down\nถ้า Majority down: Recovery จาก Backup"),
]

print("=== Troubleshooting ===")
for i in issues:
    print(f"\n  [{i.issue}] {i.symptom}")
    print(f"    Diagnose: {i.diagnose}")
    print(f"    Fix: {i.fix}")

เคล็ดลับ

  • Rook: ใช้ Rook Operator บน Kubernetes ง่ายที่สุด
  • SSD: ใช้ SSD สำหรับ WAL+DB แยกจาก HDD Data เร็วขึ้นมาก
  • Network: แยก Public Network และ Cluster Network
  • 3 Node: ขั้นต่ำ 3 Node สำหรับ HA อย่าใช้ 1-2 Node ใน Production
  • Monitor: เปิด Prometheus + Grafana Dashboard ติดตามตลอด

Ceph คืออะไร

Open Source Software-defined Storage RBD CephFS RGW S3 Distributed CRUSH Self-healing Kubernetes Rook Scale-out CERN Red Hat

อ่านเพิ่ม: Kubernetes Storage คืออะไร? สอน PV, PVC, StorageClass และ CS · อ่านเพิ่ม: Let's Encrypt SSL ฟรี ติดตั้ง HTTPS บน Server Linux · อ่านเพิ่ม: WireGuard vs OpenVPN 2026 เลือก VPN Protocol อะไรดี