Ceph Storage Cluster Progressive Delivery คืออะไร
Ceph เป็น open source distributed storage system ที่รองรับ Object Storage (S3-compatible), Block Storage (RBD) และ File System (CephFS) ในระบบเดียว Progressive Delivery คือการ deploy changes แบบค่อยๆ เปิดให้ผู้ใช้ทีละกลุ่ม ลดความเสี่ยงจาก bad deployments รวมถึง canary releases, blue-green deployments และ feature flags การรวมสองแนวคิดนี้ช่วยให้ upgrade Ceph clusters ได้อย่างปลอดภัย ลด downtime และป้องกัน data loss จาก faulty updates
Ceph Architecture Overview
# ceph_arch.py — Ceph architecture overview
import json
class CephArchitecture:
COMPONENTS = {
"mon": {
"name": "Ceph Monitor (MON)",
"description": "รักษา cluster map — track สถานะ OSD, PG, CRUSH map",
"count": "อย่างน้อย 3 (odd number สำหรับ quorum)",
"upgrade_risk": "สูง — ถ้า majority down = cluster unavailable",
},
"osd": {
"name": "Ceph OSD (Object Storage Daemon)",
"description": "เก็บข้อมูลจริง — 1 OSD ต่อ 1 disk/SSD",
"count": "ขึ้นกับ capacity — ทั่วไป 10-1000+",
"upgrade_risk": "ปานกลาง — upgrade ทีละ OSD, data replicated",
},
"mgr": {
"name": "Ceph Manager (MGR)",
"description": "Dashboard, monitoring, orchestration",
"count": "2 (active-standby)",
"upgrade_risk": "ต่ำ — standby takeover อัตโนมัติ",
},
"mds": {
"name": "Metadata Server (MDS)",
"description": "จัดการ metadata สำหรับ CephFS",
"count": "1-2 active + standby",
"upgrade_risk": "ปานกลาง — CephFS unavailable ชั่วคราว",
},
"rgw": {
"name": "RADOS Gateway (RGW)",
"description": "S3/Swift compatible object storage API",
"count": "2+ behind load balancer",
"upgrade_risk": "ต่ำ — load balancer route around upgrading instance",
},
}
def show_components(self):
print("=== Ceph Components ===\n")
for key, comp in self.COMPONENTS.items():
print(f"[{comp['name']}]")
print(f" {comp['description']}")
print(f" Upgrade risk: {comp['upgrade_risk']}")
print()
arch = CephArchitecture()
arch.show_components()
Progressive Delivery Strategies
# progressive.py — Progressive delivery for Ceph
import json
class ProgressiveDelivery:
STRATEGIES = {
"rolling_upgrade": {
"name": "1. Rolling Upgrade (ทีละ daemon)",
"description": "Upgrade Ceph daemons ทีละตัว — MON → MGR → OSD → MDS → RGW",
"steps": [
"1. Set noout flag (ป้องกัน rebalance ขณะ upgrade)",
"2. Upgrade MON ทีละตัว (รอ quorum กลับก่อนทำตัวถัดไป)",
"3. Upgrade MGR (active → standby → switch)",
"4. Upgrade OSD ทีละกลุ่ม (ทีละ rack/host)",
"5. Upgrade MDS + RGW",
"6. Unset noout flag",
],
"risk": "ต่ำ — ทีละตัว, rollback ได้ทันที",
},
"canary_osd": {
"name": "2. Canary OSD Upgrade",
"description": "Upgrade OSD กลุ่มเล็กก่อน (1-2 hosts) → monitor → ค่อยขยาย",
"steps": [
"1. เลือก canary hosts (1-2 hosts ที่มี non-critical data)",
"2. Upgrade OSDs บน canary hosts",
"3. Monitor 24-48 ชั่วโมง: I/O latency, error rate, recovery",
"4. ถ้า OK → upgrade batch ถัดไป (25% → 50% → 100%)",
"5. ถ้ามีปัญหา → rollback canary hosts",
],
"risk": "ต่ำมาก — impact แค่ canary hosts",
},
"blue_green_rgw": {
"name": "3. Blue-Green RGW Deployment",
"description": "Deploy RGW version ใหม่คู่กับเก่า → switch traffic ที่ load balancer",
"steps": [
"1. Deploy new RGW instances (green) คู่กับ existing (blue)",
"2. Route 10% traffic ไป green",
"3. Monitor: S3 API latency, error rate, compatibility",
"4. Gradually increase: 10% → 25% → 50% → 100%",
"5. Decommission blue instances",
],
"risk": "ต่ำ — rollback = switch traffic กลับ blue",
},
}
def show_strategies(self):
print("=== Progressive Delivery Strategies ===\n")
for key, strat in self.STRATEGIES.items():
print(f"[{strat['name']}]")
print(f" {strat['description']}")
print(f" Risk: {strat['risk']}")
for step in strat["steps"][:4]:
print(f" {step}")
print()
pd = ProgressiveDelivery()
pd.show_strategies()
Automation Scripts
# automation.py — Ceph upgrade automation
import json
import random
class CephUpgradeAutomation:
CODE = """
# ceph_upgrade.py — Automated progressive Ceph upgrade
import subprocess
import time
import json
class CephUpgrader:
def __init__(self, target_version):
self.target_version = target_version
self.cluster_status = None
def get_health(self):
result = subprocess.run(
['ceph', 'health', '--format=json'],
capture_output=True, text=True
)
return json.loads(result.stdout)
def wait_healthy(self, timeout=600):
start = time.time()
while time.time() - start < timeout:
health = self.get_health()
if health.get('status') == 'HEALTH_OK':
return True
print(f" Waiting for HEALTH_OK... ({health.get('status')})")
time.sleep(30)
return False
def set_noout(self):
subprocess.run(['ceph', 'osd', 'set', 'noout'])
print("Set noout flag")
def unset_noout(self):
subprocess.run(['ceph', 'osd', 'unset', 'noout'])
print("Unset noout flag")
def upgrade_mon(self, host):
print(f"Upgrading MON on {host}...")
subprocess.run(['ssh', host, f'apt install -y ceph-mon={self.target_version}'])
subprocess.run(['ssh', host, 'systemctl restart ceph-mon.target'])
time.sleep(30)
return self.wait_healthy(300)
def upgrade_osd_host(self, host):
print(f"Upgrading OSDs on {host}...")
# Get OSDs on this host
result = subprocess.run(
['ceph', 'osd', 'tree', '--format=json'],
capture_output=True, text=True
)
tree = json.loads(result.stdout)
# Upgrade package
subprocess.run(['ssh', host, f'apt install -y ceph-osd={self.target_version}'])
# Restart OSDs one by one
for osd in self._get_osds_on_host(host, tree):
subprocess.run(['ssh', host, f'systemctl restart ceph-osd@{osd}'])
time.sleep(10)
return self.wait_healthy(600)
def progressive_upgrade(self, hosts, batch_pct=[10, 25, 50, 100]):
self.set_noout()
for pct in batch_pct:
n = max(1, len(hosts) * pct // 100)
batch = hosts[:n]
print(f"\\n=== Batch: {pct}% ({n}/{len(hosts)} hosts) ===")
for host in batch:
success = self.upgrade_osd_host(host)
if not success:
print(f"FAILED on {host}! Stopping.")
self.unset_noout()
return False
print(f"Batch {pct}% complete. Monitoring...")
time.sleep(3600) # Monitor 1 hour
self.unset_noout()
return True
upgrader = CephUpgrader("18.2.1")
# upgrader.progressive_upgrade(osd_hosts, [10, 25, 50, 100])
"""
def show_code(self):
print("=== Upgrade Automation ===")
print(self.CODE[:600])
def upgrade_dashboard(self):
print(f"\n=== Upgrade Progress Dashboard ===")
phases = [
{"phase": "MON upgrade", "status": "Complete", "hosts": "3/3"},
{"phase": "MGR upgrade", "status": "Complete", "hosts": "2/2"},
{"phase": "OSD canary (10%)", "status": "Complete", "hosts": "3/30"},
{"phase": "OSD batch 2 (25%)", "status": "In Progress", "hosts": "5/8"},
{"phase": "OSD batch 3 (50%)", "status": "Pending", "hosts": "0/15"},
{"phase": "OSD batch 4 (100%)", "status": "Pending", "hosts": "0/30"},
]
for p in phases:
print(f" [{p['status']:>12}] {p['phase']:<25} Hosts: {p['hosts']}")
auto = CephUpgradeAutomation()
auto.show_code()
auto.upgrade_dashboard()
Health Monitoring
# monitoring.py — Ceph health monitoring during upgrade
import json
import random
class CephMonitoring:
HEALTH_CHECKS = {
"cluster_health": "ceph health detail — ตรวจ HEALTH_OK/WARN/ERR",
"osd_status": "ceph osd tree — ดู OSD up/down status",
"pg_status": "ceph pg stat — ดู PG states (active+clean = ดี)",
"io_stats": "ceph osd pool stats — ดู read/write IOPS per pool",
"recovery": "ceph -s — ดู recovery/rebalance progress",
"slow_ops": "ceph daemon osd.X perf dump — ดู slow operations",
}
ALERT_THRESHOLDS = {
"osd_latency_p99": {"threshold": "50ms", "action": "Pause upgrade, investigate"},
"pg_not_clean": {"threshold": "> 0 for 30min", "action": "Wait for PGs to clean"},
"recovery_rate": {"threshold": "< 100MB/s", "action": "Check network, disk I/O"},
"osd_down": {"threshold": "> 0 unexpected", "action": "Investigate, rollback if needed"},
"cluster_health": {"threshold": "HEALTH_ERR", "action": "Stop upgrade immediately"},
}
def show_checks(self):
print("=== Health Checks ===\n")
for check, cmd in self.HEALTH_CHECKS.items():
print(f" [{check}] {cmd}")
def show_alerts(self):
print(f"\n=== Alert Thresholds ===")
for metric, alert in self.ALERT_THRESHOLDS.items():
print(f" [{metric}] Threshold: {alert['threshold']} → {alert['action']}")
def live_dashboard(self):
print(f"\n=== Live Cluster Status ===")
print(f" Health: HEALTH_OK")
print(f" OSDs: {random.randint(28, 30)}/30 up")
print(f" PGs: {random.randint(250, 256)} active+clean / 256 total")
print(f" Read IOPS: {random.randint(1000, 5000):,}")
print(f" Write IOPS: {random.randint(500, 3000):,}")
print(f" Latency P99: {random.uniform(5, 30):.1f}ms")
print(f" Recovery: {random.uniform(0, 500):.0f} MB/s")
print(f" Capacity: {random.uniform(40, 80):.1f}% used")
mon = CephMonitoring()
mon.show_checks()
mon.show_alerts()
mon.live_dashboard()
Rollback Procedures
# rollback.py — Ceph rollback procedures
import json
class RollbackProcedures:
ROLLBACK_STEPS = {
"osd_rollback": {
"name": "OSD Rollback",
"steps": [
"1. ceph osd set noout (ป้องกัน rebalance)",
"2. ssh systemctl stop ceph-osd.target",
"3. ssh apt install ceph-osd=",
"4. ssh systemctl start ceph-osd.target",
"5. ceph -s (verify HEALTH_OK)",
"6. Repeat สำหรับ hosts ที่ upgrade แล้ว",
"7. ceph osd unset noout",
],
"time": "5-15 นาทีต่อ host",
},
"mon_rollback": {
"name": "MON Rollback",
"steps": [
"1. Stop upgraded MON: systemctl stop ceph-mon@",
"2. Downgrade package: apt install ceph-mon=",
"3. Start MON: systemctl start ceph-mon@",
"4. Verify quorum: ceph mon stat",
"5. Repeat ทีละตัว — รักษา quorum ตลอด",
],
"time": "2-5 นาทีต่อ MON",
},
"full_rollback": {
"name": "Full Cluster Rollback",
"steps": [
"1. ceph osd set noout && ceph osd set norebalance",
"2. Rollback RGW → MDS → OSD → MGR → MON (reverse order)",
"3. Verify cluster health after each component",
"4. Unset flags: ceph osd unset noout && ceph osd unset norebalance",
"5. Monitor 24 hours",
],
"time": "30 นาที - 2 ชั่วโมง (ขึ้นกับ cluster size)",
},
}
def show_rollback(self):
print("=== Rollback Procedures ===\n")
for key, rb in self.ROLLBACK_STEPS.items():
print(f"[{rb['name']}] Time: {rb['time']}")
for step in rb["steps"][:4]:
print(f" {step}")
print()
def decision_matrix(self):
print("=== Rollback Decision Matrix ===")
decisions = [
{"condition": "1 OSD failed to start", "action": "Rollback that OSD only"},
{"condition": "Multiple OSDs slow", "action": "Pause upgrade, investigate, rollback batch"},
{"condition": "MON quorum lost", "action": "Emergency: rollback MON immediately"},
{"condition": "Data corruption detected", "action": "Full cluster rollback + data verification"},
{"condition": "Performance degraded < 20%", "action": "Monitor, may be temporary"},
]
for d in decisions:
print(f" [{d['condition']}] → {d['action']}")
rb = RollbackProcedures()
rb.show_rollback()
rb.decision_matrix()
FAQ - คำถามที่พบบ่อย
Q: Upgrade Ceph ต้อง downtime ไหม?
A: ไม่จำเป็น — ใช้ rolling upgrade ไม่มี downtime (ถ้าทำถูกต้อง) Key: upgrade ทีละ daemon, รักษา quorum (MON), data replication (OSD) set noout flag ป้องกัน unnecessary rebalance ขณะ upgrade I/O อาจช้าลงเล็กน้อยระหว่าง upgrade — แต่ไม่ down
Q: Progressive delivery จำเป็นไหมสำหรับ Ceph?
A: จำเป็นมาก เพราะ: Ceph เก็บข้อมูลสำคัญ — ถ้า upgrade ผิดพลาดอาจ data loss Bad upgrade อาจ impact ทั้ง cluster — ถ้า upgrade ทีเดียว ไม่มีทางรู้ก่อน Progressive (canary → batch) ช่วย detect ปัญหาก่อน impact ทั้ง cluster Rollback ง่ายกว่า เมื่อ upgrade แค่บางส่วน
Q: Upgrade order สำคัญไหม?
A: สำคัญมาก ลำดับที่ถูกต้อง: 1) MON (cluster map) 2) MGR (dashboard/orchestration) 3) OSD (storage daemons) 4) MDS (CephFS metadata) 5) RGW (S3 gateway) ถ้า upgrade ผิดลำดับอาจเกิด compatibility issues ระหว่าง versions
Q: Monitor อะไรระหว่าง upgrade?
A: Critical: ceph health (ต้อง OK/WARN), PG states (ต้อง active+clean), OSD up count Performance: I/O latency, IOPS, throughput, recovery rate Alert: HEALTH_ERR, OSD down unexpected, PG stuck, slow ops ใช้ Prometheus + Grafana dashboard สำหรับ real-time monitoring
