Unity Catalog Deployment
Databricks Unity Catalog Blue-Green Canary Deploy Delta Lake Time Travel Rollback Data Quality ML Model Pipeline Production
| Strategy | Risk | Downtime | Rollback Speed | Use Case |
|---|---|---|---|---|
| Blue-Green | ต่ำ | Zero | ทันที (Switch) | Major Pipeline Change Schema Change |
| Canary | ต่ำมาก | Zero | เร็ว (Route back) | ML Model New Transform Logic |
| Rolling | ปานกลาง | Zero | ปานกลาง | Minor Config Change Small Fix |
| Recreate | สูง | มี Downtime | ช้า (Rebuild) | Major Schema Migration |
Blue-Green Setup
# === Blue-Green Deployment with Unity Catalog ===
# Step 1: Create Blue and Green Catalogs
# CREATE CATALOG IF NOT EXISTS production_blue;
# CREATE CATALOG IF NOT EXISTS production_green;
# CREATE SCHEMA IF NOT EXISTS production_blue.sales;
# CREATE SCHEMA IF NOT EXISTS production_green.sales;
# Step 2: Deploy new pipeline to Green
# -- New pipeline writes to Green
# spark.sql("""
# INSERT INTO production_green.sales.orders
# SELECT * FROM raw.sales.orders
# WHERE date >= '2025-01-01'
# """)
# Step 3: Validate Green
# -- Compare row counts
# blue_count = spark.sql("SELECT count(*) FROM production_blue.sales.orders").first()[0]
# green_count = spark.sql("SELECT count(*) FROM production_green.sales.orders").first()[0]
# assert abs(blue_count - green_count) / blue_count < 0.01 # < 1% diff
# Step 4: Switch (Create View pointing to Green)
# CREATE OR REPLACE VIEW production.sales.orders AS
# SELECT * FROM production_green.sales.orders;
# Step 5: Rollback (if needed)
# CREATE OR REPLACE VIEW production.sales.orders AS
# SELECT * FROM production_blue.sales.orders;
from dataclasses import dataclass
@dataclass
class DeployStep:
step: int
action: str
sql: str
validation: str
steps = [
DeployStep(1, "Prepare Green Environment",
"CREATE CATALOG production_green + CREATE SCHEMA",
"Catalog exists Schema exists Empty tables"),
DeployStep(2, "Deploy Pipeline to Green",
"INSERT INTO production_green.sales.orders ...",
"Pipeline completes No errors Row count > 0"),
DeployStep(3, "Data Quality Validation",
"Compare Blue vs Green: row count schema null% range",
"Row diff < 1% Schema match Null% < threshold"),
DeployStep(4, "Switch Traffic to Green",
"CREATE OR REPLACE VIEW production.sales.orders AS SELECT * FROM green",
"View updated Consumers read from Green"),
DeployStep(5, "Monitor Post-switch",
"Check Dashboard Alerts Consumer feedback",
"No errors No data quality issues 30 min monitor"),
DeployStep(6, "Cleanup or Rollback",
"DROP old Blue OR Rollback: switch View back to Blue",
"Rollback < 1 min if needed"),
]
print("=== Blue-Green Deploy Steps ===")
for s in steps:
print(f" Step {s.step}: {s.action}")
print(f" SQL: {s.sql}")
print(f" Validate: {s.validation}")
Canary Deploy
# === Canary Deployment for ML Model ===
# import mlflow
# from databricks.sdk import WorkspaceClient
#
# # Deploy canary model version
# client = mlflow.tracking.MlflowClient()
# client.set_registered_model_alias(
# name="sales_forecast",
# alias="canary",
# version=12 # new version
# )
#
# # Serving endpoint with traffic split
# # 95% → champion (v11), 5% → canary (v12)
# endpoint_config = {
# "served_entities": [
# {"entity_name": "sales_forecast",
# "entity_version": "11",
# "scale_to_zero_enabled": True,
# "traffic_percentage": 95},
# {"entity_name": "sales_forecast",
# "entity_version": "12",
# "scale_to_zero_enabled": True,
# "traffic_percentage": 5},
# ]
# }
@dataclass
class CanaryStage:
stage: str
traffic_pct: str
duration: str
metrics_check: str
action_if_fail: str
canary_stages = [
CanaryStage("Initial Canary",
"5% canary / 95% champion",
"1 hour",
"Accuracy >= champion Latency P99 < 200ms Error Rate < 0.1%",
"Rollback to 0% canary"),
CanaryStage("Expand Canary",
"25% canary / 75% champion",
"4 hours",
"Same metrics + Business KPI unchanged",
"Rollback to 5% or 0%"),
CanaryStage("Half Traffic",
"50% canary / 50% champion",
"12 hours",
"Statistical significance test A/B",
"Rollback to 25% or 0%"),
CanaryStage("Full Rollout",
"100% canary (new champion)",
"Permanent",
"Continuous monitoring",
"RESTORE previous version Time Travel"),
]
print("=== Canary Stages ===")
for c in canary_stages:
print(f" [{c.stage}] Traffic: {c.traffic_pct}")
print(f" Duration: {c.duration}")
print(f" Check: {c.metrics_check}")
print(f" Fail: {c.action_if_fail}")
Rollback & Monitoring
# === Rollback Strategies ===
# Delta Lake Time Travel
# RESTORE TABLE production.sales.orders VERSION AS OF 10;
# RESTORE TABLE production.sales.orders TIMESTAMP AS OF '2025-01-15T10:00:00';
# SELECT * FROM production.sales.orders VERSION AS OF 10; -- read old version
@dataclass
class RollbackMethod:
method: str
command: str
speed: str
data_loss: str
use_case: str
rollbacks = [
RollbackMethod("Delta Time Travel",
"RESTORE TABLE ... VERSION AS OF N",
"เร็วมาก (seconds)",
"ไม่มี (restore full state)",
"Table Data ผิดพลาด Wrong INSERT UPDATE"),
RollbackMethod("View Switch",
"ALTER VIEW ... AS SELECT FROM old_catalog",
"ทันที (instant)",
"ไม่มี",
"Blue-Green Switch กลับ"),
RollbackMethod("Model Version Rollback",
"set_registered_model_alias('prod', 'champion', old_version)",
"เร็ว (< 1 min)",
"ไม่มี",
"ML Model Canary ไม่ผ่าน"),
RollbackMethod("Pipeline Re-run",
"Databricks Workflows trigger old pipeline version",
"ช้า (depends on pipeline)",
"อาจมี ถ้า Overwrite",
"Pipeline Logic ผิด ต้องรันใหม่"),
RollbackMethod("Git Revert + Redeploy",
"git revert + CI/CD redeploy",
"ปานกลาง (5-15 min)",
"ไม่มี (code level)",
"Code Change ผิด"),
]
print("=== Rollback Methods ===")
for r in rollbacks:
print(f" [{r.method}] Speed: {r.speed}")
print(f" Command: {r.command}")
print(f" Data Loss: {r.data_loss}")
print(f" Use: {r.use_case}")
เคล็ดลับ
- Time Travel: ใช้ Delta Lake Time Travel เป็น Safety Net เสมอ
- View: ใช้ View เป็น Abstraction Layer Switch ได้ทันที
- Validate: ตรวจ Data Quality ก่อน Switch ทุกครั้ง
- Canary: เริ่ม 5% ค่อยๆเพิ่ม อย่ารีบ 100%
- Monitor: เฝ้าดู 30 นาทีหลัง Switch ก่อน Cleanup
การบริหารจัดการฐานข้อมูลอย่างมืออาชีพ
Database Management ที่ดีเริ่มจากการออกแบบ Schema ที่เหมาะสม ใช้ Normalization ลด Data Redundancy สร้าง Index บน Column ที่ Query บ่อย วิเคราะห์ Query Plan เพื่อ Optimize Performance และทำ Regular Maintenance เช่น VACUUM สำหรับ PostgreSQL หรือ OPTIMIZE TABLE สำหรับ MySQL
เรื่อง High Availability ควรติดตั้ง Replication อย่างน้อย 1 Replica สำหรับ Read Scaling และ Disaster Recovery ใช้ Connection Pooling เช่น PgBouncer หรือ ProxySQL ลดภาระ Connection ที่เปิดพร้อมกัน และตั้ง Automated Failover ให้ระบบสลับไป Replica อัตโนมัติเมื่อ Primary ล่ม
Backup ต้องทำทั้ง Full Backup รายวัน และ Incremental Backup ทุก 1-4 ชั่วโมง เก็บ Binary Log หรือ WAL สำหรับ Point-in-Time Recovery ทดสอบ Restore เป็นประจำ และเก็บ Backup ไว้ Off-site ด้วยเสมอ
Unity Catalog คืออะไร
Databricks Governance 3-level Namespace Access Control Lineage Audit Discovery Delta Sharing Catalog Schema Table GRANT REVOKE
Blue-Green Deploy ทำอย่างไร
2 Catalogs Blue Green Deploy Green Validate Switch View Rollback Instant Zero Downtime Data Quality Row Count Schema Compare
Canary Deploy ทำอย่างไร
5% → 25% → 50% → 100% Traffic Split ML Model Serving Endpoint Metrics Accuracy Latency Error Rate A/B Test Rollback
Rollback ทำอย่างไร
Delta Time Travel RESTORE VERSION View Switch Model Version Pipeline Re-run Git Revert Instant Rollback Data Quality Monitor
สรุป
Databricks Unity Catalog Blue-Green Canary Deploy Delta Lake Time Travel Rollback View Switch Model Version Data Quality Production
