Databricks Unity Catalog Blue Green Canary

Unity Catalog Deployment

Databricks Unity Catalog Blue-Green Canary Deploy Delta Lake Time Travel Rollback Data Quality ML Model Pipeline Production

Strategy	Risk	Downtime	Rollback Speed	Use Case
Blue-Green	ต่ำ	Zero	ทันที (Switch)	Major Pipeline Change Schema Change
Canary	ต่ำมาก	Zero	เร็ว (Route back)	ML Model New Transform Logic
Rolling	ปานกลาง	Zero	ปานกลาง	Minor Config Change Small Fix
Recreate	สูง	มี Downtime	ช้า (Rebuild)	Major Schema Migration

Blue-Green Setup

# === Blue-Green Deployment with Unity Catalog ===



# Step 1: Create Blue and Green Catalogs

# CREATE CATALOG IF NOT EXISTS production_blue;

# CREATE CATALOG IF NOT EXISTS production_green;

# CREATE SCHEMA IF NOT EXISTS production_blue.sales;

# CREATE SCHEMA IF NOT EXISTS production_green.sales;



# Step 2: Deploy new pipeline to Green

# -- New pipeline writes to Green

# spark.sql("""

#   INSERT INTO production_green.sales.orders

#   SELECT * FROM raw.sales.orders

#   WHERE date >= '2025-01-01'

# """)



# Step 3: Validate Green

# -- Compare row counts

# blue_count = spark.sql("SELECT count(*) FROM production_blue.sales.orders").first()[0]

# green_count = spark.sql("SELECT count(*) FROM production_green.sales.orders").first()[0]

# assert abs(blue_count - green_count) / blue_count < 0.01  # < 1% diff



# Step 4: Switch (Create View pointing to Green)

# CREATE OR REPLACE VIEW production.sales.orders AS

# SELECT * FROM production_green.sales.orders;



# Step 5: Rollback (if needed)

# CREATE OR REPLACE VIEW production.sales.orders AS

# SELECT * FROM production_blue.sales.orders;



from dataclasses import dataclass



@dataclass

class DeployStep:

    step: int

    action: str

    sql: str

    validation: str



steps = [

    DeployStep(1, "Prepare Green Environment",

        "CREATE CATALOG production_green + CREATE SCHEMA",

        "Catalog exists Schema exists Empty tables"),

    DeployStep(2, "Deploy Pipeline to Green",

        "INSERT INTO production_green.sales.orders ...",

        "Pipeline completes No errors Row count > 0"),

    DeployStep(3, "Data Quality Validation",

        "Compare Blue vs Green: row count schema null% range",

        "Row diff < 1% Schema match Null% < threshold"),

    DeployStep(4, "Switch Traffic to Green",

        "CREATE OR REPLACE VIEW production.sales.orders AS SELECT * FROM green",

        "View updated Consumers read from Green"),

    DeployStep(5, "Monitor Post-switch",

        "Check Dashboard Alerts Consumer feedback",

        "No errors No data quality issues 30 min monitor"),

    DeployStep(6, "Cleanup or Rollback",

        "DROP old Blue OR Rollback: switch View back to Blue",

        "Rollback < 1 min if needed"),

]



print("=== Blue-Green Deploy Steps ===")

for s in steps:

    print(f"  Step {s.step}: {s.action}")

    print(f"    SQL: {s.sql}")

    print(f"    Validate: {s.validation}")

Canary Deploy

# === Canary Deployment for ML Model ===



# import mlflow

# from databricks.sdk import WorkspaceClient

#

# # Deploy canary model version

# client = mlflow.tracking.MlflowClient()

# client.set_registered_model_alias(

#     name="sales_forecast",

#     alias="canary",

#     version=12  # new version

# )

#

# # Serving endpoint with traffic split

# # 95% → champion (v11), 5% → canary (v12)

# endpoint_config = {

#     "served_entities": [

#         {"entity_name": "sales_forecast",

#          "entity_version": "11",

#          "scale_to_zero_enabled": True,

#          "traffic_percentage": 95},

#         {"entity_name": "sales_forecast",

#          "entity_version": "12",

#          "scale_to_zero_enabled": True,

#          "traffic_percentage": 5},

#     ]

# }



@dataclass

class CanaryStage:

    stage: str

    traffic_pct: str

    duration: str

    metrics_check: str

    action_if_fail: str



canary_stages = [

    CanaryStage("Initial Canary",

        "5% canary / 95% champion",

        "1 hour",

        "Accuracy >= champion Latency P99 < 200ms Error Rate < 0.1%",

        "Rollback to 0% canary"),

    CanaryStage("Expand Canary",

        "25% canary / 75% champion",

        "4 hours",

        "Same metrics + Business KPI unchanged",

        "Rollback to 5% or 0%"),

    CanaryStage("Half Traffic",

        "50% canary / 50% champion",

        "12 hours",

        "Statistical significance test A/B",

        "Rollback to 25% or 0%"),

    CanaryStage("Full Rollout",

        "100% canary (new champion)",

        "Permanent",

        "Continuous monitoring",

        "RESTORE previous version Time Travel"),

]



print("=== Canary Stages ===")

for c in canary_stages:

    print(f"  [{c.stage}] Traffic: {c.traffic_pct}")

    print(f"    Duration: {c.duration}")

    print(f"    Check: {c.metrics_check}")

    print(f"    Fail: {c.action_if_fail}")

Rollback & Monitoring

# === Rollback Strategies ===



# Delta Lake Time Travel

# RESTORE TABLE production.sales.orders VERSION AS OF 10;

# RESTORE TABLE production.sales.orders TIMESTAMP AS OF '2025-01-15T10:00:00';

# SELECT * FROM production.sales.orders VERSION AS OF 10; -- read old version



@dataclass

class RollbackMethod:

    method: str

    command: str

    speed: str

    data_loss: str

    use_case: str



rollbacks = [

    RollbackMethod("Delta Time Travel",

        "RESTORE TABLE ... VERSION AS OF N",

        "เร็วมาก (seconds)",

        "ไม่มี (restore full state)",

        "Table Data ผิดพลาด Wrong INSERT UPDATE"),

    RollbackMethod("View Switch",

        "ALTER VIEW ... AS SELECT FROM old_catalog",

        "ทันที (instant)",

        "ไม่มี",

        "Blue-Green Switch กลับ"),

    RollbackMethod("Model Version Rollback",

        "set_registered_model_alias('prod', 'champion', old_version)",

        "เร็ว (< 1 min)",

        "ไม่มี",

        "ML Model Canary ไม่ผ่าน"),

    RollbackMethod("Pipeline Re-run",

        "Databricks Workflows trigger old pipeline version",

        "ช้า (depends on pipeline)",

        "อาจมี ถ้า Overwrite",

        "Pipeline Logic ผิด ต้องรันใหม่"),

    RollbackMethod("Git Revert + Redeploy",

        "git revert + CI/CD redeploy",

        "ปานกลาง (5-15 min)",

        "ไม่มี (code level)",

        "Code Change ผิด"),

]



print("=== Rollback Methods ===")

for r in rollbacks:

    print(f"  [{r.method}] Speed: {r.speed}")

    print(f"    Command: {r.command}")

    print(f"    Data Loss: {r.data_loss}")

    print(f"    Use: {r.use_case}")

เคล็ดลับ

Time Travel: ใช้ Delta Lake Time Travel เป็น Safety Net เสมอ
View: ใช้ View เป็น Abstraction Layer Switch ได้ทันที
Validate: ตรวจ Data Quality ก่อน Switch ทุกครั้ง
Canary: เริ่ม 5% ค่อยๆเพิ่ม อย่ารีบ 100%
Monitor: เฝ้าดู 30 นาทีหลัง Switch ก่อน Cleanup

การบริหารจัดการฐานข้อมูลอย่างมืออาชีพ

Database Management ที่ดีเริ่มจากการออกแบบ Schema ที่เหมาะสม ใช้ Normalization ลด Data Redundancy สร้าง Index บน Column ที่ Query บ่อย วิเคราะห์ Query Plan เพื่อ Optimize Performance และทำ Regular Maintenance เช่น VACUUM สำหรับ PostgreSQL หรือ OPTIMIZE TABLE สำหรับ MySQL

เรื่อง High Availability ควรติดตั้ง Replication อย่างน้อย 1 Replica สำหรับ Read Scaling และ Disaster Recovery ใช้ Connection Pooling เช่น PgBouncer หรือ ProxySQL ลดภาระ Connection ที่เปิดพร้อมกัน และตั้ง Automated Failover ให้ระบบสลับไป Replica อัตโนมัติเมื่อ Primary ล่ม

เนื้อหาเกี่ยวข้อง — ทำความเข้าใจ Prefect Workflow Progressive Delivery

Backup ต้องทำทั้ง Full Backup รายวัน และ Incremental Backup ทุก 1-4 ชั่วโมง เก็บ Binary Log หรือ WAL สำหรับ Point-in-Time Recovery ทดสอบ Restore เป็นประจำ และเก็บ Backup ไว้ Off-site ด้วยเสมอ

แนะนำเพิ่มเติม — คอร์สเทรด Forex ที่ iCafeForex