Monte Carlo Observability RBAC ABAC Policy คืออะไร
Monte Carlo เป็น data observability platform ที่ตรวจจับ data quality issues อัตโนมัติ เช่น data freshness, volume anomalies, schema changes และ distribution shifts ช่วยให้ทีม data engineering รู้ปัญหาก่อนที่ downstream users จะได้รับผลกระทบ RBAC (Role-Based Access Control) คือการควบคุม access ตาม role ของผู้ใช้ ABAC (Attribute-Based Access Control) คือการควบคุม access ตาม attributes ต่างๆ เช่น department, data classification, time of day การรวม Monte Carlo กับ RBAC/ABAC policies ช่วยให้จัดการ data observability อย่างปลอดภัยและเป็นระบบ
Monte Carlo Platform
# monte_carlo.py — Monte Carlo data observability
import json
class MonteCarloObservability:
PILLARS = {
"freshness": {
"name": "Freshness",
"description": "ข้อมูลอัพเดทล่าสุดเมื่อไหร่ — ตรวจจับ stale data",
"alert": "Table ไม่ update ตามกำหนด → alert ทีม data engineering",
},
"volume": {
"name": "Volume",
"description": "จำนวน rows/records เปลี่ยนแปลงผิดปกติไหม",
"alert": "Row count ลด 50% จาก baseline → possible data loss",
},
"schema": {
"name": "Schema",
"description": "โครงสร้าง table เปลี่ยนไหม — columns added/removed/type changed",
"alert": "Column dropped หรือ type changed → breaking change",
},
"distribution": {
"name": "Distribution",
"description": "การกระจายตัวของ data เปลี่ยนไหม — null rates, unique values",
"alert": "Null rate เพิ่มจาก 1% เป็น 30% → data quality issue",
},
"lineage": {
"name": "Lineage",
"description": "Data มาจากไหน ไปไหน — impact analysis เมื่อมีปัญหา",
"benefit": "รู้ว่า table มีปัญหา → กระทบ downstream dashboards/models อะไรบ้าง",
},
}
INTEGRATIONS = {
"warehouses": "Snowflake, BigQuery, Databricks, Redshift, PostgreSQL",
"orchestrators": "Airflow, dbt, Fivetran, Prefect",
"notification": "Slack, PagerDuty, email, Jira, OpsGenie",
"bi_tools": "Tableau, Looker, Power BI — impact analysis",
}
def show_pillars(self):
print("=== Data Observability Pillars ===\n")
for key, pillar in self.PILLARS.items():
print(f"[{pillar['name']}]")
print(f" {pillar['description']}")
print(f" Alert: {pillar['alert']}")
print()
def show_integrations(self):
print("=== Integrations ===")
for cat, tools in self.INTEGRATIONS.items():
print(f" [{cat}] {tools}")
mc = MonteCarloObservability()
mc.show_pillars()
mc.show_integrations()
RBAC & ABAC Fundamentals
# access_control.py — RBAC and ABAC fundamentals
import json
class AccessControl:
RBAC = {
"definition": "Role-Based Access Control — กำหนด permissions ตาม role",
"roles": {
"admin": "Full access — manage users, settings, integrations, all data",
"data_engineer": "View all monitors, manage monitors, acknowledge incidents",
"data_analyst": "View monitors and incidents สำหรับ tables ที่เกี่ยวข้อง",
"viewer": "Read-only — ดู dashboards, incidents, lineage",
},
"pros": "Simple, easy to manage, widely understood",
"cons": "Coarse-grained — ไม่สามารถ control ตาม data sensitivity หรือ context",
}
ABAC = {
"definition": "Attribute-Based Access Control — กำหนด permissions ตาม attributes",
"attributes": {
"user": "department, role, team, location, clearance_level",
"resource": "data_classification (public/internal/confidential/restricted), schema, table",
"environment": "time_of_day, ip_address, device_type",
"action": "view, edit, delete, export, share",
},
"example_policy": "IF user.department='finance' AND resource.classification='confidential' AND environment.network='corporate' THEN allow",
"pros": "Fine-grained, context-aware, dynamic",
"cons": "Complex to implement and manage, requires attribute infrastructure",
}
def show_rbac(self):
print("=== RBAC ===")
print(f" Definition: {self.RBAC['definition']}")
print(f"\n Roles:")
for role, desc in self.RBAC['roles'].items():
print(f" [{role}] {desc}")
def show_abac(self):
print(f"\n=== ABAC ===")
print(f" Definition: {self.ABAC['definition']}")
print(f"\n Attributes:")
for attr_type, attrs in self.ABAC['attributes'].items():
print(f" [{attr_type}] {attrs}")
ac = AccessControl()
ac.show_rbac()
ac.show_abac()
Python Policy Engine
# policy_engine.py — Python RBAC/ABAC policy engine
import json
class PolicyEngine:
CODE = """
# data_access_policy.py — RBAC/ABAC policy engine for data observability
import json
from datetime import datetime
from dataclasses import dataclass, field
from typing import List, Optional, Dict
@dataclass
class User:
id: str
name: str
roles: List[str]
department: str
team: str
clearance: str = "internal" # public, internal, confidential, restricted
@dataclass
class DataResource:
table: str
schema: str
database: str
classification: str = "internal"
owner_team: str = ""
tags: List[str] = field(default_factory=list)
@dataclass
class AccessRequest:
user: User
resource: DataResource
action: str # view, edit, manage, export
context: Dict = field(default_factory=dict)
class RBACPolicy:
'''Role-Based Access Control'''
def __init__(self):
self.permissions = {
'admin': ['view', 'edit', 'manage', 'export', 'delete', 'admin'],
'data_engineer': ['view', 'edit', 'manage', 'export'],
'data_analyst': ['view', 'export'],
'viewer': ['view'],
}
def check(self, request: AccessRequest) -> dict:
for role in request.user.roles:
allowed = self.permissions.get(role, [])
if request.action in allowed:
return {'allowed': True, 'reason': f'Role {role} permits {request.action}'}
return {'allowed': False, 'reason': f'No role permits {request.action}'}
class ABACPolicy:
'''Attribute-Based Access Control'''
def __init__(self):
self.rules = []
def add_rule(self, name, condition_fn, priority=0):
self.rules.append({
'name': name,
'condition': condition_fn,
'priority': priority,
})
self.rules.sort(key=lambda r: -r['priority'])
def check(self, request: AccessRequest) -> dict:
for rule in self.rules:
result = rule['condition'](request)
if result is not None:
return {
'allowed': result,
'rule': rule['name'],
}
return {'allowed': False, 'reason': 'No matching rule (default deny)'}
class CombinedPolicy:
'''RBAC + ABAC combined policy'''
def __init__(self):
self.rbac = RBACPolicy()
self.abac = ABACPolicy()
self._setup_abac_rules()
def _setup_abac_rules(self):
# Rule: Restricted data only for clearance=restricted
self.abac.add_rule(
'restricted_data',
lambda req: False if req.resource.classification == 'restricted'
and req.user.clearance != 'restricted' else None,
priority=100,
)
# Rule: Confidential data only for internal/confidential/restricted clearance
self.abac.add_rule(
'confidential_data',
lambda req: False if req.resource.classification == 'confidential'
and req.user.clearance == 'public' else None,
priority=90,
)
# Rule: Export only during business hours
self.abac.add_rule(
'export_hours',
lambda req: False if req.action == 'export'
and not (9 <= datetime.now().hour <= 18) else None,
priority=80,
)
# Rule: Team members get extra access to own data
self.abac.add_rule(
'team_ownership',
lambda req: True if req.resource.owner_team == req.user.team
and req.action in ['view', 'edit'] else None,
priority=70,
)
def evaluate(self, request: AccessRequest) -> dict:
# ABAC first (deny rules have priority)
abac_result = self.abac.check(request)
if not abac_result.get('allowed', True) and 'rule' in abac_result:
return {'allowed': False, 'source': 'abac', **abac_result}
# Then RBAC
rbac_result = self.rbac.check(request)
# If ABAC explicitly allowed (team ownership), override RBAC
if abac_result.get('allowed') is True:
return {'allowed': True, 'source': 'abac', **abac_result}
return {'allowed': rbac_result['allowed'], 'source': 'rbac', **rbac_result}
# policy = CombinedPolicy()
# user = User('u1', 'Alice', ['data_analyst'], 'finance', 'data-team', 'confidential')
# resource = DataResource('transactions', 'finance', 'prod', 'confidential', 'data-team')
# request = AccessRequest(user, resource, 'view')
# result = policy.evaluate(request)
"""
def show_code(self):
print("=== Policy Engine ===")
print(self.CODE[:600])
engine = PolicyEngine()
engine.show_code()
Monte Carlo Policy Configuration
# mc_policy.py — Monte Carlo access policies
import json
class MCPolicyConfig:
POLICIES = {
"monitor_access": {
"name": "Monitor Access Policy",
"rules": [
"Data Engineers: create/edit/delete monitors สำหรับ tables ของ team",
"Data Analysts: view monitors + acknowledge incidents",
"Viewers: read-only access to dashboards",
"Admins: full control ทุก monitors",
],
},
"incident_management": {
"name": "Incident Management Policy",
"rules": [
"Auto-assign incidents ให้ table owner team",
"SEV1 incidents: notify data engineering lead + stakeholders",
"Only data engineers can mark incidents as resolved",
"All incident actions logged for audit",
],
},
"data_classification": {
"name": "Data Classification Policy",
"rules": [
"PII tables: restricted access — only approved users",
"Financial data: confidential — finance team + approved data engineers",
"Public metrics: viewable by all authenticated users",
"Lineage of restricted data: limited to data engineers",
],
},
}
AUDIT = {
"login_events": "ทุก login/logout + failed attempts",
"policy_changes": "ทุกการเปลี่ยนแปลง policy — who, what, when",
"data_access": "ทุก access to monitors, incidents, lineage",
"export_events": "ทุก data export — track data movement",
}
def show_policies(self):
print("=== Monte Carlo Policies ===\n")
for key, policy in self.POLICIES.items():
print(f"[{policy['name']}]")
for rule in policy['rules'][:3]:
print(f" • {rule}")
print()
def show_audit(self):
print("=== Audit Trail ===")
for event, desc in self.AUDIT.items():
print(f" [{event}] {desc}")
config = MCPolicyConfig()
config.show_policies()
config.show_audit()
Implementation Best Practices
# best_practices.py — RBAC/ABAC best practices
import json
class BestPractices:
PRACTICES = {
"least_privilege": {
"name": "Principle of Least Privilege",
"description": "ให้ minimum permissions ที่จำเป็น — เริ่มจาก viewer แล้วเพิ่มทีละ",
},
"separation_of_duties": {
"name": "Separation of Duties",
"description": "แยก roles ไม่ให้คนเดียวมี power มากเกิน — เช่น ไม่ให้ create + approve",
},
"regular_review": {
"name": "Regular Access Review",
"description": "Review access ทุก quarter — remove stale permissions, inactive users",
},
"default_deny": {
"name": "Default Deny",
"description": "ถ้าไม่มี rule ที่ allow → deny — explicit allow only",
},
"attribute_hygiene": {
"name": "Attribute Hygiene (ABAC)",
"description": "ดูแล attributes ให้ up-to-date — department changes, role changes, data classification",
},
"audit_everything": {
"name": "Audit Everything",
"description": "Log ทุก access decision — ใช้สำหรับ compliance, forensics, optimization",
},
}
def show_practices(self):
print("=== Best Practices ===\n")
for key, p in self.PRACTICES.items():
print(f"[{p['name']}]")
print(f" {p['description']}")
print()
bp = BestPractices()
bp.show_practices()
FAQ - คำถามที่พบบ่อย
Q: RBAC กับ ABAC ควรเลือกอันไหน?
A: RBAC: เหมาะกับ team เล็ก-กลาง, simple permissions — ง่าย implement, ง่าย manage ABAC: เหมาะกับ enterprise, complex policies — fine-grained, context-aware แนะนำ: เริ่มจาก RBAC → เพิ่ม ABAC rules เมื่อต้องการ fine-grained control Combined: ใช้ RBAC เป็น base + ABAC สำหรับ exceptions/overrides (เช่น data classification)
Q: Monte Carlo ราคาเท่าไหร่?
A: Monte Carlo ไม่เปิดเผยราคาบน website — ต้อง contact sales Estimate: $30,000-200,000+/year ขึ้นกับ data volume, tables, integrations Alternatives ฟรี/ถูกกว่า: Great Expectations (open-source), Elementary (dbt-native), Soda (open-core) คุ้มเมื่อ: data team > 5 คน, tables > 100, data quality critical สำหรับ business decisions
Q: Data Observability ต่างจาก Data Quality อย่างไร?
A: Data Quality: ตรวจสอบว่า data ถูกต้องตาม rules ที่กำหนด (schema, range, format) — reactive Data Observability: monitor ทุก aspect ของ data health อัตโนมัติ (freshness, volume, schema, distribution) — proactive DQ: ต้องเขียน rules เอง, DObservability: ML-based anomaly detection อัตโนมัติ ใช้ร่วมกัน: Data Quality checks ใน pipeline + Data Observability monitor overall health
Q: ต้อง tag data classification ทุก table ไหม?
A: ควร — อย่างน้อย tables ที่มี PII, financial data, health data เริ่มจาก: tag top 20% tables ที่สำคัญที่สุด (Pareto principle) Levels: public → internal → confidential → restricted Automate: ใช้ tools scan columns หา PII patterns (email, phone, SSN) → auto-tag สำคัญ: data classification เป็นพื้นฐานของ ABAC — ถ้าไม่มี classification ก็ทำ ABAC ไม่ได้
