Opsgenie Alert RBAC ABAC Policy คืออะไร
Opsgenie เป็น incident management platform จาก Atlassian ที่จัดการ alerts, on-call schedules และ escalation policies RBAC (Role-Based Access Control) คือการควบคุมสิทธิ์ตาม role ที่กำหนดให้ผู้ใช้ ABAC (Attribute-Based Access Control) คือการควบคุมสิทธิ์ตาม attributes หลายมิติ เช่น team, environment, severity การรวม RBAC และ ABAC กับ Opsgenie ช่วยให้ organizations จัดการว่าใครเห็น alert อะไร ใครรับผิดชอบ incident ไหน และ escalation ไปยังทีมใด ตาม policies ที่กำหนด
Opsgenie Architecture
# opsgenie_arch.py — Opsgenie alert architecture
import json
class OpsgenieArch:
COMPONENTS = {
"alerts": {
"name": "Alerts",
"description": "การแจ้งเตือนจาก monitoring tools (Prometheus, Datadog, CloudWatch)",
"flow": "Source → Integration → Alert → Routing → Notification",
},
"teams": {
"name": "Teams",
"description": "กลุ่มผู้ใช้ที่รับผิดชอบ services เฉพาะ",
"examples": "Platform Team, Backend Team, Security Team",
},
"schedules": {
"name": "On-Call Schedules",
"description": "ตารางเวรรับ alert (rotation weekly/daily)",
"types": "Primary, Secondary, Manager escalation",
},
"escalations": {
"name": "Escalation Policies",
"description": "กฎ escalate alert เมื่อไม่ได้รับ acknowledge",
"example": "5 min → primary on-call, 15 min → secondary, 30 min → manager",
},
"routing": {
"name": "Alert Routing Rules",
"description": "กฎ route alert ไปยังทีมที่ถูกต้องตาม conditions",
"example": "severity=P1 AND service=payment → Payment Team",
},
}
def show_components(self):
print("=== Opsgenie Components ===\n")
for key, comp in self.COMPONENTS.items():
print(f"[{comp['name']}]")
print(f" {comp['description']}")
if "example" in comp:
print(f" Example: {comp['example']}")
print()
arch = OpsgenieArch()
arch.show_components()
RBAC Configuration
# rbac.py — Role-Based Access Control for Opsgenie
import json
class OpsgenieRBAC:
ROLES = {
"admin": {
"name": "Admin",
"permissions": ["Manage users/teams", "Configure integrations", "Edit global policies", "View all alerts", "Manage billing"],
"assign_to": "Platform/DevOps leads, IT managers",
},
"owner": {
"name": "Team Owner",
"permissions": ["Manage team members", "Edit team schedules", "Configure team routing", "Acknowledge/close team alerts"],
"assign_to": "Team leads, senior engineers",
},
"member": {
"name": "Team Member",
"permissions": ["View team alerts", "Acknowledge alerts", "Add notes", "View on-call schedules"],
"assign_to": "Engineers on-call, team members",
},
"stakeholder": {
"name": "Stakeholder",
"permissions": ["View alerts (read-only)", "View status pages", "Subscribe to notifications"],
"assign_to": "Managers, product owners, executives",
},
"restricted": {
"name": "Restricted Access",
"permissions": ["View specific team alerts only", "No configuration access"],
"assign_to": "Contractors, external partners",
},
}
RBAC_CONFIG = """
# opsgenie_rbac.py — RBAC configuration via API
import requests
OPSGENIE_API = "https://api.opsgenie.com/v2"
API_KEY = "your-api-key"
HEADERS = {"Authorization": f"GenieKey {API_KEY}", "Content-Type": "application/json"}
class OpsgenieRBACConfig:
def create_team(self, name, description, members):
payload = {
"name": name,
"description": description,
"members": [{"user": {"username": m["email"]}, "role": m["role"]} for m in members],
}
resp = requests.post(f"{OPSGENIE_API}/teams", json=payload, headers=HEADERS)
print(f"Team '{name}': {resp.status_code}")
return resp.json()
def assign_role(self, team_id, user_email, role):
payload = {"user": {"username": user_email}, "role": role}
resp = requests.post(f"{OPSGENIE_API}/teams/{team_id}/members", json=payload, headers=HEADERS)
print(f"Assigned {role} to {user_email}: {resp.status_code}")
# Usage
rbac = OpsgenieRBACConfig()
rbac.create_team("backend-team", "Backend Services", [
{"email": "alice@company.com", "role": "admin"},
{"email": "bob@company.com", "role": "user"},
])
"""
def show_roles(self):
print("=== RBAC Roles ===\n")
for key, role in self.ROLES.items():
print(f"[{role['name']}]")
print(f" Permissions: {', '.join(role['permissions'][:3])}")
print(f" Assign to: {role['assign_to']}")
print()
def show_config(self):
print("=== RBAC API Config ===")
print(self.RBAC_CONFIG[:500])
rbac = OpsgenieRBAC()
rbac.show_roles()
rbac.show_config()
ABAC Policies
# abac.py — Attribute-Based Access Control
import json
class OpsgenieABAC:
ATTRIBUTES = {
"user": ["team", "role", "location", "department", "seniority"],
"alert": ["severity", "service", "environment", "source", "priority"],
"action": ["acknowledge", "close", "escalate", "reassign", "add_note"],
"context": ["time_of_day", "is_oncall", "business_hours", "maintenance_window"],
}
POLICIES = {
"p1_routing": {
"name": "P1 Alert Routing",
"condition": "alert.severity == 'P1' AND alert.environment == 'production'",
"action": "Route to primary on-call + notify team lead + page manager after 5 min",
"teams": ["platform-team", "sre-team"],
},
"env_isolation": {
"name": "Environment Isolation",
"condition": "alert.environment == 'staging'",
"action": "Route to dev team only, no escalation to management",
"teams": ["dev-team"],
},
"security_alerts": {
"name": "Security Alert Policy",
"condition": "alert.source == 'waf' OR alert.tag contains 'security'",
"action": "Route to security team, always escalate P1/P2",
"teams": ["security-team"],
},
"business_hours": {
"name": "Business Hours Policy",
"condition": "context.business_hours == true AND alert.severity in ['P3', 'P4']",
"action": "Slack notification only (no page)",
"teams": ["all-teams"],
},
"contractor_limit": {
"name": "Contractor Access Limit",
"condition": "user.role == 'contractor'",
"action": "View alerts for assigned services only, cannot acknowledge P1/P2",
"teams": ["contractor-team"],
},
}
ROUTING_RULES = """
# routing_rules.py — Alert routing configuration
import requests
OPSGENIE_API = "https://api.opsgenie.com/v2"
API_KEY = "your-api-key"
HEADERS = {"Authorization": f"GenieKey {API_KEY}", "Content-Type": "application/json"}
def create_routing_rule(team_id, name, conditions, notify):
payload = {
"name": name,
"order": 1,
"criteria": {
"type": "match-all-conditions",
"conditions": conditions,
},
"notify": notify,
}
resp = requests.post(
f"{OPSGENIE_API}/teams/{team_id}/routing-rules",
json=payload, headers=HEADERS
)
return resp.json()
# P1 Production alerts → immediate page
create_routing_rule("team-123", "P1 Production", [
{"field": "priority", "operation": "equals", "expectedValue": "P1"},
{"field": "extra-properties.environment", "operation": "equals", "expectedValue": "production"},
], {"type": "schedule", "name": "primary-oncall"})
"""
def show_attributes(self):
print("=== ABAC Attributes ===\n")
for category, attrs in self.ATTRIBUTES.items():
print(f" [{category}]: {', '.join(attrs)}")
def show_policies(self):
print(f"\n=== ABAC Policies ===\n")
for key, policy in self.POLICIES.items():
print(f"[{policy['name']}]")
print(f" IF: {policy['condition']}")
print(f" THEN: {policy['action']}")
print()
def show_routing(self):
print("=== Routing Rules API ===")
print(self.ROUTING_RULES[:500])
abac = OpsgenieABAC()
abac.show_attributes()
abac.show_policies()
abac.show_routing()
Escalation & On-Call
# escalation.py — Escalation policies and on-call
import json
import random
class EscalationSetup:
POLICIES = {
"standard": {
"name": "Standard Escalation",
"steps": [
{"delay": "0 min", "notify": "Primary on-call (SMS + Push)"},
{"delay": "5 min", "notify": "Primary on-call (Phone call)"},
{"delay": "15 min", "notify": "Secondary on-call (SMS + Push)"},
{"delay": "30 min", "notify": "Team lead (Phone call)"},
{"delay": "60 min", "notify": "Engineering manager"},
],
},
"critical": {
"name": "Critical (P1) Escalation",
"steps": [
{"delay": "0 min", "notify": "All on-call + Team lead (SMS + Push + Phone)"},
{"delay": "5 min", "notify": "Engineering manager"},
{"delay": "10 min", "notify": "VP Engineering + Incident commander"},
{"delay": "15 min", "notify": "CTO"},
],
},
}
def show_policies(self):
print("=== Escalation Policies ===\n")
for key, policy in self.POLICIES.items():
print(f"[{policy['name']}]")
for step in policy["steps"][:4]:
print(f" {step['delay']:>8} → {step['notify']}")
print()
def oncall_status(self):
print("=== Current On-Call ===")
teams = [
{"team": "Platform", "primary": "Alice", "secondary": "Bob"},
{"team": "Backend", "primary": "Charlie", "secondary": "Diana"},
{"team": "Security", "primary": "Eve", "secondary": "Frank"},
]
for t in teams:
print(f" [{t['team']}] Primary: {t['primary']} | Secondary: {t['secondary']}")
esc = EscalationSetup()
esc.show_policies()
esc.oncall_status()
Monitoring & Audit
# audit.py — RBAC/ABAC audit and monitoring
import json
import random
class PolicyAudit:
def access_audit(self):
print("=== Access Audit Log ===\n")
events = [
{"user": "alice@co.com", "action": "acknowledge", "alert": "P1-payment-down", "result": "ALLOWED", "policy": "team_owner"},
{"user": "bob@co.com", "action": "close", "alert": "P2-high-cpu", "result": "ALLOWED", "policy": "team_member"},
{"user": "contractor@ext.com", "action": "acknowledge", "alert": "P1-db-failure", "result": "DENIED", "policy": "contractor_limit"},
{"user": "eve@co.com", "action": "escalate", "alert": "P3-disk-warning", "result": "ALLOWED", "policy": "security_team"},
]
for e in events:
icon = "OK" if e["result"] == "ALLOWED" else "DENY"
print(f" [{icon:>4}] {e['user']:<25} {e['action']:<15} {e['alert']}")
def policy_metrics(self):
print(f"\n=== Policy Metrics ===")
metrics = {
"Total alerts today": random.randint(50, 200),
"P1 alerts": random.randint(0, 5),
"MTTR (P1)": f"{random.randint(5, 30)} minutes",
"Escalations triggered": random.randint(2, 15),
"Access denied events": random.randint(0, 10),
"Policy violations": random.randint(0, 3),
}
for m, v in metrics.items():
print(f" {m}: {v}")
def compliance(self):
print(f"\n=== Compliance Checklist ===")
checks = [
("RBAC roles reviewed quarterly", True),
("On-call schedules up to date", True),
("Escalation policies tested", random.choice([True, False])),
("Contractor access limited", True),
("Audit logs retained 90+ days", True),
("P1 response SLO met", random.choice([True, True, False])),
]
for name, status in checks:
icon = "PASS" if status else "FAIL"
print(f" [{icon:>4}] {name}")
audit = PolicyAudit()
audit.access_audit()
audit.policy_metrics()
audit.compliance()
FAQ - คำถามที่พบบ่อย
Q: RBAC กับ ABAC ใช้อันไหนดี?
A: RBAC: ง่าย, เหมาะทีมเล็ก-กลาง, กำหนดสิทธิ์ตาม role ABAC: ยืดหยุ่นกว่า, เหมาะทีมใหญ่, กำหนดตามหลาย attributes แนะนำ: เริ่มด้วย RBAC → เพิ่ม ABAC policies เมื่อต้องการ fine-grained control ส่วนใหญ่ใช้ hybrid: RBAC เป็น base + ABAC สำหรับ routing rules
Q: Opsgenie กับ PagerDuty อันไหนดี?
A: Opsgenie: ราคาถูกกว่า, Atlassian integration ดี (Jira, Confluence), features ครบ PagerDuty: mature กว่า, enterprise features เยอะกว่า, ecosystem ใหญ่กว่า ใช้ Opsgenie: ใช้ Atlassian suite อยู่แล้ว, budget จำกัด ใช้ PagerDuty: enterprise, ต้องการ advanced analytics
Q: On-call rotation ควรตั้งอย่างไร?
A: Weekly rotation: เปลี่ยนทุกสัปดาห์ (พอดี sprint cycle) Primary + Secondary: 2 คนเสมอ (backup) Follow-the-sun: ทีม global ใช้ timezone-based rotation Compensation: on-call allowance, comp time off สำคัญ: ไม่ให้คนเดียว on-call ตลอด → burnout
Q: Alert fatigue แก้อย่างไร?
A: 1. ลด noise: suppress duplicate alerts, group related alerts 2. ปรับ thresholds: alert เฉพาะ actionable items 3. แยก severity ชัดเจน: P1 = page, P3/P4 = Slack only 4. Review alerts monthly: ลบ alerts ที่ไม่มีคน action 5. Auto-resolve: alerts ที่ recover เอง → auto-close
