OIDC High Availability
OpenID Connect High Availability OIDC Provider ทำงานต่อเนื่อง Uptime 99.99% Keycloak Cluster หลาย Instances Load Balancer Database Replication Session Replication
Auth Critical Service ล่มทุก Service Login ไม่ได้ API Reject Revenue Loss HA Auth ทำงาน 24/7
Keycloak HA Cluster
# === Keycloak HA on Kubernetes ===
# 1. Helm Chart Installation
# helm repo add bitnami https://charts.bitnami.com/bitnami
# helm install keycloak bitnami/keycloak \
# --set replicaCount=3 \
# --set auth.adminUser=admin \
# --set auth.adminPassword=admin123 \
# --set postgresql.enabled=true \
# --set postgresql.architecture=replication \
# --set postgresql.readReplicas.replicaCount=2 \
# --set cache.enabled=true \
# --set proxy=edge \
# --set production=true
# 2. Kubernetes Deployment
# apiVersion: apps/v1
# kind: Deployment
# metadata:
# name: keycloak
# spec:
# replicas: 3
# selector:
# matchLabels:
# app: keycloak
# template:
# spec:
# containers:
# - name: keycloak
# image: quay.io/keycloak/keycloak:23.0
# args: ["start"]
# env:
# - name: KC_DB
# value: postgres
# - name: KC_DB_URL
# value: jdbc:postgresql://postgres-primary:5432/keycloak
# - name: KC_DB_USERNAME
# valueFrom:
# secretKeyRef:
# name: keycloak-db
# key: username
# - name: KC_DB_PASSWORD
# valueFrom:
# secretKeyRef:
# name: keycloak-db
# key: password
# - name: KC_CACHE
# value: ispn
# - name: KC_CACHE_STACK
# value: kubernetes
# - name: KC_PROXY
# value: edge
# - name: KC_HOSTNAME
# value: auth.example.com
# - name: JAVA_OPTS_APPEND
# value: "-Djgroups.dns.query=keycloak-headless"
# ports:
# - containerPort: 8080
# readinessProbe:
# httpGet:
# path: /health/ready
# port: 8080
# initialDelaySeconds: 30
# periodSeconds: 10
# livenessProbe:
# httpGet:
# path: /health/live
# port: 8080
# initialDelaySeconds: 60
# periodSeconds: 30
# resources:
# requests:
# cpu: 500m
# memory: 1Gi
# limits:
# cpu: 2
# memory: 2Gi
# 3. Headless Service (for JGroups discovery)
# apiVersion: v1
# kind: Service
# metadata:
# name: keycloak-headless
# spec:
# clusterIP: None
# selector:
# app: keycloak
# ports:
# - port: 7800
# name: jgroups
from dataclasses import dataclass
from typing import List
@dataclass
class HAComponent:
name: str
replicas: int
failover: str
health_check: str
ha_components = [
HAComponent("Keycloak", 3, "Load Balancer + Health Check", "/health/ready"),
HAComponent("PostgreSQL Primary", 1, "Patroni Auto-failover", "pg_isready"),
HAComponent("PostgreSQL Replica", 2, "Read-only Replicas", "pg_isready"),
HAComponent("Redis Cluster", 6, "Sentinel Auto-failover", "PING"),
HAComponent("Nginx Ingress", 2, "DNS Failover", "/healthz"),
]
print("Keycloak HA Components:")
for comp in ha_components:
print(f" [{comp.name}] x{comp.replicas}")
print(f" Failover: {comp.failover}")
print(f" Health: {comp.health_check}")
Token Management
# token_ha.py — Token Management for HA
import time
import hashlib
import json
from dataclasses import dataclass, field
from typing import Dict, Optional
@dataclass
class TokenCache:
"""Distributed Token Cache"""
cache: Dict[str, dict] = field(default_factory=dict)
jwks_cache: Optional[dict] = None
jwks_expiry: float = 0
def cache_token(self, token_hash: str, claims: dict, ttl: int = 300):
"""Cache validated token"""
self.cache[token_hash] = {
"claims": claims,
"expiry": time.time() + ttl,
}
def get_cached_token(self, token_hash: str) -> Optional[dict]:
"""Get cached token validation"""
entry = self.cache.get(token_hash)
if entry and time.time() < entry["expiry"]:
return entry["claims"]
return None
def cache_jwks(self, jwks: dict, ttl: int = 3600):
"""Cache JWKS Public Keys"""
self.jwks_cache = jwks
self.jwks_expiry = time.time() + ttl
def get_jwks(self) -> Optional[dict]:
"""Get cached JWKS"""
if self.jwks_cache and time.time() < self.jwks_expiry:
return self.jwks_cache
return None
class OIDCValidator:
"""OIDC Token Validator with HA Support"""
def __init__(self, issuer: str, cache: TokenCache):
self.issuer = issuer
self.cache = cache
self.fallback_issuers: list = []
def add_fallback(self, issuer: str):
"""เพิ่ม Fallback OIDC Provider"""
self.fallback_issuers.append(issuer)
def validate_token(self, token: str) -> dict:
"""Validate Token with Cache"""
token_hash = hashlib.sha256(token.encode()).hexdigest()[:16]
# 1. ตรวจ Cache ก่อน
cached = self.cache.get_cached_token(token_hash)
if cached:
print(f" [CACHE HIT] Token validated from cache")
return cached
# 2. ดึง JWKS
jwks = self.cache.get_jwks()
if not jwks:
jwks = self._fetch_jwks()
if jwks:
self.cache.cache_jwks(jwks)
# 3. Verify Token (Production: ใช้ PyJWT)
claims = {"sub": "user-123", "email": "user@example.com", "exp": time.time() + 3600}
self.cache.cache_token(token_hash, claims)
print(f" [VALIDATED] Token verified and cached")
return claims
def _fetch_jwks(self) -> Optional[dict]:
"""Fetch JWKS with Fallback"""
providers = [self.issuer] + self.fallback_issuers
for provider in providers:
try:
# response = httpx.get(f"{provider}/.well-known/jwks.json", timeout=5)
print(f" [JWKS] Fetched from {provider}")
return {"keys": [{"kty": "RSA", "kid": "key-1"}]}
except Exception:
print(f" [JWKS] Failed {provider}, trying fallback...")
return None
# ตัวอย่าง
cache = TokenCache()
validator = OIDCValidator("https://auth.example.com", cache)
validator.add_fallback("https://auth-backup.example.com")
print("OIDC Token Validation with HA:")
validator.validate_token("eyJhbGciOiJSUzI1NiJ9.test1")
validator.validate_token("eyJhbGciOiJSUzI1NiJ9.test1") # Cache hit
# HA Strategies
strategies = {
"Multi-Instance": "หลาย Keycloak Instances + Load Balancer",
"Database HA": "PostgreSQL Primary-Replica + Patroni Failover",
"Session Cache": "Infinispan/Redis Cluster Session Replication",
"Token Cache": "Cache JWT Public Keys + Validated Tokens",
"Multi-Region": "Deploy หลาย Region DNS GeoDNS",
"Fallback Provider": "Backup OIDC Provider ถ้า Primary ล่ม",
}
print(f"\n\nHA Strategies:")
for strategy, desc in strategies.items():
print(f" {strategy}: {desc}")
Monitoring Auth
# auth_monitoring.py — Auth Service Monitoring
monitoring = {
"Login Success Rate": {
"metric": "keycloak_login_total{status='success'} / keycloak_login_total",
"threshold": "> 99%",
"alert": "Login Success < 95% -> PagerDuty",
},
"Login Latency P99": {
"metric": "histogram_quantile(0.99, keycloak_login_duration_seconds)",
"threshold": "< 500ms",
"alert": "P99 > 1s -> Slack Warning",
},
"Token Issuance Rate": {
"metric": "rate(keycloak_token_total[5m])",
"threshold": "Baseline +/- 30%",
"alert": "Spike/Drop -> Investigate",
},
"Active Sessions": {
"metric": "keycloak_active_sessions",
"threshold": "< Max Capacity",
"alert": "> 80% Capacity -> Scale Up",
},
"Database Connections": {
"metric": "pg_stat_activity_count",
"threshold": "< Pool Max",
"alert": "> 90% Pool -> Warning",
},
"Cache Hit Ratio": {
"metric": "infinispan_cache_hits / infinispan_cache_requests",
"threshold": "> 90%",
"alert": "< 80% -> Check Cache Config",
},
}
print("Auth Service Monitoring:")
for metric_name, info in monitoring.items():
print(f"\n [{metric_name}]")
print(f" Metric: {info['metric']}")
print(f" Threshold: {info['threshold']}")
print(f" Alert: {info['alert']}")
# Disaster Recovery
dr_plan = {
"RPO (Recovery Point Objective)": "0 นาที (Synchronous Replication)",
"RTO (Recovery Time Objective)": "< 5 นาที (Auto-failover)",
"Backup Schedule": "Database: ทุก 6 ชั่วโมง, Config: ทุก Commit",
"Failover Test": "ทดสอบ Failover ทุกเดือน",
"Runbook": "Documented ใน Mintlify Docs",
}
print(f"\n\nDisaster Recovery Plan:")
for item, value in dr_plan.items():
print(f" {item}: {value}")
Best Practices
- Replicas: Keycloak อย่างน้อย 3 Replicas Database อย่างน้อย 1 Primary + 2 Replica
- Cache: ใช้ Token Cache ลด Load บน Auth Server
- JWKS Cache: Cache Public Keys ที่ Client ไม่ต้องเรียกทุกครั้ง
- Health Checks: ตรวจ /health/ready ทุก 10 วินาที
- Failover Test: ทดสอบ Failover ทุกเดือน
- Monitoring: ติดตาม Login Success Rate, Latency, Sessions
OpenID Connect HA คืออะไร
OIDC Provider ทำงานต่อเนื่อง Uptime 99.99% Keycloak Cluster หลาย Instances Load Balancer Database Replication Session Replication
Keycloak Cluster ตั้งค่าอย่างไร
Kubernetes หลาย Replicas Infinispan Distributed Cache Session Replication PostgreSQL Primary-Replica Ingress Load Balancer Sticky Sessions
HA สำหรับ Auth สำคัญอย่างไร
Auth Critical Service ล่มทุก Service Login ไม่ได้ API Reject Revenue Loss HA Auth ทำงาน 24/7 แม้ Node ล่ม
Token Caching ช่วย HA อย่างไร
Cache JWT Public Keys Client Side Redis Cluster Token Cache OIDC Provider ล่มชั่วคราว Client ใช้ Cached Keys Verify ได้ ลด Load Auth Server
สรุป
OpenID Connect HA Keycloak Cluster หลาย Replicas PostgreSQL Replication Infinispan Cache Token Cache JWKS Cache Fallback Provider Monitoring Login Success Rate Latency Disaster Recovery Failover Test
