Certificate Manager คืออะไร
Certificate Manager เป็นระบบจัดการ digital certificates (SSL/TLS certificates) สำหรับ encrypt การสื่อสารระหว่าง clients และ servers ครอบคลุมทั้ง certificate lifecycle ตั้งแต่ issuance, deployment, renewal และ revocation
ประเภทของ Certificate Managers ได้แก่ cert-manager สำหรับ Kubernetes จัดการ certificates อัตโนมัติผ่าน Let's Encrypt หรือ internal CA, AWS Certificate Manager (ACM) managed service สำหรับ AWS resources, HashiCorp Vault PKI สำหรับ internal certificate authority, Google Certificate Manager สำหรับ Google Cloud และ OpenSSL/cfssl สำหรับ manual certificate management
Backup และ Recovery Strategy สำคัญมากเพราะ certificate expiration ทำให้ services ใช้งานไม่ได้, private key สูญหายทำให้ต้อง re-issue certificate ทั้งหมด, disaster recovery ต้อง restore certificates ให้เร็วที่สุด และ compliance requirements บังคับให้มี backup plan สำหรับ cryptographic materials
ติดตั้ง Certificate Management Tools
ติดตั้งเครื่องมือจัดการ certificates
# === Certificate Management Tools Setup ===
# 1. cert-manager for Kubernetes
# ===================================
# Install with Helm
helm repo add jetstack https://charts.jetstack.io
helm repo update
helm install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--set installCRDs=true \
--set prometheus.enabled=true
# Verify
kubectl get pods -n cert-manager
# cert-manager-xxx 1/1 Running
# cert-manager-cainjector-xxx 1/1 Running
# cert-manager-webhook-xxx 1/1 Running
# 2. Create ClusterIssuer (Let's Encrypt)
cat <<'EOF' | kubectl apply -f -
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
server: https://acme-v02.api.letsencrypt.org/directory
email: admin@example.com
privateKeySecretRef:
name: letsencrypt-prod-key
solvers:
- http01:
ingress:
class: nginx
EOF
# 3. Request Certificate
cat <<'EOF' | kubectl apply -f -
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: app-tls
namespace: default
spec:
secretName: app-tls-secret
issuerRef:
name: letsencrypt-prod
kind: ClusterIssuer
dnsNames:
- app.example.com
- www.app.example.com
duration: 2160h # 90 days
renewBefore: 720h # Renew 30 days before expiry
EOF
# 4. OpenSSL for Local Certs
# ===================================
# Generate CA
openssl genrsa -out ca.key 4096
openssl req -x509 -new -nodes -key ca.key -sha256 -days 3650 \
-out ca.crt -subj "/CN=Internal CA/O=MyOrg"
# Generate server cert
openssl genrsa -out server.key 2048
openssl req -new -key server.key -out server.csr \
-subj "/CN=server.internal/O=MyOrg"
openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key \
-CAcreateserial -out server.crt -days 365 -sha256
# Verify cert
openssl x509 -in server.crt -text -noout | head -20
openssl verify -CAfile ca.crt server.crt
# 5. cfssl (CloudFlare's PKI toolkit)
# ===================================
go install github.com/cloudflare/cfssl/cmd/cfssl@latest
go install github.com/cloudflare/cfssl/cmd/cfssljson@latest
echo "Certificate tools installed"
Backup Strategy สำหรับ Certificates
วางแผน backup certificates
#!/usr/bin/env python3
# cert_backup_strategy.py — Certificate Backup Planning
import json
import logging
from datetime import datetime, timedelta
from typing import Dict, List
from dataclasses import dataclass, field
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("cert_backup")
@dataclass
class CertificateInfo:
name: str
domain: str
issuer: str
not_before: str
not_after: str
key_type: str
key_size: int
location: str
secret_name: str = ""
backup_locations: List[str] = field(default_factory=list)
class CertBackupPlanner:
def __init__(self):
self.certificates: List[CertificateInfo] = []
self.backup_policies = {
"production": {
"frequency": "daily",
"retention_days": 365,
"encryption": "AES-256-GCM",
"locations": ["local_vault", "cloud_kms", "offline_hsm"],
"min_copies": 3,
},
"staging": {
"frequency": "weekly",
"retention_days": 90,
"encryption": "AES-256-GCM",
"locations": ["local_vault", "cloud_kms"],
"min_copies": 2,
},
"development": {
"frequency": "monthly",
"retention_days": 30,
"encryption": "AES-256-GCM",
"locations": ["local_vault"],
"min_copies": 1,
},
}
def add_certificate(self, cert: CertificateInfo):
self.certificates.append(cert)
def audit_certificates(self):
now = datetime.utcnow()
results = {"total": len(self.certificates), "expiring_soon": [], "expired": [], "healthy": []}
for cert in self.certificates:
expiry = datetime.fromisoformat(cert.not_after)
days_until_expiry = (expiry - now).days
status = {
"name": cert.name,
"domain": cert.domain,
"days_until_expiry": days_until_expiry,
"has_backup": len(cert.backup_locations) > 0,
"backup_count": len(cert.backup_locations),
}
if days_until_expiry < 0:
status["severity"] = "critical"
results["expired"].append(status)
elif days_until_expiry < 30:
status["severity"] = "warning"
results["expiring_soon"].append(status)
else:
status["severity"] = "ok"
results["healthy"].append(status)
return results
def generate_backup_plan(self, environment="production"):
policy = self.backup_policies.get(environment, self.backup_policies["production"])
plan = {
"environment": environment,
"policy": policy,
"certificates_to_backup": [],
}
for cert in self.certificates:
needs_backup = len(cert.backup_locations) < policy["min_copies"]
plan["certificates_to_backup"].append({
"name": cert.name,
"domain": cert.domain,
"current_backups": len(cert.backup_locations),
"required_backups": policy["min_copies"],
"needs_backup": needs_backup,
"items_to_backup": [
f"{cert.name}.key (private key — CRITICAL)",
f"{cert.name}.crt (certificate)",
f"{cert.name}-chain.crt (CA chain)",
f"{cert.name}.csr (certificate signing request)",
],
})
return plan
def generate_recovery_runbook(self):
return {
"title": "Certificate Recovery Runbook",
"steps": [
{"step": 1, "action": "Identify affected certificates and services"},
{"step": 2, "action": "Retrieve backup from primary location (Vault/KMS)"},
{"step": 3, "action": "Verify backup integrity (checksum + test decrypt)"},
{"step": 4, "action": "Deploy certificates to target systems"},
{"step": 5, "action": "Verify TLS connectivity on all endpoints"},
{"step": 6, "action": "Update monitoring and alerting"},
{"step": 7, "action": "Document incident and update procedures"},
],
"rto": "30 minutes (target)",
"rpo": "Last successful backup",
}
planner = CertBackupPlanner()
planner.add_certificate(CertificateInfo(
"app-tls", "app.example.com", "Let's Encrypt",
"2025-01-01", "2025-03-31", "RSA", 2048,
"kubernetes/default/app-tls-secret",
backup_locations=["vault"]
))
planner.add_certificate(CertificateInfo(
"api-tls", "api.example.com", "DigiCert",
"2024-06-01", "2025-06-01", "ECDSA", 256,
"kubernetes/default/api-tls-secret",
backup_locations=["vault", "s3"]
))
print("Audit:", json.dumps(planner.audit_certificates(), indent=2))
print("Plan:", json.dumps(planner.generate_backup_plan(), indent=2))
Automated Backup Scripts
Scripts สำหรับ backup certificates อัตโนมัติ
#!/bin/bash
# cert_backup.sh — Automated Certificate Backup
set -euo pipefail
BACKUP_BASE="/secure-backup/certificates"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/"
ENCRYPTION_KEY_FILE="/etc/cert-backup/.encryption-key"
VAULT_ADDR="https://vault.internal:8200"
LOG_FILE="/var/log/cert-backup.log"
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"; }
mkdir -p "$BACKUP_DIR"
log "Starting certificate backup to $BACKUP_DIR"
# === 1. Backup Kubernetes cert-manager secrets ===
log "Backing up Kubernetes certificate secrets..."
NAMESPACES=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')
for ns in $NAMESPACES; do
SECRETS=$(kubectl get secrets -n "$ns" \
-l cert-manager.io/certificate-name \
-o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "")
for secret in $SECRETS; do
if [ -n "$secret" ]; then
kubectl get secret "$secret" -n "$ns" -o yaml \
> "/k8s__.yaml"
log " Backed up: /"
fi
done
done
# === 2. Backup local certificates ===
log "Backing up local certificates..."
CERT_DIRS=("/etc/ssl/private" "/etc/letsencrypt/live" "/opt/certs")
for dir in ""; do
if [ -d "$dir" ]; then
tar -czf "/local_$(echo $dir | tr '/' '_').tar.gz" \
-C "$(dirname $dir)" "$(basename $dir)"
log " Backed up: $dir"
fi
done
# === 3. Backup cert-manager resources ===
log "Backing up cert-manager CRDs..."
kubectl get certificates --all-namespaces -o yaml \
> "/cert-manager-certificates.yaml"
kubectl get clusterissuers -o yaml \
> "/cert-manager-clusterissuers.yaml"
kubectl get issuers --all-namespaces -o yaml \
> "/cert-manager-issuers.yaml"
# === 4. Encrypt backup ===
log "Encrypting backup..."
tar -czf ".tar.gz" -C "$BACKUP_BASE" "$(basename $BACKUP_DIR)"
openssl enc -aes-256-gcm -salt \
-in ".tar.gz" \
-out ".tar.gz.enc" \
-pass file:"$ENCRYPTION_KEY_FILE"
# Generate checksum
sha256sum ".tar.gz.enc" > ".tar.gz.enc.sha256"
# Clean unencrypted files
rm -rf "$BACKUP_DIR" ".tar.gz"
log "Encrypted backup: .tar.gz.enc"
# === 5. Upload to remote storage ===
log "Uploading to remote storage..."
# AWS S3
# aws s3 cp ".tar.gz.enc" \
# "s3://cert-backup-bucket/$(basename ).tar.gz.enc" \
# --sse aws:kms
# HashiCorp Vault
# vault kv put secret/cert-backups/ \
# backup=@".tar.gz.enc"
# === 6. Cleanup old backups (keep 90 days) ===
find "$BACKUP_BASE" -name "*.tar.gz.enc" -mtime +90 -delete
log "Cleaned up backups older than 90 days"
# === 7. Verify backup ===
log "Verifying backup integrity..."
sha256sum -c ".tar.gz.enc.sha256"
if [ $? -eq 0 ]; then
log "Backup verified successfully"
else
log "ERROR: Backup verification failed!"
exit 1
fi
log "Certificate backup completed successfully"
# Crontab: 0 2 * * * /opt/scripts/cert_backup.sh >> /var/log/cert-backup.log 2>&1
Recovery Procedures
ขั้นตอน restore certificates
#!/bin/bash
# cert_restore.sh — Certificate Recovery Script
set -euo pipefail
BACKUP_FILE=""
ENCRYPTION_KEY_FILE="/etc/cert-backup/.encryption-key"
RESTORE_DIR="/tmp/cert-restore-$(date +%s)"
LOG_FILE="/var/log/cert-restore.log"
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"; }
log "Starting certificate restore from $BACKUP_FILE"
# === 1. Verify and decrypt ===
mkdir -p "$RESTORE_DIR"
if [ -f ".sha256" ]; then
sha256sum -c ".sha256"
log "Checksum verified"
fi
openssl enc -aes-256-gcm -d -salt \
-in "$BACKUP_FILE" \
-out "/backup.tar.gz" \
-pass file:"$ENCRYPTION_KEY_FILE"
tar -xzf "/backup.tar.gz" -C "$RESTORE_DIR"
log "Backup decrypted and extracted"
# === 2. Restore Kubernetes secrets ===
log "Restoring Kubernetes secrets..."
for yaml_file in /*/k8s_*.yaml; do
if [ -f "$yaml_file" ]; then
# Remove resourceVersion and uid for clean apply
sed -i '/resourceVersion:/d; /uid:/d; /creationTimestamp:/d' "$yaml_file"
kubectl apply -f "$yaml_file"
log " Restored: $(basename $yaml_file)"
fi
done
# === 3. Restore cert-manager resources ===
log "Restoring cert-manager resources..."
for resource in clusterissuers issuers certificates; do
yaml_file=$(find "$RESTORE_DIR" -name "cert-manager-.yaml" | head -1)
if [ -f "$yaml_file" ]; then
kubectl apply -f "$yaml_file"
log " Restored: $resource"
fi
done
# === 4. Restore local certificates ===
log "Restoring local certificates..."
for tar_file in /*/local_*.tar.gz; do
if [ -f "$tar_file" ]; then
# Extract to original location
tar -xzf "$tar_file" -C /
log " Restored: $(basename $tar_file)"
fi
done
# === 5. Verify restored certificates ===
log "Verifying restored certificates..."
ERRORS=0
# Check Kubernetes secrets
SECRETS=$(kubectl get secrets --all-namespaces \
-l cert-manager.io/certificate-name \
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}{"\n"}{end}')
for secret in $SECRETS; do
NS=$(echo $secret | cut -d/ -f1)
NAME=$(echo $secret | cut -d/ -f2)
# Verify cert is valid
CERT=$(kubectl get secret "$NAME" -n "$NS" -o jsonpath='{.data.tls\.crt}' | base64 -d)
EXPIRY=$(echo "$CERT" | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$EXPIRY" ]; then
log " OK: $secret (expires: $EXPIRY)"
else
log " ERROR: $secret — invalid certificate"
((ERRORS++))
fi
done
# === 6. Restart affected services ===
log "Restarting services to pick up restored certificates..."
# Restart nginx ingress
kubectl rollout restart deployment -n ingress-nginx ingress-nginx-controller 2>/dev/null || true
# Restart applications that use TLS
# kubectl rollout restart deployment -n default my-app
# === 7. Cleanup ===
rm -rf "$RESTORE_DIR"
if [ $ERRORS -eq 0 ]; then
log "Certificate restore completed successfully"
else
log "WARNING: Restore completed with $ERRORS errors"
exit 1
fi
Monitoring และ Alerting
ตรวจสอบ certificate status
#!/usr/bin/env python3
# cert_monitor.py — Certificate Monitoring and Alerting
import subprocess
import json
import ssl
import socket
import logging
from datetime import datetime
from typing import Dict, List
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("cert_monitor")
class CertificateMonitor:
def __init__(self):
self.checks = []
def check_endpoint(self, hostname, port=443):
try:
context = ssl.create_default_context()
with socket.create_connection((hostname, port), timeout=10) as sock:
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
cert = ssock.getpeercert()
not_after = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z')
days_left = (not_after - datetime.utcnow()).days
issuer = dict(x[0] for x in cert['issuer'])
subject = dict(x[0] for x in cert['subject'])
san = [entry[1] for entry in cert.get('subjectAltName', [])]
result = {
"hostname": hostname,
"port": port,
"status": "ok",
"subject": subject.get('commonName', ''),
"issuer": issuer.get('organizationName', ''),
"not_after": not_after.isoformat(),
"days_until_expiry": days_left,
"san": san,
"severity": "ok" if days_left > 30 else "warning" if days_left > 7 else "critical",
}
except Exception as e:
result = {
"hostname": hostname,
"port": port,
"status": "error",
"error": str(e),
"severity": "critical",
}
self.checks.append(result)
return result
def check_multiple(self, endpoints):
results = []
for ep in endpoints:
host = ep if isinstance(ep, str) else ep["host"]
port = 443 if isinstance(ep, str) else ep.get("port", 443)
results.append(self.check_endpoint(host, port))
return results
def get_summary(self):
total = len(self.checks)
ok = sum(1 for c in self.checks if c["severity"] == "ok")
warning = sum(1 for c in self.checks if c["severity"] == "warning")
critical = sum(1 for c in self.checks if c["severity"] == "critical")
return {
"checked_at": datetime.utcnow().isoformat(),
"total_endpoints": total,
"ok": ok,
"warning": warning,
"critical": critical,
"overall_status": "critical" if critical > 0 else "warning" if warning > 0 else "ok",
"issues": [c for c in self.checks if c["severity"] != "ok"],
}
# === Prometheus Metrics ===
# cert_manager_certificate_ready_status{name="app-tls", namespace="default"} 1
# cert_manager_certificate_expiration_timestamp_seconds{name="app-tls"} 1743465600
#
# Alert rules:
# - alert: CertificateExpiringSoon
# expr: (cert_manager_certificate_expiration_timestamp_seconds - time()) < 604800
# labels: {severity: warning}
# annotations: {summary: "Certificate expiring in less than 7 days"}
#
# - alert: CertificateExpired
# expr: (cert_manager_certificate_expiration_timestamp_seconds - time()) < 0
# labels: {severity: critical}
# monitor = CertificateMonitor()
# monitor.check_multiple(["google.com", "github.com", "example.com"])
# print(json.dumps(monitor.get_summary(), indent=2))
FAQ คำถามที่พบบ่อย
Q: ต้อง backup อะไรบ้างสำหรับ certificates?
A: ต้อง backup Private key (สำคัญที่สุด สูญหายเท่ากับต้อง re-issue certificate), Certificate file (.crt/.pem), CA chain/intermediate certificates, CSR (Certificate Signing Request) สำหรับ re-issue, cert-manager CRD resources (ClusterIssuers, Certificates), ACME account keys สำหรับ Let's Encrypt สำหรับ Kubernetes backup ทั้ง Secret objects ที่เก็บ certificates ใน etcd
Q: ควร backup certificates บ่อยแค่ไหน?
A: Production certificates ควร backup ทุกวัน หรือทันทีหลัง renew/issue certificate ใหม่ Staging ทุกสัปดาห์ Development ทุกเดือน สำคัญที่สุดคือ trigger backup ทันทีเมื่อมี certificate ใหม่ ไม่ใช่แค่ scheduled backup cert-manager มี events ที่ใช้ trigger backup ได้ retention ขั้นต่ำ 1 ปีสำหรับ production เพื่อ compliance
Q: เก็บ private keys อย่างไรให้ปลอดภัย?
A: ห้ามเก็บ private keys ใน plaintext บน disk ใช้ encrypted storage เสมอ เช่น HashiCorp Vault, AWS KMS, Azure Key Vault, GCP Secret Manager สำหรับ Kubernetes ใช้ sealed-secrets หรือ external-secrets operator encrypt backups ด้วย AES-256 แยก encryption key จาก backup data ใช้ HSM (Hardware Security Module) สำหรับ CA private keys จำกัด access ด้วย RBAC เฉพาะคนที่จำเป็น
Q: cert-manager ทำ auto-renewal ได้ไหม?
A: cert-manager renew certificates อัตโนมัติก่อน expiry (default 30 วัน ก่อนหมดอายุ ตั้งค่าได้ด้วย renewBefore) สำหรับ Let's Encrypt certificates (90 วัน) จะ renew อัตโนมัติที่ 60 วัน ไม่ต้องทำอะไรเพิ่ม แต่ต้อง monitor ว่า renewal สำเร็จ เพราะอาจ fail จาก DNS issues, rate limits, network problems ตั้ง alerting สำหรับ certificate expiry เพื่อ catch failures
