CDK Business Continuity
AWS CDK Construct Business Continuity Infrastructure as Code Multi-AZ DR Automation Backup RTO RPO Multi-Region Failover Chaos Engineering TypeScript Python
| DR Strategy | RTO | RPO | Cost | เหมาะกับ |
|---|---|---|---|---|
| Backup & Restore | ชั่วโมง | ชั่วโมง | ต่ำ | Non-critical |
| Pilot Light | 10-30 นาที | นาที | ต่ำ-ปานกลาง | Core systems |
| Warm Standby | นาที | วินาที | ปานกลาง | Important |
| Multi-site Active | วินาที | 0 | สูง | Mission-critical |
CDK Multi-AZ Stack
# === CDK Business Continuity Construct ===
# npm install aws-cdk-lib constructs
# // lib/bc-stack.ts
# import * as cdk from 'aws-cdk-lib';
# import * as ec2 from 'aws-cdk-lib/aws-ec2';
# import * as rds from 'aws-cdk-lib/aws-rds';
# import * as ecs from 'aws-cdk-lib/aws-ecs';
# import * as s3 from 'aws-cdk-lib/aws-s3';
# import * as route53 from 'aws-cdk-lib/aws-route53';
# import { Construct } from 'constructs';
#
# export class BCStack extends cdk.Stack {
# constructor(scope: Construct, id: string, props?: cdk.StackProps) {
# super(scope, id, props);
#
# // Multi-AZ VPC
# const vpc = new ec2.Vpc(this, 'BCVPC', {
# maxAzs: 3,
# natGateways: 2,
# subnetConfiguration: [
# { name: 'Public', subnetType: ec2.SubnetType.PUBLIC },
# { name: 'Private', subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
# { name: 'Isolated', subnetType: ec2.SubnetType.PRIVATE_ISOLATED },
# ],
# });
#
# // RDS Multi-AZ
# const database = new rds.DatabaseInstance(this, 'Database', {
# engine: rds.DatabaseInstanceEngine.postgres({
# version: rds.PostgresEngineVersion.VER_16,
# }),
# instanceType: ec2.InstanceType.of(
# ec2.InstanceClass.R6G, ec2.InstanceSize.LARGE
# ),
# vpc,
# multiAz: true,
# storageEncrypted: true,
# backupRetention: cdk.Duration.days(30),
# deletionProtection: true,
# autoMinorVersionUpgrade: true,
# });
#
# // S3 with Cross-Region Replication
# const bucket = new s3.Bucket(this, 'DataBucket', {
# versioned: true,
# encryption: s3.BucketEncryption.S3_MANAGED,
# lifecycleRules: [{
# transitions: [{
# storageClass: s3.StorageClass.INTELLIGENT_TIERING,
# transitionAfter: cdk.Duration.days(30),
# }],
# }],
# });
# }
# }
from dataclasses import dataclass
@dataclass
class AWSResource:
service: str
config: str
ha_feature: str
rto: str
rpo: str
resources = [
AWSResource("VPC", "3 AZs, 2 NAT GW", "Multi-AZ Subnets", "0", "0"),
AWSResource("RDS PostgreSQL", "Multi-AZ Standby", "Auto Failover", "60s", "0"),
AWSResource("S3", "Cross-Region Replication", "11 nines durability", "0", "Minutes"),
AWSResource("ECS Fargate", "Multi-AZ Tasks", "Auto-replace failed", "30s", "0"),
AWSResource("ALB", "Cross-AZ", "Health Check Routing", "0", "0"),
AWSResource("Route53", "Health Check Failover", "DNS Failover", "60s", "0"),
AWSResource("ElastiCache", "Multi-AZ Replica", "Auto Failover", "30s", "Seconds"),
]
print("=== BC Architecture ===")
for r in resources:
print(f" [{r.service}] {r.config}")
print(f" HA: {r.ha_feature} | RTO: {r.rto} | RPO: {r.rpo}")
Backup และ Recovery
# === Automated Backup Strategy ===
# CDK — AWS Backup Plan
# import * as backup from 'aws-cdk-lib/aws-backup';
#
# const plan = new backup.BackupPlan(this, 'BackupPlan', {
# backupPlanRules: [
# new backup.BackupPlanRule({
# ruleName: 'DailyBackup',
# scheduleExpression: events.Schedule.cron({
# hour: '2', minute: '0'
# }),
# deleteAfter: cdk.Duration.days(30),
# moveToColdStorageAfter: cdk.Duration.days(90),
# }),
# new backup.BackupPlanRule({
# ruleName: 'WeeklyBackup',
# scheduleExpression: events.Schedule.cron({
# weekDay: 'SUN', hour: '3', minute: '0'
# }),
# deleteAfter: cdk.Duration.days(365),
# }),
# ],
# });
#
# plan.addSelection('Selection', {
# resources: [
# backup.BackupResource.fromRdsDatabaseInstance(database),
# backup.BackupResource.fromDynamoDbTable(table),
# backup.BackupResource.fromEfsFileSystem(efs),
# ],
# });
# SSM Automation — DR Runbook
# aws ssm create-document --name "DR-Failover" --content '{
# "schemaVersion": "0.3",
# "mainSteps": [
# { "action": "aws:executeAwsApi",
# "name": "PromoteRDSReadReplica",
# "inputs": {
# "Service": "rds",
# "Api": "PromoteReadReplica",
# "DBInstanceIdentifier": "dr-replica"
# }
# },
# { "action": "aws:executeAwsApi",
# "name": "UpdateRoute53",
# "inputs": {
# "Service": "route53",
# "Api": "ChangeResourceRecordSets",
# ...
# }
# }
# ]
# }'
@dataclass
class BackupPolicy:
resource: str
frequency: str
retention: str
cross_region: bool
tested: str
policies = [
BackupPolicy("RDS PostgreSQL", "Daily 2AM", "30 days", True, "Weekly"),
BackupPolicy("DynamoDB", "Continuous (PITR)", "35 days", True, "Monthly"),
BackupPolicy("S3 Data", "Versioned + CRR", "Indefinite", True, "Quarterly"),
BackupPolicy("EFS Files", "Daily 3AM", "30 days", False, "Monthly"),
BackupPolicy("EC2 AMI", "Weekly Sunday", "90 days", True, "Monthly"),
BackupPolicy("Secrets Manager", "Replicated", "N/A", True, "Quarterly"),
]
print("\n=== Backup Policies ===")
for p in policies:
crr = "CRR" if p.cross_region else "Same Region"
print(f" [{p.resource}]")
print(f" Frequency: {p.frequency} | Retention: {p.retention}")
print(f" Region: {crr} | Test: {p.tested}")
Chaos Testing
# === Chaos Engineering & DR Testing ===
# AWS Fault Injection Simulator
# aws fis create-experiment-template \
# --description "AZ Failure Test" \
# --targets '{
# "Instances": {
# "resourceType": "aws:ec2:instance",
# "selectionMode": "ALL",
# "filters": [{"path": "Placement.AvailabilityZone", "values": ["ap-southeast-1a"]}]
# }
# }' \
# --actions '{
# "StopInstances": {
# "actionId": "aws:ec2:stop-instances",
# "targets": {"Instances": "Instances"},
# "parameters": {"startInstancesAfterDuration": "PT10M"}
# }
# }' \
# --stopConditions '[{"source": "none"}]' \
# --roleArn arn:aws:iam::123456789:role/FISRole
@dataclass
class ChaosTest:
name: str
target: str
impact: str
expected_behavior: str
frequency: str
last_result: str
tests = [
ChaosTest("AZ Failure", "Stop all instances in 1 AZ", "33% capacity", "Auto-recover in other AZs", "Monthly", "Pass"),
ChaosTest("RDS Failover", "Force failover to standby", "60s DB downtime", "App reconnects auto", "Monthly", "Pass"),
ChaosTest("Instance Termination", "Random instance kill", "1 instance lost", "ASG replaces in 2min", "Weekly", "Pass"),
ChaosTest("Network Disruption", "Block traffic to 1 AZ", "Partial outage", "ALB routes to healthy AZ", "Monthly", "Pass"),
ChaosTest("Region Failover", "Simulate region outage", "Full region down", "Route53 switches to DR", "Quarterly", "Pass"),
ChaosTest("Data Corruption", "Restore from backup", "Data loss to RPO", "Restore completes in RTO", "Quarterly", "Pass"),
]
print("Chaos Tests:")
for t in tests:
print(f" [{t.last_result}] {t.name} ({t.frequency})")
print(f" Target: {t.target}")
print(f" Impact: {t.impact}")
print(f" Expected: {t.expected_behavior}")
dr_metrics = {
"RTO Target": "5 minutes",
"RTO Actual (last test)": "3.5 minutes",
"RPO Target": "1 minute",
"RPO Actual (last test)": "0 seconds (Multi-AZ)",
"DR Tests (12 months)": "12 passed, 0 failed",
"MTTR (Mean Time to Recover)": "4.2 minutes",
"Availability (12 months)": "99.99%",
}
print(f"\n\nDR Metrics:")
for k, v in dr_metrics.items():
print(f" {k}: {v}")
เคล็ดลับ
- Multi-AZ: ทุก Resource ต้อง Multi-AZ เป็นอย่างน้อย
- Backup: Test Backup Restore ทุกเดือน ไม่ใช่แค่ Backup
- CDK: สร้าง Reusable Construct แชร์ทั้งองค์กร
- Chaos: ทำ Chaos Testing ทุกเดือน ไม่รอให้ล่มจริง
- Runbook: Automate DR Runbook ด้วย SSM ลดเวลา Manual
AWS CDK คืออะไร
Infrastructure as Code TypeScript Python Java Construct Stack AWS Resource L1 L2 L3 Pattern cdk deploy Building Block
Business Continuity คืออะไร
ธุรกิจทำงานต่อเมื่อเหตุการณ์ไม่คาดคิด RTO RPO Multi-AZ Multi-Region Backup Restore DR Site Failover Server ล่ม Cyber Attack
สร้าง DR ด้วย CDK อย่างไร
CDK Construct Multi-AZ RDS S3 CRR DynamoDB Global Route53 Failover CloudFront Lambda Aurora Global ECS ASG Reusable Construct
ทดสอบ DR Plan อย่างไร
Chaos Engineering FIS AZ Failure Region Failure Game Day Quarter Runbook SSM Automation DR Drill RTO RPO Continuous Verification
สรุป
CDK Construct Business Continuity AWS Multi-AZ DR Automation Backup RTO RPO Chaos Engineering FIS Route53 Failover Runbook SSM Production Resilience
