Technology

CDK Construct Chaos Engineering

cdk construct chaos engineering
CDK Construct Chaos Engineering | SiamCafe Blog
2026-01-26· อ. บอม — SiamCafe.net· 10,308 คำ

CDK Construct Chaos Engineering

AWS CDK Construct Chaos Engineering FIS Fault Injection Simulator EC2 ECS EKS RDS Stop Condition CloudWatch IaC TypeScript Production

FIS ActionTargetEffectDuration
aws:ec2:stop-instancesEC2 Instanceหยุด Instance ทดสอบ HA5-30 นาที
aws:ecs:drain-container-instancesECS ContainerDrain Tasks ทดสอบ Scaling5-15 นาที
aws:fis:inject-api-internal-errorAWS APIจำลอง API Error 5005-10 นาที
aws:network:disrupt-connectivityVPC Subnetตัด Network Connectivity5-15 นาที
aws:ssm:send-commandEC2 via SSMรัน Stress Test CPU/Memory5-30 นาที
aws:rds:failover-db-clusterRDS AuroraFailover Primary → Replica1-5 นาที

CDK Chaos Construct

# === CDK Chaos Engineering Construct ===

# TypeScript CDK Construct
# import * as cdk from 'aws-cdk-lib';
# import * as fis from 'aws-cdk-lib/aws-fis';
# import * as iam from 'aws-cdk-lib/aws-iam';
#
# export class ChaosExperimentConstruct extends cdk.Construct {
#   constructor(scope: cdk.Construct, id: string, props: ChaosProps) {
#     super(scope, id);
#
#     // IAM Role for FIS
#     const fisRole = new iam.Role(this, 'FISRole', {
#       assumedBy: new iam.ServicePrincipal('fis.amazonaws.com'),
#       managedPolicies: [
#         iam.ManagedPolicy.fromAwsManagedPolicyName('PowerUserAccess'),
#       ],
#     });
#
#     // FIS Experiment Template
#     new fis.CfnExperimentTemplate(this, 'ChaosExperiment', {
#       description: props.description,
#       roleArn: fisRole.roleArn,
#       stopConditions: [{
#         source: 'aws:cloudwatch:alarm',
#         value: props.stopAlarmArn,
#       }],
#       targets: {
#         'ec2-instances': {
#           resourceType: 'aws:ec2:instance',
#           selectionMode: 'COUNT(1)',
#           resourceTags: { 'chaos': 'true' },
#         },
#       },
#       actions: {
#         'stop-instance': {
#           actionId: 'aws:ec2:stop-instances',
#           parameters: { startInstancesAfterDuration: 'PT5M' },
#           targets: { Instances: 'ec2-instances' },
#         },
#       },
#       tags: { Environment: props.environment },
#     });
#   }
# }

from dataclasses import dataclass

@dataclass
class CDKConstruct:
    construct: str
    level: str
    resources: str
    props: str
    reusable: bool

constructs = [
    CDKConstruct("ChaosEC2StopConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + CloudWatch Alarm",
        "targetTag, duration, stopAlarmArn, environment",
        True),
    CDKConstruct("ChaosNetworkDisruptConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + VPC Subnet Target",
        "subnetIds, duration, stopAlarmArn",
        True),
    CDKConstruct("ChaosECSConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + ECS Cluster Target",
        "clusterArn, duration, stopAlarmArn",
        True),
    CDKConstruct("ChaosRDSFailoverConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + RDS Cluster Target",
        "dbClusterIdentifier, stopAlarmArn",
        True),
    CDKConstruct("ChaosCPUStressConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + SSM Document",
        "targetTag, cpuPercent, duration, stopAlarmArn",
        True),
]

print("=== CDK Chaos Constructs ===")
for c in constructs:
    print(f"  [{c.construct}] Level: {c.level}")
    print(f"    Resources: {c.resources}")
    print(f"    Props: {c.props}")
    print(f"    Reusable: {c.reusable}")

CI/CD Integration

# === Chaos in CI/CD Pipeline ===

# GitHub Actions
# name: Chaos Engineering Pipeline
# on:
#   schedule:
#     - cron: '0 10 * * 1'  # Every Monday 10 AM
#   workflow_dispatch:
# jobs:
#   deploy-staging:
#     runs-on: ubuntu-latest
#     steps:
#       - uses: actions/checkout@v4
#       - run: npm ci && npx cdk deploy --app 'npx ts-node app.ts' ChaosStack
#
#   run-chaos:
#     needs: deploy-staging
#     runs-on: ubuntu-latest
#     steps:
#       - name: Start FIS Experiment
#         run: |
#           EXPERIMENT_ID=$(aws fis start-experiment \
#             --experiment-template-id $TEMPLATE_ID \
#             --query 'experiment.id' --output text)
#           echo "Started: $EXPERIMENT_ID"
#       - name: Wait for Completion
#         run: |
#           aws fis get-experiment --id $EXPERIMENT_ID \
#             --query 'experiment.state.status'
#       - name: Verify Recovery
#         run: |
#           curl -f https://staging.example.com/health || exit 1

@dataclass
class PipelinePhase:
    phase: str
    trigger: str
    action: str
    success_criteria: str
    failure_action: str

phases = [
    PipelinePhase("Pre-chaos Baseline",
        "Before Experiment",
        "บันทึก Metrics Baseline (Latency Error Rate Throughput)",
        "Baseline recorded ไม่มี Pre-existing Issues",
        "Fix Issues ก่อน Run Chaos"),
    PipelinePhase("Start Experiment",
        "aws fis start-experiment",
        "Run FIS Experiment ตาม Template",
        "Experiment Started สถานะ running",
        "ตรวจ IAM Permission Template Config"),
    PipelinePhase("Monitor During Chaos",
        "CloudWatch Dashboard",
        "ดู Metrics ระหว่าง Chaos ทำงาน",
        "Recovery ใน SLA Target (เช่น 2 นาที)",
        "Stop Condition Trigger หรือ Manual Stop"),
    PipelinePhase("Post-chaos Verify",
        "Health Check + Metrics",
        "ตรวจ Service ฟื้นตัวปกติ Metrics กลับ Baseline",
        "Health OK Metrics ปกติใน 5 นาที",
        "Investigate ทำไม Recovery ช้า/ล้มเหลว"),
    PipelinePhase("Report",
        "หลัง Experiment เสร็จ",
        "สร้าง Report สรุปผล Hypothesis vs Actual",
        "Report Generated Findings Documented",
        "สร้าง Action Items สำหรับ Improvement"),
]

print("=== CI/CD Chaos Pipeline ===")
for p in phases:
    print(f"  [{p.phase}] Trigger: {p.trigger}")
    print(f"    Action: {p.action}")
    print(f"    Success: {p.success_criteria}")
    print(f"    Failure: {p.failure_action}")

Best Practices

# === Chaos Best Practices ===

@dataclass
class BestPractice:
    practice: str
    why: str
    how: str
    cdk_implementation: str

practices = [
    BestPractice("Always Set Stop Conditions",
        "ป้องกัน Blast Radius เกินที่คาดไว้",
        "CloudWatch Alarm ตรวจ Error Rate Latency",
        "stopConditions: [{ source: 'aws:cloudwatch:alarm', value: alarmArn }]"),
    BestPractice("Tag-based Targeting",
        "ควบคุม Target ชัดเจน ไม่กระทบ Resource อื่น",
        "ใช้ Tag chaos=true เฉพาะ Resource ที่ต้องการ",
        "resourceTags: { chaos: 'true', environment: 'staging' }"),
    BestPractice("Start Small",
        "ลด Risk เริ่มจาก 1 Instance/Container",
        "selectionMode COUNT(1) ก่อนเพิ่ม",
        "selectionMode: 'COUNT(1)' → 'PERCENT(25)'"),
    BestPractice("Staging First",
        "ตรวจ Experiment ทำงานถูกต้องก่อน Production",
        "Deploy Chaos Stack ไป Staging ก่อน",
        "new ChaosStack(app, 'ChaosStaging', { env: staging })"),
    BestPractice("Hypothesis-driven",
        "ต้องรู้ว่าคาดหวังอะไรก่อน Run",
        "เขียน Hypothesis เป็น Comment ใน CDK Code",
        "// Hypothesis: ASG launches new instance within 2 min"),
]

print("=== Best Practices ===")
for p in practices:
    print(f"  [{p.practice}]")
    print(f"    Why: {p.why}")
    print(f"    How: {p.how}")
    print(f"    CDK: {p.cdk_implementation}")

เคล็ดลับ

AWS CDK คืออะไร

IaC Framework TypeScript Python Java Construct L1 L2 L3 Stack App Synth Deploy Diff CloudFormation Type Safety IDE Testing

Chaos Engineering กับ CDK ทำอย่างไร

CDK สร้าง FIS Experiment Template IaC Custom Construct Reusable Target Tag IAM Stop Condition Git PR Review CI/CD Staging

FIS Experiment สร้างอย่างไร

CfnExperimentTemplate Actions stop-instances drain network-disrupt api-error Targets Tag selectionMode Stop CloudWatch Alarm Duration

Best Practices มีอะไร

Stop Condition เสมอ Tag Target Staging First Start Small Hypothesis Report Schedule Weekly Runbook Team Buy-in Documentation

สรุป

CDK Construct Chaos Engineering AWS FIS Custom Construct Reusable CI/CD Stop Condition Tag Target Staging Hypothesis Report Production

📖 บทความที่เกี่ยวข้อง

Ansible AWX Tower Chaos Engineeringอ่านบทความ → CDK Construct API Gateway Patternอ่านบทความ → CDK Construct Database Migrationอ่านบทความ → CDK Construct Kubernetes Deploymentอ่านบทความ → CDK Construct Cloud Migration Strategyอ่านบทความ →

📚 ดูบทความทั้งหมด →