SiamCafe · Blog
CDK Construct Chaos Engineering — ทดสอบ AWS
บทความ

CDK Construct Chaos Engineering — ทดสอบ AWS

เผยแพร่ 28 พฤษภาคม 2569

CDK Construct Chaos Engineering

CDK Construct Chaos Engineering — ทดสอบ AWS

AWS CDK Construct Chaos Engineering FIS Fault Injection Simulator EC2 ECS EKS RDS Stop Condition CloudWatch IaC TypeScript Production

FIS ActionTargetEffectDuration
aws:ec2:stop-instancesEC2 Instanceหยุด Instance ทดสอบ HA5-30 นาที
aws:ecs:drain-container-instancesECS ContainerDrain Tasks ทดสอบ Scaling5-15 นาที
aws:fis:inject-api-internal-errorAWS APIจำลอง API Error 5005-10 นาที
aws:network:disrupt-connectivityVPC Subnetตัด Network Connectivity5-15 นาที
aws:ssm:send-commandEC2 via SSMรัน Stress Test CPU/Memory5-30 นาที
aws:rds:failover-db-clusterRDS AuroraFailover Primary → Replica1-5 นาที

CDK Chaos Construct

# === CDK Chaos Engineering Construct ===

# TypeScript CDK Construct
# import * as cdk from 'aws-cdk-lib';
# import * as fis from 'aws-cdk-lib/aws-fis';
# import * as iam from 'aws-cdk-lib/aws-iam';
#
# export class ChaosExperimentConstruct extends cdk.Construct {
#   constructor(scope: cdk.Construct, id: string, props: ChaosProps) {
#     super(scope, id);
#
#     // IAM Role for FIS
#     const fisRole = new iam.Role(this, 'FISRole', {
#       assumedBy: new iam.ServicePrincipal('fis.amazonaws.com'),
#       managedPolicies: [
#         iam.ManagedPolicy.fromAwsManagedPolicyName('PowerUserAccess'),
#       ],
#     });
#
#     // FIS Experiment Template
#     new fis.CfnExperimentTemplate(this, 'ChaosExperiment', {
#       description: props.description,
#       roleArn: fisRole.roleArn,
#       stopConditions: [{
#         source: 'aws:cloudwatch:alarm',
#         value: props.stopAlarmArn,
#       }],
#       targets: {
#         'ec2-instances': {
#           resourceType: 'aws:ec2:instance',
#           selectionMode: 'COUNT(1)',
#           resourceTags: { 'chaos': 'true' },
#         },
#       },
#       actions: {
#         'stop-instance': {
#           actionId: 'aws:ec2:stop-instances',
#           parameters: { startInstancesAfterDuration: 'PT5M' },
#           targets: { Instances: 'ec2-instances' },
#         },
#       },
#       tags: { Environment: props.environment },
#     });
#   }
# }

from dataclasses import dataclass

@dataclass
class CDKConstruct:
    construct: str
    level: str
    resources: str
    props: str
    reusable: bool

constructs = [
    CDKConstruct("ChaosEC2StopConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + CloudWatch Alarm",
        "targetTag, duration, stopAlarmArn, environment",
        True),
    CDKConstruct("ChaosNetworkDisruptConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + VPC Subnet Target",
        "subnetIds, duration, stopAlarmArn",
        True),
    CDKConstruct("ChaosECSConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + ECS Cluster Target",
        "clusterArn, duration, stopAlarmArn",
        True),
    CDKConstruct("ChaosRDSFailoverConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + RDS Cluster Target",
        "dbClusterIdentifier, stopAlarmArn",
        True),
    CDKConstruct("ChaosCPUStressConstruct",
        "L3 (Pattern)",
        "FIS Template + IAM Role + SSM Document",
        "targetTag, cpuPercent, duration, stopAlarmArn",
        True),
]

print("=== CDK Chaos Constructs ===")
for c in constructs:
    print(f"  [{c.construct}] Level: {c.level}")
    print(f"    Resources: {c.resources}")
    print(f"    Props: {c.props}")
    print(f"    Reusable: {c.reusable}")

CI/CD Integration

CDK Construct Chaos Engineering — ทดสอบ AWS
# === Chaos in CI/CD Pipeline ===

# GitHub Actions
# name: Chaos Engineering Pipeline
# on:
#   schedule:
#     - cron: '0 10 * * 1'  # Every Monday 10 AM
#   workflow_dispatch:
# jobs:
#   deploy-staging:
#     runs-on: ubuntu-latest
#     steps:
#       - uses: actions/checkout@v4
#       - run: npm ci && npx cdk deploy --app 'npx ts-node app.ts' ChaosStack
#
#   run-chaos:
#     needs: deploy-staging
#     runs-on: ubuntu-latest
#     steps:
#       - name: Start FIS Experiment
#         run: |
#           EXPERIMENT_ID=$(aws fis start-experiment \
#             --experiment-template-id $TEMPLATE_ID \
#             --query 'experiment.id' --output text)
#           echo "Started: $EXPERIMENT_ID"
#       - name: Wait for Completion
#         run: |
#           aws fis get-experiment --id $EXPERIMENT_ID \
#             --query 'experiment.state.status'
#       - name: Verify Recovery
#         run: |
#           curl -f https://staging.example.com/health || exit 1

@dataclass
class PipelinePhase:
    phase: str
    trigger: str
    action: str
    success_criteria: str
    failure_action: str

phases = [
    PipelinePhase("Pre-chaos Baseline",
        "Before Experiment",
        "บันทึก Metrics Baseline (Latency Error Rate Throughput)",
        "Baseline recorded ไม่มี Pre-existing Issues",
        "Fix Issues ก่อน Run Chaos"),
    PipelinePhase("Start Experiment",
        "aws fis start-experiment",
        "Run FIS Experiment ตาม Template",
        "Experiment Started สถานะ running",
        "ตรวจ IAM Permission Template Config"),
    PipelinePhase("Monitor During Chaos",
        "CloudWatch Dashboard",
        "ดู Metrics ระหว่าง Chaos ทำงาน",
        "Recovery ใน SLA Target (เช่น 2 นาที)",
        "Stop Condition Trigger หรือ Manual Stop"),
    PipelinePhase("Post-chaos Verify",
        "Health Check + Metrics",
        "ตรวจ Service ฟื้นตัวปกติ Metrics กลับ Baseline",
        "Health OK Metrics ปกติใน 5 นาที",
        "Investigate ทำไม Recovery ช้า/ล้มเหลว"),
    PipelinePhase("Report",
        "หลัง Experiment เสร็จ",
        "สร้าง Report สรุปผล Hypothesis vs Actual",
        "Report Generated Findings Documented",
        "สร้าง Action Items สำหรับ Improvement"),
]

print("=== CI/CD Chaos Pipeline ===")
for p in phases:
    print(f"  [{p.phase}] Trigger: {p.trigger}")
    print(f"    Action: {p.action}")
    print(f"    Success: {p.success_criteria}")
    print(f"    Failure: {p.failure_action}")

Best Practices

# === Chaos Best Practices ===

@dataclass
class BestPractice:
    practice: str
    why: str
    how: str
    cdk_implementation: str

practices = [
    BestPractice("Always Set Stop Conditions",
        "ป้องกัน Blast Radius เกินที่คาดไว้",
        "CloudWatch Alarm ตรวจ Error Rate Latency",
        "stopConditions: [{ source: 'aws:cloudwatch:alarm', value: alarmArn }]"),
    BestPractice("Tag-based Targeting",
        "ควบคุม Target ชัดเจน ไม่กระทบ Resource อื่น",
        "ใช้ Tag chaos=true เฉพาะ Resource ที่ต้องการ",
        "resourceTags: { chaos: 'true', environment: 'staging' }"),
    BestPractice("Start Small",
        "ลด Risk เริ่มจาก 1 Instance/Container",
        "selectionMode COUNT(1) ก่อนเพิ่ม",
        "selectionMode: 'COUNT(1)' → 'PERCENT(25)'"),
    BestPractice("Staging First",
        "ตรวจ Experiment ทำงานถูกต้องก่อน Production",
        "Deploy Chaos Stack ไป Staging ก่อน",
        "new ChaosStack(app, 'ChaosStaging', { env: staging })"),
    BestPractice("Hypothesis-driven",
        "ต้องรู้ว่าคาดหวังอะไรก่อน Run",
        "เขียน Hypothesis เป็น Comment ใน CDK Code",
        "// Hypothesis: ASG launches new instance within 2 min"),
]

print("=== Best Practices ===")
for p in practices:
    print(f"  [{p.practice}]")
    print(f"    Why: {p.why}")
    print(f"    How: {p.how}")
    print(f"    CDK: {p.cdk_implementation}")

เคล็ดลับ

  • Stop Condition: ทุก Experiment ต้องมี Stop Condition เสมอ
  • Tag: ใช้ Tag chaos=true เลือก Target ชัดเจน
  • Construct Library: สร้าง Reusable Construct เป็น npm Package
  • Schedule: Run Chaos ทุกสัปดาห์ สร้าง Confidence
  • Report: บันทึกผลทุก Experiment เป็น Knowledge Base

AWS CDK คืออะไร

IaC Framework TypeScript Python Java Construct L1 L2 L3 Stack App Synth Deploy Diff CloudFormation Type Safety IDE Testing