
CDK Construct Chaos Engineering — ทดสอบ AWS
CDK Construct Chaos Engineering

AWS CDK Construct Chaos Engineering FIS Fault Injection Simulator EC2 ECS EKS RDS Stop Condition CloudWatch IaC TypeScript Production
| FIS Action | Target | Effect | Duration |
|---|---|---|---|
| aws:ec2:stop-instances | EC2 Instance | หยุด Instance ทดสอบ HA | 5-30 นาที |
| aws:ecs:drain-container-instances | ECS Container | Drain Tasks ทดสอบ Scaling | 5-15 นาที |
| aws:fis:inject-api-internal-error | AWS API | จำลอง API Error 500 | 5-10 นาที |
| aws:network:disrupt-connectivity | VPC Subnet | ตัด Network Connectivity | 5-15 นาที |
| aws:ssm:send-command | EC2 via SSM | รัน Stress Test CPU/Memory | 5-30 นาที |
| aws:rds:failover-db-cluster | RDS Aurora | Failover Primary → Replica | 1-5 นาที |
CDK Chaos Construct
# === CDK Chaos Engineering Construct ===
# TypeScript CDK Construct
# import * as cdk from 'aws-cdk-lib';
# import * as fis from 'aws-cdk-lib/aws-fis';
# import * as iam from 'aws-cdk-lib/aws-iam';
#
# export class ChaosExperimentConstruct extends cdk.Construct {
# constructor(scope: cdk.Construct, id: string, props: ChaosProps) {
# super(scope, id);
#
# // IAM Role for FIS
# const fisRole = new iam.Role(this, 'FISRole', {
# assumedBy: new iam.ServicePrincipal('fis.amazonaws.com'),
# managedPolicies: [
# iam.ManagedPolicy.fromAwsManagedPolicyName('PowerUserAccess'),
# ],
# });
#
# // FIS Experiment Template
# new fis.CfnExperimentTemplate(this, 'ChaosExperiment', {
# description: props.description,
# roleArn: fisRole.roleArn,
# stopConditions: [{
# source: 'aws:cloudwatch:alarm',
# value: props.stopAlarmArn,
# }],
# targets: {
# 'ec2-instances': {
# resourceType: 'aws:ec2:instance',
# selectionMode: 'COUNT(1)',
# resourceTags: { 'chaos': 'true' },
# },
# },
# actions: {
# 'stop-instance': {
# actionId: 'aws:ec2:stop-instances',
# parameters: { startInstancesAfterDuration: 'PT5M' },
# targets: { Instances: 'ec2-instances' },
# },
# },
# tags: { Environment: props.environment },
# });
# }
# }
from dataclasses import dataclass
@dataclass
class CDKConstruct:
construct: str
level: str
resources: str
props: str
reusable: bool
constructs = [
CDKConstruct("ChaosEC2StopConstruct",
"L3 (Pattern)",
"FIS Template + IAM Role + CloudWatch Alarm",
"targetTag, duration, stopAlarmArn, environment",
True),
CDKConstruct("ChaosNetworkDisruptConstruct",
"L3 (Pattern)",
"FIS Template + IAM Role + VPC Subnet Target",
"subnetIds, duration, stopAlarmArn",
True),
CDKConstruct("ChaosECSConstruct",
"L3 (Pattern)",
"FIS Template + IAM Role + ECS Cluster Target",
"clusterArn, duration, stopAlarmArn",
True),
CDKConstruct("ChaosRDSFailoverConstruct",
"L3 (Pattern)",
"FIS Template + IAM Role + RDS Cluster Target",
"dbClusterIdentifier, stopAlarmArn",
True),
CDKConstruct("ChaosCPUStressConstruct",
"L3 (Pattern)",
"FIS Template + IAM Role + SSM Document",
"targetTag, cpuPercent, duration, stopAlarmArn",
True),
]
print("=== CDK Chaos Constructs ===")
for c in constructs:
print(f" [{c.construct}] Level: {c.level}")
print(f" Resources: {c.resources}")
print(f" Props: {c.props}")
print(f" Reusable: {c.reusable}")
CI/CD Integration

# === Chaos in CI/CD Pipeline ===
# GitHub Actions
# name: Chaos Engineering Pipeline
# on:
# schedule:
# - cron: '0 10 * * 1' # Every Monday 10 AM
# workflow_dispatch:
# jobs:
# deploy-staging:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - run: npm ci && npx cdk deploy --app 'npx ts-node app.ts' ChaosStack
#
# run-chaos:
# needs: deploy-staging
# runs-on: ubuntu-latest
# steps:
# - name: Start FIS Experiment
# run: |
# EXPERIMENT_ID=$(aws fis start-experiment \
# --experiment-template-id $TEMPLATE_ID \
# --query 'experiment.id' --output text)
# echo "Started: $EXPERIMENT_ID"
# - name: Wait for Completion
# run: |
# aws fis get-experiment --id $EXPERIMENT_ID \
# --query 'experiment.state.status'
# - name: Verify Recovery
# run: |
# curl -f https://staging.example.com/health || exit 1
@dataclass
class PipelinePhase:
phase: str
trigger: str
action: str
success_criteria: str
failure_action: str
phases = [
PipelinePhase("Pre-chaos Baseline",
"Before Experiment",
"บันทึก Metrics Baseline (Latency Error Rate Throughput)",
"Baseline recorded ไม่มี Pre-existing Issues",
"Fix Issues ก่อน Run Chaos"),
PipelinePhase("Start Experiment",
"aws fis start-experiment",
"Run FIS Experiment ตาม Template",
"Experiment Started สถานะ running",
"ตรวจ IAM Permission Template Config"),
PipelinePhase("Monitor During Chaos",
"CloudWatch Dashboard",
"ดู Metrics ระหว่าง Chaos ทำงาน",
"Recovery ใน SLA Target (เช่น 2 นาที)",
"Stop Condition Trigger หรือ Manual Stop"),
PipelinePhase("Post-chaos Verify",
"Health Check + Metrics",
"ตรวจ Service ฟื้นตัวปกติ Metrics กลับ Baseline",
"Health OK Metrics ปกติใน 5 นาที",
"Investigate ทำไม Recovery ช้า/ล้มเหลว"),
PipelinePhase("Report",
"หลัง Experiment เสร็จ",
"สร้าง Report สรุปผล Hypothesis vs Actual",
"Report Generated Findings Documented",
"สร้าง Action Items สำหรับ Improvement"),
]
print("=== CI/CD Chaos Pipeline ===")
for p in phases:
print(f" [{p.phase}] Trigger: {p.trigger}")
print(f" Action: {p.action}")
print(f" Success: {p.success_criteria}")
print(f" Failure: {p.failure_action}")
Best Practices
# === Chaos Best Practices ===
@dataclass
class BestPractice:
practice: str
why: str
how: str
cdk_implementation: str
practices = [
BestPractice("Always Set Stop Conditions",
"ป้องกัน Blast Radius เกินที่คาดไว้",
"CloudWatch Alarm ตรวจ Error Rate Latency",
"stopConditions: [{ source: 'aws:cloudwatch:alarm', value: alarmArn }]"),
BestPractice("Tag-based Targeting",
"ควบคุม Target ชัดเจน ไม่กระทบ Resource อื่น",
"ใช้ Tag chaos=true เฉพาะ Resource ที่ต้องการ",
"resourceTags: { chaos: 'true', environment: 'staging' }"),
BestPractice("Start Small",
"ลด Risk เริ่มจาก 1 Instance/Container",
"selectionMode COUNT(1) ก่อนเพิ่ม",
"selectionMode: 'COUNT(1)' → 'PERCENT(25)'"),
BestPractice("Staging First",
"ตรวจ Experiment ทำงานถูกต้องก่อน Production",
"Deploy Chaos Stack ไป Staging ก่อน",
"new ChaosStack(app, 'ChaosStaging', { env: staging })"),
BestPractice("Hypothesis-driven",
"ต้องรู้ว่าคาดหวังอะไรก่อน Run",
"เขียน Hypothesis เป็น Comment ใน CDK Code",
"// Hypothesis: ASG launches new instance within 2 min"),
]
print("=== Best Practices ===")
for p in practices:
print(f" [{p.practice}]")
print(f" Why: {p.why}")
print(f" How: {p.how}")
print(f" CDK: {p.cdk_implementation}")
เคล็ดลับ
- Stop Condition: ทุก Experiment ต้องมี Stop Condition เสมอ
- Tag: ใช้ Tag chaos=true เลือก Target ชัดเจน
- Construct Library: สร้าง Reusable Construct เป็น npm Package
- Schedule: Run Chaos ทุกสัปดาห์ สร้าง Confidence
- Report: บันทึกผลทุก Experiment เป็น Knowledge Base
AWS CDK คืออะไร
IaC Framework TypeScript Python Java Construct L1 L2 L3 Stack App Synth Deploy Diff CloudFormation Type Safety IDE Testing