Pulumi IaC Capacity Planning
Pulumi Infrastructure as Code Capacity Planning Auto-scaling Cost Optimization Python TypeScript AWS Azure GCP Kubernetes
| Feature | Pulumi | Terraform | CDK (AWS) |
|---|---|---|---|
| Language | Python, TS, Go, C#, Java | HCL | Python, TS, Java, C# |
| State | Pulumi Cloud / S3 / Local | S3 / TF Cloud / Local | CloudFormation |
| Multi-cloud | AWS, Azure, GCP, K8s, 100+ | AWS, Azure, GCP, 3000+ | AWS only |
| Policy | CrossGuard (Python/TS) | Sentinel (paid) | cdk-nag |
| Testing | Unit test with language tools | Terratest (Go) | assertions lib |
| Learning Curve | ต่ำ (ใช้ภาษาที่รู้) | กลาง (เรียน HCL) | กลาง (เรียน Constructs) |
Capacity Planning with Pulumi
# === Pulumi Capacity Planning ===
# import pulumi
# import pulumi_aws as aws
#
# config = pulumi.Config()
# env = pulumi.get_stack() # dev, staging, prod
#
# # Capacity config per environment
# capacity = {
# "dev": {"instance_type": "t3.small", "min": 1, "max": 2, "desired": 1},
# "staging": {"instance_type": "t3.medium", "min": 2, "max": 4, "desired": 2},
# "prod": {"instance_type": "c6i.xlarge", "min": 3, "max": 20, "desired": 5},
# }
# cap = capacity[env]
#
# # Auto Scaling Group
# asg = aws.autoscaling.Group("app-asg",
# min_size=cap["min"],
# max_size=cap["max"],
# desired_capacity=cap["desired"],
# launch_template=aws.autoscaling.GroupLaunchTemplateArgs(
# id=launch_template.id,
# version="$Latest",
# ),
# target_group_arns=[target_group.arn],
# vpc_zone_identifiers=private_subnet_ids,
# )
#
# # Target Tracking Scaling Policy
# cpu_policy = aws.autoscaling.Policy("cpu-scaling",
# autoscaling_group_name=asg.name,
# policy_type="TargetTrackingScaling",
# target_tracking_configuration=aws.autoscaling.PolicyTargetTrackingConfigurationArgs(
# predefined_metric_specification=aws.autoscaling.PolicyTargetTrackingConfigurationPredefinedMetricSpecificationArgs(
# predefined_metric_type="ASGAverageCPUUtilization",
# ),
# target_value=70.0,
# ),
# )
from dataclasses import dataclass
@dataclass
class CapacityPlan:
service: str
current_load: str
growth_6mo: str
instance_type: str
min_instances: int
max_instances: int
scaling_metric: str
cost_monthly: str
plans = [
CapacityPlan("API Gateway",
"500 RPS, 50ms p99", "+100% (1000 RPS)",
"c6i.xlarge (4 vCPU, 8GB)", 3, 20,
"CPU 70% + Request Count",
"$450 (baseline) - $3000 (peak)"),
CapacityPlan("Worker Service",
"10K jobs/hr, 200ms avg", "+50% (15K jobs/hr)",
"c6i.2xlarge (8 vCPU, 16GB)", 2, 15,
"SQS Queue Depth",
"$600 (baseline) - $4500 (peak)"),
CapacityPlan("Database (RDS)",
"5K QPS, 80% read", "+80% (9K QPS)",
"r6i.2xlarge (8 vCPU, 64GB)", 1, 5,
"Read Replica auto + CPU",
"$800 (primary) + $400/replica"),
CapacityPlan("Cache (ElastiCache)",
"50K ops/sec, 4GB data", "+60% (80K ops/sec)",
"r6g.xlarge (4 vCPU, 26GB)", 2, 6,
"Memory + Connections",
"$300 (baseline)"),
]
print("=== Capacity Plans ===")
for p in plans:
print(f" [{p.service}] {p.instance_type}")
print(f" Current: {p.current_load}")
print(f" Growth 6mo: {p.growth_6mo}")
print(f" Scale: {p.min_instances}-{p.max_instances} | Metric: {p.scaling_metric}")
print(f" Cost: {p.cost_monthly}")
Cost Optimization
# === Pulumi Cost Policies ===
# CrossGuard Policy (Python)
# from pulumi_policy import (
# EnforcementLevel, PolicyPack, ResourceValidationPolicy
# )
#
# def max_instance_size(args, report_violation):
# if args.resource_type == "aws:ec2/instance:Instance":
# instance_type = args.props.get("instanceType", "")
# allowed = ["t3.micro","t3.small","t3.medium","t3.large",
# "c6i.large","c6i.xlarge","c6i.2xlarge"]
# if instance_type not in allowed:
# report_violation(
# f"Instance type {instance_type} not allowed. "
# f"Use one of: {allowed}")
#
# def require_tags(args, report_violation):
# if args.resource_type.startswith("aws:"):
# tags = args.props.get("tags", {})
# required = ["Environment", "Team", "CostCenter"]
# for tag in required:
# if tag not in tags:
# report_violation(f"Missing required tag: {tag}")
#
# PolicyPack("cost-policies", [
# ResourceValidationPolicy("max-instance-size", max_instance_size),
# ResourceValidationPolicy("require-tags", require_tags),
# ])
@dataclass
class CostStrategy:
strategy: str
saving: str
implementation: str
risk: str
strategies = [
CostStrategy("Reserved Instances (1yr)",
"30-40% ลดจาก On-demand",
"Pulumi config กำหนด RI สำหรับ baseline instances",
"ต่ำ — commit 1 ปี ต้องแน่ใจว่าใช้"),
CostStrategy("Spot Instances (Workers)",
"60-90% ลดจาก On-demand",
"Pulumi Mixed Instances Policy: 70% Spot + 30% On-demand",
"กลาง — อาจถูก reclaim ต้อง handle gracefully"),
CostStrategy("Auto-scaling Down",
"20-40% ลดค่า compute นอกเวลา",
"Scheduled scaling: scale down 22:00-06:00 weekends",
"ต่ำ — ตั้ง min ให้เพียงพอสำหรับ baseline"),
CostStrategy("Dev Environment Destroy",
"100% ลดค่า dev นอกเวลาทำงาน",
"pulumi destroy ทุก 19:00, pulumi up ทุก 08:00",
"ต่ำ — Dev only, ไม่กระทบ Production"),
CostStrategy("Right-sizing",
"10-30% ลดจากการลด Instance Size",
"ดู CloudWatch CPU < 30% ลด instance type ลง",
"ต่ำ — ทดสอบ Performance ก่อนลด"),
]
print("=== Cost Strategies ===")
for s in strategies:
print(f" [{s.strategy}] Saving: {s.saving}")
print(f" How: {s.implementation}")
print(f" Risk: {s.risk}")
Monitoring and Alerts
# === Monitoring Setup ===
@dataclass
class MonitorAlert:
metric: str
threshold: str
action: str
pulumi_resource: str
alerts = [
MonitorAlert("CPU Utilization",
"> 80% for 5 min", "Scale up + Alert Slack",
"aws.cloudwatch.MetricAlarm"),
MonitorAlert("Memory Utilization",
"> 85% for 5 min", "Scale up + Alert PagerDuty",
"aws.cloudwatch.MetricAlarm + CW Agent"),
MonitorAlert("Request Latency p99",
"> 500ms for 3 min", "Investigate + Scale up if needed",
"aws.cloudwatch.MetricAlarm (ALB)"),
MonitorAlert("Error Rate",
"> 1% for 2 min", "Alert PagerDuty + investigate",
"aws.cloudwatch.MetricAlarm (ALB 5xx)"),
MonitorAlert("Queue Depth",
"> 1000 messages for 5 min", "Scale workers up",
"aws.cloudwatch.MetricAlarm (SQS)"),
MonitorAlert("Monthly Cost",
"> 120% of budget", "Alert Finance + review resources",
"aws.budgets.Budget"),
]
print("=== Monitoring Alerts ===")
for a in alerts:
print(f" [{a.metric}] Threshold: {a.threshold}")
print(f" Action: {a.action}")
print(f" Pulumi: {a.pulumi_resource}")
เคล็ดลับ
- Stack: ใช้ Pulumi Stack แยก Dev Staging Prod Config ต่างกัน
- Policy: ใช้ CrossGuard บังคับ Tag Instance Type Budget
- Spot: ใช้ Spot Instance สำหรับ Worker ลด Cost 60-90%
- Right-size: ตรวจ CPU Memory ทุกเดือน ลด Instance Type ที่ใช้น้อย
- Destroy: Destroy Dev Environment นอกเวลาทำงาน ลด Cost 100%
การนำไปใช้งานจริงในองค์กร
สำหรับองค์กรขนาดกลางถึงใหญ่ แนะนำให้ใช้หลัก Three-Tier Architecture คือ Core Layer ที่เป็นแกนกลางของระบบ Distribution Layer ที่ทำหน้าที่กระจาย Traffic และ Access Layer ที่เชื่อมต่อกับผู้ใช้โดยตรง การแบ่ง Layer ชัดเจนช่วยให้การ Troubleshoot ง่ายขึ้นและสามารถ Scale ระบบได้ตามความต้องการ
เรื่อง Network Security ก็สำคัญไม่แพ้กัน ควรติดตั้ง Next-Generation Firewall ที่สามารถ Deep Packet Inspection ได้ ใช้ Network Segmentation แยก VLAN สำหรับแต่ละแผนก ติดตั้ง IDS/IPS เพื่อตรวจจับการโจมตี และทำ Regular Security Audit อย่างน้อยปีละ 2 ครั้ง
Pulumi คืออะไร
IaC Platform ภาษาจริง Python TypeScript Go C# AWS Azure GCP Kubernetes State Pulumi Cloud CrossGuard Policy Automation API Component
Capacity Planning ทำอย่างไร
Workload CPU RAM Disk Growth Rate Performance Target Instance Type Auto-scaling Min Max Pulumi Stack Config Environment Dev Staging Prod
Cost Optimization ทำอย่างไร
CrossGuard Policy Instance Type Tag Reserved Instance Spot Instance Auto-scaling Down Dev Destroy Right-sizing CloudWatch Budget Alert
Pulumi ต่างจาก Terraform อย่างไร
Pulumi ภาษาจริง Python TypeScript Loop Condition Terraform HCL ภาษาเฉพาะ count for_each State Pulumi Cloud S3 Automation API CLI เลือกตามทีม
สรุป
Pulumi IaC Capacity Planning Auto-scaling Cost Optimization CrossGuard Policy Reserved Spot Instance Right-sizing Monitoring Production
