Database Replication
Database Replication คือ Master-Slave Multi-Master Synchronous Asynchronous WAL Binary Log Failover HA Read Scaling Disaster Recovery
| Type | Write | Read | Consistency | Use Case |
|---|---|---|---|---|
| Master-Slave | Master only | Master + Slaves | Eventual (Async) | Read-heavy, Simple HA |
| Multi-Master | All nodes | All nodes | Eventual/Strong | Multi-region Write |
| Synchronous | Primary | Primary + Standby | Strong | Zero data loss |
| Semi-sync | Primary | Primary + Replicas | Near-strong | Balance Safety/Speed |
| Cascading | Primary | Primary + Chain | Eventual | Many replicas |
MySQL Replication
# === MySQL Master-Slave Replication Setup ===
# Master Configuration (my.cnf)
# [mysqld]
# server-id = 1
# log_bin = mysql-bin
# binlog_format = ROW
# binlog_do_db = myapp
# gtid_mode = ON
# enforce_gtid_consistency = ON
# sync_binlog = 1
# innodb_flush_log_at_trx_commit = 1
# Create Replication User on Master
# CREATE USER 'repl'@'10.0.0.%' IDENTIFIED BY 'strong_password';
# GRANT REPLICATION SLAVE ON *.* TO 'repl'@'10.0.0.%';
# FLUSH PRIVILEGES;
# Slave Configuration (my.cnf)
# [mysqld]
# server-id = 2
# relay_log = relay-bin
# read_only = ON
# gtid_mode = ON
# enforce_gtid_consistency = ON
# Start Replication on Slave
# CHANGE MASTER TO
# MASTER_HOST='10.0.0.1',
# MASTER_USER='repl',
# MASTER_PASSWORD='strong_password',
# MASTER_AUTO_POSITION=1;
# START SLAVE;
# SHOW SLAVE STATUS\G
from dataclasses import dataclass
@dataclass
class ReplicationConfig:
database: str
config_key: str
master_value: str
slave_value: str
purpose: str
configs = [
ReplicationConfig("MySQL",
"server-id", "1", "2 (unique per node)",
"ระบุตัวตน Node ต้องไม่ซ้ำ"),
ReplicationConfig("MySQL",
"log_bin", "mysql-bin", "ไม่ต้อง (ยกเว้น Chain)",
"เก็บ Binary Log สำหรับ Replication"),
ReplicationConfig("MySQL",
"binlog_format", "ROW", "ROW",
"ROW-based Replication ปลอดภัยที่สุด"),
ReplicationConfig("MySQL",
"gtid_mode", "ON", "ON",
"Global Transaction ID ง่ายต่อ Failover"),
ReplicationConfig("MySQL",
"sync_binlog", "1", "-",
"Sync binlog ทุก Transaction ไม่สูญหาย"),
ReplicationConfig("MySQL",
"read_only", "OFF", "ON",
"Slave อ่านอย่างเดียว ป้องกัน Write ผิดตัว"),
]
print("=== MySQL Replication Config ===")
for c in configs:
print(f" [{c.config_key}] Master: {c.master_value} | Slave: {c.slave_value}")
print(f" Purpose: {c.purpose}")
PostgreSQL Replication
# === PostgreSQL Streaming Replication ===
# Primary Configuration (postgresql.conf)
# wal_level = replica
# max_wal_senders = 5
# wal_keep_size = 1GB
# synchronous_commit = on # or 'remote_apply' for sync
# synchronous_standby_names = 'replica1'
# pg_hba.conf (Allow Replication)
# host replication repl_user 10.0.0.0/24 scram-sha-256
# Create Replication User
# CREATE ROLE repl_user WITH REPLICATION LOGIN PASSWORD 'strong_pass';
# SELECT * FROM pg_create_physical_replication_slot('replica1_slot');
# Standby Setup
# pg_basebackup -h primary -U repl_user -D /var/lib/postgresql/data -Fp -Xs -P -R
# # -R creates standby.signal + primary_conninfo automatically
# standby postgresql.conf
# primary_conninfo = 'host=10.0.0.1 port=5432 user=repl_user password=strong_pass'
# primary_slot_name = 'replica1_slot'
# hot_standby = on
# Check Replication Status
# SELECT * FROM pg_stat_replication;
# SELECT pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn(),
# pg_last_xact_replay_timestamp();
@dataclass
class PGReplication:
param: str
value: str
purpose: str
impact: str
pg_configs = [
PGReplication("wal_level",
"replica (or logical)",
"กำหนดระดับ WAL logging",
"replica สำหรับ Physical, logical สำหรับ Logical Replication"),
PGReplication("max_wal_senders",
"5 (ขึ้นกับจำนวน Replica)",
"จำนวน Connection สำหรับ Streaming",
"ตั้งมากกว่าจำนวน Replica + 1-2 สำหรับ Backup"),
PGReplication("synchronous_commit",
"on / remote_apply / remote_write",
"ระดับ Consistency",
"on=local, remote_apply=strong (ช้ากว่า 2-5x)"),
PGReplication("wal_keep_size",
"1GB-10GB",
"เก็บ WAL ไว้เผื่อ Replica หลุด",
"ใหญ่เกิน = ใช้ Disk มาก เล็กเกิน = Replica ต้อง Rebuild"),
PGReplication("hot_standby",
"on",
"อ่านข้อมูลจาก Standby ได้",
"ใช้ Standby สำหรับ Read Query ลด Load Primary"),
]
print("=== PostgreSQL Replication ===")
for p in pg_configs:
print(f" [{p.param}] = {p.value}")
print(f" Purpose: {p.purpose}")
print(f" Impact: {p.impact}")
Monitoring & Failover
# === Replication Monitoring & Automated Failover ===
@dataclass
class MonitorMetric:
metric: str
query: str
threshold: str
action: str
monitoring = [
MonitorMetric("Replication Lag (MySQL)",
"SHOW SLAVE STATUS → Seconds_Behind_Master",
"> 5 seconds → Warning, > 30s → Critical",
"Check slow queries on Slave, Network, Disk I/O"),
MonitorMetric("Replication Lag (PostgreSQL)",
"SELECT extract(epoch from now()-pg_last_xact_replay_timestamp())",
"> 5 seconds → Warning, > 30s → Critical",
"Check WAL throughput, Network, Slave resources"),
MonitorMetric("Slave IO/SQL Thread",
"SHOW SLAVE STATUS → Slave_IO_Running, Slave_SQL_Running",
"No → Critical",
"Check connectivity, Error log, SKIP counter"),
MonitorMetric("WAL Sender State (PG)",
"SELECT state FROM pg_stat_replication",
"NULL (disconnected) → Critical",
"Check network, Standby status, Slot"),
MonitorMetric("Replication Slot Lag",
"SELECT pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn)",
"> 1GB → Warning",
"Slot retained WAL ใช้ Disk มาก ลบ Slot ถ้า Replica ตาย"),
]
@dataclass
class FailoverTool:
tool: str
database: str
method: str
rpo_rto: str
failover_tools = [
FailoverTool("Orchestrator",
"MySQL",
"Auto-detect failure + Promote best Slave + Reroute traffic",
"RPO: ~0s (GTID), RTO: 10-30s"),
FailoverTool("Patroni + etcd",
"PostgreSQL",
"Leader election via etcd + Auto-promote Standby",
"RPO: 0s (sync) / ~1s (async), RTO: 10-30s"),
FailoverTool("PgBouncer",
"PostgreSQL",
"Connection pooling + Route to new Primary",
"ลด Connection overhead + Seamless failover"),
FailoverTool("ProxySQL",
"MySQL",
"Query routing + Read/Write split + Auto-failover",
"Application ไม่ต้องเปลี่ยน Connection String"),
]
print("=== Monitoring ===")
for m in monitoring:
print(f" [{m.metric}]")
print(f" Query: {m.query}")
print(f" Threshold: {m.threshold}")
print("\n=== Failover Tools ===")
for f in failover_tools:
print(f" [{f.tool}] {f.database}")
print(f" Method: {f.method}")
print(f" RPO/RTO: {f.rpo_rto}")
เคล็ดลับ
- GTID: ใช้ GTID (MySQL) สำหรับ Failover ง่ายกว่า Position-based
- Slot: ใช้ Replication Slot (PostgreSQL) ป้องกัน WAL ถูกลบ
- Monitor: ตั้ง Alert Replication Lag > 5 วินาที เสมอ
- Failover: ใช้ Orchestrator/Patroni Automated Failover
- Test: ทดสอบ Failover เป็นระยะ Disaster Recovery Drill
Database Replication คืออะไร
สำเนาข้อมูลอัตโนมัติ HA Read Scaling DR Synchronous Asynchronous Semi-sync Master-Slave Multi-Master WAL Binary Log Failover
Master-Slave Replication ทำอย่างไร
Master Write Slave Read Binary Log WAL server-id GTID CHANGE MASTER pg_basebackup primary_conninfo Replication Slot read_only
Multi-Master Replication ต่างอย่างไร
ทุก Node Write ได้ Conflict Resolution LWW MySQL Group Replication PostgreSQL BDR CockroachDB Galera Geographic Multi-region
ตั้งค่า Production อย่างไร
Monitoring Lag Alert Orchestrator Patroni PgBouncer ProxySQL Automated Failover SSL Backup Restore Test DR Drill Chaos Engineering
สรุป
Database Replication Master-Slave Multi-Master MySQL PostgreSQL WAL GTID Synchronous Asynchronous Failover Orchestrator Patroni Production
