ai

A/B Testing ML กับ MLOps Workflow — วิธีทำ A/B

A/B Testing ML กับ MLOps Workflow — วิธีทำ A/B

A/B Testing สำหรับ ML Models

A/B Testing ML กับ MLOps Workflow — วิธีทำ A/B

A/B Testing เป็นวิธีมาตรฐานในการเปรียบเทียบ ML Models ก่อน Deploy เต็มรูปแบบ แบ่ง Traffic ส่งไป Model เก่าและใหม่ วัดผลด้วย Statistical Tests ตัดสินใจจากข้อมูลจริง

เนื้อหาเกี่ยวข้อง — แนะนำให้อ่าน Data Lakehouse — ข้อมูลครบถ้วน 2026

MLOps Workflow ทำให้กระบวนการ A/B Testing อัตโนมัติ ตั้งแต่ Training, Registry, Deployment, Traffic Splitting, Monitoring ไปจนถึง Automated Rollback

แนะนำเพิ่มเติม — สัญญาณเทรดรายวัน XM Signal

เนื้อหาเกี่ยวข้อง — ทำความเข้าใจ Prometheus PromQL AR VR Development

เนื้อหาเกี่ยวข้อง — บทความที่เกี่ยวข้อง: Th Html คืออะไร — ข้อมูลครบถ้วน 2026

A/B Testing Framework

# ab_testing_ml.py — A/B Testing Framework สำหรับ ML

# pip install scipy numpy pandas



import numpy as np

import pandas as pd

from scipy import stats

from dataclasses import dataclass, field

from typing import List, Dict, Optional, Tuple

from datetime import datetime, timedelta

import json



@dataclass

class ABExperiment:

    name: str

    model_a: str  # Control

    model_b: str  # Treatment

    metric: str   # Primary metric

    traffic_split: float = 0.1  # % ไป Model B

    min_sample_size: int = 1000

    significance_level: float = 0.05

    power: float = 0.80

    start_date: Optional[str] = None

    status: str = "draft"  # draft, running, completed, rolled_back



@dataclass

class ABResult:

    metric_a: float

    metric_b: float

    sample_a: int

    sample_b: int

    p_value: float

    confidence_interval: Tuple[float, float]

    is_significant: bool

    lift: float  # % improvement



class MLABTestingFramework:

    """A/B Testing Framework สำหรับ ML Models"""



    def __init__(self):

        self.experiments: Dict[str, ABExperiment] = {}

        self.results: Dict[str, ABResult] = {}



    def create_experiment(self, experiment: ABExperiment):

        self.experiments[experiment.name] = experiment

        print(f"Created experiment: {experiment.name}")

        print(f"  Control: {experiment.model_a}")

        print(f"  Treatment: {experiment.model_b}")

        print(f"  Traffic Split: {experiment.traffic_split:.0%}")



    def calculate_sample_size(self, baseline_rate, mde, alpha=0.05, power=0.80):

        """คำนวณ Sample Size ที่ต้องการ"""

        z_alpha = stats.norm.ppf(1 - alpha / 2)

        z_beta = stats.norm.ppf(power)



        p1 = baseline_rate

        p2 = baseline_rate * (1 + mde)



        n = ((z_alpha * np.sqrt(2 * p1 * (1 - p1)) +

              z_beta * np.sqrt(p1 * (1 - p1) + p2 * (1 - p2))) ** 2) / \

            ((p2 - p1) ** 2)



        return int(np.ceil(n))



    def analyze(self, name, data_a, data_b):

        """วิเคราะห์ผล A/B Test"""

        exp = self.experiments[name]



        mean_a = np.mean(data_a)

        mean_b = np.mean(data_b)

        n_a = len(data_a)

        n_b = len(data_b)



        # Two-sample t-test

        t_stat, p_value = stats.ttest_ind(data_a, data_b)



        # Confidence Interval

        se = np.sqrt(np.var(data_a) / n_a + np.var(data_b) / n_b)

        diff = mean_b - mean_a

        ci = (diff - 1.96 * se, diff + 1.96 * se)



        # Lift

        lift = (mean_b - mean_a) / mean_a * 100



        is_significant = p_value < exp.significance_level



        result = ABResult(

            metric_a=mean_a, metric_b=mean_b,

            sample_a=n_a, sample_b=n_b,

            p_value=p_value, confidence_interval=ci,

            is_significant=is_significant, lift=lift,

        )

        self.results[name] = result



        return result



    def report(self, name):

        """แสดงผล A/B Test"""

        exp = self.experiments[name]

        result = self.results[name]



        print(f"\n{'='*55}")

        print(f"A/B Test Report: {name}")

        print(f"{'='*55}")

        print(f"  Control ({exp.model_a}):   {result.metric_a:.4f} (n={result.sample_a:,})")

        print(f"  Treatment ({exp.model_b}): {result.metric_b:.4f} (n={result.sample_b:,})")

        print(f"  Lift: {result.lift:+.2f}%")

        print(f"  P-value: {result.p_value:.4f}")

        print(f"  95% CI: [{result.confidence_interval[0]:.4f}, {result.confidence_interval[1]:.4f}]")

        print(f"  Significant: {'YES' if result.is_significant else 'NO'}")



        if result.is_significant and result.lift > 0:

            print(f"\n  Recommendation: DEPLOY Model B ({exp.model_b})")

        elif result.is_significant and result.lift < 0:

            print(f"\n  Recommendation: KEEP Model A ({exp.model_a})")

        else:

            print(f"\n  Recommendation: Continue testing (not significant)")



# === ตัวอย่าง ===

framework = MLABTestingFramework()



# สร้าง Experiment

exp = ABExperiment(

    name="recommendation-v2",

    model_a="rec-model-v1.2",

    model_b="rec-model-v2.0",

    metric="click_through_rate",

    traffic_split=0.10,

)

framework.create_experiment(exp)



# Sample Size

n = framework.calculate_sample_size(baseline_rate=0.05, mde=0.10)

print(f"\nRequired sample size per group: {n:,}")



# จำลองข้อมูล

np.random.seed(42)

data_a = np.random.binomial(1, 0.050, 5000)  # CTR 5.0%

data_b = np.random.binomial(1, 0.055, 5000)  # CTR 5.5%



result = framework.analyze("recommendation-v2", data_a, data_b)

framework.report("recommendation-v2")

MLOps Pipeline

# mlops_pipeline.py — MLOps Workflow สำหรับ A/B Testing

# pip install mlflow boto3



from dataclasses import dataclass

from typing import Dict, List, Optional

from enum import Enum

import json



class ModelStage(Enum):

    DEVELOPMENT = "development"

    STAGING = "staging"

    CANARY = "canary"

    PRODUCTION = "production"

    ARCHIVED = "archived"



@dataclass

class ModelVersion:

    name: str

    version: str

    stage: ModelStage

    metrics: Dict[str, float]

    artifact_path: str

    created_at: str



class MLOpsWorkflow:

    """MLOps Workflow สำหรับ Model Lifecycle"""



    def __init__(self):

        self.models: Dict[str, List[ModelVersion]] = {}

        self.deployments: Dict[str, Dict] = {}



    def register_model(self, model: ModelVersion):

        """Register Model ใน Registry"""

        if model.name not in self.models:

            self.models[model.name] = []

        self.models[model.name].append(model)

        print(f"Registered: {model.name} v{model.version} ({model.stage.value})")



    def promote(self, name, version, target_stage: ModelStage):

        """Promote Model ไป Stage ถัดไป"""

        models = self.models.get(name, [])

        for m in models:

            if m.version == version:

                old_stage = m.stage

                m.stage = target_stage

                print(f"Promoted: {name} v{version} "

                      f"{old_stage.value} -> {target_stage.value}")

                return True

        return False



    def deploy_ab(self, name, version_a, version_b, split=0.10):

        """Deploy A/B Test"""

        self.deployments[name] = {

            "type": "ab_test",

            "control": version_a,

            "treatment": version_b,

            "split": split,

            "status": "running",

        }



        print(f"\nA/B Deployment: {name}")

        print(f"  Control: v{version_a} ({1-split:.0%} traffic)")

        print(f"  Treatment: v{version_b} ({split:.0%} traffic)")



    def canary_deploy(self, name, version, initial_pct=5):

        """Canary Deployment"""

        self.deployments[name] = {

            "type": "canary",

            "version": version,

            "traffic_pct": initial_pct,

            "status": "running",

        }



        print(f"\nCanary Deployment: {name} v{version}")

        print(f"  Initial traffic: {initial_pct}%")



    def rollback(self, name):

        """Rollback Deployment"""

        if name in self.deployments:

            self.deployments[name]["status"] = "rolled_back"

            print(f"Rolled back: {name}")



    def pipeline_status(self):

        """แสดงสถานะ Pipeline"""

        print(f"\n{'='*55}")

        print(f"MLOps Pipeline Status")

        print(f"{'='*55}")



        for name, versions in self.models.items():

            print(f"\n  Model: {name}")

            for v in versions:

                metrics_str = ", ".join(f"{k}={v:.3f}" for k, v in v.metrics.items())

                print(f"    v{v.version} [{v.stage.value}] {metrics_str}")



        if self.deployments:

            print(f"\n  Active Deployments:")

            for name, deploy in self.deployments.items():

                print(f"    {name}: {deploy['type']} ({deploy['status']})")



# === ตัวอย่าง ===

mlops = MLOpsWorkflow()



# Register Models

mlops.register_model(ModelVersion(

    "recommender", "1.2", ModelStage.PRODUCTION,

    {"accuracy": 0.85, "latency_p95": 45.0}, "s3://models/rec/1.2", "2024-01-01",

))

mlops.register_model(ModelVersion(

    "recommender", "2.0", ModelStage.STAGING,

    {"accuracy": 0.88, "latency_p95": 42.0}, "s3://models/rec/2.0", "2024-02-01",

))



# Deploy A/B Test

mlops.deploy_ab("recommender", "1.2", "2.0", split=0.10)



# Pipeline Status

mlops.pipeline_status()

Kubernetes Deployment สำหรับ A/B Testing

# === Kubernetes + Istio A/B Testing ===



# 1. Model Deployments

# apiVersion: apps/v1

# kind: Deployment

# metadata:

#   name: ml-model-v1

#   labels:

#     app: ml-model

#     version: v1

# spec:

#   replicas: 3

#   selector:

#     matchLabels:

#       app: ml-model

#       version: v1

#   template:

#     metadata:

#       labels:

#         app: ml-model

#         version: v1

#     spec:

#       containers:

#         - name: model

#           image: ml-model:v1.2

#           ports:

#             - containerPort: 8080

#           resources:

#             requests:

#               cpu: "500m"

#               memory: "1Gi"

#               nvidia.com/gpu: "1"

# ---

# apiVersion: apps/v1

# kind: Deployment

# metadata:

#   name: ml-model-v2

#   labels:

#     app: ml-model

#     version: v2

# spec:

#   replicas: 1

#   selector:

#     matchLabels:

#       app: ml-model

#       version: v2

#   template:

#     metadata:

#       labels:

#         app: ml-model

#         version: v2

#     spec:

#       containers:

#         - name: model

#           image: ml-model:v2.0

#           ports:

#             - containerPort: 8080



# 2. Istio VirtualService — Traffic Splitting

# apiVersion: networking.istio.io/v1beta1

# kind: VirtualService

# metadata:

#   name: ml-model-vs

# spec:

#   hosts:

#     - ml-model

#   http:

#     - route:

#         - destination:

#             host: ml-model

#             subset: v1

#           weight: 90

#         - destination:

#             host: ml-model

#             subset: v2

#           weight: 10



# 3. DestinationRule

# apiVersion: networking.istio.io/v1beta1

# kind: DestinationRule

# metadata:

#   name: ml-model-dr

# spec:

#   host: ml-model

#   subsets:

#     - name: v1

#       labels:

#         version: v1

#     - name: v2

#       labels:

#         version: v2



# 4. Progressive Traffic Increase

# kubectl patch virtualservice ml-model-vs --type=json \

#   -p='[{"op":"replace","path":"/spec/http/0/route/0/weight","value":80},

#        {"op":"replace","path":"/spec/http/0/route/1/weight","value":20}]'



# 5. Full Rollout

# kubectl patch virtualservice ml-model-vs --type=json \

#   -p='[{"op":"replace","path":"/spec/http/0/route/0/weight","value":0},

#        {"op":"replace","path":"/spec/http/0/route/1/weight","value":100}]'



echo "A/B Testing Deployment:"

echo "  Model v1: 90% traffic (Control)"

echo "  Model v2: 10% traffic (Treatment)"

echo "  Istio VirtualService: Traffic splitting"

echo "  Progressive: 90/10 -> 80/20 -> 50/50 -> 0/100"

Best Practices

A/B Testing ML กับ MLOps Workflow — วิธีทำ A/B
  • Sample Size: คำนวณ Sample Size ก่อนเริ่ม อย่าหยุด Test ก่อนถึงจำนวน
  • One Metric: กำหนด Primary Metric เดียว ป้องกัน Multiple Testing Problem
  • Guardrail Metrics: ตั้ง Guardrail Metrics (Latency, Error Rate) ถ้าเกิน Rollback ทันที
  • Progressive Rollout: เริ่มจาก 5-10% ค่อยเพิ่มทีละ Step ไม่ Switch 100% ทีเดียว
  • Automated Rollback: ตั้ง Automated Rollback ถ้า Error Rate เกิน Threshold
  • Model Registry: เก็บทุก Model Version ใน Registry พร้อม Metrics สำหรับ Comparison

A/B Testing สำหรับ ML คืออะไร

เปรียบเทียบ Model ใหม่กับเก่า แบ่ง Traffic ไป Control Treatment วัด Accuracy Latency Business KPIs ใช้ Statistical Tests ตัดสินว่า Model ใหม่ดีกว่าจริงหรือไม่

แนะนำเพิ่มเติม — อีบุ๊กการลงทุน SiamCafeBook

เนื้อหาเกี่ยวข้อง — แนะนำให้อ่าน ModSecurity WAF Infrastructure as Code

XM Legend · เทรดเดอร์ & ผู้สอน Forex 13 ปี

ผู้ก่อตั้ง SiamCafe ตั้งแต่ปี 1997 · เทรดเดอร์สาย Forex มากกว่า 13 ปี ได้รับการยกย่องเป็น XM Legend · แบ่งปันความรู้ Forex, ไอที, AI และการเทรด จากประสบการณ์จริงในตลาดจริง