Weights & Biases Audit Trail
W&B wandb MLOps Experiment Tracking Hyperparameters Metrics Artifacts Dashboard Model Registry Sweeps Audit Trail Compliance Reproducibility
| Feature | W&B | MLflow | Neptune |
|---|---|---|---|
| Hosting | Cloud / Self-hosted | Self-hosted | Cloud |
| UI | สวย Interactive | พื้นฐาน | ดี |
| Experiment Tracking | ดีมาก | ดี | ดีมาก |
| Model Registry | มี | มี | มี |
| Collaboration | ดีมาก Team | พื้นฐาน | ดี |
| ราคา | ฟรี Individual | ฟรี Open Source | ฟรี Individual |
W&B Setup และ Tracking
# === Weights & Biases Setup ===
# pip install wandb
# wandb login
# experiment_tracking.py
# import wandb
# import torch
# import torch.nn as nn
# from datetime import datetime
#
# # Initialize W&B Run
# run = wandb.init(
# project="image-classifier",
# name=f"resnet50-{datetime.now():%Y%m%d-%H%M}",
# config={
# "model": "ResNet50",
# "dataset": "CIFAR-10",
# "epochs": 50,
# "batch_size": 64,
# "learning_rate": 0.001,
# "optimizer": "Adam",
# "weight_decay": 1e-4,
# "scheduler": "CosineAnnealing",
# "augmentation": ["RandomCrop", "HorizontalFlip", "Normalize"],
# },
# tags=["production", "v2", "resnet"],
# notes="ResNet50 with CosineAnnealing scheduler",
# )
#
# # Log Metrics
# for epoch in range(config.epochs):
# train_loss, train_acc = train_one_epoch(model, train_loader)
# val_loss, val_acc = validate(model, val_loader)
#
# wandb.log({
# "epoch": epoch,
# "train/loss": train_loss,
# "train/accuracy": train_acc,
# "val/loss": val_loss,
# "val/accuracy": val_acc,
# "learning_rate": scheduler.get_last_lr()[0],
# })
#
# # Log Model Checkpoint
# if val_acc > best_acc:
# best_acc = val_acc
# torch.save(model.state_dict(), "best_model.pth")
# artifact = wandb.Artifact(
# name="model-checkpoint",
# type="model",
# metadata={"accuracy": val_acc, "epoch": epoch},
# )
# artifact.add_file("best_model.pth")
# run.log_artifact(artifact)
#
# # Log Dataset
# dataset_artifact = wandb.Artifact("cifar10-processed", type="dataset")
# dataset_artifact.add_dir("data/processed/")
# run.log_artifact(dataset_artifact)
#
# wandb.finish()
from dataclasses import dataclass, field
from typing import List, Dict
from datetime import datetime
@dataclass
class ExperimentRun:
run_id: str
project: str
model: str
config: Dict
metrics: Dict
user: str
timestamp: str
tags: List[str]
status: str
runs = [
ExperimentRun("run-abc123", "image-classifier", "ResNet50",
{"lr": 0.001, "epochs": 50, "batch": 64},
{"val_acc": 0.942, "val_loss": 0.18},
"สมชาย", "2024-01-15 10:30", ["production", "v2"], "completed"),
ExperimentRun("run-def456", "image-classifier", "EfficientNet-B0",
{"lr": 0.0005, "epochs": 100, "batch": 32},
{"val_acc": 0.956, "val_loss": 0.14},
"สมหญิง", "2024-01-16 14:20", ["experiment"], "completed"),
ExperimentRun("run-ghi789", "image-classifier", "ViT-B/16",
{"lr": 0.0001, "epochs": 30, "batch": 16},
{"val_acc": 0.961, "val_loss": 0.12},
"สมศักดิ์", "2024-01-17 09:15", ["experiment", "transformer"], "completed"),
]
print("=== W&B Experiment Runs ===")
for r in runs:
print(f"\n [{r.run_id}] {r.model}")
print(f" User: {r.user} | {r.timestamp}")
print(f" Accuracy: {r.metrics['val_acc']:.1%} | Loss: {r.metrics['val_loss']:.3f}")
print(f" Tags: {', '.join(r.tags)}")
Audit Trail System
# audit_trail.py — ML Audit Trail System
from dataclasses import dataclass, field
from typing import List, Dict, Optional
from datetime import datetime
from enum import Enum
import json
class AuditAction(Enum):
MODEL_TRAINED = "model_trained"
MODEL_EVALUATED = "model_evaluated"
MODEL_DEPLOYED = "model_deployed"
MODEL_ROLLBACK = "model_rollback"
DATA_UPDATED = "data_updated"
CONFIG_CHANGED = "config_changed"
APPROVAL_GRANTED = "approval_granted"
@dataclass
class AuditEntry:
timestamp: str
action: AuditAction
user: str
details: Dict
model_version: Optional[str] = None
data_version: Optional[str] = None
@dataclass
class AuditTrail:
project: str
entries: List[AuditEntry] = field(default_factory=list)
def log(self, action: AuditAction, user: str, details: Dict,
model_version: str = None, data_version: str = None):
entry = AuditEntry(
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
action=action,
user=user,
details=details,
model_version=model_version,
data_version=data_version,
)
self.entries.append(entry)
return entry
def get_by_user(self, user: str) -> List[AuditEntry]:
return [e for e in self.entries if e.user == user]
def get_by_action(self, action: AuditAction) -> List[AuditEntry]:
return [e for e in self.entries if e.action == action]
def get_model_lineage(self, model_version: str) -> List[AuditEntry]:
return [e for e in self.entries if e.model_version == model_version]
def export_json(self) -> str:
return json.dumps([{
"timestamp": e.timestamp,
"action": e.action.value,
"user": e.user,
"details": e.details,
"model_version": e.model_version,
} for e in self.entries], indent=2, ensure_ascii=False)
# สร้าง Audit Trail
trail = AuditTrail("image-classifier")
trail.log(AuditAction.DATA_UPDATED, "สมชาย",
{"dataset": "cifar10-v3", "records": 60000, "changes": "Added augmented data"},
data_version="v3")
trail.log(AuditAction.MODEL_TRAINED, "สมหญิง",
{"model": "EfficientNet-B0", "epochs": 100, "accuracy": 0.956},
model_version="v2.1", data_version="v3")
trail.log(AuditAction.MODEL_EVALUATED, "สมศักดิ์",
{"test_accuracy": 0.951, "test_loss": 0.15, "bias_check": "passed"},
model_version="v2.1")
trail.log(AuditAction.APPROVAL_GRANTED, "ผู้จัดการ",
{"approved_for": "production", "reason": "Accuracy > 95% threshold"},
model_version="v2.1")
trail.log(AuditAction.MODEL_DEPLOYED, "DevOps",
{"environment": "production", "replicas": 3, "endpoint": "/api/predict"},
model_version="v2.1")
print("=== ML Audit Trail ===")
for entry in trail.entries:
print(f" [{entry.timestamp}] {entry.action.value}")
print(f" User: {entry.user}")
print(f" Model: {entry.model_version or '-'}")
print(f" Details: {entry.details}\n")
# Compliance Requirements
compliance = {
"GDPR": "ต้องบันทึกว่าใช้ Data อะไร ลบได้เมื่อร้องขอ",
"HIPAA": "ต้อง Encrypt Data at Rest/Transit Audit Access",
"SOX": "ต้องมี Approval Workflow Change Management",
"ISO 27001": "Information Security Management System",
"AI Act (EU)": "High-risk AI ต้อง Transparency Explainability",
}
print("Compliance Requirements:")
for reg, desc in compliance.items():
print(f" [{reg}]: {desc}")
Model Registry
# model_registry.py — Model Registry & Versioning
from dataclasses import dataclass
from typing import Optional
from enum import Enum
class ModelStage(Enum):
DEVELOPMENT = "Development"
STAGING = "Staging"
PRODUCTION = "Production"
ARCHIVED = "Archived"
@dataclass
class RegisteredModel:
name: str
version: str
stage: ModelStage
accuracy: float
created_by: str
approved_by: Optional[str]
artifact_path: str
models = [
RegisteredModel("image-classifier", "v1.0", ModelStage.ARCHIVED, 0.920, "สมชาย", "ผู้จัดการ", "s3://models/v1.0/"),
RegisteredModel("image-classifier", "v2.0", ModelStage.ARCHIVED, 0.942, "สมชาย", "ผู้จัดการ", "s3://models/v2.0/"),
RegisteredModel("image-classifier", "v2.1", ModelStage.PRODUCTION, 0.956, "สมหญิง", "ผู้จัดการ", "s3://models/v2.1/"),
RegisteredModel("image-classifier", "v3.0", ModelStage.STAGING, 0.961, "สมศักดิ์", None, "s3://models/v3.0/"),
]
print("=== Model Registry ===")
for m in models:
approved = m.approved_by or "Pending"
print(f" {m.name} {m.version} [{m.stage.value}]")
print(f" Accuracy: {m.accuracy:.1%} | By: {m.created_by} | Approved: {approved}")
# W&B Model Registry Commands
# import wandb
# run = wandb.init()
#
# # Log Model as Artifact
# artifact = wandb.Artifact("image-classifier", type="model",
# metadata={"accuracy": 0.956})
# artifact.add_file("model.pth")
# run.log_artifact(artifact)
#
# # Link to Registry
# run.link_artifact(artifact, "model-registry/image-classifier",
# aliases=["latest", "v2.1"])
#
# # Download from Registry
# api = wandb.Api()
# artifact = api.artifact("myproject/model-registry/image-classifier:v2.1")
# artifact.download()
registry_workflow = [
"1. Train Model — บันทึก Metrics และ Artifacts",
"2. Evaluate — ทดสอบ Test Set, Bias Check, A/B Test",
"3. Register — บันทึกใน Model Registry พร้อม Metadata",
"4. Review — ส่งให้ Reviewer ตรวจสอบ",
"5. Approve — ผู้มีอำนาจ Approve เพื่อ Deploy",
"6. Deploy — GitOps Deploy ไป Staging -> Production",
"7. Monitor — ตรวจ Performance, Data Drift, Model Drift",
]
print(f"\n\nModel Registry Workflow:")
for step in registry_workflow:
print(f" {step}")
เคล็ดลับ
- Log Everything: บันทึกทุก Hyperparameter, Metric, Artifact
- Tags: ใช้ Tags จัดกลุ่ม Runs (production, experiment, baseline)
- Artifacts: เก็บ Dataset และ Model เป็น Versioned Artifacts
- Approval: ใส่ Approval Workflow ก่อน Deploy Production
- Lineage: ติดตาม Data -> Model -> Deployment Chain
Weights & Biases คืออะไร
MLOps Experiment Tracking Hyperparameters Metrics Artifacts Dashboard Model Registry Sweeps PyTorch TensorFlow Keras
Audit Trail ใน ML สำคัญอย่างไร
บันทึกทุกขั้นตอน ใคร ทำอะไร เมื่อไหร่ Data Model Compliance GDPR HIPAA Reproducibility Debugging Accountability
W&B ต่างจาก MLflow อย่างไร
W&B Cloud UI สวย Collaboration Team MLflow Open Source Self-hosted ยืดหยุ่น ฟรี W&B เหมาะทีม MLflow เหมาะ Self-hosted
Model Registry คืออะไร
เก็บจัดการ Model Versions Artifacts Metadata Lineage Stage Management Staging Production Approval Workflow
สรุป
Weights Biases W&B MLOps Experiment Tracking Audit Trail Logging Model Registry Versioning Compliance GDPR HIPAA Reproducibility Lineage Approval Workflow Artifacts Sweeps
