Weights & Biases Audit Trail Logging — ระบบ
Weights & Biases Audit Trail
W&B wandb MLOps Experiment Tracking Hyperparameters Metrics Artifacts Dashboard Model Registry Sweeps Audit Trail Compliance Reproducibility
| Feature | W&B | MLflow | Neptune |
|---|---|---|---|
| Hosting | Cloud / Self-hosted | Self-hosted | Cloud |
| UI | สวย Interactive | พื้นฐาน | ดี |
| Experiment Tracking | ดีมาก | ดี | ดีมาก |
| Model Registry | มี | มี | มี |
| Collaboration | ดีมาก Team | พื้นฐาน | ดี |
| ราคา | ฟรี Individual | ฟรี Open Source | ฟรี Individual |
W&B Setup และ Tracking
# === Weights & Biases Setup ===
# pip install wandb
# wandb login
# experiment_tracking.py
# import wandb
# import torch
# import torch.nn as nn
# from datetime import datetime
#
# # Initialize W&B Run
# run = wandb.init(
# project="image-classifier",
# name=f"resnet50-{datetime.now():%Y%m%d-%H%M}",
# config={
# "model": "ResNet50",
# "dataset": "CIFAR-10",
# "epochs": 50,
# "batch_size": 64,
# "learning_rate": 0.001,
# "optimizer": "Adam",
# "weight_decay": 1e-4,
# "scheduler": "CosineAnnealing",
# "augmentation": ["RandomCrop", "HorizontalFlip", "Normalize"],
# },
# tags=["production", "v2", "resnet"],
# notes="ResNet50 with CosineAnnealing scheduler",
# )
#
# # Log Metrics
# for epoch in range(config.epochs):
# train_loss, train_acc = train_one_epoch(model, train_loader)
# val_loss, val_acc = validate(model, val_loader)
#
# wandb.log({
# "epoch": epoch,
# "train/loss": train_loss,
# "train/accuracy": train_acc,
# "val/loss": val_loss,
# "val/accuracy": val_acc,
# "learning_rate": scheduler.get_last_lr()[0],
# })
#
# # Log Model Checkpoint
# if val_acc > best_acc:
# best_acc = val_acc
# torch.save(model.state_dict(), "best_model.pth")
# artifact = wandb.Artifact(
# name="model-checkpoint",
# type="model",
# metadata={"accuracy": val_acc, "epoch": epoch},
# )
# artifact.add_file("best_model.pth")
# run.log_artifact(artifact)
#
# # Log Dataset
# dataset_artifact = wandb.Artifact("cifar10-processed", type="dataset")
# dataset_artifact.add_dir("data/processed/")
# run.log_artifact(dataset_artifact)
#
# wandb.finish()
from dataclasses import dataclass, field
from typing import List, Dict
from datetime import datetime
@dataclass
class ExperimentRun:
run_id: str
project: str
model: str
config: Dict
metrics: Dict
user: str
timestamp: str
tags: List[str]
status: str
runs = [
ExperimentRun("run-abc123", "image-classifier", "ResNet50",
{"lr": 0.001, "epochs": 50, "batch": 64},
{"val_acc": 0.942, "val_loss": 0.18},
"สมชาย", "2024-01-15 10:30", ["production", "v2"], "completed"),
ExperimentRun("run-def456", "image-classifier", "EfficientNet-B0",
{"lr": 0.0005, "epochs": 100, "batch": 32},
{"val_acc": 0.956, "val_loss": 0.14},
"สมหญิง", "2024-01-16 14:20", ["experiment"], "completed"),
ExperimentRun("run-ghi789", "image-classifier", "ViT-B/16",
{"lr": 0.0001, "epochs": 30, "batch": 16},
{"val_acc": 0.961, "val_loss": 0.12},
"สมศักดิ์", "2024-01-17 09:15", ["experiment", "transformer"], "completed"),
]
print("=== W&B Experiment Runs ===")
for r in runs:
print(f"\n [{r.run_id}] {r.model}")
print(f" User: {r.user} | {r.timestamp}")
print(f" Accuracy: {r.metrics['val_acc']:.1%} | Loss: {r.metrics['val_loss']:.3f}")
print(f" Tags: {', '.join(r.tags)}")
Audit Trail System
# audit_trail.py — ML Audit Trail System
from dataclasses import dataclass, field
from typing import List, Dict, Optional
from datetime import datetime
from enum import Enum
import json
class AuditAction(Enum):
MODEL_TRAINED = "model_trained"
MODEL_EVALUATED = "model_evaluated"
MODEL_DEPLOYED = "model_deployed"
MODEL_ROLLBACK = "model_rollback"
DATA_UPDATED = "data_updated"
CONFIG_CHANGED = "config_changed"
APPROVAL_GRANTED = "approval_granted"
@dataclass
class AuditEntry:
timestamp: str
action: AuditAction
user: str
details: Dict
model_version: Optional[str] = None
data_version: Optional[str] = None
@dataclass
class AuditTrail:
project: str
entries: List[AuditEntry] = field(default_factory=list)
def log(self, action: AuditAction, user: str, details: Dict,
model_version: str = None, data_version: str = None):
entry = AuditEntry(
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
action=action,
user=user,
details=details,
model_version=model_version,
data_version=data_version,
)
self.entries.append(entry)
return entry
def get_by_user(self, user: str) -> List[AuditEntry]:
return [e for e in self.entries if e.user == user]
def get_by_action(self, action: AuditAction) -> List[AuditEntry]:
return [e for e in self.entries if e.action == action]
def get_model_lineage(self, model_version: str) -> List[AuditEntry]:
return [e for e in self.entries if e.model_version == model_version]
def export_json(self) -> str:
return json.dumps([{
"timestamp": e.timestamp,
"action": e.action.value,
"user": e.user,
"details": e.details,
"model_version": e.model_version,
} for e in self.entries], indent=2, ensure_ascii=False)
# สร้าง Audit Trail
trail = AuditTrail("image-classifier")
trail.log(AuditAction.DATA_UPDATED, "สมชาย",
{"dataset": "cifar10-v3", "records": 60000, "changes": "Added augmented data"},
data_version="v3")
trail.log(AuditAction.MODEL_TRAINED, "สมหญิง",
{"model": "EfficientNet-B0", "epochs": 100, "accuracy": 0.956},
model_version="v2.1", data_version="v3")
trail.log(AuditAction.MODEL_EVALUATED, "สมศักดิ์",
{"test_accuracy": 0.951, "test_loss": 0.15, "bias_check": "passed"},
model_version="v2.1")
trail.log(AuditAction.APPROVAL_GRANTED, "ผู้จัดการ",
{"approved_for": "production", "reason": "Accuracy > 95% threshold"},
model_version="v2.1")
trail.log(AuditAction.MODEL_DEPLOYED, "DevOps",
{"environment": "production", "replicas": 3, "endpoint": "/api/predict"},
model_version="v2.1")
print("=== ML Audit Trail ===")
for entry in trail.entries:
print(f" [{entry.timestamp}] {entry.action.value}")
print(f" User: {entry.user}")
print(f" Model: {entry.model_version or '-'}")
print(f" Details: {entry.details}\n")
# Compliance Requirements
compliance = {
"GDPR": "ต้องบันทึกว่าใช้ Data อะไร ลบได้เมื่อร้องขอ",
"HIPAA": "ต้อง Encrypt Data at Rest/Transit Audit Access",
"SOX": "ต้องมี Approval Workflow Change Management",
"ISO 27001": "Information Security Management System",
"AI Act (EU)": "High-risk AI ต้อง Transparency Explainability",
}
print("Compliance Requirements:")
for reg, desc in compliance.items():
print(f" [{reg}]: {desc}")
Model Registry
# model_registry.py — Model Registry & Versioning
from dataclasses import dataclass
from typing import Optional
from enum import Enum
class ModelStage(Enum):
DEVELOPMENT = "Development"
STAGING = "Staging"
PRODUCTION = "Production"
ARCHIVED = "Archived"
@dataclass
class RegisteredModel:
name: str
version: str
stage: ModelStage
accuracy: float
created_by: str
approved_by: Optional[str]
artifact_path: str
models = [
RegisteredModel("image-classifier", "v1.0", ModelStage.ARCHIVED, 0.920, "สมชาย", "ผู้จัดการ", "s3://models/v1.0/"),
RegisteredModel("image-classifier", "v2.0", ModelStage.ARCHIVED, 0.942, "สมชาย", "ผู้จัดการ", "s3://models/v2.0/"),
RegisteredModel("image-classifier", "v2.1", ModelStage.PRODUCTION, 0.956, "สมหญิง", "ผู้จัดการ", "s3://models/v2.1/"),
RegisteredModel("image-classifier", "v3.0", ModelStage.STAGING, 0.961, "สมศักดิ์", None, "s3://models/v3.0/"),
]
print("=== Model Registry ===")
for m in models:
approved = m.approved_by or "Pending"
print(f" {m.name} {m.version} [{m.stage.value}]")
print(f" Accuracy: {m.accuracy:.1%} | By: {m.created_by} | Approved: {approved}")
# W&B Model Registry Commands
# import wandb
# run = wandb.init()
#
# # Log Model as Artifact
# artifact = wandb.Artifact("image-classifier", type="model",
# metadata={"accuracy": 0.956})
# artifact.add_file("model.pth")
# run.log_artifact(artifact)
#
# # Link to Registry
# run.link_artifact(artifact, "model-registry/image-classifier",
# aliases=["latest", "v2.1"])
#
# # Download from Registry
# api = wandb.Api()
# artifact = api.artifact("myproject/model-registry/image-classifier:v2.1")
# artifact.download()
registry_workflow = [
"1. Train Model — บันทึก Metrics และ Artifacts",
"2. Evaluate — ทดสอบ Test Set, Bias Check, A/B Test",
"3. Register — บันทึกใน Model Registry พร้อม Metadata",
"4. Review — ส่งให้ Reviewer ตรวจสอบ",
"5. Approve — ผู้มีอำนาจ Approve เพื่อ Deploy",
"6. Deploy — GitOps Deploy ไป Staging -> Production",
"7. Monitor — ตรวจ Performance, Data Drift, Model Drift",
]
print(f"\n\nModel Registry Workflow:")
for step in registry_workflow:
print(f" {step}")
เคล็ดลับ
- Log Everything: บันทึกทุก Hyperparameter, Metric, Artifact
- Tags: ใช้ Tags จัดกลุ่ม Runs (production, experiment, baseline)
- Artifacts: เก็บ Dataset และ Model เป็น Versioned Artifacts
- Approval: ใส่ Approval Workflow ก่อน Deploy Production
- Lineage: ติดตาม Data -> Model -> Deployment Chain
Weights & Biases คืออะไร
MLOps Experiment Tracking Hyperparameters Metrics Artifacts Dashboard Model Registry Sweeps PyTorch TensorFlow Keras