MLflow Experiment Tech Conference 2026 — จัดการ
MLflow Experiment Tech Conference
MLflow Open Source ML Lifecycle Tracking Projects Models Registry Experiment Parameters Metrics Artifacts Tech Conference Presentation Demo
| MLflow Component | Function | Use Case |
|---|---|---|
| Tracking | Log Parameters, Metrics | Experiment Comparison |
| Projects | Package Code + Environment | Reproducibility |
| Models | Standard Model Format | Multi-framework Deploy |
| Registry | Model Versioning | Stage Management |
MLflow Tracking Setup
=== MLflow Experiment Tracking ===
อ่านเพิ่ม: LLM Fine-tuning LoRA — วิธีตั้งค่าและใช้งานจริงพร้อมตัวอย่าง · อ่านเพิ่ม: LocalAI Self-hosted Compliance Automation — วิธีตั้งค่าและใช · อ่านเพิ่ม: deep learning vs machine learning คือ — วิธีตั้งค่าและใช้งาน
pip install mlflow scikit-learn pandas
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score
import pandas as pd
# Set Tracking URI
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("image-classifier-v2")
# Load Data
df = pd.read_csv("data/train.csv")
X_train, X_test, y_train, y_test = train_test_split(
df.drop("label", axis=1), df["label"],
test_size=0.2, random_state=42
)
# Train with MLflow Tracking
with mlflow.start_run(run_name="rf-baseline"):
# Log Parameters
params = {
"n_estimators": 100,
"max_depth": 10,
"min_samples_split": 5,
"random_state": 42,
}
mlflow.log_params(params)
# Train Model
model = RandomForestClassifier(**params)
model.fit(X_train, y_train)
# Evaluate
y_pred = model.predict(X_test)
metrics = {
"accuracy": accuracy_score(y_test, y_pred),
"f1_score": f1_score(y_test, y_pred, average="weighted"),
"precision": precision_score(y_test, y_pred, average="weighted"),
}
mlflow.log_metrics(metrics)
# Log Model
mlflow.sklearn.log_model(model, "model")
# Log Artifacts
mlflow.log_artifact("data/train.csv")
print(f"Run ID: {mlflow.active_run().info.run_id}")
print(f"Accuracy: {metrics['accuracy']:.4f}")
mlflow ui --port 5000
from dataclasses import dataclass, field
from typing import List, Dict
@dataclass
class ExperimentRun:
run_id: str
name: str
params: Dict
metrics: Dict
model: str
status: str
runs = [
ExperimentRun("run-001", "rf-baseline",
{"n_estimators": 100, "max_depth": 10},
{"accuracy": 0.892, "f1": 0.887}, "RandomForest", "completed"),
ExperimentRun("run-002", "rf-tuned",
{"n_estimators": 500, "max_depth": 20},
{"accuracy": 0.921, "f1": 0.918}, "RandomForest", "completed"),
ExperimentRun("run-003", "xgb-baseline",
{"n_estimators": 200, "learning_rate": 0.1},
{"accuracy": 0.935, "f1": 0.932}, "XGBoost", "completed"),
ExperimentRun("run-004", "xgb-tuned",
{"n_estimators": 500, "learning_rate": 0.05},
{"accuracy": 0.948, "f1": 0.945}, "XGBoost", "completed"),
ExperimentRun("run-005", "neural-net",
{"hidden_layers": 3, "learning_rate": 0.001},
{"accuracy": 0.952, "f1": 0.949}, "PyTorch", "completed"),
]
print("=== MLflow Experiment Runs ===")
print(f"{'Name':<16} {'Model':<14} {'Accuracy':>8} {'F1':>6} {'Status'}")
for r in runs:
print(f" {r.name:<16} {r.model:<14} {r.metrics['accuracy']:>8.3f} "
f"{r.metrics['f1']:>6.3f} {r.status}")
Model Registry
=== MLflow Model Registry ===
import mlflow
from mlflow.tracking import MlflowClient
client = MlflowClient()
# Register Model
result = mlflow.register_model(
f"runs:/{run_id}/model",
"image-classifier"
)
# Transition to Staging
client.transition_model_version_stage(
name="image-classifier",
version=result.version,
stage="Staging",
)
# Transition to Production
client.transition_model_version_stage(
name="image-classifier",
version=result.version,
stage="Production",
)
# Load Production Model
model = mlflow.pyfunc.load_model(
"models:/image-classifier/Production"
)
predictions = model.predict(X_test)
Hyperparameter Tuning with MLflow
import optuna
def objective(trial):
with mlflow.start_run(nested=True):
params = {
"n_estimators": trial.suggest_int("n_estimators", 100, 1000),
"max_depth": trial.suggest_int("max_depth", 5, 30),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
"subsample": trial.suggest_float("subsample", 0.6, 1.0),
}
mlflow.log_params(params)
model = XGBClassifier(**params)
model.fit(X_train, y_train)
accuracy = accuracy_score(y_test, model.predict(X_test))
mlflow.log_metric("accuracy", accuracy)
return accuracy
with mlflow.start_run(run_name="optuna-tuning"):
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
mlflow.log_params(study.best_params)
mlflow.log_metric("best_accuracy", study.best_value)
from enum import Enum
class ModelStage(Enum):
NONE = "None"
STAGING = "Staging"
PRODUCTION = "Production"
ARCHIVED = "Archived"
@dataclass
class RegisteredModel:
name: str
version: int
stage: ModelStage
accuracy: float
run_id: str
models = [
RegisteredModel("image-classifier", 1, ModelStage.ARCHIVED, 0.892, "run-001"),
RegisteredModel("image-classifier", 2, ModelStage.ARCHIVED, 0.935, "run-003"),
RegisteredModel("image-classifier", 3, ModelStage.PRODUCTION, 0.952, "run-005"),
RegisteredModel("image-classifier", 4, ModelStage.STAGING, 0.961, "run-006"),
]
print("\n=== Model Registry ===")
for m in models:
print(f" {m.name} v{m.version} [{m.stage.value}] "
f"Accuracy: {m.accuracy:.3f} | Run: {m.run_id}")
Conference Preparation
# === Tech Conference Preparation ===
@dataclass
class ConferenceTalk:
title: str
abstract: str
duration_min: int
slides: int
demo: bool
audience: str
@dataclass
class SlideOutline:
section: str
slides: int
content: str
talk = ConferenceTalk(
title="Scaling ML Experiments with MLflow: From Notebook to Production",
abstract="How we scaled from ad-hoc notebooks to a systematic ML experimentation platform using MLflow",
duration_min=30,
slides=25,
demo=True,
audience="ML Engineers, Data Scientists",
)
outline = [
SlideOutline("Introduction", 3, "Problem Statement, Why Experiment Tracking"),
SlideOutline("MLflow Overview", 4, "Components, Architecture, Setup"),
SlideOutline("Live Demo", 5, "Tracking, Compare Runs, Registry"),
SlideOutline("Production Setup", 5, "Docker, Auth, Storage, CI/CD"),
SlideOutline("Results", 4, "Before/After Metrics, Team Productivity"),
SlideOutline("Lessons Learned", 2, "Pitfalls, Best Practices"),
SlideOutline("Q&A", 2, "Questions and Discussion"),
]
print(f"=== Conference Talk ===")
print(f" Title: {talk.title}")
print(f" Duration: {talk.duration_min} min | Slides: {talk.slides}")
print(f" Demo: {'Yes' if talk.demo else 'No'} | Audience: {talk.audience}")
print(f"\n Slide Outline:")
total_slides = 0
for section in outline:
total_slides += section.slides
print(f" [{section.section}] {section.slides} slides — {section.content}")
print(f"\n Total: {total_slides} slides")
# Presentation Tips
tips = {
"Content": [
"เริ่มด้วย Problem ที่ผู้ฟังเข้าใจ",
"แสดง Before/After ชัดเจน",
"Demo สดที่ทำงานจริง มี Backup Video",
"แชร์ตัวเลข Impact ที่วัดได้",
],
"Delivery": [
"ซ้อมพูด 3-5 รอบ จับเวลา",
"พูดช้าลง หยุดหายใจ",
"สบตาผู้ฟัง ไม่อ่าน Slides",
"เตรียม Q&A รู้จุดอ่อนของงาน",
],
"Slides": [
"1 Slide = 1 Idea ไม่ยัดเยียด",
"ใช้ภาพและ Diagram มากกว่าข้อความ",
"Font ใหญ่ อ่านได้จากหลังห้อง",
"Code Snippet สั้น Highlight จุดสำคัญ",
],
}
print(f"\n\nPresentation Tips:")
for category, items in tips.items():
print(f"\n [{category}]")
for tip in items:
print(f" - {tip}")
เคล็ดลับ
- Tracking: Log ทุก Parameter และ Metric ตั้งแต่เริ่ม
- Naming: ตั้งชื่อ Run ที่สื่อความหมาย ใช้ Tags จัดกลุ่ม
- Registry: ใช้ Stage Management Staging -> Production
- Demo: เตรียม Demo ที่ทำงานจริง มี Backup Video
- GitHub: แชร์ Code ที่ Reproducible บน GitHub
MLflow คืออะไร
Open Source ML Lifecycle Databricks Tracking Projects Models Registry Parameters Metrics Artifacts ทุก Framework