YOLO Object Detection
YOLO (You Only Look Once) เป็น Object Detection Algorithm ที่ได้รับความนิยมสูงสุดสำหรับ Real-time Applications ทำงานโดยแบ่งรูปภาพเป็น Grid แล้วทำนาย Bounding Boxes และ Class Probabilities พร้อมกันในครั้งเดียว ทำให้เร็วมากเหมาะกับ Video Processing
YOLOv8 จาก Ultralytics เป็นเวอร์ชันล่าสุดที่ใช้งานง่ายที่สุด รองรับทั้ง Object Detection, Instance Segmentation, Pose Estimation และ Classification มี Pre-trained Models หลายขนาด ตั้งแต่ Nano (เร็วสุด) ถึง Extra Large (แม่นยำสุด)
YOLOv8 — Training และ Inference
# yolo_project.py — YOLOv8 Object Detection Pipeline
# pip install ultralytics opencv-python
from ultralytics import YOLO
import cv2
import numpy as np
from pathlib import Path
import json
import logging
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class YOLODetector:
"""YOLOv8 Object Detection Pipeline พร้อม Best Practices"""
def __init__(self, model_name="yolov8n.pt"):
self.model = YOLO(model_name)
self.model_name = model_name
self.history = []
logger.info(f"Loaded model: {model_name}")
def train(self, data_yaml, epochs=100, imgsz=640, batch=16,
project="runs/train", name="exp"):
"""Training พร้อม Config ที่ดี"""
results = self.model.train(
data=data_yaml,
epochs=epochs,
imgsz=imgsz,
batch=batch,
project=project,
name=name,
# Augmentation
hsv_h=0.015,
hsv_s=0.7,
hsv_v=0.4,
degrees=10.0,
translate=0.1,
scale=0.5,
fliplr=0.5,
mosaic=1.0,
mixup=0.1,
# Optimization
optimizer="AdamW",
lr0=0.001,
lrf=0.01,
weight_decay=0.0005,
warmup_epochs=3,
# Early Stopping
patience=20,
# Saving
save=True,
save_period=10,
plots=True,
)
logger.info(f"Training completed: {name}")
return results
def predict(self, source, conf=0.5, iou=0.45, save=False):
"""Inference พร้อม Post-processing"""
results = self.model.predict(
source=source,
conf=conf,
iou=iou,
save=save,
verbose=False,
)
detections = []
for result in results:
for box in result.boxes:
det = {
"class": result.names[int(box.cls)],
"confidence": float(box.conf),
"bbox": box.xyxy[0].tolist(),
"class_id": int(box.cls),
}
detections.append(det)
self.history.append({
"timestamp": datetime.now().isoformat(),
"source": str(source),
"detections": len(detections),
})
return detections
def evaluate(self, data_yaml):
"""Evaluation พร้อม Metrics"""
results = self.model.val(data=data_yaml, verbose=True)
metrics = {
"mAP50": float(results.box.map50),
"mAP50-95": float(results.box.map),
"precision": float(results.box.mp),
"recall": float(results.box.mr),
}
logger.info(f"Evaluation: mAP50={metrics['mAP50']:.4f} "
f"mAP50-95={metrics['mAP50-95']:.4f}")
return metrics
def export(self, format="onnx", imgsz=640, half=False):
"""Export Model สำหรับ Production"""
path = self.model.export(
format=format,
imgsz=imgsz,
half=half,
simplify=True,
)
logger.info(f"Exported to: {path}")
return path
def benchmark(self, imgsz=640, n_runs=100):
"""Benchmark Inference Speed"""
import time
dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)
# Warmup
for _ in range(10):
self.model.predict(dummy, verbose=False)
# Benchmark
times = []
for _ in range(n_runs):
start = time.perf_counter()
self.model.predict(dummy, verbose=False)
times.append((time.perf_counter() - start) * 1000)
avg = np.mean(times)
fps = 1000 / avg
print(f"Benchmark ({self.model_name}, {imgsz}x{imgsz}):")
print(f" Avg: {avg:.1f}ms | FPS: {fps:.0f}")
print(f" P50: {np.percentile(times, 50):.1f}ms")
print(f" P99: {np.percentile(times, 99):.1f}ms")
return {"avg_ms": avg, "fps": fps}
# === Dataset YAML Example ===
# dataset.yaml:
# path: /data/custom_dataset
# train: images/train
# val: images/val
# test: images/test
# names:
# 0: person
# 1: car
# 2: bicycle
# ตัวอย่าง
# detector = YOLODetector("yolov8n.pt")
# detector.train("dataset.yaml", epochs=100)
# dets = detector.predict("test.jpg", conf=0.5)
# detector.export("onnx")
Technical Debt Audit สำหรับ CV Projects
# cv_debt_audit.py — Technical Debt Audit สำหรับ Computer Vision Projects
import os
import ast
import json
from pathlib import Path
from dataclasses import dataclass, field
from typing import List, Dict
from collections import Counter
@dataclass
class DebtItem:
category: str
severity: str # critical, high, medium, low
description: str
file: str = ""
effort: str = "" # hours estimate
recommendation: str = ""
class CVTechDebtAuditor:
"""Audit Technical Debt ใน Computer Vision Projects"""
def __init__(self, project_path):
self.project_path = Path(project_path)
self.debts: List[DebtItem] = []
def audit_all(self):
"""รัน Audit ทั้งหมด"""
self.check_data_management()
self.check_code_quality()
self.check_model_management()
self.check_testing()
self.check_deployment()
self.check_documentation()
return self.debts
def check_data_management(self):
"""ตรวจสอบ Data Management"""
# DVC หรือ Data Version Control
if not (self.project_path / ".dvc").exists():
self.debts.append(DebtItem(
category="Data", severity="high",
description="No Data Version Control (DVC)",
recommendation="Initialize DVC: dvc init",
effort="4h",
))
# Data Validation
py_files = list(self.project_path.rglob("*.py"))
has_validation = any(
"great_expectations" in f.read_text(errors="ignore") or
"pandera" in f.read_text(errors="ignore")
for f in py_files
)
if not has_validation:
self.debts.append(DebtItem(
category="Data", severity="medium",
description="No Data Validation Pipeline",
recommendation="Add data validation with Great Expectations",
effort="8h",
))
def check_code_quality(self):
"""ตรวจสอบ Code Quality"""
py_files = list(self.project_path.rglob("*.py"))
for f in py_files:
content = f.read_text(errors="ignore")
# Hardcoded Paths
if "/home/" in content or "C:\\" in content:
self.debts.append(DebtItem(
category="Code", severity="medium",
description="Hardcoded file paths",
file=str(f.relative_to(self.project_path)),
recommendation="Use config files or environment variables",
effort="2h",
))
# Magic Numbers in Training
if "lr=" in content and "0.001" in content:
# Check if it's not in a config
if "config" not in str(f).lower():
self.debts.append(DebtItem(
category="Code", severity="low",
description="Hardcoded hyperparameters",
file=str(f.relative_to(self.project_path)),
recommendation="Move hyperparameters to config file",
effort="2h",
))
# Type Hints
no_hints = sum(1 for f in py_files
if "->" not in f.read_text(errors="ignore"))
if no_hints > len(py_files) * 0.5:
self.debts.append(DebtItem(
category="Code", severity="low",
description=f"{no_hints}/{len(py_files)} files without type hints",
recommendation="Add type hints progressively",
effort="16h",
))
def check_model_management(self):
"""ตรวจสอบ Model Management"""
# MLflow or similar
py_content = ""
for f in self.project_path.rglob("*.py"):
py_content += f.read_text(errors="ignore")
if "mlflow" not in py_content and "wandb" not in py_content:
self.debts.append(DebtItem(
category="Model", severity="high",
description="No Experiment Tracking (MLflow/W&B)",
recommendation="Integrate MLflow for experiment tracking",
effort="8h",
))
# Model Cards
if not list(self.project_path.rglob("MODEL_CARD*")):
self.debts.append(DebtItem(
category="Model", severity="medium",
description="No Model Cards",
recommendation="Create Model Cards for each model version",
effort="4h",
))
def check_testing(self):
"""ตรวจสอบ Testing"""
test_files = list(self.project_path.rglob("test_*.py"))
if not test_files:
self.debts.append(DebtItem(
category="Testing", severity="critical",
description="No test files found",
recommendation="Add unit tests for data pipeline and model inference",
effort="24h",
))
# CI/CD
ci_files = [".github/workflows", ".gitlab-ci.yml", "Jenkinsfile"]
has_ci = any((self.project_path / f).exists() for f in ci_files)
if not has_ci:
self.debts.append(DebtItem(
category="Testing", severity="high",
description="No CI/CD Pipeline",
recommendation="Set up GitHub Actions for automated testing",
effort="8h",
))
def check_deployment(self):
"""ตรวจสอบ Deployment"""
if not (self.project_path / "Dockerfile").exists():
self.debts.append(DebtItem(
category="Deployment", severity="medium",
description="No Dockerfile",
recommendation="Create Dockerfile for reproducible deployment",
effort="4h",
))
def check_documentation(self):
"""ตรวจสอบ Documentation"""
readme = self.project_path / "README.md"
if not readme.exists():
self.debts.append(DebtItem(
category="Docs", severity="medium",
description="No README.md",
recommendation="Create comprehensive README",
effort="4h",
))
def generate_report(self):
"""สร้างรายงาน"""
print(f"\n{'='*60}")
print(f"Technical Debt Audit Report")
print(f"Project: {self.project_path}")
print(f"{'='*60}")
by_severity = Counter(d.severity for d in self.debts)
print(f"\nTotal: {len(self.debts)} items")
print(f" Critical: {by_severity.get('critical', 0)}")
print(f" High: {by_severity.get('high', 0)}")
print(f" Medium: {by_severity.get('medium', 0)}")
print(f" Low: {by_severity.get('low', 0)}")
total_effort = sum(int(d.effort.replace("h", ""))
for d in self.debts if d.effort)
print(f" Estimated Total Effort: {total_effort}h")
for severity in ["critical", "high", "medium", "low"]:
items = [d for d in self.debts if d.severity == severity]
if items:
print(f"\n--- {severity.upper()} ---")
for d in items:
print(f" [{d.category}] {d.description}")
if d.file:
print(f" File: {d.file}")
print(f" Fix: {d.recommendation} ({d.effort})")
# ตัวอย่าง
# auditor = CVTechDebtAuditor("/path/to/cv-project")
# auditor.audit_all()
# auditor.generate_report()
Refactoring Strategy
# refactoring_plan.py — แผนจัดการ Technical Debt
import json
from datetime import datetime, timedelta
class RefactoringPlanner:
"""วางแผน Refactoring สำหรับ CV Projects"""
def __init__(self):
self.sprints = []
def create_plan(self, debt_items, sprint_capacity_hours=40):
"""สร้างแผน Refactoring"""
# จัดลำดับ: Critical > High > Medium > Low
priority_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
sorted_items = sorted(debt_items,
key=lambda d: priority_order.get(d.severity, 4))
current_sprint = {"items": [], "hours": 0, "number": 1}
for item in sorted_items:
effort = int(item.effort.replace("h", "")) if item.effort else 4
if current_sprint["hours"] + effort > sprint_capacity_hours:
self.sprints.append(current_sprint)
current_sprint = {
"items": [], "hours": 0,
"number": len(self.sprints) + 1,
}
current_sprint["items"].append({
"category": item.category,
"description": item.description,
"severity": item.severity,
"effort": effort,
"recommendation": item.recommendation,
})
current_sprint["hours"] += effort
if current_sprint["items"]:
self.sprints.append(current_sprint)
def print_plan(self):
"""แสดงแผน"""
start_date = datetime.now()
print(f"\n{'='*60}")
print(f"Refactoring Plan")
print(f"Total Sprints: {len(self.sprints)}")
print(f"{'='*60}")
for sprint in self.sprints:
sprint_start = start_date + timedelta(weeks=(sprint["number"]-1)*2)
print(f"\n--- Sprint {sprint['number']} "
f"(Week of {sprint_start:%Y-%m-%d}) "
f"[{sprint['hours']}h] ---")
for item in sprint["items"]:
print(f" [{item['severity'].upper():8}] "
f"[{item['category']:10}] "
f"{item['description']} ({item['effort']}h)")
# ตัวอย่าง
# auditor = CVTechDebtAuditor("/path/to/project")
# debts = auditor.audit_all()
# planner = RefactoringPlanner()
# planner.create_plan(debts)
# planner.print_plan()
Best Practices ลด Technical Debt
- Data Versioning: ใช้ DVC หรือ LakeFS Version Control สำหรับ Dataset ทุก Version
- Experiment Tracking: ใช้ MLflow หรือ W&B Track ทุก Experiment ไม่ใช่แค่ Best Model
- Config Management: แยก Hyperparameters ไปไฟล์ Config (YAML/JSON) ไม่ Hardcode ใน Code
- Automated Testing: เขียน Tests สำหรับ Data Pipeline, Model Inference และ API Endpoints
- CI/CD Pipeline: Automate Training, Testing, Deployment ด้วย GitHub Actions หรือ GitLab CI
- Model Cards: สร้าง Model Card ทุก Model Version อธิบาย Performance, Limitations, Bias
- Boy Scout Rule: ทำให้ Code ดีขึ้นเล็กน้อยทุกครั้งที่แตะ ไม่สะสม Debt เพิ่ม
YOLO คืออะไร
Object Detection Algorithm แบบ Real-time ตรวจจับวัตถุในรูปภาพหรือวิดีโอในครั้งเดียว เร็วกว่า Two-stage Detectors เวอร์ชันล่าสุดคือ YOLOv8 จาก Ultralytics รองรับ Detection, Segmentation, Pose Estimation
Technical Debt ในโปรเจค Computer Vision คืออะไร
ปัญหาสะสมจากการตัดสินใจทางเทคนิคที่ไม่ดี เช่น Data Pipeline ไม่มี Version Control, Hardcode Parameters, ไม่มี Tests, ไม่มี Model Monitoring, Deprecated Libraries, Documentation ไม่ครบ
วิธีลด Technical Debt ทำอย่างไร
Audit หา Debt ทั้งหมด จัดลำดับตาม Impact/Effort ใช้ Boy Scout Rule จัด Refactoring Sprint เขียน Tests CI/CD Pipeline Data Versioning ด้วย DVC ตั้ง Code Quality Standards
YOLOv8 ใช้งานอย่างไร
pip install ultralytics ใช้ Python API หรือ CLI Training: model.train(data='dataset.yaml') Inference: model.predict(source='image.jpg') Export เป็น ONNX TensorRT CoreML สำหรับ Production
สรุป
YOLO เป็น Object Detection Algorithm ที่ทรงพลังสำหรับ Real-time Applications แต่โปรเจค Computer Vision มักมี Technical Debt สะสม เช่น ไม่มี Data Versioning, ไม่มี Tests, Hardcoded Config การจัดการ Debt ต้องเริ่มจาก Audit หาปัญหา จัดลำดับความสำคัญ วางแผน Refactoring Sprint และใช้ Best Practices เช่น DVC, MLflow, CI/CD, Model Cards เพื่อป้องกันไม่ให้ Debt สะสมเพิ่ม
