Computer Vision YOLO กับ Technical Debt

YOLO Object Detection

YOLO (You Only Look Once) เป็น Object Detection Algorithm ที่ได้รับความนิยมสูงสุดสำหรับ Real-time Applications ทำงานโดยแบ่งรูปภาพเป็น Grid แล้วทำนาย Bounding Boxes และ Class Probabilities พร้อมกันในครั้งเดียว ทำให้เร็วมากเหมาะกับ Video Processing

เนื้อหาเกี่ยวข้อง — ทำความเข้าใจ Proxmox VE Cluster ทำ High Availability สำหรับ Home Lab

YOLOv8 จาก Ultralytics เป็นเวอร์ชันล่าสุดที่ใช้งานง่ายที่สุด รองรับทั้ง Object Detection, Instance Segmentation, Pose Estimation และ Classification มี Pre-trained Models หลายขนาด ตั้งแต่ Nano (เร็วสุด) ถึง Extra Large (แม่นยำสุด)

เนื้อหาเกี่ยวข้อง — LocalAI Self-hosted Scaling Strategy วิธี Scale

YOLOv8 — Training และ Inference

# yolo_project.py — YOLOv8 Object Detection Pipeline

# pip install ultralytics opencv-python



from ultralytics import YOLO

import cv2

import numpy as np

from pathlib import Path

import json

import logging

from datetime import datetime



logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)



class YOLODetector:

    """YOLOv8 Object Detection Pipeline พร้อม Best Practices"""



    def __init__(self, model_name="yolov8n.pt"):

        self.model = YOLO(model_name)

        self.model_name = model_name

        self.history = []

        logger.info(f"Loaded model: {model_name}")



    def train(self, data_yaml, epochs=100, imgsz=640, batch=16,

              project="runs/train", name="exp"):

        """Training พร้อม Config ที่ดี"""

        results = self.model.train(

            data=data_yaml,

            epochs=epochs,

            imgsz=imgsz,

            batch=batch,

            project=project,

            name=name,

            # Augmentation

            hsv_h=0.015,

            hsv_s=0.7,

            hsv_v=0.4,

            degrees=10.0,

            translate=0.1,

            scale=0.5,

            fliplr=0.5,

            mosaic=1.0,

            mixup=0.1,

            # Optimization

            optimizer="AdamW",

            lr0=0.001,

            lrf=0.01,

            weight_decay=0.0005,

            warmup_epochs=3,

            # Early Stopping

            patience=20,

            # Saving

            save=True,

            save_period=10,

            plots=True,

        )



        logger.info(f"Training completed: {name}")

        return results



    def predict(self, source, conf=0.5, iou=0.45, save=False):

        """Inference พร้อม Post-processing"""

        results = self.model.predict(

            source=source,

            conf=conf,

            iou=iou,

            save=save,

            verbose=False,

        )



        detections = []

        for result in results:

            for box in result.boxes:

                det = {

                    "class": result.names[int(box.cls)],

                    "confidence": float(box.conf),

                    "bbox": box.xyxy[0].tolist(),

                    "class_id": int(box.cls),

                }

                detections.append(det)



        self.history.append({

            "timestamp": datetime.now().isoformat(),

            "source": str(source),

            "detections": len(detections),

        })



        return detections



    def evaluate(self, data_yaml):

        """Evaluation พร้อม Metrics"""

        results = self.model.val(data=data_yaml, verbose=True)



        metrics = {

            "mAP50": float(results.box.map50),

            "mAP50-95": float(results.box.map),

            "precision": float(results.box.mp),

            "recall": float(results.box.mr),

        }



        logger.info(f"Evaluation: mAP50={metrics['mAP50']:.4f} "

                     f"mAP50-95={metrics['mAP50-95']:.4f}")

        return metrics



    def export(self, format="onnx", imgsz=640, half=False):

        """Export Model สำหรับ Production"""

        path = self.model.export(

            format=format,

            imgsz=imgsz,

            half=half,

            simplify=True,

        )

        logger.info(f"Exported to: {path}")

        return path



    def benchmark(self, imgsz=640, n_runs=100):

        """Benchmark Inference Speed"""

        import time

        dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)



        # Warmup

        for _ in range(10):

            self.model.predict(dummy, verbose=False)



        # Benchmark

        times = []

        for _ in range(n_runs):

            start = time.perf_counter()

            self.model.predict(dummy, verbose=False)

            times.append((time.perf_counter() - start) * 1000)



        avg = np.mean(times)

        fps = 1000 / avg



        print(f"Benchmark ({self.model_name}, {imgsz}x{imgsz}):")

        print(f"  Avg: {avg:.1f}ms | FPS: {fps:.0f}")

        print(f"  P50: {np.percentile(times, 50):.1f}ms")

        print(f"  P99: {np.percentile(times, 99):.1f}ms")



        return {"avg_ms": avg, "fps": fps}



# === Dataset YAML Example ===

# dataset.yaml:

# path: /data/custom_dataset

# train: images/train

# val: images/val

# test: images/test

# names:

#   0: person

#   1: car

#   2: bicycle



# ตัวอย่าง

# detector = YOLODetector("yolov8n.pt")

# detector.train("dataset.yaml", epochs=100)

# dets = detector.predict("test.jpg", conf=0.5)

# detector.export("onnx")

Technical Debt Audit สำหรับ CV Projects

# cv_debt_audit.py — Technical Debt Audit สำหรับ Computer Vision Projects

import os

import ast

import json

from pathlib import Path

from dataclasses import dataclass, field

from typing import List, Dict

from collections import Counter



@dataclass

class DebtItem:

    category: str

    severity: str       # critical, high, medium, low

    description: str

    file: str = ""

    effort: str = ""    # hours estimate

    recommendation: str = ""



class CVTechDebtAuditor:

    """Audit Technical Debt ใน Computer Vision Projects"""



    def __init__(self, project_path):

        self.project_path = Path(project_path)

        self.debts: List[DebtItem] = []



    def audit_all(self):

        """รัน Audit ทั้งหมด"""

        self.check_data_management()

        self.check_code_quality()

        self.check_model_management()

        self.check_testing()

        self.check_deployment()

        self.check_documentation()

        return self.debts



    def check_data_management(self):

        """ตรวจสอบ Data Management"""

        # DVC หรือ Data Version Control

        if not (self.project_path / ".dvc").exists():

            self.debts.append(DebtItem(

                category="Data", severity="high",

                description="No Data Version Control (DVC)",

                recommendation="Initialize DVC: dvc init",

                effort="4h",

            ))



        # Data Validation

        py_files = list(self.project_path.rglob("*.py"))

        has_validation = any(

            "great_expectations" in f.read_text(errors="ignore") or

            "pandera" in f.read_text(errors="ignore")

            for f in py_files

        )

        if not has_validation:

            self.debts.append(DebtItem(

                category="Data", severity="medium",

                description="No Data Validation Pipeline",

                recommendation="Add data validation with Great Expectations",

                effort="8h",

            ))



    def check_code_quality(self):

        """ตรวจสอบ Code Quality"""

        py_files = list(self.project_path.rglob("*.py"))



        for f in py_files:

            content = f.read_text(errors="ignore")



            # Hardcoded Paths

            if "/home/" in content or "C:\\" in content:

                self.debts.append(DebtItem(

                    category="Code", severity="medium",

                    description="Hardcoded file paths",

                    file=str(f.relative_to(self.project_path)),

                    recommendation="Use config files or environment variables",

                    effort="2h",

                ))



            # Magic Numbers in Training

            if "lr=" in content and "0.001" in content:

                # Check if it's not in a config

                if "config" not in str(f).lower():

                    self.debts.append(DebtItem(

                        category="Code", severity="low",

                        description="Hardcoded hyperparameters",

                        file=str(f.relative_to(self.project_path)),

                        recommendation="Move hyperparameters to config file",

                        effort="2h",

                    ))



        # Type Hints

        no_hints = sum(1 for f in py_files

                       if "->" not in f.read_text(errors="ignore"))

        if no_hints > len(py_files) * 0.5:

            self.debts.append(DebtItem(

                category="Code", severity="low",

                description=f"{no_hints}/{len(py_files)} files without type hints",

                recommendation="Add type hints progressively",

                effort="16h",

            ))



    def check_model_management(self):

        """ตรวจสอบ Model Management"""

        # MLflow or similar

        py_content = ""

        for f in self.project_path.rglob("*.py"):

            py_content += f.read_text(errors="ignore")



        if "mlflow" not in py_content and "wandb" not in py_content:

            self.debts.append(DebtItem(

                category="Model", severity="high",

                description="No Experiment Tracking (MLflow/W&B)",

                recommendation="Integrate MLflow for experiment tracking",

                effort="8h",

            ))



        # Model Cards

        if not list(self.project_path.rglob("MODEL_CARD*")):

            self.debts.append(DebtItem(

                category="Model", severity="medium",

                description="No Model Cards",

                recommendation="Create Model Cards for each model version",

                effort="4h",

            ))



    def check_testing(self):

        """ตรวจสอบ Testing"""

        test_files = list(self.project_path.rglob("test_*.py"))

        if not test_files:

            self.debts.append(DebtItem(

                category="Testing", severity="critical",

                description="No test files found",

                recommendation="Add unit tests for data pipeline and model inference",

                effort="24h",

            ))



        # CI/CD

        ci_files = [".github/workflows", ".gitlab-ci.yml", "Jenkinsfile"]

        has_ci = any((self.project_path / f).exists() for f in ci_files)

        if not has_ci:

            self.debts.append(DebtItem(

                category="Testing", severity="high",

                description="No CI/CD Pipeline",

                recommendation="Set up GitHub Actions for automated testing",

                effort="8h",

            ))



    def check_deployment(self):

        """ตรวจสอบ Deployment"""

        if not (self.project_path / "Dockerfile").exists():

            self.debts.append(DebtItem(

                category="Deployment", severity="medium",

                description="No Dockerfile",

                recommendation="Create Dockerfile for reproducible deployment",

                effort="4h",

            ))



    def check_documentation(self):

        """ตรวจสอบ Documentation"""

        readme = self.project_path / "README.md"

        if not readme.exists():

            self.debts.append(DebtItem(

                category="Docs", severity="medium",

                description="No README.md",

                recommendation="Create comprehensive README",

                effort="4h",

            ))



    def generate_report(self):

        """สร้างรายงาน"""

        print(f"\n{'='*60}")

        print(f"Technical Debt Audit Report")

        print(f"Project: {self.project_path}")

        print(f"{'='*60}")



        by_severity = Counter(d.severity for d in self.debts)

        print(f"\nTotal: {len(self.debts)} items")

        print(f"  Critical: {by_severity.get('critical', 0)}")

        print(f"  High: {by_severity.get('high', 0)}")

        print(f"  Medium: {by_severity.get('medium', 0)}")

        print(f"  Low: {by_severity.get('low', 0)}")



        total_effort = sum(int(d.effort.replace("h", ""))

                          for d in self.debts if d.effort)

        print(f"  Estimated Total Effort: {total_effort}h")



        for severity in ["critical", "high", "medium", "low"]:

            items = [d for d in self.debts if d.severity == severity]

            if items:

                print(f"\n--- {severity.upper()} ---")

                for d in items:

                    print(f"  [{d.category}] {d.description}")

                    if d.file:

                        print(f"    File: {d.file}")

                    print(f"    Fix: {d.recommendation} ({d.effort})")



# ตัวอย่าง

# auditor = CVTechDebtAuditor("/path/to/cv-project")

# auditor.audit_all()

# auditor.generate_report()

Refactoring Strategy

# refactoring_plan.py — แผนจัดการ Technical Debt

import json

from datetime import datetime, timedelta



class RefactoringPlanner:

    """วางแผน Refactoring สำหรับ CV Projects"""



    def __init__(self):

        self.sprints = []



    def create_plan(self, debt_items, sprint_capacity_hours=40):

        """สร้างแผน Refactoring"""

        # จัดลำดับ: Critical > High > Medium > Low

        priority_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}

        sorted_items = sorted(debt_items,

                             key=lambda d: priority_order.get(d.severity, 4))



        current_sprint = {"items": [], "hours": 0, "number": 1}



        for item in sorted_items:

            effort = int(item.effort.replace("h", "")) if item.effort else 4



            if current_sprint["hours"] + effort > sprint_capacity_hours:

                self.sprints.append(current_sprint)

                current_sprint = {

                    "items": [], "hours": 0,

                    "number": len(self.sprints) + 1,

                }



            current_sprint["items"].append({

                "category": item.category,

                "description": item.description,

                "severity": item.severity,

                "effort": effort,

                "recommendation": item.recommendation,

            })

            current_sprint["hours"] += effort



        if current_sprint["items"]:

            self.sprints.append(current_sprint)



    def print_plan(self):

        """แสดงแผน"""

        start_date = datetime.now()

        print(f"\n{'='*60}")

        print(f"Refactoring Plan")

        print(f"Total Sprints: {len(self.sprints)}")

        print(f"{'='*60}")



        for sprint in self.sprints:

            sprint_start = start_date + timedelta(weeks=(sprint["number"]-1)*2)

            print(f"\n--- Sprint {sprint['number']} "

                  f"(Week of {sprint_start:%Y-%m-%d}) "

                  f"[{sprint['hours']}h] ---")



            for item in sprint["items"]:

                print(f"  [{item['severity'].upper():8}] "

                      f"[{item['category']:10}] "

                      f"{item['description']} ({item['effort']}h)")



# ตัวอย่าง

# auditor = CVTechDebtAuditor("/path/to/project")

# debts = auditor.audit_all()

# planner = RefactoringPlanner()

# planner.create_plan(debts)

# planner.print_plan()

Best Practices ลด Technical Debt

Data Versioning: ใช้ DVC หรือ LakeFS Version Control สำหรับ Dataset ทุก Version
Experiment Tracking: ใช้ MLflow หรือ W&B Track ทุก Experiment ไม่ใช่แค่ Best Model
Config Management: แยก Hyperparameters ไปไฟล์ Config (YAML/JSON) ไม่ Hardcode ใน Code
Automated Testing: เขียน Tests สำหรับ Data Pipeline, Model Inference และ API Endpoints
CI/CD Pipeline: Automate Training, Testing, Deployment ด้วย GitHub Actions หรือ GitLab CI
Model Cards: สร้าง Model Card ทุก Model Version อธิบาย Performance, Limitations, Bias
Boy Scout Rule: ทำให้ Code ดีขึ้นเล็กน้อยทุกครั้งที่แตะ ไม่สะสม Debt เพิ่ม

YOLO คืออะไร

Object Detection Algorithm แบบ Real-time ตรวจจับวัตถุในรูปภาพหรือวิดีโอในครั้งเดียว เร็วกว่า Two-stage Detectors เวอร์ชันล่าสุดคือ YOLOv8 จาก Ultralytics รองรับ Detection, Segmentation, Pose Estimation

แนะนำเพิ่มเติม — อ่านเพิ่มเติมที่ SiamCafeBook

เนื้อหาเกี่ยวข้อง — บทความที่เกี่ยวข้อง: Linkerd Service Mesh Community Building