Computer Vision YOLO Testing Strategy QA คืออะไร

YOLO (You Only Look Once) เป็น real-time object detection model ที่ได้รับความนิยมสูงสุดในด้าน computer vision สามารถตรวจจับวัตถุหลายชนิดในภาพเดียวด้วยความเร็วสูง Testing Strategy QA สำหรับ YOLO คือกระบวนการทดสอบและประกันคุณภาพของ model ตั้งแต่ data quality, model accuracy, inference performance ไปจนถึง edge cases และ production monitoring เพื่อให้มั่นใจว่า model ทำงานได้ถูกต้องและเชื่อถือได้ในสภาพแวดล้อมจริง

YOLO Testing Framework

# yolo_testing.py — YOLO testing framework
import json

class YOLOTestingFramework:
 TEST_LEVELS = {
 "data_quality": {
 "name": "Data Quality Tests",
 "description": "ตรวจสอบคุณภาพ training/test data",
 "tests": [
 "Label correctness (bounding box accuracy)",
 "Class distribution balance",
 "Image quality (resolution, blur, lighting)",
 "Duplicate detection",
 "Annotation format validation (YOLO format)",
 ],
 },
 "model_accuracy": {
 "name": "Model Accuracy Tests",
 "description": "ตรวจสอบ accuracy ของ model",
 "tests": [
 "mAP@50 (Mean Average Precision)",
 "mAP@50:95 (stricter metric)",
 "Per-class precision and recall",
 "Confusion matrix analysis",
 "IoU (Intersection over Union) distribution",
 ],
 },
 "performance": {
 "name": "Performance Tests",
 "description": "ตรวจสอบ inference speed และ resource usage",
 "tests": [
 "FPS (Frames Per Second) on target hardware",
 "Latency (ms per inference)",
 "GPU/CPU memory usage",
 "Batch processing throughput",
 "Model size (MB) and load time",
 ],
 },
 "robustness": {
 "name": "Robustness Tests",
 "description": "ตรวจสอบ model ใน edge cases",
 "tests": [
 "Low light / overexposure images",
 "Occluded objects (บังบางส่วน)",
 "Small objects detection",
 "Different angles / perspectives",
 "Adversarial examples",
 ],
 },
 "integration": {
 "name": "Integration Tests",
 "description": "ตรวจสอบ model ใน production pipeline",
 "tests": [
 "API endpoint response format",
 "Input validation (image format, size)",
 "Error handling (corrupt image, timeout)",
 "Concurrent request handling",
 "Model versioning and rollback",
 ],
 },
 }

 def show_framework(self):
 print("=== YOLO Testing Framework ===\n")
 for key, level in self.TEST_LEVELS.items():
 print(f"[{level['name']}]")
 print(f" {level['description']}")
 for test in level["tests"][:3]:
 print(f" • {test}")
 print()

framework = YOLOTestingFramework()
framework.show_framework()

Data Quality Testing

# data_quality.py — YOLO data quality tests
import json
import random
import os

class DataQualityTests:
 YOLO_FORMAT = """
 YOLO Annotation Format:
     
 
 Example (person at center):
 0 0.5 0.5 0.3 0.6
 
 Rules:
 - All values normalized (0-1)
 - x_center, y_center = center of bounding box
 - width, height = relative to image size
 - One .txt file per image (same filename)
 """

 TEST_SCRIPT = """
# test_data_quality.py — Data quality validation
import os
import cv2
from pathlib import Path
from collections import Counter

class YOLODataValidator:
 def __init__(self, images_dir, labels_dir, classes):
 self.images_dir = Path(images_dir)
 self.labels_dir = Path(labels_dir)
 self.classes = classes
 self.errors = []
 
 def validate_labels(self):
 label_files = list(self.labels_dir.glob("*.txt"))
 print(f"Validating {len(label_files)} label files...")
 
 class_counts = Counter()
 for lf in label_files:
 with open(lf) as f:
 for line_num, line in enumerate(f, 1):
 parts = line.strip().split()
 if len(parts) != 5:
 self.errors.append(f"{lf.name}:{line_num} Invalid format")
 continue
 
 class_id = int(parts[0])
 if class_id >= len(self.classes):
 self.errors.append(f"{lf.name}:{line_num} Invalid class {class_id}")
 
 coords = [float(x) for x in parts[1:]]
 if any(c < 0 or c > 1 for c in coords):
 self.errors.append(f"{lf.name}:{line_num} Out of range coords")
 
 class_counts[self.classes[class_id]] += 1
 
 return class_counts
 
 def check_image_label_pairs(self):
 images = {p.stem for p in self.images_dir.glob("*.*") if p.suffix in ['.jpg', '.png']}
 labels = {p.stem for p in self.labels_dir.glob("*.txt")}
 
 missing_labels = images - labels
 orphan_labels = labels - images
 
 if missing_labels:
 self.errors.append(f"{len(missing_labels)} images without labels")
 if orphan_labels:
 self.errors.append(f"{len(orphan_labels)} labels without images")
 
 return len(images), len(labels), len(missing_labels)
 
 def report(self):
 counts = self.validate_labels()
 imgs, lbls, missing = self.check_image_label_pairs()
 
 print(f"Images: {imgs} | Labels: {lbls} | Missing: {missing}")
 print(f"Class distribution: {dict(counts)}")
 print(f"Errors: {len(self.errors)}")
 for err in self.errors[:5]:
 print(f" ⚠ {err}")

validator = YOLODataValidator("data/images", "data/labels", 
 ["person", "car", "truck", "bicycle"])
validator.report()
"""

 def show_format(self):
 print("=== YOLO Format ===")
 print(self.YOLO_FORMAT)

 def show_script(self):
 print("=== Data Quality Script ===")
 print(self.TEST_SCRIPT[:600])

dq = DataQualityTests()
dq.show_format()
dq.show_script()

Model Accuracy Testing

# accuracy.py — YOLO model accuracy testing
import json
import random

class AccuracyTesting:
 METRICS = {
 "map50": {"name": "mAP@50", "description": "Mean AP at IoU=0.50", "good": "> 0.70", "excellent": "> 0.85"},
 "map50_95": {"name": "mAP@50:95", "description": "Mean AP averaged over IoU 0.50-0.95", "good": "> 0.45", "excellent": "> 0.60"},
 "precision": {"name": "Precision", "description": "TP / (TP + FP)", "good": "> 0.80", "excellent": "> 0.90"},
 "recall": {"name": "Recall", "description": "TP / (TP + FN)", "good": "> 0.75", "excellent": "> 0.85"},
 "f1": {"name": "F1 Score", "description": "Harmonic mean of Precision and Recall", "good": "> 0.75", "excellent": "> 0.85"},
 }

 EVAL_SCRIPT = """
# eval_yolo.py — YOLO evaluation script
from ultralytics import YOLO

# Load model
model = YOLO("runs/detect/train/weights/best.pt")

# Evaluate on test set
metrics = model.val(data="data.yaml", split="test")

# Key metrics
print(f"mAP@50: {metrics.box.map50:.4f}")
print(f"mAP@50:95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")

# Per-class metrics
for i, cls_name in enumerate(model.names.values()):
 ap50 = metrics.box.ap50[i]
 ap = metrics.box.ap[i]
 print(f" [{cls_name}] AP@50={ap50:.3f} AP@50:95={ap:.3f}")

# Confusion matrix
metrics.confusion_matrix.plot(save_dir="eval_results/")
"""

 def show_metrics(self):
 print("=== Accuracy Metrics ===\n")
 for key, m in self.METRICS.items():
 print(f" [{m['name']}] {m['description']} | Good: {m['good']} | Excellent: {m['excellent']}")

 def show_eval(self):
 print(f"\n=== Evaluation Script ===")
 print(self.EVAL_SCRIPT[:500])

 def simulate_results(self):
 print(f"\n=== Simulated Results ===")
 classes = ["person", "car", "truck", "bicycle"]
 overall_map50 = random.uniform(0.75, 0.92)
 overall_map = random.uniform(0.45, 0.65)
 print(f" Overall mAP@50: {overall_map50:.4f}")
 print(f" Overall mAP@50:95: {overall_map:.4f}")
 for cls in classes:
 ap50 = random.uniform(0.65, 0.95)
 print(f" [{cls}] AP@50={ap50:.3f}")

acc = AccuracyTesting()
acc.show_metrics()
acc.show_eval()
acc.simulate_results()

Performance & Robustness Testing

# performance.py — YOLO performance and robustness
import json
import random
import time

class PerformanceTesting:
 BENCHMARK_SCRIPT = """
# benchmark_yolo.py — Performance benchmark
from ultralytics import YOLO
import time
import torch
import numpy as np

model = YOLO("yolov8n.pt") # nano model

# Warmup
for _ in range(10):
 model.predict("test.jpg", verbose=False)

# Benchmark
times = []
for _ in range(100):
 start = time.perf_counter()
 results = model.predict("test.jpg", verbose=False)
 elapsed = (time.perf_counter() - start) * 1000
 times.append(elapsed)

print(f"Avg latency: {np.mean(times):.1f}ms")
print(f"P95 latency: {np.percentile(times, 95):.1f}ms")
print(f"P99 latency: {np.percentile(times, 99):.1f}ms")
print(f"FPS: {1000 / np.mean(times):.1f}")
print(f"GPU Memory: {torch.cuda.memory_allocated() / 1024**2:.0f} MB")
"""

 ROBUSTNESS_TESTS = """
# robustness_tests.py — Robustness testing
import cv2
import numpy as np
from ultralytics import YOLO

model = YOLO("best.pt")

def test_augmentations(image_path):
 img = cv2.imread(image_path)
 tests = {}
 
 # Low light
 dark = cv2.convertScaleAbs(img, alpha=0.3, beta=0)
 tests["low_light"] = model.predict(dark, verbose=False)
 
 # Overexposure
 bright = cv2.convertScaleAbs(img, alpha=2.0, beta=50)
 tests["overexposed"] = model.predict(bright, verbose=False)
 
 # Blur
 blurred = cv2.GaussianBlur(img, (15, 15), 0)
 tests["blurred"] = model.predict(blurred, verbose=False)
 
 # Noise
 noise = np.random.normal(0, 25, img.shape).astype(np.uint8)
 noisy = cv2.add(img, noise)
 tests["noisy"] = model.predict(noisy, verbose=False)
 
 # Rotation
 h, w = img.shape[:2]
 M = cv2.getRotationMatrix2D((w//2, h//2), 15, 1.0)
 rotated = cv2.warpAffine(img, M, (w, h))
 tests["rotated_15deg"] = model.predict(rotated, verbose=False)
 
 for name, results in tests.items():
 detections = len(results[0].boxes)
 conf = results[0].boxes.conf.mean().item() if detections > 0 else 0
 print(f" [{name}] Detections: {detections} | Avg conf: {conf:.3f}")

test_augmentations("test_image.jpg")
"""

 def show_benchmark(self):
 print("=== Performance Benchmark ===")
 print(self.BENCHMARK_SCRIPT[:500])

 def show_robustness(self):
 print(f"\n=== Robustness Tests ===")
 print(self.ROBUSTNESS_TESTS[:500])

 def model_comparison(self):
 print(f"\n=== YOLO Model Comparison ===")
 models = [
 {"name": "YOLOv8n", "params": "3.2M", "map": 37.3, "fps": random.randint(400, 600), "size": "6.2 MB"},
 {"name": "YOLOv8s", "params": "11.2M", "map": 44.9, "fps": random.randint(200, 350), "size": "21.5 MB"},
 {"name": "YOLOv8m", "params": "25.9M", "map": 50.2, "fps": random.randint(100, 200), "size": "49.7 MB"},
 {"name": "YOLOv8l", "params": "43.7M", "map": 52.9, "fps": random.randint(60, 120), "size": "83.7 MB"},
 {"name": "YOLOv8x", "params": "68.2M", "map": 53.9, "fps": random.randint(40, 80), "size": "131 MB"},
 ]
 for m in models:
 print(f" [{m['name']}] Params: {m['params']} | mAP: {m['map']} | FPS: {m['fps']} | Size: {m['size']}")

perf = PerformanceTesting()
perf.show_benchmark()
perf.show_robustness()
perf.model_comparison()

CI/CD & Production QA

# cicd_qa.py — CI/CD and production QA
import json
import random

class ProductionQA:
 PIPELINE = """
# .github/workflows/yolo-qa.yml
name: YOLO Model QA
on:
 push:
 paths: ['models/**', 'data/**', 'tests/**']

jobs:
 test:
 runs-on: ubuntu-latest
 steps:
 - uses: actions/checkout@v4
 - uses: actions/setup-python@v5
 with:
 python-version: '3.11'
 
 - name: Install dependencies
 run: pip install ultralytics pytest numpy opencv-python
 
 - name: Data quality tests
 run: pytest tests/test_data_quality.py -v
 
 - name: Model accuracy tests
 run: |
 python eval_model.py --model models/best.pt --data data.yaml
 python check_metrics.py --min-map50 0.75 --min-precision 0.80
 
 - name: Performance benchmark
 run: python benchmark.py --model models/best.pt --target-fps 30
 
 - name: Robustness tests
 run: pytest tests/test_robustness.py -v
"""

 def show_pipeline(self):
 print("=== CI/CD Pipeline ===")
 print(self.PIPELINE[:500])

 def monitoring(self):
 print(f"\n=== Production Monitoring ===")
 metrics = {
 "Inference latency (P95)": f"{random.randint(20, 100)}ms",
 "Detection accuracy (sample)": f"{random.uniform(85, 98):.1f}%",
 "False positive rate": f"{random.uniform(1, 8):.1f}%",
 "Throughput": f"{random.randint(20, 100)} fps",
 "Model drift detected": random.choice(["No", "No", "No", "Yes"]),
 "GPU utilization": f"{random.randint(40, 80)}%",
 }
 for m, v in metrics.items():
 print(f" {m}: {v}")

 def qa_checklist(self):
 print(f"\n=== QA Checklist ===")
 checks = [
 ("Data quality validated", True),
 ("mAP@50 > threshold", True),
 ("Per-class AP balanced", random.choice([True, True, False])),
 ("FPS meets target", True),
 ("Robustness tests passed", random.choice([True, True, False])),
 ("Integration tests passed", True),
 ("Model versioned and tagged", True),
 ("Rollback plan documented", True),
 ]
 for name, status in checks:
 icon = "PASS" if status else "FAIL"
 print(f" [{icon:>4}] {name}")

qa = ProductionQA()
qa.show_pipeline()
qa.monitoring()
qa.qa_checklist()

FAQ - คำถามที่พบบ่อย

Q: mAP เท่าไหร่ถึงจะดีพอสำหรับ production?

A: ขึ้นอยู่กับ use case Safety-critical (self-driving): mAP@50 > 95% General object detection: mAP@50 > 75% Prototype/MVP: mAP@50 > 60% สิ่งสำคัญกว่า mAP: per-class performance, false positive rate, edge cases

Q: YOLOv8 กับ YOLOv5 อันไหนดี?

A: YOLOv8: ใหม่กว่า, accuracy ดีกว่า, API ง่ายกว่า (Ultralytics), anchor-free YOLOv5: stable, community ใหญ่, documentation เยอะ แนะนำ: ใช้ YOLOv8 สำหรับ projects ใหม่ เว้นแต่มี legacy YOLOv5 ที่ทำงานดีอยู่แล้ว

Q: Test data ต้องเตรียมอย่างไร?

A: แยก train/val/test: 70/15/15 หรือ 80/10/10 Test set ต้องไม่ overlap กับ train set เลย Test set ควรครอบคลุม edge cases (dark, blur, occlusion) ตรวจ class balance ใน test set ใช้ stratified split เพื่อให้ class distribution เท่ากัน

Q: Model drift ตรวจอย่างไร?

A: Compare accuracy metrics เป็น periodic (weekly/monthly) Monitor confidence score distribution (ถ้าลดลง = drift) Sample production predictions → human review (spot check) Re-evaluate กับ fresh labeled data A/B test model versions ใน production