Computer Vision YOLO Testing Strategy QA คืออะไร
YOLO (You Only Look Once) เป็น real-time object detection model ที่ได้รับความนิยมสูงสุดในด้าน computer vision สามารถตรวจจับวัตถุหลายชนิดในภาพเดียวด้วยความเร็วสูง Testing Strategy QA สำหรับ YOLO คือกระบวนการทดสอบและประกันคุณภาพของ model ตั้งแต่ data quality, model accuracy, inference performance ไปจนถึง edge cases และ production monitoring เพื่อให้มั่นใจว่า model ทำงานได้ถูกต้องและเชื่อถือได้ในสภาพแวดล้อมจริง
YOLO Testing Framework
# yolo_testing.py — YOLO testing framework
import json
class YOLOTestingFramework:
TEST_LEVELS = {
"data_quality": {
"name": "Data Quality Tests",
"description": "ตรวจสอบคุณภาพ training/test data",
"tests": [
"Label correctness (bounding box accuracy)",
"Class distribution balance",
"Image quality (resolution, blur, lighting)",
"Duplicate detection",
"Annotation format validation (YOLO format)",
],
},
"model_accuracy": {
"name": "Model Accuracy Tests",
"description": "ตรวจสอบ accuracy ของ model",
"tests": [
"mAP@50 (Mean Average Precision)",
"mAP@50:95 (stricter metric)",
"Per-class precision and recall",
"Confusion matrix analysis",
"IoU (Intersection over Union) distribution",
],
},
"performance": {
"name": "Performance Tests",
"description": "ตรวจสอบ inference speed และ resource usage",
"tests": [
"FPS (Frames Per Second) on target hardware",
"Latency (ms per inference)",
"GPU/CPU memory usage",
"Batch processing throughput",
"Model size (MB) and load time",
],
},
"robustness": {
"name": "Robustness Tests",
"description": "ตรวจสอบ model ใน edge cases",
"tests": [
"Low light / overexposure images",
"Occluded objects (บังบางส่วน)",
"Small objects detection",
"Different angles / perspectives",
"Adversarial examples",
],
},
"integration": {
"name": "Integration Tests",
"description": "ตรวจสอบ model ใน production pipeline",
"tests": [
"API endpoint response format",
"Input validation (image format, size)",
"Error handling (corrupt image, timeout)",
"Concurrent request handling",
"Model versioning and rollback",
],
},
}
def show_framework(self):
print("=== YOLO Testing Framework ===\n")
for key, level in self.TEST_LEVELS.items():
print(f"[{level['name']}]")
print(f" {level['description']}")
for test in level["tests"][:3]:
print(f" • {test}")
print()
framework = YOLOTestingFramework()
framework.show_framework()
Data Quality Testing
# data_quality.py — YOLO data quality tests
import json
import random
import os
class DataQualityTests:
YOLO_FORMAT = """
YOLO Annotation Format:
Example (person at center):
0 0.5 0.5 0.3 0.6
Rules:
- All values normalized (0-1)
- x_center, y_center = center of bounding box
- width, height = relative to image size
- One .txt file per image (same filename)
"""
TEST_SCRIPT = """
# test_data_quality.py — Data quality validation
import os
import cv2
from pathlib import Path
from collections import Counter
class YOLODataValidator:
def __init__(self, images_dir, labels_dir, classes):
self.images_dir = Path(images_dir)
self.labels_dir = Path(labels_dir)
self.classes = classes
self.errors = []
def validate_labels(self):
label_files = list(self.labels_dir.glob("*.txt"))
print(f"Validating {len(label_files)} label files...")
class_counts = Counter()
for lf in label_files:
with open(lf) as f:
for line_num, line in enumerate(f, 1):
parts = line.strip().split()
if len(parts) != 5:
self.errors.append(f"{lf.name}:{line_num} Invalid format")
continue
class_id = int(parts[0])
if class_id >= len(self.classes):
self.errors.append(f"{lf.name}:{line_num} Invalid class {class_id}")
coords = [float(x) for x in parts[1:]]
if any(c < 0 or c > 1 for c in coords):
self.errors.append(f"{lf.name}:{line_num} Out of range coords")
class_counts[self.classes[class_id]] += 1
return class_counts
def check_image_label_pairs(self):
images = {p.stem for p in self.images_dir.glob("*.*") if p.suffix in ['.jpg', '.png']}
labels = {p.stem for p in self.labels_dir.glob("*.txt")}
missing_labels = images - labels
orphan_labels = labels - images
if missing_labels:
self.errors.append(f"{len(missing_labels)} images without labels")
if orphan_labels:
self.errors.append(f"{len(orphan_labels)} labels without images")
return len(images), len(labels), len(missing_labels)
def report(self):
counts = self.validate_labels()
imgs, lbls, missing = self.check_image_label_pairs()
print(f"Images: {imgs} | Labels: {lbls} | Missing: {missing}")
print(f"Class distribution: {dict(counts)}")
print(f"Errors: {len(self.errors)}")
for err in self.errors[:5]:
print(f" ⚠ {err}")
validator = YOLODataValidator("data/images", "data/labels",
["person", "car", "truck", "bicycle"])
validator.report()
"""
def show_format(self):
print("=== YOLO Format ===")
print(self.YOLO_FORMAT)
def show_script(self):
print("=== Data Quality Script ===")
print(self.TEST_SCRIPT[:600])
dq = DataQualityTests()
dq.show_format()
dq.show_script()
Model Accuracy Testing
# accuracy.py — YOLO model accuracy testing
import json
import random
class AccuracyTesting:
METRICS = {
"map50": {"name": "mAP@50", "description": "Mean AP at IoU=0.50", "good": "> 0.70", "excellent": "> 0.85"},
"map50_95": {"name": "mAP@50:95", "description": "Mean AP averaged over IoU 0.50-0.95", "good": "> 0.45", "excellent": "> 0.60"},
"precision": {"name": "Precision", "description": "TP / (TP + FP)", "good": "> 0.80", "excellent": "> 0.90"},
"recall": {"name": "Recall", "description": "TP / (TP + FN)", "good": "> 0.75", "excellent": "> 0.85"},
"f1": {"name": "F1 Score", "description": "Harmonic mean of Precision and Recall", "good": "> 0.75", "excellent": "> 0.85"},
}
EVAL_SCRIPT = """
# eval_yolo.py — YOLO evaluation script
from ultralytics import YOLO
# Load model
model = YOLO("runs/detect/train/weights/best.pt")
# Evaluate on test set
metrics = model.val(data="data.yaml", split="test")
# Key metrics
print(f"mAP@50: {metrics.box.map50:.4f}")
print(f"mAP@50:95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")
# Per-class metrics
for i, cls_name in enumerate(model.names.values()):
ap50 = metrics.box.ap50[i]
ap = metrics.box.ap[i]
print(f" [{cls_name}] AP@50={ap50:.3f} AP@50:95={ap:.3f}")
# Confusion matrix
metrics.confusion_matrix.plot(save_dir="eval_results/")
"""
def show_metrics(self):
print("=== Accuracy Metrics ===\n")
for key, m in self.METRICS.items():
print(f" [{m['name']}] {m['description']} | Good: {m['good']} | Excellent: {m['excellent']}")
def show_eval(self):
print(f"\n=== Evaluation Script ===")
print(self.EVAL_SCRIPT[:500])
def simulate_results(self):
print(f"\n=== Simulated Results ===")
classes = ["person", "car", "truck", "bicycle"]
overall_map50 = random.uniform(0.75, 0.92)
overall_map = random.uniform(0.45, 0.65)
print(f" Overall mAP@50: {overall_map50:.4f}")
print(f" Overall mAP@50:95: {overall_map:.4f}")
for cls in classes:
ap50 = random.uniform(0.65, 0.95)
print(f" [{cls}] AP@50={ap50:.3f}")
acc = AccuracyTesting()
acc.show_metrics()
acc.show_eval()
acc.simulate_results()
Performance & Robustness Testing
# performance.py — YOLO performance and robustness
import json
import random
import time
class PerformanceTesting:
BENCHMARK_SCRIPT = """
# benchmark_yolo.py — Performance benchmark
from ultralytics import YOLO
import time
import torch
import numpy as np
model = YOLO("yolov8n.pt") # nano model
# Warmup
for _ in range(10):
model.predict("test.jpg", verbose=False)
# Benchmark
times = []
for _ in range(100):
start = time.perf_counter()
results = model.predict("test.jpg", verbose=False)
elapsed = (time.perf_counter() - start) * 1000
times.append(elapsed)
print(f"Avg latency: {np.mean(times):.1f}ms")
print(f"P95 latency: {np.percentile(times, 95):.1f}ms")
print(f"P99 latency: {np.percentile(times, 99):.1f}ms")
print(f"FPS: {1000 / np.mean(times):.1f}")
print(f"GPU Memory: {torch.cuda.memory_allocated() / 1024**2:.0f} MB")
"""
ROBUSTNESS_TESTS = """
# robustness_tests.py — Robustness testing
import cv2
import numpy as np
from ultralytics import YOLO
model = YOLO("best.pt")
def test_augmentations(image_path):
img = cv2.imread(image_path)
tests = {}
# Low light
dark = cv2.convertScaleAbs(img, alpha=0.3, beta=0)
tests["low_light"] = model.predict(dark, verbose=False)
# Overexposure
bright = cv2.convertScaleAbs(img, alpha=2.0, beta=50)
tests["overexposed"] = model.predict(bright, verbose=False)
# Blur
blurred = cv2.GaussianBlur(img, (15, 15), 0)
tests["blurred"] = model.predict(blurred, verbose=False)
# Noise
noise = np.random.normal(0, 25, img.shape).astype(np.uint8)
noisy = cv2.add(img, noise)
tests["noisy"] = model.predict(noisy, verbose=False)
# Rotation
h, w = img.shape[:2]
M = cv2.getRotationMatrix2D((w//2, h//2), 15, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
tests["rotated_15deg"] = model.predict(rotated, verbose=False)
for name, results in tests.items():
detections = len(results[0].boxes)
conf = results[0].boxes.conf.mean().item() if detections > 0 else 0
print(f" [{name}] Detections: {detections} | Avg conf: {conf:.3f}")
test_augmentations("test_image.jpg")
"""
def show_benchmark(self):
print("=== Performance Benchmark ===")
print(self.BENCHMARK_SCRIPT[:500])
def show_robustness(self):
print(f"\n=== Robustness Tests ===")
print(self.ROBUSTNESS_TESTS[:500])
def model_comparison(self):
print(f"\n=== YOLO Model Comparison ===")
models = [
{"name": "YOLOv8n", "params": "3.2M", "map": 37.3, "fps": random.randint(400, 600), "size": "6.2 MB"},
{"name": "YOLOv8s", "params": "11.2M", "map": 44.9, "fps": random.randint(200, 350), "size": "21.5 MB"},
{"name": "YOLOv8m", "params": "25.9M", "map": 50.2, "fps": random.randint(100, 200), "size": "49.7 MB"},
{"name": "YOLOv8l", "params": "43.7M", "map": 52.9, "fps": random.randint(60, 120), "size": "83.7 MB"},
{"name": "YOLOv8x", "params": "68.2M", "map": 53.9, "fps": random.randint(40, 80), "size": "131 MB"},
]
for m in models:
print(f" [{m['name']}] Params: {m['params']} | mAP: {m['map']} | FPS: {m['fps']} | Size: {m['size']}")
perf = PerformanceTesting()
perf.show_benchmark()
perf.show_robustness()
perf.model_comparison()
CI/CD & Production QA
# cicd_qa.py — CI/CD and production QA
import json
import random
class ProductionQA:
PIPELINE = """
# .github/workflows/yolo-qa.yml
name: YOLO Model QA
on:
push:
paths: ['models/**', 'data/**', 'tests/**']
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: pip install ultralytics pytest numpy opencv-python
- name: Data quality tests
run: pytest tests/test_data_quality.py -v
- name: Model accuracy tests
run: |
python eval_model.py --model models/best.pt --data data.yaml
python check_metrics.py --min-map50 0.75 --min-precision 0.80
- name: Performance benchmark
run: python benchmark.py --model models/best.pt --target-fps 30
- name: Robustness tests
run: pytest tests/test_robustness.py -v
"""
def show_pipeline(self):
print("=== CI/CD Pipeline ===")
print(self.PIPELINE[:500])
def monitoring(self):
print(f"\n=== Production Monitoring ===")
metrics = {
"Inference latency (P95)": f"{random.randint(20, 100)}ms",
"Detection accuracy (sample)": f"{random.uniform(85, 98):.1f}%",
"False positive rate": f"{random.uniform(1, 8):.1f}%",
"Throughput": f"{random.randint(20, 100)} fps",
"Model drift detected": random.choice(["No", "No", "No", "Yes"]),
"GPU utilization": f"{random.randint(40, 80)}%",
}
for m, v in metrics.items():
print(f" {m}: {v}")
def qa_checklist(self):
print(f"\n=== QA Checklist ===")
checks = [
("Data quality validated", True),
("mAP@50 > threshold", True),
("Per-class AP balanced", random.choice([True, True, False])),
("FPS meets target", True),
("Robustness tests passed", random.choice([True, True, False])),
("Integration tests passed", True),
("Model versioned and tagged", True),
("Rollback plan documented", True),
]
for name, status in checks:
icon = "PASS" if status else "FAIL"
print(f" [{icon:>4}] {name}")
qa = ProductionQA()
qa.show_pipeline()
qa.monitoring()
qa.qa_checklist()
FAQ - คำถามที่พบบ่อย
Q: mAP เท่าไหร่ถึงจะดีพอสำหรับ production?
A: ขึ้นอยู่กับ use case Safety-critical (self-driving): mAP@50 > 95% General object detection: mAP@50 > 75% Prototype/MVP: mAP@50 > 60% สิ่งสำคัญกว่า mAP: per-class performance, false positive rate, edge cases
Q: YOLOv8 กับ YOLOv5 อันไหนดี?
A: YOLOv8: ใหม่กว่า, accuracy ดีกว่า, API ง่ายกว่า (Ultralytics), anchor-free YOLOv5: stable, community ใหญ่, documentation เยอะ แนะนำ: ใช้ YOLOv8 สำหรับ projects ใหม่ เว้นแต่มี legacy YOLOv5 ที่ทำงานดีอยู่แล้ว
Q: Test data ต้องเตรียมอย่างไร?
A: แยก train/val/test: 70/15/15 หรือ 80/10/10 Test set ต้องไม่ overlap กับ train set เลย Test set ควรครอบคลุม edge cases (dark, blur, occlusion) ตรวจ class balance ใน test set ใช้ stratified split เพื่อให้ class distribution เท่ากัน
Q: Model drift ตรวจอย่างไร?
A: Compare accuracy metrics เป็น periodic (weekly/monthly) Monitor confidence score distribution (ถ้าลดลง = drift) Sample production predictions → human review (spot check) Re-evaluate กับ fresh labeled data A/B test model versions ใน production
