Computer Vision YOLO Testing Strategy QA — คู่มือฉบับสมบูรณ์ 2026

Computer Vision YOLO Testing Strategy QA คืออะไร

YOLO (You Only Look Once) เป็น real-time object detection model ที่ได้รับความนิยมสูงสุดในด้าน computer vision สามารถตรวจจับวัตถุหลายชนิดในภาพเดียวด้วยความเร็วสูง Testing Strategy QA สำหรับ YOLO คือกระบวนการทดสอบและประกันคุณภาพของ model ตั้งแต่ data quality, model accuracy, inference performance ไปจนถึง edge cases และ production monitoring เพื่อให้มั่นใจว่า model ทำงานได้ถูกต้องและเชื่อถือได้ในสภาพแวดล้อมจริง

YOLO Testing Framework

# yolo_testing.py — YOLO testing framework

import json



class YOLOTestingFramework:

 TEST_LEVELS = {

 "data_quality": {

 "name": "Data Quality Tests",

 "description": "ตรวจสอบคุณภาพ training/test data",

 "tests": [

 "Label correctness (bounding box accuracy)",

 "Class distribution balance",

 "Image quality (resolution, blur, lighting)",

 "Duplicate detection",

 "Annotation format validation (YOLO format)",

 ],

 },

 "model_accuracy": {

 "name": "Model Accuracy Tests",

 "description": "ตรวจสอบ accuracy ของ model",

 "tests": [

 "mAP@50 (Mean Average Precision)",

 "mAP@50:95 (stricter metric)",

 "Per-class precision and recall",

 "Confusion matrix analysis",

 "IoU (Intersection over Union) distribution",

 ],

 },

 "performance": {

 "name": "Performance Tests",

 "description": "ตรวจสอบ inference speed และ resource usage",

 "tests": [

 "FPS (Frames Per Second) on target hardware",

 "Latency (ms per inference)",

 "GPU/CPU memory usage",

 "Batch processing throughput",

 "Model size (MB) and load time",

 ],

 },

 "robustness": {

 "name": "Robustness Tests",

 "description": "ตรวจสอบ model ใน edge cases",

 "tests": [

 "Low light / overexposure images",

 "Occluded objects (บังบางส่วน)",

 "Small objects detection",

 "Different angles / perspectives",

 "Adversarial examples",

 ],

 },

 "integration": {

 "name": "Integration Tests",

 "description": "ตรวจสอบ model ใน production pipeline",

 "tests": [

 "API endpoint response format",

 "Input validation (image format, size)",

 "Error handling (corrupt image, timeout)",

 "Concurrent request handling",

 "Model versioning and rollback",

 ],

 },

 }



 def show_framework(self):

 print("=== YOLO Testing Framework ===\n")

 for key, level in self.TEST_LEVELS.items():

 print(f"[{level['name']}]")

 print(f" {level['description']}")

 for test in level["tests"][:3]:

 print(f" • {test}")

 print()



framework = YOLOTestingFramework()

framework.show_framework()

Data Quality Testing

# data_quality.py — YOLO data quality tests

import json

import random

import os



class DataQualityTests:

 YOLO_FORMAT = """

 YOLO Annotation Format:

     

 

 Example (person at center):

 0 0.5 0.5 0.3 0.6

 

 Rules:

 - All values normalized (0-1)

 - x_center, y_center = center of bounding box

 - width, height = relative to image size

 - One .txt file per image (same filename)

 """



 TEST_SCRIPT = """

# test_data_quality.py — Data quality validation

import os

import cv2

from pathlib import Path

from collections import Counter



class YOLODataValidator:

 def __init__(self, images_dir, labels_dir, classes):

 self.images_dir = Path(images_dir)

 self.labels_dir = Path(labels_dir)

 self.classes = classes

 self.errors = []

 

 def validate_labels(self):

 label_files = list(self.labels_dir.glob("*.txt"))

 print(f"Validating {len(label_files)} label files...")

 

 class_counts = Counter()

 for lf in label_files:

 with open(lf) as f:

 for line_num, line in enumerate(f, 1):

 parts = line.strip().split()

 if len(parts) != 5:

 self.errors.append(f"{lf.name}:{line_num} Invalid format")

 continue

 

 class_id = int(parts[0])

 if class_id >= len(self.classes):

 self.errors.append(f"{lf.name}:{line_num} Invalid class {class_id}")

 

 coords = [float(x) for x in parts[1:]]

 if any(c < 0 or c > 1 for c in coords):

 self.errors.append(f"{lf.name}:{line_num} Out of range coords")

 

 class_counts[self.classes[class_id]] += 1

 

 return class_counts

 

 def check_image_label_pairs(self):

 images = {p.stem for p in self.images_dir.glob("*.*") if p.suffix in ['.jpg', '.png']}

 labels = {p.stem for p in self.labels_dir.glob("*.txt")}

 

 missing_labels = images - labels

 orphan_labels = labels - images

 

 if missing_labels:

 self.errors.append(f"{len(missing_labels)} images without labels")

 if orphan_labels:

 self.errors.append(f"{len(orphan_labels)} labels without images")

 

 return len(images), len(labels), len(missing_labels)

 

 def report(self):

 counts = self.validate_labels()

 imgs, lbls, missing = self.check_image_label_pairs()

 

 print(f"Images: {imgs} | Labels: {lbls} | Missing: {missing}")

 print(f"Class distribution: {dict(counts)}")

 print(f"Errors: {len(self.errors)}")

 for err in self.errors[:5]:

 print(f" ⚠ {err}")



validator = YOLODataValidator("data/images", "data/labels", 

 ["person", "car", "truck", "bicycle"])

validator.report()

"""



 def show_format(self):

 print("=== YOLO Format ===")

 print(self.YOLO_FORMAT)



 def show_script(self):

 print("=== Data Quality Script ===")

 print(self.TEST_SCRIPT[:600])



dq = DataQualityTests()

dq.show_format()

dq.show_script()

Model Accuracy Testing

# accuracy.py — YOLO model accuracy testing

import json

import random



class AccuracyTesting:

 METRICS = {

 "map50": {"name": "mAP@50", "description": "Mean AP at IoU=0.50", "good": "> 0.70", "excellent": "> 0.85"},

 "map50_95": {"name": "mAP@50:95", "description": "Mean AP averaged over IoU 0.50-0.95", "good": "> 0.45", "excellent": "> 0.60"},

 "precision": {"name": "Precision", "description": "TP / (TP + FP)", "good": "> 0.80", "excellent": "> 0.90"},

 "recall": {"name": "Recall", "description": "TP / (TP + FN)", "good": "> 0.75", "excellent": "> 0.85"},

 "f1": {"name": "F1 Score", "description": "Harmonic mean of Precision and Recall", "good": "> 0.75", "excellent": "> 0.85"},

 }



 EVAL_SCRIPT = """

# eval_yolo.py — YOLO evaluation script

from ultralytics import YOLO



# Load model

model = YOLO("runs/detect/train/weights/best.pt")



# Evaluate on test set

metrics = model.val(data="data.yaml", split="test")



# Key metrics

print(f"mAP@50: {metrics.box.map50:.4f}")

print(f"mAP@50:95: {metrics.box.map:.4f}")

print(f"Precision: {metrics.box.mp:.4f}")

print(f"Recall: {metrics.box.mr:.4f}")



# Per-class metrics

for i, cls_name in enumerate(model.names.values()):

 ap50 = metrics.box.ap50[i]

 ap = metrics.box.ap[i]

 print(f" [{cls_name}] AP@50={ap50:.3f} AP@50:95={ap:.3f}")



# Confusion matrix

metrics.confusion_matrix.plot(save_dir="eval_results/")

"""



 def show_metrics(self):

 print("=== Accuracy Metrics ===\n")

 for key, m in self.METRICS.items():

 print(f" [{m['name']}] {m['description']} | Good: {m['good']} | Excellent: {m['excellent']}")



 def show_eval(self):

 print(f"\n=== Evaluation Script ===")

 print(self.EVAL_SCRIPT[:500])



 def simulate_results(self):

 print(f"\n=== Simulated Results ===")

 classes = ["person", "car", "truck", "bicycle"]

 overall_map50 = random.uniform(0.75, 0.92)

 overall_map = random.uniform(0.45, 0.65)

 print(f" Overall mAP@50: {overall_map50:.4f}")

 print(f" Overall mAP@50:95: {overall_map:.4f}")

 for cls in classes:

 ap50 = random.uniform(0.65, 0.95)

 print(f" [{cls}] AP@50={ap50:.3f}")



acc = AccuracyTesting()

acc.show_metrics()

acc.show_eval()

acc.simulate_results()

Performance & Robustness Testing

# performance.py — YOLO performance and robustness

import json

import random

import time



class PerformanceTesting:

 BENCHMARK_SCRIPT = """

# benchmark_yolo.py — Performance benchmark

from ultralytics import YOLO

import time

import torch

import numpy as np



model = YOLO("yolov8n.pt") # nano model



# Warmup

for _ in range(10):

 model.predict("test.jpg", verbose=False)



# Benchmark

times = []

for _ in range(100):

 start = time.perf_counter()

 results = model.predict("test.jpg", verbose=False)

 elapsed = (time.perf_counter() - start) * 1000

 times.append(elapsed)



print(f"Avg latency: {np.mean(times):.1f}ms")

print(f"P95 latency: {np.percentile(times, 95):.1f}ms")

print(f"P99 latency: {np.percentile(times, 99):.1f}ms")

print(f"FPS: {1000 / np.mean(times):.1f}")

print(f"GPU Memory: {torch.cuda.memory_allocated() / 1024**2:.0f} MB")

"""



 ROBUSTNESS_TESTS = """

# robustness_tests.py — Robustness testing

import cv2

import numpy as np

from ultralytics import YOLO



model = YOLO("best.pt")



def test_augmentations(image_path):

 img = cv2.imread(image_path)

 tests = {}

 

 # Low light

 dark = cv2.convertScaleAbs(img, alpha=0.3, beta=0)

 tests["low_light"] = model.predict(dark, verbose=False)

 

 # Overexposure

 bright = cv2.convertScaleAbs(img, alpha=2.0, beta=50)

 tests["overexposed"] = model.predict(bright, verbose=False)

 

 # Blur

 blurred = cv2.GaussianBlur(img, (15, 15), 0)

 tests["blurred"] = model.predict(blurred, verbose=False)

 

 # Noise

 noise = np.random.normal(0, 25, img.shape).astype(np.uint8)

 noisy = cv2.add(img, noise)

 tests["noisy"] = model.predict(noisy, verbose=False)

 

 # Rotation

 h, w = img.shape[:2]

 M = cv2.getRotationMatrix2D((w//2, h//2), 15, 1.0)

 rotated = cv2.warpAffine(img, M, (w, h))

 tests["rotated_15deg"] = model.predict(rotated, verbose=False)

 

 for name, results in tests.items():

 detections = len(results[0].boxes)

 conf = results[0].boxes.conf.mean().item() if detections > 0 else 0

 print(f" [{name}] Detections: {detections} | Avg conf: {conf:.3f}")



test_augmentations("test_image.jpg")

"""



 def show_benchmark(self):

 print("=== Performance Benchmark ===")

 print(self.BENCHMARK_SCRIPT[:500])



 def show_robustness(self):

 print(f"\n=== Robustness Tests ===")

 print(self.ROBUSTNESS_TESTS[:500])



 def model_comparison(self):

 print(f"\n=== YOLO Model Comparison ===")

 models = [

 {"name": "YOLOv8n", "params": "3.2M", "map": 37.3, "fps": random.randint(400, 600), "size": "6.2 MB"},

 {"name": "YOLOv8s", "params": "11.2M", "map": 44.9, "fps": random.randint(200, 350), "size": "21.5 MB"},

 {"name": "YOLOv8m", "params": "25.9M", "map": 50.2, "fps": random.randint(100, 200), "size": "49.7 MB"},

 {"name": "YOLOv8l", "params": "43.7M", "map": 52.9, "fps": random.randint(60, 120), "size": "83.7 MB"},

 {"name": "YOLOv8x", "params": "68.2M", "map": 53.9, "fps": random.randint(40, 80), "size": "131 MB"},

 ]

 for m in models:

 print(f" [{m['name']}] Params: {m['params']} | mAP: {m['map']} | FPS: {m['fps']} | Size: {m['size']}")



perf = PerformanceTesting()

perf.show_benchmark()

perf.show_robustness()

perf.model_comparison()

CI/CD & Production QA

# cicd_qa.py — CI/CD and production QA

import json

import random



class ProductionQA:

 PIPELINE = """

# .github/workflows/yolo-qa.yml

name: YOLO Model QA

on:

 push:

 paths: ['models/**', 'data/**', 'tests/**']



jobs:

 test:

 runs-on: ubuntu-latest

 steps:

 - uses: actions/checkout@v4

 - uses: actions/setup-python@v5

 with:

 python-version: '3.11'

 

 - name: Install dependencies

 run: pip install ultralytics pytest numpy opencv-python

 

 - name: Data quality tests

 run: pytest tests/test_data_quality.py -v

 

 - name: Model accuracy tests

 run: |

 python eval_model.py --model models/best.pt --data data.yaml

 python check_metrics.py --min-map50 0.75 --min-precision 0.80

 

 - name: Performance benchmark

 run: python benchmark.py --model models/best.pt --target-fps 30

 

 - name: Robustness tests

 run: pytest tests/test_robustness.py -v

"""



 def show_pipeline(self):

 print("=== CI/CD Pipeline ===")

 print(self.PIPELINE[:500])



 def monitoring(self):

 print(f"\n=== Production Monitoring ===")

 metrics = {

 "Inference latency (P95)": f"{random.randint(20, 100)}ms",

 "Detection accuracy (sample)": f"{random.uniform(85, 98):.1f}%",

 "False positive rate": f"{random.uniform(1, 8):.1f}%",

 "Throughput": f"{random.randint(20, 100)} fps",

 "Model drift detected": random.choice(["No", "No", "No", "Yes"]),

 "GPU utilization": f"{random.randint(40, 80)}%",

 }

 for m, v in metrics.items():

 print(f" {m}: {v}")



 def qa_checklist(self):

 print(f"\n=== QA Checklist ===")

 checks = [

 ("Data quality validated", True),

 ("mAP@50 > threshold", True),

 ("Per-class AP balanced", random.choice([True, True, False])),

 ("FPS meets target", True),

 ("Robustness tests passed", random.choice([True, True, False])),

 ("Integration tests passed", True),

 ("Model versioned and tagged", True),

 ("Rollback plan documented", True),

 ]

 for name, status in checks:

 icon = "PASS" if status else "FAIL"

 print(f" [{icon:>4}] {name}")



qa = ProductionQA()

qa.show_pipeline()

qa.monitoring()

qa.qa_checklist()

FAQ - คำถามที่พบบ่อย

Q: mAP เท่าไหร่ถึงจะดีพอสำหรับ production?

A: ขึ้นอยู่กับ use case Safety-critical (self-driving): mAP@50 > 95% General object detection: mAP@50 > 75% Prototype/MVP: mAP@50 > 60% สิ่งสำคัญกว่า mAP: per-class performance, false positive rate, edge cases

เนื้อหาเกี่ยวข้อง — แนะนำให้อ่าน PagerDuty Incident Pod Scheduling — จัดการ

Q: YOLOv8 กับ YOLOv5 อันไหนดี?

แนะนำเพิ่มเติม — iCafeForex

A: YOLOv8: ใหม่กว่า, accuracy ดีกว่า, API ง่ายกว่า (Ultralytics), anchor-free YOLOv5: stable, community ใหญ่, documentation เยอะ แนะนำ: ใช้ YOLOv8 สำหรับ projects ใหม่ เว้นแต่มี legacy YOLOv5 ที่ทำงานดีอยู่แล้ว

เนื้อหาเกี่ยวข้อง — ดูเพิ่มเติมเรื่อง หุ้น บัวหลวง กับเทรนด์เทคโนโลยีใหม่: โอกาสการลงทุนในยุคดิจิทัล

Q: Test data ต้องเตรียมอย่างไร?

A: แยก train/val/test: 70/15/15 หรือ 80/10/10 Test set ต้องไม่ overlap กับ train set เลย Test set ควรครอบคลุม edge cases (dark, blur, occlusion) ตรวจ class balance ใน test set ใช้ stratified split เพื่อให้ class distribution เท่ากัน

แนะนำเพิ่มเติม — ติดตาม XM Signal

เนื้อหาเกี่ยวข้อง — ดูเพิ่มเติมเรื่อง market automation คือ

Q: Model drift ตรวจอย่างไร?

A: Compare accuracy metrics เป็น periodic (weekly/monthly) Monitor confidence score distribution (ถ้าลดลง = drift) Sample production predictions → human review (spot check) Re-evaluate กับ fresh labeled data A/B test model versions ใน production

เนื้อหาเกี่ยวข้อง — ทำความเข้าใจ Prometheus Federation Cache Strategy Redis