Computer Vision ????????? YOLO ?????????????????????
Computer Vision ????????????????????????????????? AI ?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????? ????????????????????? object detection, face recognition, image segmentation ??????????????? AR/VR applications
YOLO (You Only Look Once) ???????????? real-time object detection algorithm ??????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? milliseconds ?????????????????????????????????????????? real-time ???????????? self-driving cars, security cameras, AR applications ???????????????????????? YOLOv8 ????????? Ultralytics ??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
AR (Augmented Reality) ??????????????????????????? 3D ?????????????????????????????????????????????????????? ????????? Computer Vision ????????????????????? surfaces, objects, faces ????????????????????? virtual content VR (Virtual Reality) ??????????????????????????????????????????????????????????????? ????????? Computer Vision ?????????????????? hand tracking, eye tracking, spatial mapping ???????????????????????? YOLO ????????? AR/VR ?????????????????????????????? interactive experiences ??????????????????????????????????????? real-world objects ?????????
????????????????????? YOLOv8 ?????????????????? Object Detection
Setup YOLOv8 environment
# === YOLOv8 Setup ===
# 1. Install Ultralytics YOLOv8
pip install ultralytics opencv-python torch torchvision
# 2. Verify installation
python3 -c "
from ultralytics import YOLO
model = YOLO('yolov8n.pt') # nano model (fastest)
print(f'Model loaded: {model.model_name}')
print(f'Classes: {len(model.names)} objects')
print('Sample classes:', list(model.names.values())[:10])
"
# 3. Quick Test ??? Detect objects in an image
python3 -c "
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model('https://ultralytics.com/images/bus.jpg')
for r in results:
boxes = r.boxes
print(f'Detected {len(boxes)} objects')
for box in boxes:
cls = int(box.cls[0])
conf = float(box.conf[0])
print(f' {model.names[cls]}: {conf:.2f}')
"
# 4. Model Variants
cat > yolo_models.yaml << 'EOF'
yolov8_variants:
yolov8n:
name: "YOLOv8 Nano"
params: "3.2M"
mAP: "37.3"
speed: "fastest (~1ms GPU)"
best_for: "Mobile, Edge devices, Real-time"
yolov8s:
name: "YOLOv8 Small"
params: "11.2M"
mAP: "44.9"
speed: "fast (~2ms GPU)"
best_for: "Balance speed/accuracy"
yolov8m:
name: "YOLOv8 Medium"
params: "25.9M"
mAP: "50.2"
speed: "moderate (~4ms GPU)"
best_for: "General purpose"
yolov8l:
name: "YOLOv8 Large"
params: "43.7M"
mAP: "52.9"
speed: "slower (~6ms GPU)"
best_for: "High accuracy needed"
yolov8x:
name: "YOLOv8 Extra Large"
params: "68.2M"
mAP: "53.9"
speed: "slowest (~8ms GPU)"
best_for: "Maximum accuracy"
tasks:
detection: "yolov8n.pt ??? Bounding box detection"
segmentation: "yolov8n-seg.pt ??? Instance segmentation"
classification: "yolov8n-cls.pt ??? Image classification"
pose: "yolov8n-pose.pt ??? Pose estimation"
EOF
echo "YOLOv8 setup complete"
??????????????? Real-Time Detection Pipeline
Real-time object detection ????????????????????????
#!/usr/bin/env python3
# realtime_detection.py ??? Real-Time YOLO Detection
import json
import logging
from typing import Dict, List
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("detection")
class YOLODetectionPipeline:
def __init__(self):
self.config = {}
def webcam_detection_code(self):
"""Code for real-time webcam detection"""
return """
from ultralytics import YOLO
import cv2
# Load model
model = YOLO('yolov8n.pt')
# Open webcam
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Run detection
results = model(frame, conf=0.5, verbose=False)
# Draw results
annotated = results[0].plot()
# Display FPS
fps = 1000 / results[0].speed['inference']
cv2.putText(annotated, f'FPS: {fps:.0f}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('YOLOv8 Detection', annotated)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
"""
def video_processing(self):
"""Video file processing pipeline"""
return {
"batch_processing": """
from ultralytics import YOLO
model = YOLO('yolov8m.pt')
# Process video file
results = model(
source='input_video.mp4',
save=True, # Save annotated video
conf=0.5, # Confidence threshold
iou=0.7, # NMS IoU threshold
show=False, # Don't display
stream=True, # Process frame by frame (memory efficient)
)
for r in results:
boxes = r.boxes
# Process each frame's detections
for box in boxes:
cls = int(box.cls[0])
conf = float(box.conf[0])
xyxy = box.xyxy[0].tolist()
# Custom logic here
""",
"multi_stream": "????????? ThreadPoolExecutor process ???????????? streams ????????????????????????",
"gpu_batch": "Batch frames ???????????? GPU ???????????????????????? ??????????????? throughput",
}
def detection_use_cases(self):
return {
"security": "???????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? CCTV",
"retail": "People counting, Heatmap, Shelf monitoring",
"manufacturing": "Defect detection, Safety compliance (PPE)",
"traffic": "Vehicle counting, License plate, Speed estimation",
"agriculture": "Crop disease, Weed detection, Fruit counting",
"medical": "Cell detection, Tumor segmentation",
}
pipeline = YOLODetectionPipeline()
cases = pipeline.detection_use_cases()
print("YOLO Use Cases:")
for case, desc in cases.items():
print(f" {case}: {desc}")
AR/VR Development ????????? Computer Vision
??????????????? AR experience ???????????? YOLO
#!/usr/bin/env python3
# ar_vr_pipeline.py ??? AR/VR with Computer Vision
import json
import logging
from typing import Dict, List
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("arvr")
class ARVRPipeline:
def __init__(self):
self.config = {}
def ar_architecture(self):
return {
"pipeline": [
{"step": 1, "name": "Camera Input", "tech": "OpenCV, ARKit, ARCore"},
{"step": 2, "name": "Object Detection", "tech": "YOLOv8, MediaPipe"},
{"step": 3, "name": "Pose Estimation", "tech": "YOLOv8-pose, OpenPose"},
{"step": 4, "name": "Surface Detection", "tech": "ARKit/ARCore SLAM"},
{"step": 5, "name": "3D Rendering", "tech": "Three.js, Unity, Unreal"},
{"step": 6, "name": "Overlay on Camera", "tech": "WebXR, ARKit, ARCore"},
],
"platforms": {
"web_ar": {
"framework": "Three.js + WebXR + TensorFlow.js",
"pros": "No app install, cross-platform",
"cons": "Limited features, performance",
},
"mobile_ar": {
"ios": "ARKit + Core ML (YOLOv8 CoreML export)",
"android": "ARCore + TensorFlow Lite",
"cross_platform": "Unity AR Foundation",
},
"vr": {
"standalone": "Meta Quest (Unity/Unreal)",
"pc_vr": "SteamVR (Unity/Unreal)",
"web_vr": "A-Frame + WebXR",
},
},
}
def yolo_ar_integration(self):
"""Integrate YOLO detection with AR overlay"""
return {
"concept": "???????????????????????????????????????????????????????????? YOLO ???????????????????????? AR content",
"examples": [
{
"name": "Product Info AR",
"description": "????????????????????????????????????????????????????????? YOLO ??????????????????????????????????????????????????? ???????????????????????? ??????????????? ????????????????????????????????????????????????????????? AR overlay",
},
{
"name": "AR Navigation",
"description": "????????????????????????????????? ????????????????????? ??????????????? ??????????????????????????????????????? AR ???????????????",
},
{
"name": "Interactive Learning",
"description": "???????????????????????????????????????????????????????????????????????? YOLO ??????????????????????????? ???????????? 3D model ??????????????????????????????????????? AR",
},
{
"name": "Safety Monitoring",
"description": "????????????????????? PPE (???????????? ???????????? ??????????????????????????????????????????) ???????????? warning AR ??????????????????????????????????????????",
},
],
"export_formats": {
"CoreML": "iOS native (ARKit)",
"TFLite": "Android native (ARCore)",
"ONNX": "Cross-platform (Unity Barracuda)",
"TensorRT": "NVIDIA Jetson (edge device)",
"OpenVINO": "Intel devices",
},
}
arvr = ARVRPipeline()
arch = arvr.ar_architecture()
print("AR Pipeline:")
for step in arch["pipeline"]:
print(f" {step['step']}. {step['name']}: {step['tech']}")
integration = arvr.yolo_ar_integration()
print(f"\nAR Examples: {len(integration['examples'])}")
for ex in integration["examples"]:
print(f" {ex['name']}: {ex['description'][:50]}...")
print(f"\nExport Formats: {len(integration['export_formats'])}")
for fmt, desc in integration["export_formats"].items():
print(f" {fmt}: {desc}")
Training Custom YOLO Model
Train YOLO model ?????????????????? custom objects
# === Custom YOLO Training ===
# 1. Prepare Dataset
# Structure:
# dataset/
# train/
# images/ (jpg/png files)
# labels/ (txt files, YOLO format)
# val/
# images/
# labels/
# Label format (YOLO): class_id x_center y_center width height (normalized 0-1)
# Example: 0 0.5 0.5 0.3 0.4
# 2. Dataset YAML
cat > dataset.yaml << 'EOF'
path: /data/custom_dataset
train: train/images
val: val/images
nc: 3 # number of classes
names:
0: helmet
1: vest
2: person
EOF
# 3. Train Model
cat > train.py << 'PYEOF'
from ultralytics import YOLO
# Load pretrained model
model = YOLO('yolov8n.pt')
# Train on custom dataset
results = model.train(
data='dataset.yaml',
epochs=100,
imgsz=640,
batch=16,
patience=20, # Early stopping
device=0, # GPU 0
workers=8,
project='runs/train',
name='ppe_detector',
# Augmentation
hsv_h=0.015,
hsv_s=0.7,
hsv_v=0.4,
degrees=10,
translate=0.1,
scale=0.5,
fliplr=0.5,
mosaic=1.0,
mixup=0.1,
)
print(f"Best mAP50: {results.results_dict['metrics/mAP50(B)']:.3f}")
print(f"Best model: runs/train/ppe_detector/weights/best.pt")
PYEOF
python3 train.py
# 4. Evaluate
python3 -c "
from ultralytics import YOLO
model = YOLO('runs/train/ppe_detector/weights/best.pt')
metrics = model.val(data='dataset.yaml')
print(f'mAP50: {metrics.box.map50:.3f}')
print(f'mAP50-95: {metrics.box.map:.3f}')
"
# 5. Export for Deployment
python3 -c "
from ultralytics import YOLO
model = YOLO('runs/train/ppe_detector/weights/best.pt')
# Export to various formats
model.export(format='onnx') # Cross-platform
model.export(format='coreml') # iOS
model.export(format='tflite') # Android/Edge
model.export(format='engine') # TensorRT (NVIDIA)
print('Export complete')
"
echo "Custom training complete"
Deployment ????????? Optimization
Deploy YOLO model ?????????????????? production
#!/usr/bin/env python3
# deployment.py ??? YOLO Deployment & Optimization
import json
import logging
from typing import Dict
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("deploy")
class YOLODeployment:
def __init__(self):
pass
def deployment_options(self):
return {
"cloud_api": {
"architecture": "FastAPI + YOLO + Docker",
"scaling": "Kubernetes with GPU nodes",
"latency": "50-200ms per image",
"cost": "$0.50-2.00/1000 images (GPU instance)",
},
"edge_device": {
"nvidia_jetson": {
"model": "YOLOv8n TensorRT",
"fps": "30-60 FPS (Jetson Orin Nano)",
"power": "7-15W",
"cost": "$199-499",
},
"raspberry_pi": {
"model": "YOLOv8n NCNN/TFLite",
"fps": "5-10 FPS (Pi 5)",
"power": "5-10W",
"cost": "$60-80",
},
"coral_tpu": {
"model": "YOLOv8n TFLite + Edge TPU",
"fps": "20-40 FPS",
"power": "2-4W",
"cost": "$60-150",
},
},
"mobile": {
"ios": "CoreML + ARKit",
"android": "TFLite + ARCore",
"flutter": "tflite_flutter package",
"fps_target": "15-30 FPS on modern phones",
},
"browser": {
"framework": "TensorFlow.js + WebGL",
"model": "YOLOv8n TFJS format",
"fps": "10-20 FPS (desktop), 5-10 FPS (mobile)",
"note": "No installation needed",
},
}
def optimization_techniques(self):
return {
"quantization": {
"int8": "?????????????????? model 4x ???????????????????????? 2-3x accuracy ??????????????????????????????",
"fp16": "?????????????????? 2x ???????????????????????? 1.5-2x accuracy ????????????????????????",
},
"pruning": "????????? weights ?????????????????????????????????????????? ?????????????????? 30-50%",
"knowledge_distillation": "Train model ????????????????????? model ????????????",
"tensorrt": "NVIDIA optimization ???????????????????????? 2-5x",
"batching": "Process ????????????????????????????????????????????? ??????????????? throughput",
"resolution": "?????? input size (640???416) ????????????????????????????????? accuracy ??????????????????????????????",
}
deploy = YOLODeployment()
options = deploy.deployment_options()
print("Deployment Options:")
for platform, info in options.items():
if isinstance(info, dict) and "architecture" in info:
print(f" {platform}: {info['architecture']}, {info['latency']}")
else:
print(f" {platform}")
opt = deploy.optimization_techniques()
print("\nOptimization:")
for technique, desc in opt.items():
if isinstance(desc, str):
print(f" {technique}: {desc[:60]}...")
FAQ ??????????????????????????????????????????
Q: YOLOv8 ????????? YOLOv5 ???????????????????????????????????????????
A: YOLOv8 ???????????????????????????????????????????????????????????? ????????? Ultralytics ????????????????????? ?????????????????????????????? (mAP ????????????????????? 2-5%), API ??????????????????????????????????????????????????????, ?????????????????? tasks ????????????????????? (detection, segmentation, classification, pose), Training ????????????????????????, Export formats ????????????????????? YOLOv5 ????????????????????????????????? ?????? community ???????????? documentation ???????????? ???????????????????????? features ???????????????????????? ???????????????????????? YOLOv8 ?????????????????? project ???????????? ??????????????? YOLOv5 model ???????????????????????? ??????????????????????????????????????????????????????????????????
Q: GPU ????????????????????????????????????????????? YOLO?
A: Training RTX 3060 12GB ??????????????????????????????????????? YOLOv8n-m, RTX 4090 24GB ?????????????????????????????? cost-effective, A100 40/80GB ?????????????????? large models ????????? batch ???????????? Inference RTX 3060 ?????????????????? 30+ FPS (YOLOv8n), Jetson Orin Nano ?????????????????? edge device 30-60 FPS, Intel GPU + OpenVINO ?????????????????? budget option ??????????????? GPU ????????? CPU ??????????????????????????? YOLOv8n ?????? CPU ~50-100ms/frame (10-20 FPS) ??????????????????????????????????????? non-real-time tasks Cloud GPU RunPod, Lambda, AWS g5 instance ????????????????????????????????? training
Q: AR ????????? VR ???????????????????????????????????????????
A: AR (Augmented Reality) ???????????? content ???????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????? AR ?????????????????????????????????????????? + ??????????????? virtual ???????????????????????? Pokemon GO, IKEA Place, Google Lens VR (Virtual Reality) ??????????????????????????????????????????????????????????????? ????????? headset ??????????????? (Meta Quest, PS VR) ??????????????????????????????????????????????????????????????? ???????????????????????? Beat Saber, Half-Life Alyx MR (Mixed Reality) ????????? AR+VR ??????????????? virtual interact ??????????????????????????????????????? ???????????????????????? Meta Quest 3 passthrough, Apple Vision Pro ?????????????????? developer ???????????????????????? AR (???????????????????????? ????????????????????? VR headset) ????????? ARKit/ARCore + YOLO
Q: YOLO ?????????????????? CCTV security ???????????????????
A: ????????? ???????????? use case ?????????????????????????????? ????????? YOLOv8 ????????????????????? ??????????????? (person detection, counting), ???????????????????????? (vehicle detection, license plate), ????????????????????????????????????????????? (loitering, intrusion), PPE compliance (???????????? ??????????????????????????????????????????) Architecture ??????????????? RTSP ??? YOLO inference server ??? Alert system Setup ????????? YOLOv8n ?????????????????? real-time (30 FPS ????????????????????????), 1 GPU (RTX 3060) process ????????? 4-8 ???????????????????????????????????????, ????????? DeepSORT ?????????????????? object tracking, Alert ???????????? LINE, Telegram ???????????????????????????????????? event ????????????????????????????????? ???????????? comply ????????? PDPA (???????????????????????????????????????????????????????????????????????????????????????) ????????????????????????????????????????????????????????????????????????
