TensorFlow Serving Edge Computing
TensorFlow Serving Production ML Serving gRPC REST API Model Versioning Batching GPU Edge Computing ประมวลผลใกล้แหล่งข้อมูล ลด Latency IoT CCTV หุ่นยนต์ โรงงาน
| Platform | Target | Performance | Use Case |
|---|---|---|---|
| TF Serving | Server/Cloud | สูงมาก (GPU) | Production API |
| TF Lite | Mobile/Edge | ปานกลาง | Android iOS RPi |
| TF.js | Browser/Node | ปานกลาง | Web Application |
| TF Micro | Microcontroller | ต่ำ | TinyML IoT Sensor |
| NVIDIA Triton | Server/Edge | สูงมาก | Multi-Model Serving |
TensorFlow Serving Setup
# === TensorFlow Serving Setup ===
# 1. Docker (แนะนำ)
# docker pull tensorflow/serving:latest-gpu
# docker run -p 8501:8501 -p 8500:8500 \
# --mount type=bind, source=/models/my_model, target=/models/my_model \
# -e MODEL_NAME=my_model \
# -t tensorflow/serving:latest-gpu
# 2. SavedModel Format
# import tensorflow as tf
#
# model = tf.keras.applications.MobileNetV2(weights='imagenet')
# tf.saved_model.save(model, '/models/my_model/1/')
# # Directory structure:
# # /models/my_model/
# # 1/ (version 1)
# # saved_model.pb
# # variables/
# # 2/ (version 2)
# # saved_model.pb
# # variables/
# 3. REST API
# curl http://localhost:8501/v1/models/my_model
# curl -d '{"instances": [[1.0, 2.0, 3.0]]}' \
# http://localhost:8501/v1/models/my_model:predict
# 4. gRPC (faster)
# pip install tensorflow-serving-api
# import grpc
# from tensorflow_serving.apis import predict_pb2
# from tensorflow_serving.apis import prediction_service_pb2_grpc
#
# channel = grpc.insecure_channel('localhost:8500')
# stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
# request = predict_pb2.PredictRequest()
# request.model_spec.name = 'my_model'
# 5. Model Config (multiple models)
# model_config.config:
# model_config_list {
# config {
# name: 'classifier'
# base_path: '/models/classifier'
# model_platform: 'tensorflow'
# }
# config {
# name: 'detector'
# base_path: '/models/detector'
# model_platform: 'tensorflow'
# }
# }
# docker-compose.yml
# version: '3.8'
# services:
# tf-serving:
# image: tensorflow/serving:latest-gpu
# ports:
# - "8500:8500"
# - "8501:8501"
# volumes:
# - ./models:/models
# - ./model_config.config:/config
# command: --model_config_file=/config
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class ServingConfig:
model_name: str
version: int
platform: str
batch_size: int
gpu: bool
configs = [
ServingConfig("image_classifier", 3, "tensorflow", 32, True),
ServingConfig("object_detector", 2, "tensorflow", 16, True),
ServingConfig("text_classifier", 1, "tensorflow", 64, False),
]
print("TensorFlow Serving Models:")
for c in configs:
gpu_str = "GPU" if c.gpu else "CPU"
print(f" {c.model_name} v{c.version} | {c.platform} | "
f"batch={c.batch_size} | {gpu_str}")
TensorFlow Lite Edge
# === TensorFlow Lite for Edge Devices ===
# 1. Convert Model to TFLite
# import tensorflow as tf
#
# model = tf.keras.applications.MobileNetV2(weights='imagenet')
#
# # Standard conversion
# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# tflite_model = converter.convert()
# with open('model.tflite', 'wb') as f:
# f.write(tflite_model)
#
# # Quantized (INT8) — เล็กลง 4x เร็วขึ้น
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.representative_dataset = representative_data_gen
# converter.target_spec.supported_ops = [
# tf.lite.OpsSet.TFLITE_BUILTINS_INT8
# ]
# converter.inference_input_type = tf.uint8
# converter.inference_output_type = tf.uint8
# quantized_model = converter.convert()
# with open('model_quant.tflite', 'wb') as f:
# f.write(quantized_model)
# 2. Run on Raspberry Pi
# pip install tflite-runtime
#
# import tflite_runtime.interpreter as tflite
# import numpy as np
# from PIL import Image
#
# interpreter = tflite.Interpreter(model_path='model.tflite')
# interpreter.allocate_tensors()
#
# input_details = interpreter.get_input_details()
# output_details = interpreter.get_output_details()
#
# img = Image.open('test.jpg').resize((224, 224))
# input_data = np.expand_dims(np.array(img, dtype=np.float32) / 255.0, 0)
# interpreter.set_tensor(input_details[0]['index'], input_data)
# interpreter.invoke()
# output = interpreter.get_tensor(output_details[0]['index'])
# 3. Run with Coral Edge TPU
# pip install pycoral
#
# from pycoral.utils.edgetpu import make_interpreter
# interpreter = make_interpreter('model_edgetpu.tflite')
# interpreter.allocate_tensors()
# # 10x faster than CPU on Raspberry Pi
# 4. NVIDIA Jetson
# pip install jetson-inference
#
# import jetson.inference
# net = jetson.inference.detectNet("ssd-mobilenet-v2", threshold=0.5)
# camera = jetson.utils.videoSource("/dev/video0")
# while True:
# img = camera.Capture()
# detections = net.Detect(img)
edge_devices = {
"Raspberry Pi 5": {
"cpu": "ARM Cortex-A76 2.4GHz",
"ram": "4-8GB",
"price": "$60-80",
"tflite_fps": "5-15 FPS",
"use": "Prototype, Camera, Sensor",
},
"Coral Edge TPU": {
"cpu": "Edge TPU Coprocessor",
"ram": "N/A (USB Accelerator)",
"price": "$60",
"tflite_fps": "50-100 FPS",
"use": "TFLite Acceleration",
},
"NVIDIA Jetson Orin Nano": {
"cpu": "ARM A78AE + 1024 CUDA",
"ram": "8GB",
"price": "$500",
"tflite_fps": "100+ FPS",
"use": "AI Robot, Drone, CCTV",
},
"Intel NUC": {
"cpu": "Intel i5/i7",
"ram": "8-32GB",
"price": "$300-600",
"tflite_fps": "30-60 FPS",
"use": "Edge Server, Gateway",
},
}
print("\nEdge Devices for TensorFlow:")
for device, info in edge_devices.items():
print(f"\n [{device}]")
for k, v in info.items():
print(f" {k}: {v}")
Kubernetes Edge
# === Kubernetes Edge Deployment ===
# K3s — Lightweight Kubernetes for Edge
# curl -sfL https://get.k3s.io | sh -
# kubectl get nodes
# Edge Deployment
# apiVersion: apps/v1
# kind: Deployment
# metadata:
# name: edge-inference
# spec:
# replicas: 2
# selector:
# matchLabels:
# app: edge-inference
# template:
# spec:
# containers:
# - name: tflite-server
# image: edge-inference:latest
# ports:
# - containerPort: 8080
# resources:
# limits:
# cpu: "2"
# memory: "2Gi"
# requests:
# cpu: "1"
# memory: "1Gi"
# volumeMounts:
# - name: models
# mountPath: /models
# volumes:
# - name: models
# hostPath:
# path: /opt/models
# KubeEdge — Kubernetes for Edge Computing
# keadm init --advertise-address=10.0.0.1
# keadm join --cloudcore-ipport=10.0.0.1:10000
edge_architectures = {
"Cloud-Edge": {
"desc": "Train บน Cloud, Inference บน Edge",
"latency": "10-50ms",
"bandwidth": "ต่ำ (ส่งแค่ผลลัพธ์)",
"tools": "TF Serving (Cloud) + TF Lite (Edge)",
},
"Edge-Only": {
"desc": "ทำทุกอย่างบน Edge ไม่ต้อง Cloud",
"latency": "1-10ms",
"bandwidth": "ไม่ใช้",
"tools": "TF Lite + Coral TPU / Jetson",
},
"Federated": {
"desc": "Train บน Edge ส่ง Gradient กลับ Cloud",
"latency": "10-50ms",
"bandwidth": "ปานกลาง (ส่ง Gradient)",
"tools": "TF Federated + TF Lite",
},
}
print("Edge Computing Architectures:")
for arch, info in edge_architectures.items():
print(f"\n [{arch}]")
for k, v in info.items():
print(f" {k}: {v}")
# Model Optimization Pipeline
optimization = [
"1. Train Full Model (Cloud GPU)",
"2. Prune — ตัด Neurons ที่ไม่สำคัญ (ลด 50%)",
"3. Quantize — INT8 แทน FP32 (ลด 4x)",
"4. Convert — SavedModel to TFLite",
"5. Compile — Edge TPU Compiler (ถ้าใช้ Coral)",
"6. Benchmark — วัด Latency/Accuracy บน Edge",
"7. Deploy — OTA Update ไป Edge Device",
]
print(f"\n\nModel Optimization Pipeline:")
for step in optimization:
print(f" {step}")
เคล็ดลับ
- MobileNet: ใช้ MobileNetV2/V3 สำหรับ Edge เร็วและเล็ก
- Quantization: INT8 ลดขนาด 4x เร็วขึ้น 2-3x Accuracy ลดน้อยมาก
- Coral TPU: เร็วกว่า CPU 10x สำหรับ TFLite Model
- K3s: ใช้ K3s แทน K8s บน Edge Device ใช้ RAM น้อยกว่า
- OTA Update: อัปเดต Model ผ่าน Network ไม่ต้องไปเปลี่ยนที่อุปกรณ์
TensorFlow Serving คืออะไร
Production ML Serving Google gRPC REST API Model Versioning Batching GPU Acceleration Production Scale
Edge Computing คืออะไร
ประมวลผลใกล้แหล่งข้อมูล ลด Latency 10ms ไม่ต้อง Internet ประหยัด Bandwidth ปลอดภัย IoT CCTV หุ่นยนต์ โรงงาน
TensorFlow Lite ต่างจาก TensorFlow Serving อย่างไร
TF Lite Mobile Edge Quantized CPU GPU NPU Android iOS RPi TF Serving Server Production GPU Cluster Batching Versioning
Edge Device ที่นิยมมีอะไรบ้าง
Raspberry Pi 5 Coral Edge TPU NVIDIA Jetson Orin Intel NUC Arduino Nano 33 BLE TinyML แต่ละอุปกรณ์เหมาะงานต่างกัน
สรุป
TensorFlow Serving Production ML gRPC REST Batching Versioning Edge Computing TF Lite Quantization INT8 Coral TPU Jetson Raspberry Pi K3s KubeEdge MobileNet OTA Update Cloud-Edge Architecture
อ่านเพิ่มเติม: สอนเทรด Forex | XM Signal | IT Hardware | อาชีพ IT
