SiamCafe.net Blog
Technology

TensorFlow Serving Edge Computing

tensorflow serving edge computing
TensorFlow Serving Edge Computing | SiamCafe Blog
2025-06-25· อ. บอม — SiamCafe.net· 11,120 คำ

TensorFlow Serving Edge Computing

TensorFlow Serving Production ML Serving gRPC REST API Model Versioning Batching GPU Edge Computing ประมวลผลใกล้แหล่งข้อมูล ลด Latency IoT CCTV หุ่นยนต์ โรงงาน

PlatformTargetPerformanceUse Case
TF ServingServer/Cloudสูงมาก (GPU)Production API
TF LiteMobile/EdgeปานกลางAndroid iOS RPi
TF.jsBrowser/NodeปานกลางWeb Application
TF MicroMicrocontrollerต่ำTinyML IoT Sensor
NVIDIA TritonServer/EdgeสูงมากMulti-Model Serving

TensorFlow Serving Setup

# === TensorFlow Serving Setup ===

# 1. Docker (แนะนำ)
# docker pull tensorflow/serving:latest-gpu
# docker run -p 8501:8501 -p 8500:8500 \
#   --mount type=bind, source=/models/my_model, target=/models/my_model \
#   -e MODEL_NAME=my_model \
#   -t tensorflow/serving:latest-gpu

# 2. SavedModel Format
# import tensorflow as tf
#
# model = tf.keras.applications.MobileNetV2(weights='imagenet')
# tf.saved_model.save(model, '/models/my_model/1/')
# # Directory structure:
# # /models/my_model/
# #   1/  (version 1)
# #     saved_model.pb
# #     variables/
# #   2/  (version 2)
# #     saved_model.pb
# #     variables/

# 3. REST API
# curl http://localhost:8501/v1/models/my_model
# curl -d '{"instances": [[1.0, 2.0, 3.0]]}' \
#   http://localhost:8501/v1/models/my_model:predict

# 4. gRPC (faster)
# pip install tensorflow-serving-api
# import grpc
# from tensorflow_serving.apis import predict_pb2
# from tensorflow_serving.apis import prediction_service_pb2_grpc
#
# channel = grpc.insecure_channel('localhost:8500')
# stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
# request = predict_pb2.PredictRequest()
# request.model_spec.name = 'my_model'

# 5. Model Config (multiple models)
# model_config.config:
# model_config_list {
#   config {
#     name: 'classifier'
#     base_path: '/models/classifier'
#     model_platform: 'tensorflow'
#   }
#   config {
#     name: 'detector'
#     base_path: '/models/detector'
#     model_platform: 'tensorflow'
#   }
# }

# docker-compose.yml
# version: '3.8'
# services:
#   tf-serving:
#     image: tensorflow/serving:latest-gpu
#     ports:
#       - "8500:8500"
#       - "8501:8501"
#     volumes:
#       - ./models:/models
#       - ./model_config.config:/config
#     command: --model_config_file=/config
#     deploy:
#       resources:
#         reservations:
#           devices:
#             - driver: nvidia
#               count: 1
#               capabilities: [gpu]

from dataclasses import dataclass
from typing import List, Dict

@dataclass
class ServingConfig:
    model_name: str
    version: int
    platform: str
    batch_size: int
    gpu: bool

configs = [
    ServingConfig("image_classifier", 3, "tensorflow", 32, True),
    ServingConfig("object_detector", 2, "tensorflow", 16, True),
    ServingConfig("text_classifier", 1, "tensorflow", 64, False),
]

print("TensorFlow Serving Models:")
for c in configs:
    gpu_str = "GPU" if c.gpu else "CPU"
    print(f"  {c.model_name} v{c.version} | {c.platform} | "
          f"batch={c.batch_size} | {gpu_str}")

TensorFlow Lite Edge

# === TensorFlow Lite for Edge Devices ===

# 1. Convert Model to TFLite
# import tensorflow as tf
#
# model = tf.keras.applications.MobileNetV2(weights='imagenet')
#
# # Standard conversion
# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# tflite_model = converter.convert()
# with open('model.tflite', 'wb') as f:
#     f.write(tflite_model)
#
# # Quantized (INT8) — เล็กลง 4x เร็วขึ้น
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.representative_dataset = representative_data_gen
# converter.target_spec.supported_ops = [
#     tf.lite.OpsSet.TFLITE_BUILTINS_INT8
# ]
# converter.inference_input_type = tf.uint8
# converter.inference_output_type = tf.uint8
# quantized_model = converter.convert()
# with open('model_quant.tflite', 'wb') as f:
#     f.write(quantized_model)

# 2. Run on Raspberry Pi
# pip install tflite-runtime
#
# import tflite_runtime.interpreter as tflite
# import numpy as np
# from PIL import Image
#
# interpreter = tflite.Interpreter(model_path='model.tflite')
# interpreter.allocate_tensors()
#
# input_details = interpreter.get_input_details()
# output_details = interpreter.get_output_details()
#
# img = Image.open('test.jpg').resize((224, 224))
# input_data = np.expand_dims(np.array(img, dtype=np.float32) / 255.0, 0)
# interpreter.set_tensor(input_details[0]['index'], input_data)
# interpreter.invoke()
# output = interpreter.get_tensor(output_details[0]['index'])

# 3. Run with Coral Edge TPU
# pip install pycoral
#
# from pycoral.utils.edgetpu import make_interpreter
# interpreter = make_interpreter('model_edgetpu.tflite')
# interpreter.allocate_tensors()
# # 10x faster than CPU on Raspberry Pi

# 4. NVIDIA Jetson
# pip install jetson-inference
#
# import jetson.inference
# net = jetson.inference.detectNet("ssd-mobilenet-v2", threshold=0.5)
# camera = jetson.utils.videoSource("/dev/video0")
# while True:
#     img = camera.Capture()
#     detections = net.Detect(img)

edge_devices = {
    "Raspberry Pi 5": {
        "cpu": "ARM Cortex-A76 2.4GHz",
        "ram": "4-8GB",
        "price": "$60-80",
        "tflite_fps": "5-15 FPS",
        "use": "Prototype, Camera, Sensor",
    },
    "Coral Edge TPU": {
        "cpu": "Edge TPU Coprocessor",
        "ram": "N/A (USB Accelerator)",
        "price": "$60",
        "tflite_fps": "50-100 FPS",
        "use": "TFLite Acceleration",
    },
    "NVIDIA Jetson Orin Nano": {
        "cpu": "ARM A78AE + 1024 CUDA",
        "ram": "8GB",
        "price": "$500",
        "tflite_fps": "100+ FPS",
        "use": "AI Robot, Drone, CCTV",
    },
    "Intel NUC": {
        "cpu": "Intel i5/i7",
        "ram": "8-32GB",
        "price": "$300-600",
        "tflite_fps": "30-60 FPS",
        "use": "Edge Server, Gateway",
    },
}

print("\nEdge Devices for TensorFlow:")
for device, info in edge_devices.items():
    print(f"\n  [{device}]")
    for k, v in info.items():
        print(f"    {k}: {v}")

Kubernetes Edge

# === Kubernetes Edge Deployment ===

# K3s — Lightweight Kubernetes for Edge
# curl -sfL https://get.k3s.io | sh -
# kubectl get nodes

# Edge Deployment
# apiVersion: apps/v1
# kind: Deployment
# metadata:
#   name: edge-inference
# spec:
#   replicas: 2
#   selector:
#     matchLabels:
#       app: edge-inference
#   template:
#     spec:
#       containers:
#       - name: tflite-server
#         image: edge-inference:latest
#         ports:
#         - containerPort: 8080
#         resources:
#           limits:
#             cpu: "2"
#             memory: "2Gi"
#           requests:
#             cpu: "1"
#             memory: "1Gi"
#         volumeMounts:
#         - name: models
#           mountPath: /models
#       volumes:
#       - name: models
#         hostPath:
#           path: /opt/models

# KubeEdge — Kubernetes for Edge Computing
# keadm init --advertise-address=10.0.0.1
# keadm join --cloudcore-ipport=10.0.0.1:10000

edge_architectures = {
    "Cloud-Edge": {
        "desc": "Train บน Cloud, Inference บน Edge",
        "latency": "10-50ms",
        "bandwidth": "ต่ำ (ส่งแค่ผลลัพธ์)",
        "tools": "TF Serving (Cloud) + TF Lite (Edge)",
    },
    "Edge-Only": {
        "desc": "ทำทุกอย่างบน Edge ไม่ต้อง Cloud",
        "latency": "1-10ms",
        "bandwidth": "ไม่ใช้",
        "tools": "TF Lite + Coral TPU / Jetson",
    },
    "Federated": {
        "desc": "Train บน Edge ส่ง Gradient กลับ Cloud",
        "latency": "10-50ms",
        "bandwidth": "ปานกลาง (ส่ง Gradient)",
        "tools": "TF Federated + TF Lite",
    },
}

print("Edge Computing Architectures:")
for arch, info in edge_architectures.items():
    print(f"\n  [{arch}]")
    for k, v in info.items():
        print(f"    {k}: {v}")

# Model Optimization Pipeline
optimization = [
    "1. Train Full Model (Cloud GPU)",
    "2. Prune — ตัด Neurons ที่ไม่สำคัญ (ลด 50%)",
    "3. Quantize — INT8 แทน FP32 (ลด 4x)",
    "4. Convert — SavedModel to TFLite",
    "5. Compile — Edge TPU Compiler (ถ้าใช้ Coral)",
    "6. Benchmark — วัด Latency/Accuracy บน Edge",
    "7. Deploy — OTA Update ไป Edge Device",
]

print(f"\n\nModel Optimization Pipeline:")
for step in optimization:
    print(f"  {step}")

เคล็ดลับ

TensorFlow Serving คืออะไร

Production ML Serving Google gRPC REST API Model Versioning Batching GPU Acceleration Production Scale

Edge Computing คืออะไร

ประมวลผลใกล้แหล่งข้อมูล ลด Latency 10ms ไม่ต้อง Internet ประหยัด Bandwidth ปลอดภัย IoT CCTV หุ่นยนต์ โรงงาน

TensorFlow Lite ต่างจาก TensorFlow Serving อย่างไร

TF Lite Mobile Edge Quantized CPU GPU NPU Android iOS RPi TF Serving Server Production GPU Cluster Batching Versioning

Edge Device ที่นิยมมีอะไรบ้าง

Raspberry Pi 5 Coral Edge TPU NVIDIA Jetson Orin Intel NUC Arduino Nano 33 BLE TinyML แต่ละอุปกรณ์เหมาะงานต่างกัน

สรุป

TensorFlow Serving Production ML gRPC REST Batching Versioning Edge Computing TF Lite Quantization INT8 Coral TPU Jetson Raspberry Pi K3s KubeEdge MobileNet OTA Update Cloud-Edge Architecture

📖 บทความที่เกี่ยวข้อง