TensorRT Optimization กับ DevSecOps Integration

TensorRT Optimization

TensorRT เป็น High-performance Deep Learning Inference SDK จาก NVIDIA ทำหน้าที่ Optimize Neural Network Models ให้ทำงานเร็วที่สุดบน NVIDIA GPUs ด้วยเทคนิค Layer Fusion, Precision Calibration, Kernel Auto-tuning และ Dynamic Tensor Memory

เมื่อรวมกับ DevSecOps Pipeline ได้ระบบที่ทั้งเร็วและปลอดภัย ตรวจสอบ Security ตั้งแต่ Model Training ไปจนถึง Production Deployment ป้องกัน Model Poisoning, Adversarial Attacks และ Supply Chain Attacks

อ่านเพิ่ม: Parquet Format Batch Processing Pipeline | SiamCafe Blog · อ่านเพิ่ม: Hfm Thailand — คู่มือสมบูรณ์ 2026 | SiamCafe.net | SiamCafe · อ่านเพิ่ม: LocalAI Self-hosted Freelance IT Career — ทุกสิ่งที่ต้องรู้ใ

เนื้อหาเกี่ยวข้อง — บทความที่เกี่ยวข้อง: Parquet Format Load Testing Strategy

TensorRT Model Optimization

# tensorrt_optimize.py — Optimize ML Model ด้วย TensorRT
# pip install tensorrt onnx onnxruntime-gpu numpy

import tensorrt as trt
import numpy as np
import os
import time
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TensorRTOptimizer:
    """Optimize ML Models ด้วย TensorRT"""

    def __init__(self, workspace_size_gb=4):
        self.logger = trt.Logger(trt.Logger.WARNING)
        self.workspace_size = workspace_size_gb * (1 << 30)

    def onnx_to_tensorrt(self, onnx_path, engine_path,
                          precision="fp16", batch_size=1,
                          dynamic_batch=False):
        """แปลง ONNX Model เป็น TensorRT Engine"""
        builder = trt.Builder(self.logger)
        network = builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        )
        parser = trt.OnnxParser(network, self.logger)

        # Parse ONNX
        with open(onnx_path, "rb") as f:
            if not parser.parse(f.read()):
                for i in range(parser.num_errors):
                    logger.error(f"ONNX Parse Error: {parser.get_error(i)}")
                return None

        # Builder Config
        config = builder.create_builder_config()
        config.set_memory_pool_limit(
            trt.MemoryPoolType.WORKSPACE, self.workspace_size
        )

        # Precision
        if precision == "fp16" and builder.platform_has_fast_fp16:
            config.set_flag(trt.BuilderFlag.FP16)
            logger.info("Using FP16 precision")
        elif precision == "int8" and builder.platform_has_fast_int8:
            config.set_flag(trt.BuilderFlag.INT8)
            logger.info("Using INT8 precision")

        # Dynamic Batch Size
        if dynamic_batch:
            profile = builder.create_optimization_profile()
            input_shape = network.get_input(0).shape
            min_shape = [1] + list(input_shape[1:])
            opt_shape = [batch_size] + list(input_shape[1:])
            max_shape = [batch_size * 4] + list(input_shape[1:])

            profile.set_shape(
                network.get_input(0).name,
                min_shape, opt_shape, max_shape,
            )
            config.add_optimization_profile(profile)

        # Build Engine
        logger.info(f"Building TensorRT engine ({precision})...")
        start = time.time()
        engine = builder.build_serialized_network(network, config)
        elapsed = time.time() - start

        if engine is None:
            logger.error("Failed to build engine")
            return None

        # Save Engine
        with open(engine_path, "wb") as f:
            f.write(engine)

        size_mb = os.path.getsize(engine_path) / 1024 / 1024
        logger.info(f"Engine saved: {engine_path} ({size_mb:.1f} MB)")
        logger.info(f"Build time: {elapsed:.1f}s")

        return engine_path

    def benchmark(self, engine_path, input_shape, n_iterations=100):
        """Benchmark TensorRT Engine"""
        import pycuda.driver as cuda
        import pycuda.autoinit

        runtime = trt.Runtime(self.logger)
        with open(engine_path, "rb") as f:
            engine = runtime.deserialize_cuda_engine(f.read())

        context = engine.create_execution_context()

        # Allocate Memory
        input_data = np.random.randn(*input_shape).astype(np.float32)
        d_input = cuda.mem_alloc(input_data.nbytes)
        output_shape = engine.get_binding_shape(1)
        output_data = np.empty(output_shape, dtype=np.float32)
        d_output = cuda.mem_alloc(output_data.nbytes)

        stream = cuda.Stream()

        # Warmup
        for _ in range(10):
            cuda.memcpy_htod_async(d_input, input_data, stream)
            context.execute_async_v2([int(d_input), int(d_output)], stream.handle)
            stream.synchronize()

        # Benchmark
        latencies = []
        for _ in range(n_iterations):
            start = time.perf_counter()
            cuda.memcpy_htod_async(d_input, input_data, stream)
            context.execute_async_v2([int(d_input), int(d_output)], stream.handle)
            cuda.memcpy_dtoh_async(output_data, d_output, stream)
            stream.synchronize()
            latencies.append((time.perf_counter() - start) * 1000)

        avg = np.mean(latencies)
        p50 = np.percentile(latencies, 50)
        p99 = np.percentile(latencies, 99)

        print(f"\nTensorRT Benchmark ({n_iterations} iterations)")
        print(f"  Avg: {avg:.2f}ms | P50: {p50:.2f}ms | P99: {p99:.2f}ms")
        print(f"  Throughput: {1000/avg:.0f} inferences/sec")

# optimizer = TensorRTOptimizer()
# optimizer.onnx_to_tensorrt("model.onnx", "model.engine", precision="fp16")
# optimizer.benchmark("model.engine", (1, 3, 224, 224))

DevSecOps Pipeline สำหรับ ML

# === GitHub Actions — DevSecOps Pipeline สำหรับ ML Models ===
# .github/workflows/ml-devsecops.yml

name: ML DevSecOps Pipeline
on:
  push:
    branches: [main]
    paths: ["models/**", "src/**"]
  pull_request:
    branches: [main]

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ml-inference

jobs:
  # 1. Code Security
  code-security:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: SAST — Bandit (Python Security)
        run: |
          pip install bandit
          bandit -r src/ -f json -o bandit-report.json || true

      - name: Dependency Check — Safety
        run: |
          pip install safety
          safety check -r requirements.txt --json > safety-report.json || true

      - name: Secret Scanning — TruffleHog
        uses: trufflesecurity/trufflehog@main
        with:
          extra_args: --only-verified

      - name: License Check
        run: |
          pip install pip-licenses
          pip-licenses --format=json > licenses.json

  # 2. Model Security
  model-security:
    runs-on: ubuntu-latest
    needs: code-security
    steps:
      - uses: actions/checkout@v4

      - name: Model File Integrity
        run: |
          # Verify model checksums
          sha256sum models/*.onnx > model-checksums.txt
          echo "Model checksums:"
          cat model-checksums.txt

      - name: Pickle Scan (Anti-deserialization attack)
        run: |
          pip install fickling
          fickling --check models/*.pkl || true

      - name: ONNX Model Validation
        run: |
          pip install onnx onnxruntime
          python -c "
          import onnx
          model = onnx.load('models/model.onnx')
          onnx.checker.check_model(model)
          print('ONNX model valid')
          "

  # 3. TensorRT Optimization
  tensorrt-optimize:
    runs-on: [self-hosted, gpu]
    needs: model-security
    steps:
      - uses: actions/checkout@v4

      - name: Optimize with TensorRT
        run: |
          python scripts/optimize.py \
            --input models/model.onnx \
            --output models/model.engine \
            --precision fp16 \
            --batch-size 8

      - name: Benchmark
        run: |
          python scripts/benchmark.py \
            --engine models/model.engine \
            --iterations 1000

      - uses: actions/upload-artifact@v4
        with:
          name: tensorrt-engine
          path: models/model.engine

  # 4. Container Security
  container-security:
    runs-on: ubuntu-latest
    needs: tensorrt-optimize
    steps:
      - uses: actions/checkout@v4

      - name: Build Container
        run: docker build -t $IMAGE_NAME:test .

      - name: Trivy Container Scan
        uses: aquasecurity/trivy-action@master
        with:
          image-ref: }:test
          format: sarif
          output: trivy-results.sarif
          severity: CRITICAL, HIGH

      - name: Grype Vulnerability Scan
        uses: anchore/scan-action@v3
        with:
          image: }:test
          severity-cutoff: high

  # 5. Deploy with Security
  deploy:
    runs-on: ubuntu-latest
    needs: container-security
    if: github.ref == 'refs/heads/main'
    steps:
      - name: Deploy to Production
        run: |
          kubectl set image deployment/ml-inference \
            inference=$REGISTRY/$IMAGE_NAME:} \
            -n production

      - name: DAST — Runtime Security Test
        run: |
          python scripts/security_test.py \
            --endpoint https://ml-api.example.com \
            --test adversarial, injection, dos

Security Checklist สำหรับ ML Models

# ml_security_checklist.py — Security Checklist สำหรับ ML Production

checklist = {
    "Model Security": [
        ("Model files integrity check (SHA256)", True),
        ("No pickle deserialization vulnerabilities", True),
        ("ONNX model validation", True),
        ("Model provenance tracking", False),
        ("Adversarial robustness testing", False),
    ],
    "Data Security": [
        ("Training data access control", True),
        ("PII detection and removal", True),
        ("Data encryption at rest", True),
        ("Data encryption in transit (TLS)", True),
        ("Data lineage tracking", False),
    ],
    "Infrastructure Security": [
        ("Container image scanning", True),
        ("No HIGH/CRITICAL vulnerabilities", True),
        ("Non-root container user", True),
        ("Network policies configured", False),
        ("GPU access control", False),
    ],
    "API Security": [
        ("Authentication (JWT/API Key)", True),
        ("Rate limiting", True),
        ("Input validation", True),
        ("Output sanitization", False),
        ("Audit logging", True),
    ],
    "Pipeline Security": [
        ("SAST scanning", True),
        ("Dependency scanning", True),
        ("Secret scanning", True),
        ("License compliance", True),
        ("Signed artifacts", False),
    ],
}

print("ML Security Checklist")
print("=" * 55)

total = 0
done = 0
for category, items in checklist.items():
    ready = sum(1 for _, ok in items if ok)
    total += len(items)
    done += ready
    pct = ready / len(items) * 100
    print(f"\n[{category}] {ready}/{len(items)} ({pct:.0f}%)")
    for desc, ok in items:
        mark = "v" if ok else "x"
        print(f"  [{mark}] {desc}")

overall = done / total * 100
print(f"\nOverall: {done}/{total} ({overall:.0f}%)")
grade = "A" if overall >= 90 else "B" if overall >= 75 else "C" if overall >= 60 else "F"
print(f"Grade: {grade}")

Best Practices

FP16 เป็น Default: ใช้ FP16 Precision เป็นค่าเริ่มต้น เร็วขึ้น 2x โดย Accuracy ลดน้อยมาก
INT8 Calibration: ใช้ INT8 เมื่อต้องการ Performance สูงสุด ต้องมี Calibration Dataset
Container Scanning: Scan Container Image ทุกครั้งก่อน Deploy หา Vulnerabilities
Model Integrity: ตรวจสอบ Checksum ของ Model Files ป้องกัน Tampering
Input Validation: ตรวจสอบ Input ก่อนส่งเข้า Model ป้องกัน Adversarial Attacks
Audit Logging: บันทึกทุก Inference Request สำหรับ Compliance และ Forensics

การนำไปใช้งานจริงในองค์กร

สำหรับองค์กรขนาดกลางถึงใหญ่ แนะนำให้ใช้หลัก Three-Tier Architecture คือ Core Layer ที่เป็นแกนกลางของระบบ Distribution Layer ที่ทำหน้าที่กระจาย Traffic และ Access Layer ที่เชื่อมต่อกับผู้ใช้โดยตรง การแบ่ง Layer ชัดเจนช่วยให้การ Troubleshoot ง่ายขึ้นและสามารถ Scale ระบบได้ตามความต้องการ

แนะนำเพิ่มเติม — iCafeForex

เรื่อง Network Security ก็สำคัญไม่แพ้กัน ควรติดตั้ง Next-Generation Firewall ที่สามารถ Deep Packet Inspection ได้ ใช้ Network Segmentation แยก VLAN สำหรับแต่ละแผนก ติดตั้ง IDS/IPS เพื่อตรวจจับการโจมตี และทำ Regular Security Audit อย่างน้อยปีละ 2 ครั้ง

เนื้อหาเกี่ยวข้อง — ดูเพิ่มเติมเรื่อง CSS Container Queries Load Testing Strategy

TensorRT คืออะไร

SDK จาก NVIDIA Optimize Deep Learning Models ให้เร็วบน GPU ทำ Layer Fusion Precision Calibration (FP16/INT8) Kernel Auto-tuning Inference เร็วขึ้น 2-6 เท่า

DevSecOps คืออะไร

ผสาน Security ในทุกขั้นตอน DevOps Pipeline Development (SAST Dependency Scanning) Build (Container Scanning) Deploy (DAST Compliance) Runtime (Monitoring Incident Response)

แนะนำเพิ่มเติม — บทวิเคราะห์จาก XM Signal

เนื้อหาเกี่ยวข้อง — ดูเพิ่มเติมเรื่อง Tailscale Mesh Hexagonal Architecture

ทำไมต้องใช้ TensorRT กับ DevSecOps

ML Models ใน Production มีความเสี่ยง Model Poisoning Adversarial Attacks Data Leakage DevSecOps ตรวจสอบ Security ทุกขั้นตอน Training Optimization Container Scanning Runtime Protection

TensorRT รองรับ Framework อะไรบ้าง

TensorFlow PyTorch ONNX Keras MXNet แปลงเป็น ONNX แล้ว Optimize หรือใช้ TF-TRT สำหรับ TensorFlow รองรับ GPU ตั้งแต่ Jetson Nano ถึง A100/H100

เนื้อหาเกี่ยวข้อง — แนะนำให้อ่าน Etherium — คู่มือฉบับสมบูรณ์ 2026

สรุป

TensorRT ช่วย Optimize ML Models ให้เร็วขึ้น 2-6 เท่าบน NVIDIA GPUs เมื่อรวมกับ DevSecOps Pipeline ได้ระบบที่ทั้งเร็วและปลอดภัย ใช้ FP16 เป็น Default, Container Scanning ทุกครั้ง, Model Integrity Check, Input Validation และ Audit Logging สร้าง Pipeline อัตโนมัติด้วย GitHub Actions ครอบคลุมตั้งแต่ Code Security ถึง Runtime Protection