SiamCafe · Blog
Whisper Speech GitOps Workflow — จัดการ
บทความ

Whisper Speech GitOps Workflow — จัดการ

เผยแพร่ 28 พฤษภาคม 2569

Whisper GitOps Workflow

Whisper Speech-to-Text GitOps Argo CD Kubernetes Pipeline Transcription Translation faster-whisper GPU Production MLflow

ModelParametersVRAMSpeed (GPU)Accuracy
tiny39M~1GB~32x realtimeต่ำ
base74M~1GB~16x realtimeปานกลาง
small244M~2GB~6x realtimeดี
medium769M~5GB~2x realtimeดีมาก
large-v31550M~10GB~1x realtimeดีที่สุด
large-v3-turbo809M~6GB~3x realtimeดีมาก (เร็วกว่า large)

Whisper API & Pipeline

# === Whisper Speech Pipeline ===

# pip install faster-whisper fastapi uvicorn

# from faster_whisper import WhisperModel
# from fastapi import FastAPI, UploadFile
# import tempfile, os
#
# app = FastAPI()
# model = WhisperModel("large-v3-turbo", device="cuda", compute_type="float16")
#
# @app.post("/transcribe")
# async def transcribe(file: UploadFile):
#     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
#         tmp.write(await file.read())
#         tmp_path = tmp.name
#     segments, info = model.transcribe(tmp_path, language="th", beam_size=5)
#     result = []
#     for seg in segments:
#         result.append({"start": seg.start, "end": seg.end, "text": seg.text})
#     os.unlink(tmp_path)
#     return {"language": info.language, "segments": result}

from dataclasses import dataclass

@dataclass
class PipelineStage:
    stage: str
    tool: str
    input_format: str
    output: str
    config: str

stages = [
    PipelineStage("Audio Input",
        "FastAPI Upload / S3 Event / Kafka",
        "mp3 wav flac m4a (any format)",
        "Raw Audio File",
        "Max file size 500MB Timeout 300s"),
    PipelineStage("Pre-processing",
        "ffmpeg / pydub",
        "Any Audio Format",
        "16kHz Mono WAV",
        "ffmpeg -i input -ar 16000 -ac 1 output.wav"),
    PipelineStage("Transcription",
        "faster-whisper (CTranslate2)",
        "16kHz WAV",
        "Text + Timestamps + Language",
        "model=large-v3-turbo beam_size=5 language=th"),
    PipelineStage("Post-processing",
        "Custom Python / NLP",
        "Raw Transcript",
        "Formatted Text + Paragraphs",
        "Remove duplicates Fix spacing Add punctuation"),
    PipelineStage("Storage",
        "PostgreSQL / Elasticsearch",
        "Formatted Transcript",
        "Searchable Database Entry",
        "Full-text Search Timestamp Index"),
]

print("=== Pipeline Stages ===")
for s in stages:
    print(f"  [{s.stage}] Tool: {s.tool}")
    print(f"    Input: {s.input_format} → Output: {s.output}")
    print(f"    Config: {s.config}")

GitOps Deployment

# === GitOps with Argo CD ===

# Git Repository Structure:
# whisper-gitops/
# ├── base/
# │   ├── deployment.yaml
# │   ├── service.yaml
# │   ├── hpa.yaml
# │   └── kustomization.yaml
# ├── overlays/
# │   ├── staging/
# │   │   ├── kustomization.yaml
# │   │   └── patch-replicas.yaml
# │   └── production/
# │       ├── kustomization.yaml
# │       └── patch-replicas.yaml
# └── argocd/
#     └── application.yaml

# Argo CD Application
# apiVersion: argoproj.io/v1alpha1
# kind: Application
# metadata:
#   name: whisper-api
#   namespace: argocd
# spec:
#   project: default
#   source:
#     repoURL: https://github.com/org/whisper-gitops
#     targetRevision: main
#     path: overlays/production
#   destination:
#     server: https://kubernetes.default.svc
#     namespace: whisper
#   syncPolicy:
#     automated:
#       selfHeal: true
#       prune: true

@dataclass
class GitOpsComponent:
    component: str
    git_path: str
    tool: str
    sync: str

components = [
    GitOpsComponent("Whisper API Deployment",
        "overlays/production/deployment.yaml",
        "Argo CD + Kustomize",
        "Auto Sync on Git Push"),
    GitOpsComponent("Model Config",
        "base/configmap.yaml",
        "Argo CD",
        "Model Version Language Beam Size"),
    GitOpsComponent("HPA (Autoscaling)",
        "base/hpa.yaml",
        "Argo CD",
        "Scale 2-10 Pods ตาม GPU Utilization"),
    GitOpsComponent("Prometheus Rules",
        "monitoring/prometheus-rules.yaml",
        "Argo CD",
        "Alert Rules สำหรับ Latency Error Rate"),
    GitOpsComponent("Grafana Dashboard",
        "monitoring/grafana-dashboard.json",
        "Argo CD + Grafana Operator",
        "Dashboard as Code Version Control"),
]

print("=== GitOps Components ===")
for c in components:
    print(f"  [{c.component}]")
    print(f"    Path: {c.git_path}")
    print(f"    Tool: {c.tool}")
    print(f"    Sync: {c.sync}")

Monitoring & Scaling

# === Production Monitoring ===

@dataclass
class WhisperMetric:
    metric: str
    target: str
    alert: str
    scaling: str

metrics = [
    WhisperMetric("Inference Latency",
        "< 1x Realtime (1 min audio < 1 min process)",
        "> 2x Realtime → Alert",
        "HPA Scale Up เพิ่ม GPU Pods"),
    WhisperMetric("Queue Length",
        "< 10 pending jobs",
        "> 50 → Alert P2 > 200 → Alert P1",
        "HPA Scale ตาม Queue Length"),
    WhisperMetric("GPU Utilization",
        "60-80%",
        "> 90% sustained → Scale Up",
        "Scale Up ที่ 80% Scale Down ที่ 40%"),
    WhisperMetric("Error Rate",
        "< 1%",
        "> 5% → Alert P1",
        "ไม่ Scale แต่ตรวจ Root Cause"),
    WhisperMetric("Word Error Rate (WER)",
        "< 10% สำหรับ Thai",
        "> 15% → Model Quality Alert",
        "เปลี่ยน Model Version หรือ Fine-tune"),
    WhisperMetric("Throughput (audio hours/day)",
        "ตาม SLA",
        "< 80% SLA → Alert",
        "Scale GPU Nodes + Pods"),
]

print("=== Monitoring Metrics ===")
for m in metrics:
    print(f"  [{m.metric}] Target: {m.target}")
    print(f"    Alert: {m.alert}")
    print(f"    Scaling: {m.scaling}")

เคล็ดลับ

  • faster-whisper: ใช้ faster-whisper แทน openai-whisper เร็วกว่า 4x
  • turbo: ใช้ large-v3-turbo ดีสุดสำหรับ Speed/Accuracy
  • GitOps: ทุก Config ใน Git ทำ PR Review ก่อน Deploy
  • GPU: ใช้ float16 ลด VRAM 50% เร็วขึ้น 2x
  • Queue: ใช้ Queue สำหรับ Async ไม่ Block API

Whisper คืออะไร

OpenAI Speech-to-Text 99+ ภาษา ไทย Transformer tiny base small medium large turbo faster-whisper 4x GPU Transcription Translation