Whisper GitOps Workflow
Whisper Speech-to-Text GitOps Argo CD Kubernetes Pipeline Transcription Translation faster-whisper GPU Production MLflow
| Model | Parameters | VRAM | Speed (GPU) | Accuracy |
|---|---|---|---|---|
| tiny | 39M | ~1GB | ~32x realtime | ต่ำ |
| base | 74M | ~1GB | ~16x realtime | ปานกลาง |
| small | 244M | ~2GB | ~6x realtime | ดี |
| medium | 769M | ~5GB | ~2x realtime | ดีมาก |
| large-v3 | 1550M | ~10GB | ~1x realtime | ดีที่สุด |
| large-v3-turbo | 809M | ~6GB | ~3x realtime | ดีมาก (เร็วกว่า large) |
Whisper API & Pipeline
# === Whisper Speech Pipeline ===
# pip install faster-whisper fastapi uvicorn
# from faster_whisper import WhisperModel
# from fastapi import FastAPI, UploadFile
# import tempfile, os
#
# app = FastAPI()
# model = WhisperModel("large-v3-turbo", device="cuda", compute_type="float16")
#
# @app.post("/transcribe")
# async def transcribe(file: UploadFile):
# with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
# tmp.write(await file.read())
# tmp_path = tmp.name
# segments, info = model.transcribe(tmp_path, language="th", beam_size=5)
# result = []
# for seg in segments:
# result.append({"start": seg.start, "end": seg.end, "text": seg.text})
# os.unlink(tmp_path)
# return {"language": info.language, "segments": result}
from dataclasses import dataclass
@dataclass
class PipelineStage:
stage: str
tool: str
input_format: str
output: str
config: str
stages = [
PipelineStage("Audio Input",
"FastAPI Upload / S3 Event / Kafka",
"mp3 wav flac m4a (any format)",
"Raw Audio File",
"Max file size 500MB Timeout 300s"),
PipelineStage("Pre-processing",
"ffmpeg / pydub",
"Any Audio Format",
"16kHz Mono WAV",
"ffmpeg -i input -ar 16000 -ac 1 output.wav"),
PipelineStage("Transcription",
"faster-whisper (CTranslate2)",
"16kHz WAV",
"Text + Timestamps + Language",
"model=large-v3-turbo beam_size=5 language=th"),
PipelineStage("Post-processing",
"Custom Python / NLP",
"Raw Transcript",
"Formatted Text + Paragraphs",
"Remove duplicates Fix spacing Add punctuation"),
PipelineStage("Storage",
"PostgreSQL / Elasticsearch",
"Formatted Transcript",
"Searchable Database Entry",
"Full-text Search Timestamp Index"),
]
print("=== Pipeline Stages ===")
for s in stages:
print(f" [{s.stage}] Tool: {s.tool}")
print(f" Input: {s.input_format} → Output: {s.output}")
print(f" Config: {s.config}")
GitOps Deployment
# === GitOps with Argo CD ===
# Git Repository Structure:
# whisper-gitops/
# ├── base/
# │ ├── deployment.yaml
# │ ├── service.yaml
# │ ├── hpa.yaml
# │ └── kustomization.yaml
# ├── overlays/
# │ ├── staging/
# │ │ ├── kustomization.yaml
# │ │ └── patch-replicas.yaml
# │ └── production/
# │ ├── kustomization.yaml
# │ └── patch-replicas.yaml
# └── argocd/
# └── application.yaml
# Argo CD Application
# apiVersion: argoproj.io/v1alpha1
# kind: Application
# metadata:
# name: whisper-api
# namespace: argocd
# spec:
# project: default
# source:
# repoURL: https://github.com/org/whisper-gitops
# targetRevision: main
# path: overlays/production
# destination:
# server: https://kubernetes.default.svc
# namespace: whisper
# syncPolicy:
# automated:
# selfHeal: true
# prune: true
@dataclass
class GitOpsComponent:
component: str
git_path: str
tool: str
sync: str
components = [
GitOpsComponent("Whisper API Deployment",
"overlays/production/deployment.yaml",
"Argo CD + Kustomize",
"Auto Sync on Git Push"),
GitOpsComponent("Model Config",
"base/configmap.yaml",
"Argo CD",
"Model Version Language Beam Size"),
GitOpsComponent("HPA (Autoscaling)",
"base/hpa.yaml",
"Argo CD",
"Scale 2-10 Pods ตาม GPU Utilization"),
GitOpsComponent("Prometheus Rules",
"monitoring/prometheus-rules.yaml",
"Argo CD",
"Alert Rules สำหรับ Latency Error Rate"),
GitOpsComponent("Grafana Dashboard",
"monitoring/grafana-dashboard.json",
"Argo CD + Grafana Operator",
"Dashboard as Code Version Control"),
]
print("=== GitOps Components ===")
for c in components:
print(f" [{c.component}]")
print(f" Path: {c.git_path}")
print(f" Tool: {c.tool}")
print(f" Sync: {c.sync}")
Monitoring & Scaling
# === Production Monitoring ===
@dataclass
class WhisperMetric:
metric: str
target: str
alert: str
scaling: str
metrics = [
WhisperMetric("Inference Latency",
"< 1x Realtime (1 min audio < 1 min process)",
"> 2x Realtime → Alert",
"HPA Scale Up เพิ่ม GPU Pods"),
WhisperMetric("Queue Length",
"< 10 pending jobs",
"> 50 → Alert P2 > 200 → Alert P1",
"HPA Scale ตาม Queue Length"),
WhisperMetric("GPU Utilization",
"60-80%",
"> 90% sustained → Scale Up",
"Scale Up ที่ 80% Scale Down ที่ 40%"),
WhisperMetric("Error Rate",
"< 1%",
"> 5% → Alert P1",
"ไม่ Scale แต่ตรวจ Root Cause"),
WhisperMetric("Word Error Rate (WER)",
"< 10% สำหรับ Thai",
"> 15% → Model Quality Alert",
"เปลี่ยน Model Version หรือ Fine-tune"),
WhisperMetric("Throughput (audio hours/day)",
"ตาม SLA",
"< 80% SLA → Alert",
"Scale GPU Nodes + Pods"),
]
print("=== Monitoring Metrics ===")
for m in metrics:
print(f" [{m.metric}] Target: {m.target}")
print(f" Alert: {m.alert}")
print(f" Scaling: {m.scaling}")
เคล็ดลับ
- faster-whisper: ใช้ faster-whisper แทน openai-whisper เร็วกว่า 4x
- turbo: ใช้ large-v3-turbo ดีสุดสำหรับ Speed/Accuracy
- GitOps: ทุก Config ใน Git ทำ PR Review ก่อน Deploy
- GPU: ใช้ float16 ลด VRAM 50% เร็วขึ้น 2x
- Queue: ใช้ Queue สำหรับ Async ไม่ Block API
Whisper คืออะไร
OpenAI Speech-to-Text 99+ ภาษา ไทย Transformer tiny base small medium large turbo faster-whisper 4x GPU Transcription Translation
GitOps Workflow คืออะไร
Git Single Source of Truth Argo CD Flux CD Kubernetes Auto Sync PR Review Rollback Git Revert Kustomize Helm IaC Version Control
Pipeline สร้างอย่างไร
Audio Input Pre-processing ffmpeg 16kHz Transcription faster-whisper Post-processing NLP Storage PostgreSQL Elasticsearch Queue Celery
Deploy อย่างไร
Kubernetes GPU Node HPA Argo CD Auto Sync Kustomize Overlay MLflow Model Registry Canary Argo Rollouts ConfigMap Secret
สรุป
Whisper Speech GitOps Argo CD Kubernetes faster-whisper Pipeline Transcription GPU HPA Monitoring Production
