Whisper Speech GitOps Workflow — จัดการ
Whisper GitOps Workflow
Whisper Speech-to-Text GitOps Argo CD Kubernetes Pipeline Transcription Translation faster-whisper GPU Production MLflow
| Model | Parameters | VRAM | Speed (GPU) | Accuracy |
|---|---|---|---|---|
| tiny | 39M | ~1GB | ~32x realtime | ต่ำ |
| base | 74M | ~1GB | ~16x realtime | ปานกลาง |
| small | 244M | ~2GB | ~6x realtime | ดี |
| medium | 769M | ~5GB | ~2x realtime | ดีมาก |
| large-v3 | 1550M | ~10GB | ~1x realtime | ดีที่สุด |
| large-v3-turbo | 809M | ~6GB | ~3x realtime | ดีมาก (เร็วกว่า large) |
Whisper API & Pipeline
# === Whisper Speech Pipeline ===
# pip install faster-whisper fastapi uvicorn
# from faster_whisper import WhisperModel
# from fastapi import FastAPI, UploadFile
# import tempfile, os
#
# app = FastAPI()
# model = WhisperModel("large-v3-turbo", device="cuda", compute_type="float16")
#
# @app.post("/transcribe")
# async def transcribe(file: UploadFile):
# with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
# tmp.write(await file.read())
# tmp_path = tmp.name
# segments, info = model.transcribe(tmp_path, language="th", beam_size=5)
# result = []
# for seg in segments:
# result.append({"start": seg.start, "end": seg.end, "text": seg.text})
# os.unlink(tmp_path)
# return {"language": info.language, "segments": result}
from dataclasses import dataclass
@dataclass
class PipelineStage:
stage: str
tool: str
input_format: str
output: str
config: str
stages = [
PipelineStage("Audio Input",
"FastAPI Upload / S3 Event / Kafka",
"mp3 wav flac m4a (any format)",
"Raw Audio File",
"Max file size 500MB Timeout 300s"),
PipelineStage("Pre-processing",
"ffmpeg / pydub",
"Any Audio Format",
"16kHz Mono WAV",
"ffmpeg -i input -ar 16000 -ac 1 output.wav"),
PipelineStage("Transcription",
"faster-whisper (CTranslate2)",
"16kHz WAV",
"Text + Timestamps + Language",
"model=large-v3-turbo beam_size=5 language=th"),
PipelineStage("Post-processing",
"Custom Python / NLP",
"Raw Transcript",
"Formatted Text + Paragraphs",
"Remove duplicates Fix spacing Add punctuation"),
PipelineStage("Storage",
"PostgreSQL / Elasticsearch",
"Formatted Transcript",
"Searchable Database Entry",
"Full-text Search Timestamp Index"),
]
print("=== Pipeline Stages ===")
for s in stages:
print(f" [{s.stage}] Tool: {s.tool}")
print(f" Input: {s.input_format} → Output: {s.output}")
print(f" Config: {s.config}")
GitOps Deployment
# === GitOps with Argo CD ===
# Git Repository Structure:
# whisper-gitops/
# ├── base/
# │ ├── deployment.yaml
# │ ├── service.yaml
# │ ├── hpa.yaml
# │ └── kustomization.yaml
# ├── overlays/
# │ ├── staging/
# │ │ ├── kustomization.yaml
# │ │ └── patch-replicas.yaml
# │ └── production/
# │ ├── kustomization.yaml
# │ └── patch-replicas.yaml
# └── argocd/
# └── application.yaml
# Argo CD Application
# apiVersion: argoproj.io/v1alpha1
# kind: Application
# metadata:
# name: whisper-api
# namespace: argocd
# spec:
# project: default
# source:
# repoURL: https://github.com/org/whisper-gitops
# targetRevision: main
# path: overlays/production
# destination:
# server: https://kubernetes.default.svc
# namespace: whisper
# syncPolicy:
# automated:
# selfHeal: true
# prune: true
@dataclass
class GitOpsComponent:
component: str
git_path: str
tool: str
sync: str
components = [
GitOpsComponent("Whisper API Deployment",
"overlays/production/deployment.yaml",
"Argo CD + Kustomize",
"Auto Sync on Git Push"),
GitOpsComponent("Model Config",
"base/configmap.yaml",
"Argo CD",
"Model Version Language Beam Size"),
GitOpsComponent("HPA (Autoscaling)",
"base/hpa.yaml",
"Argo CD",
"Scale 2-10 Pods ตาม GPU Utilization"),
GitOpsComponent("Prometheus Rules",
"monitoring/prometheus-rules.yaml",
"Argo CD",
"Alert Rules สำหรับ Latency Error Rate"),
GitOpsComponent("Grafana Dashboard",
"monitoring/grafana-dashboard.json",
"Argo CD + Grafana Operator",
"Dashboard as Code Version Control"),
]
print("=== GitOps Components ===")
for c in components:
print(f" [{c.component}]")
print(f" Path: {c.git_path}")
print(f" Tool: {c.tool}")
print(f" Sync: {c.sync}")
Monitoring & Scaling
# === Production Monitoring ===
@dataclass
class WhisperMetric:
metric: str
target: str
alert: str
scaling: str
metrics = [
WhisperMetric("Inference Latency",
"< 1x Realtime (1 min audio < 1 min process)",
"> 2x Realtime → Alert",
"HPA Scale Up เพิ่ม GPU Pods"),
WhisperMetric("Queue Length",
"< 10 pending jobs",
"> 50 → Alert P2 > 200 → Alert P1",
"HPA Scale ตาม Queue Length"),
WhisperMetric("GPU Utilization",
"60-80%",
"> 90% sustained → Scale Up",
"Scale Up ที่ 80% Scale Down ที่ 40%"),
WhisperMetric("Error Rate",
"< 1%",
"> 5% → Alert P1",
"ไม่ Scale แต่ตรวจ Root Cause"),
WhisperMetric("Word Error Rate (WER)",
"< 10% สำหรับ Thai",
"> 15% → Model Quality Alert",
"เปลี่ยน Model Version หรือ Fine-tune"),
WhisperMetric("Throughput (audio hours/day)",
"ตาม SLA",
"< 80% SLA → Alert",
"Scale GPU Nodes + Pods"),
]
print("=== Monitoring Metrics ===")
for m in metrics:
print(f" [{m.metric}] Target: {m.target}")
print(f" Alert: {m.alert}")
print(f" Scaling: {m.scaling}")
เคล็ดลับ
- faster-whisper: ใช้ faster-whisper แทน openai-whisper เร็วกว่า 4x
- turbo: ใช้ large-v3-turbo ดีสุดสำหรับ Speed/Accuracy
- GitOps: ทุก Config ใน Git ทำ PR Review ก่อน Deploy
- GPU: ใช้ float16 ลด VRAM 50% เร็วขึ้น 2x
- Queue: ใช้ Queue สำหรับ Async ไม่ Block API
Whisper คืออะไร
OpenAI Speech-to-Text 99+ ภาษา ไทย Transformer tiny base small medium large turbo faster-whisper 4x GPU Transcription Translation