SiamCafe · Blog
Ollama Local LLM Feature Flag Management
บทความ

Ollama Local LLM Feature Flag Management

เผยแพร่ 28 พฤษภาคม 2569

Ollama Local LLM Feature Flag Management คืออะไร

Ollama Local LLM Feature Flag Management

Ollama เป็น open source tool สำหรับรัน Large Language Models (LLMs) บนเครื่อง local ได้ง่ายๆ รองรับโมเดลยอดนิยมเช่น Llama 3, Mistral, Phi-3, Gemma และ Code Llama Feature Flag Management คือการควบคุมการเปิด-ปิด features ในซอฟต์แวร์โดยไม่ต้อง deploy ใหม่ การรวมสองแนวคิดนี้ช่วยให้สร้างระบบ AI-powered feature flag ที่ใช้ LLM วิเคราะห์ feature performance, สร้าง targeting rules อัตโนมัติ และตัดสินใจ rollout strategies โดยรัน LLM ทั้งหมดบน local เพื่อความปลอดภัยของข้อมูล

Ollama Setup & Basics

# ollama_setup.py — Ollama local LLM setup
import json

class OllamaSetup:
    INSTALL = {
        "macos": "curl -fsSL https://ollama.com/install.sh | sh",
        "linux": "curl -fsSL https://ollama.com/install.sh | sh",
        "windows": "Download from ollama.com/download/windows",
        "docker": "docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama",
    }

    MODELS = {
        "llama3.1": {"name": "Llama 3.1 8B", "size": "4.7GB", "use": "General purpose, reasoning", "speed": "Fast"},
        "mistral": {"name": "Mistral 7B", "size": "4.1GB", "use": "General purpose, instruction following", "speed": "Fast"},
        "codellama": {"name": "Code Llama 7B", "size": "3.8GB", "use": "Code generation, analysis", "speed": "Fast"},
        "phi3": {"name": "Phi-3 Mini 3.8B", "size": "2.3GB", "use": "Lightweight, edge deployment", "speed": "Very Fast"},
        "gemma2": {"name": "Gemma 2 9B", "size": "5.4GB", "use": "Google model, multilingual", "speed": "Medium"},
    }

    COMMANDS = """
# Ollama CLI commands
ollama pull llama3.1          # Download model
ollama run llama3.1           # Interactive chat
ollama list                    # List downloaded models
ollama show llama3.1          # Show model info
ollama rm llama3.1            # Remove model
ollama serve                   # Start API server (port 11434)

# API usage
curl http://localhost:11434/api/generate -d '{
  "model": "llama3.1",
  "prompt": "What is a feature flag?",
  "stream": false
}'
"""

    def show_install(self):
        print("=== Installation ===\n")
        for os_name, cmd in self.INSTALL.items():
            print(f"  [{os_name}] {cmd}")

    def show_models(self):
        print(f"\n=== Popular Models ===")
        for key, model in self.MODELS.items():
            print(f"  [{model['name']}] {model['size']} | {model['use']} | {model['speed']}")

    def show_commands(self):
        print(f"\n=== Commands ===")
        print(self.COMMANDS[:400])

setup = OllamaSetup()
setup.show_install()
setup.show_models()
setup.show_commands()

Feature Flag Fundamentals

# feature_flags.py — Feature flag fundamentals
import json

class FeatureFlags:
    CONCEPTS = {
        "feature_flag": {
            "name": "Feature Flag (Feature Toggle)",
            "description": "ตัวแปร boolean/config ที่ควบคุมว่า feature จะเปิด/ปิดใน production",
            "benefit": "Deploy code โดยไม่ enable feature ทันที — ค่อยๆ rollout",
        },
        "types": {
            "release": {"name": "Release Flag", "use": "ควบคุม feature rollout (on/off)"},
            "experiment": {"name": "Experiment Flag", "use": "A/B testing (variant A vs B)"},
            "ops": {"name": "Ops Flag", "use": "Kill switch สำหรับ circuit breaker"},
            "permission": {"name": "Permission Flag", "use": "Feature access ตาม user role/plan"},
        },
        "targeting": {
            "name": "Targeting Rules",
            "description": "กฎที่กำหนดว่า user ไหนเห็น feature",
            "examples": ["User ID targeting", "% rollout (canary)", "Country/region", "User attribute (plan, role)"],
        },
    }

    TOOLS = {
        "launchdarkly": {"name": "LaunchDarkly", "type": "SaaS", "price": "$10/seat/month+"},
        "flagsmith": {"name": "Flagsmith", "type": "Open source / SaaS", "price": "Free (self-hosted)"},
        "unleash": {"name": "Unleash", "type": "Open source / SaaS", "price": "Free (self-hosted)"},
        "openfeature": {"name": "OpenFeature", "type": "Standard/SDK", "price": "Free (CNCF project)"},
        "custom": {"name": "Custom (JSON/DB)", "type": "DIY", "price": "Free"},
    }

    PYTHON_FLAGS = """
# simple_flags.py — Simple feature flag system
import json
from datetime import datetime

class FeatureFlagManager:
    def __init__(self, config_file="flags.json"):
        with open(config_file) as f:
            self.flags = json.load(f)
    
    def is_enabled(self, flag_name, user=None):
        flag = self.flags.get(flag_name)
        if not flag:
            return False
        
        if not flag.get("enabled", False):
            return False
        
        # Check targeting rules
        rules = flag.get("rules", [])
        if not rules:
            return True  # No rules = enabled for all
        
        for rule in rules:
            if self._evaluate_rule(rule, user):
                return True
        
        # Check percentage rollout
        rollout = flag.get("rollout_percentage", 100)
        if user and rollout < 100:
            user_hash = hash(f"{flag_name}:{user.get('id', '')}") % 100
            return user_hash < rollout
        
        return False
    
    def _evaluate_rule(self, rule, user):
        if not user:
            return False
        attribute = rule.get("attribute")
        operator = rule.get("operator")
        value = rule.get("value")
        user_value = user.get(attribute)
        
        if operator == "eq":
            return user_value == value
        elif operator == "in":
            return user_value in value
        elif operator == "gte":
            return user_value >= value
        return False

# flags.json
flags_config = {
    "new_dashboard": {
        "enabled": True,
        "rollout_percentage": 25,
        "rules": [
            {"attribute": "plan", "operator": "eq", "value": "enterprise"}
        ]
    },
    "ai_suggestions": {
        "enabled": True,
        "rollout_percentage": 10,
    }
}

fm = FeatureFlagManager()
user = {"id": "user123", "plan": "enterprise", "country": "TH"}
print(f"new_dashboard: {fm.is_enabled('new_dashboard', user)}")
"""

    def show_concepts(self):
        print("=== Feature Flag Concepts ===\n")
        print(f"[{self.CONCEPTS['feature_flag']['name']}]")
        print(f"  {self.CONCEPTS['feature_flag']['description']}")
        print()
        print("Types:")
        for key, t in self.CONCEPTS["types"].items():
            print(f"  [{t['name']}] {t['use']}")

    def show_tools(self):
        print(f"\n=== Tools ===")
        for key, tool in self.TOOLS.items():
            print(f"  [{tool['name']}] {tool['type']} | {tool['price']}")

ff = FeatureFlags()
ff.show_concepts()
ff.show_tools()

AI-Powered Feature Flags with Ollama

Ollama Local LLM Feature Flag Management
# ai_flags.py — AI-powered feature flag management
import json

class AIFeatureFlags:
    OLLAMA_INTEGRATION = """
# ollama_flag_advisor.py — LLM-powered feature flag advisor
import requests
import json

class OllamaFlagAdvisor:
    def __init__(self, model="llama3.1", base_url="http://localhost:11434"):
        self.model = model
        self.base_url = base_url
    
    def query(self, prompt, system="You are a feature flag management expert."):
        resp = requests.post(
            f"{self.base_url}/api/generate",
            json={
                "model": self.model,
                "prompt": prompt,
                "system": system,
                "stream": False,
                "options": {"temperature": 0.3},
            },
        )
        return resp.json().get("response", "")
    
    def analyze_rollout(self, feature_name, metrics):
        prompt = f'''
        Analyze the following feature flag rollout metrics and recommend next steps:
        
        Feature: {feature_name}
        Current rollout: {metrics.get("rollout_pct")}%
        Error rate (control): {metrics.get("error_rate_control")}%
        Error rate (treatment): {metrics.get("error_rate_treatment")}%
        Latency (control): {metrics.get("latency_control")}ms
        Latency (treatment): {metrics.get("latency_treatment")}ms
        Conversion (control): {metrics.get("conversion_control")}%
        Conversion (treatment): {metrics.get("conversion_treatment")}%
        
        Should we: increase rollout, decrease rollout, or kill the feature?
        Provide a brief analysis and recommendation.
        '''
        return self.query(prompt)
    
    def generate_targeting_rules(self, feature_description, user_segments):
        prompt = f'''
        Generate feature flag targeting rules for:
        Feature: {feature_description}
        Available segments: {json.dumps(user_segments)}
        
        Output JSON targeting rules.
        '''
        return self.query(prompt)
    
    def suggest_rollout_strategy(self, feature_name, risk_level):
        prompt = f'''
        Suggest a rollout strategy for feature "{feature_name}" 
        with risk level: {risk_level}
        
        Include: phases, percentage, duration, metrics to watch.
        '''
        return self.query(prompt)

# Usage
advisor = OllamaFlagAdvisor()

# Analyze rollout metrics
metrics = {
    "rollout_pct": 10,
    "error_rate_control": 0.5,
    "error_rate_treatment": 0.8,
    "latency_control": 120,
    "latency_treatment": 145,
    "conversion_control": 3.2,
    "conversion_treatment": 4.1,
}
analysis = advisor.analyze_rollout("new_checkout_flow", metrics)
print(analysis)
"""

    def show_integration(self):
        print("=== Ollama Feature Flag Advisor ===")
        print(self.OLLAMA_INTEGRATION[:700])

    def use_cases(self):
        print(f"\n=== AI Feature Flag Use Cases ===")
        cases = [
            {"name": "Rollout Analysis", "description": "LLM วิเคราะห์ metrics แล้วแนะนำ increase/decrease/kill"},
            {"name": "Rule Generation", "description": "LLM สร้าง targeting rules จาก feature description"},
            {"name": "Incident Detection", "description": "LLM ตรวจจับ anomalies หลัง feature enable"},
            {"name": "Strategy Planning", "description": "LLM แนะนำ rollout strategy ตาม risk level"},
            {"name": "Documentation", "description": "LLM สร้าง changelog/docs สำหรับ flag changes"},
        ]
        for c in cases:
            print(f"  [{c['name']}] {c['description']}")

ai = AIFeatureFlags()
ai.show_integration()
ai.use_cases()

Automated Rollout Pipeline

# pipeline.py — Automated rollout pipeline
import json
import random

class RolloutPipeline:
    STAGES = {
        "canary": {"name": "1. Canary (1%)", "duration": "24h", "criteria": "Error rate < 1%, latency < 200ms"},
        "early_adopter": {"name": "2. Early Adopter (10%)", "duration": "48h", "criteria": "Error rate < 0.5%, positive feedback"},
        "beta": {"name": "3. Beta (25%)", "duration": "1 week", "criteria": "No regressions, metrics stable"},
        "ga_50": {"name": "4. GA 50%", "duration": "1 week", "criteria": "Business metrics positive"},
        "ga_100": {"name": "5. GA 100%", "duration": "Permanent", "criteria": "All metrics green, cleanup flag"},
    }

    AUTOMATION = """
# auto_rollout.py — Automated progressive rollout
import requests
import time

class AutoRollout:
    def __init__(self, flag_name, ollama_advisor, flag_manager):
        self.flag_name = flag_name
        self.advisor = ollama_advisor
        self.fm = flag_manager
        self.stages = [1, 10, 25, 50, 100]
    
    def get_metrics(self):
        # Fetch metrics from monitoring system
        return {
            "error_rate_control": 0.3,
            "error_rate_treatment": 0.4,
            "latency_control": 100,
            "latency_treatment": 110,
            "conversion_control": 3.0,
            "conversion_treatment": 3.5,
        }
    
    def run(self):
        for pct in self.stages:
            print(f"Setting rollout to {pct}%...")
            self.fm.set_rollout(self.flag_name, pct)
            
            # Wait for data
            time.sleep(3600 * 24)  # 24 hours
            
            metrics = self.get_metrics()
            metrics["rollout_pct"] = pct
            
            # Ask LLM for analysis
            analysis = self.advisor.analyze_rollout(self.flag_name, metrics)
            
            if "kill" in analysis.lower() or "rollback" in analysis.lower():
                print(f"ROLLBACK! LLM recommends killing feature at {pct}%")
                self.fm.set_rollout(self.flag_name, 0)
                return False
            elif "decrease" in analysis.lower():
                print(f"DECREASE! Holding at {pct}%")
                continue
            else:
                print(f"PROCEED to next stage from {pct}%")
        
        print("Feature fully rolled out!")
        return True
"""

    def show_stages(self):
        print("=== Rollout Stages ===\n")
        for key, stage in self.STAGES.items():
            print(f"[{stage['name']}] Duration: {stage['duration']}")
            print(f"  Criteria: {stage['criteria']}")

    def show_automation(self):
        print(f"\n=== Auto Rollout Script ===")
        print(self.AUTOMATION[:500])

    def simulate(self):
        print(f"\n=== Rollout Simulation ===")
        stages = [1, 10, 25, 50, 100]
        for pct in stages:
            error = random.uniform(0.1, 1.0)
            latency = random.randint(80, 200)
            decision = "PROCEED" if error < 0.8 and latency < 180 else "HOLD"
            print(f"  [{pct:>3}%] Error: {error:.2f}% | Latency: {latency}ms → {decision}")

pipeline = RolloutPipeline()
pipeline.show_stages()
pipeline.show_automation()
pipeline.simulate()

Privacy & Security

# security.py — Privacy and security with local LLM
import json

class LocalLLMSecurity:
    BENEFITS = {
        "data_privacy": {
            "name": "Data Privacy (ข้อมูลไม่ออกนอกองค์กร)",
            "detail": "Ollama รันบน local — ข้อมูล feature flags, metrics, user data ไม่ส่งไป cloud",
        },
        "compliance": {
            "name": "Compliance (GDPR, PDPA)",
            "detail": "ไม่มี data transfer ไป third party — ง่ายต่อการ comply กับ PDPA/GDPR",
        },
        "cost": {
            "name": "Cost (ไม่มีค่า API)",
            "detail": "ไม่ต้องจ่ายค่า API calls — Ollama ฟรี, รันบน hardware ที่มี",
        },
        "latency": {
            "name": "Low Latency (ไม่ต้องเรียก cloud)",
            "detail": "Local inference เร็วกว่า cloud API — ไม่มี network latency",
        },
        "offline": {
            "name": "Offline Capable",
            "detail": "ทำงานได้แม้ไม่มี internet — เหมาะ air-gapped environments",
        },
    }

    def show_benefits(self):
        print("=== Local LLM Security Benefits ===\n")
        for key, benefit in self.BENEFITS.items():
            print(f"[{benefit['name']}]")
            print(f"  {benefit['detail']}")
            print()

    def hardware_reqs(self):
        print("=== Hardware Requirements ===")
        reqs = [
            {"model": "Phi-3 3.8B", "ram": "4GB", "gpu": "Optional", "speed": "Very fast on CPU"},
            {"model": "Llama 3.1 8B", "ram": "8GB", "gpu": "Recommended (6GB VRAM)", "speed": "Fast"},
            {"model": "Mistral 7B", "ram": "8GB", "gpu": "Recommended", "speed": "Fast"},
            {"model": "Llama 3.1 70B", "ram": "48GB+", "gpu": "Required (48GB VRAM)", "speed": "Slow on CPU"},
        ]
        for r in reqs:
            print(f"  [{r['model']}] RAM: {r['ram']} | GPU: {r['gpu']} | {r['speed']}")

sec = LocalLLMSecurity()
sec.show_benefits()
sec.hardware_reqs()

FAQ - คำถามที่พบบ่อย

Q: Ollama กับ OpenAI API อันไหนดี?

A: Ollama: ฟรี, private, offline, latency ต่ำ แต่ model quality อาจต่ำกว่า GPT-4 OpenAI: model quality สูงสุด แต่มีค่าใช้จ่าย, data ส่งออก, ต้อง internet ใช้ Ollama: data sensitive, budget จำกัด, offline requirement ใช้ OpenAI: ต้องการ quality สูงสุด, budget เพียงพอ Hybrid: Ollama สำหรับ routine tasks + OpenAI สำหรับ complex analysis

Q: Feature flag ต้องใช้ tool อะไร?

A: เริ่มต้น: JSON config + custom code (ฟรี, ง่าย) Growing: Flagsmith หรือ Unleash (open source, self-hosted) Enterprise: LaunchDarkly (SaaS, advanced targeting, analytics) Standard: OpenFeature SDK (CNCF, vendor-agnostic) สำคัญ: เลือก tool ที่ team ใช้ได้จริง ไม่ over-engineer

Q: AI ช่วย feature flag management ได้จริงไหม?

A: ช่วยได้ในหลายด้าน: 1. วิเคราะห์ rollout metrics → แนะนำ proceed/rollback 2. สร้าง targeting rules จาก natural language 3. ตรวจจับ anomalies หลัง feature enable 4. สร้าง documentation อัตโนมัติ แต่: LLM ไม่ควรตัดสินใจ rollback เองทั้งหมด — ใช้เป็น advisor ให้คนตัดสินใจ

Q: Ollama รันบนเครื่องอะไรได้?

A: CPU only: ได้ แต่ช้า (Phi-3 3.8B ใช้ได้ดี, Llama 8B พอใช้) GPU (NVIDIA): เร็วมาก (RTX 3060+ แนะนำ) Apple Silicon (M1/M2/M3): เร็วมาก (Metal acceleration) RAM: minimum 8GB สำหรับ 7-8B models Docker: รันใน container ได้ (map GPU ด้วย --gpus all)