Ollama Local LLM Internal Developer Platform — รัน LLM บนเครื่องตัวเองสำหรับ Developer Platform

Ollama Local LLM + IDP

Ollama Local LLM Internal Developer Platform IDP LLaMA Mistral Gemma API REST Privacy Code Review Documentation ChatOps GPU

Model	Size	VRAM	Use Case
LLaMA 3 8B	4.7GB (Q4)	6GB+	General Chat Code Review
Mistral 7B	4.1GB (Q4)	6GB+	Fast Inference Coding
CodeLlama 13B	7.4GB (Q4)	10GB+	Code Generation Review
Gemma 2 9B	5.4GB (Q4)	8GB+	General Purpose Compact
LLaMA 3 70B	40GB (Q4)	24GB+	Complex Analysis Expert
Qwen 2 72B	41GB (Q4)	24GB+	Multilingual Code

Ollama Setup & API

# === Ollama Installation & API ===

# Install (Linux/Mac)
# curl -fsSL https://ollama.com/install.sh | sh
#
# Install (Windows)
# Download from https://ollama.com/download
#
# Run Model
# ollama run llama3 # Interactive chat
# ollama run codellama # Code generation
# ollama run mistral # Fast inference
# ollama pull llama3 # Download only
# ollama list # List installed models
#
# Docker
# docker run -d --gpus all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
# docker exec -it ollama ollama run llama3

# REST API Examples
# curl http://localhost:11434/api/generate -d '{
# "model": "llama3",
# "prompt": "Review this Python code:\ndef add(a, b): return a+b",
# "stream": false
# }'
#
# curl http://localhost:11434/api/chat -d '{
# "model": "llama3",
# "messages": [
# {"role": "system", "content": "You are a senior code reviewer."},
# {"role": "user", "content": "Review this function..."}
# ],
# "stream": false
# }'

from dataclasses import dataclass

@dataclass
class OllamaEndpoint:
 method: str
 path: str
 description: str
 example_body: str

endpoints = [
 OllamaEndpoint("POST", "/api/generate",
 "Generate text completion",
 '{"model":"llama3","prompt":"Explain Docker","stream":false}'),
 OllamaEndpoint("POST", "/api/chat",
 "Chat conversation (multi-turn)",
 '{"model":"llama3","messages":[{"role":"user","content":"Hello"}]}'),
 OllamaEndpoint("POST", "/api/embeddings",
 "Generate text embeddings (RAG)",
 '{"model":"llama3","prompt":"text to embed"}'),
 OllamaEndpoint("GET", "/api/tags",
 "List installed models",
 "No body required"),
 OllamaEndpoint("POST", "/api/pull",
 "Download model from library",
 '{"name":"codellama:13b"}'),
 OllamaEndpoint("DELETE", "/api/delete",
 "Delete installed model",
 '{"name":"llama3"}'),
]

print("=== Ollama API Endpoints ===")
for e in endpoints:
 print(f" [{e.method}] {e.path}")
 print(f" Desc: {e.description}")
 print(f" Body: {e.example_body}")

IDP Integration

# === IDP + Ollama Integration ===

# Modelfile (Custom Configuration)
# FROM llama3
# SYSTEM """You are a senior developer at our company.
# You review code following our coding standards:
# - PEP 8 for Python
# - ESLint Airbnb for JavaScript
# - Always suggest tests
# - Flag security issues
# """
# PARAMETER temperature 0.3
# PARAMETER num_ctx 4096
#
# ollama create code-reviewer -f Modelfile
# ollama run code-reviewer

@dataclass
class IDPFeature:
 feature: str
 model: str
 integration: str
 example: str

features = [
 IDPFeature("AI Code Review",
 "codellama:13b / llama3",
 "Git Hook → Ollama API → Comment on PR",
 "PR สร้าง → Bot ส่ง Diff → Ollama Review → Comment"),
 IDPFeature("Auto Documentation",
 "llama3 / mistral",
 "CI Pipeline → Extract Code → Ollama → Generate Docs",
 "Push Code → CI สร้าง API Docs Readme อัตโนมัติ"),
 IDPFeature("Incident Analysis",
 "llama3:70b",
 "Alert → Collect Logs → Ollama Analyze → Suggest Fix",
 "PagerDuty Alert → รวม Log → AI วิเคราะห์ → แนะนำแก้"),
 IDPFeature("ChatOps Assistant",
 "llama3 / mistral",
 "Slack Bot → Ollama API → Reply in Channel",
 "/ask how to deploy → AI ตอบจาก Internal Docs"),
 IDPFeature("Test Generation",
 "codellama:13b",
 "Developer Portal → Paste Code → Generate Tests",
 "ส่ง Function → Ollama สร้าง Unit Test → Copy/Paste"),
 IDPFeature("SQL Assistant",
 "codellama / sqlcoder",
 "Developer Portal → Natural Language → SQL Query",
 "ถามภาษาไทย → Ollama แปลเป็น SQL → Query Database"),
]

print("=== IDP + Ollama Features ===")
for f in features:
 print(f"\n [{f.feature}] Model: {f.model}")
 print(f" Integration: {f.integration}")
 print(f" Example: {f.example}")

Production Architecture

# === Production Architecture ===

@dataclass
class Component:
 name: str
 technology: str
 role: str
 scaling: str

components = [
 Component("IDP Portal",
 "Next.js / React + Tailwind",
 "Web UI สำหรับ Developer เข้าถึง AI Features",
 "Horizontal (Replicas) CDN Static Assets"),
 Component("API Gateway",
 "Kong / Nginx / Traefik",
 "Auth Rate Limiting Routing Load Balancing",
 "Horizontal (Multiple Instances)"),
 Component("Ollama Cluster",
 "Ollama Docker + GPU Node Pool (K8s)",
 "Serve LLM Inference (Code Review Docs Chat)",
 "Horizontal (Add GPU Nodes) + Queue"),
 Component("Request Queue",
 "Redis / RabbitMQ / Kafka",
 "Buffer Requests ป้องกัน GPU Overload",
 "Cluster Mode (Redis Cluster / Kafka Partitions)"),
 Component("Response Cache",
 "Redis / Memcached",
 "Cache Frequent Responses ลด GPU Load 30-50%",
 "Cluster Mode + TTL-based Eviction"),
 Component("Monitoring",
 "Prometheus + Grafana",
 "GPU Util VRAM Latency Throughput Error Rate",
 "Prometheus Federation สำหรับ Multi-cluster"),
]

print("=== Production Components ===")
for c in components:
 print(f" [{c.name}] {c.technology}")
 print(f" Role: {c.role}")
 print(f" Scaling: {c.scaling}")