CrewAI Multi-Agent Backup Recovery Strategy คืออะไร
CrewAI เป็น open source framework สำหรับสร้าง multi-agent AI systems ที่ agents หลายตัวทำงานร่วมกันเพื่อบรรลุเป้าหมายที่ซับซ้อน แต่ละ agent มี role, goal และ backstory เฉพาะตัว สามารถใช้ tools ต่างๆ และส่งต่องานกันได้ Backup Recovery Strategy คือแผนสำรองและกู้คืนระบบ multi-agent รวมถึง agent configurations, memory states, task histories และ tool integrations เพื่อให้ระบบกลับมาทำงานได้เร็วที่สุดเมื่อเกิดปัญหา
CrewAI Architecture
# crewai_arch.py — CrewAI multi-agent architecture
import json
class CrewAIArch:
COMPONENTS = {
"agent": {
"name": "Agent",
"description": "AI agent ที่มี role, goal, backstory เฉพาะ",
"config": "role, goal, backstory, tools, llm, memory",
},
"task": {
"name": "Task",
"description": "งานที่กำหนดให้ agent ทำ มี description และ expected output",
"config": "description, expected_output, agent, context",
},
"crew": {
"name": "Crew",
"description": "กลุ่ม agents ที่ทำงานร่วมกัน มี process flow",
"config": "agents, tasks, process (sequential/hierarchical)",
},
"tool": {
"name": "Tool",
"description": "เครื่องมือที่ agent ใช้ เช่น search, file read, API call",
"config": "name, description, function",
},
"memory": {
"name": "Memory",
"description": "Short-term, long-term, entity memory สำหรับ context",
"config": "memory=True ใน Crew config",
},
}
ARCHITECTURE = """
[User Request]
↓
[Crew Manager]
↓
┌─────────────────────────────┐
│ Agent 1 (Researcher) │ → [Search Tool, Web Scraper]
│ Agent 2 (Analyst) │ → [Calculator, Database Tool]
│ Agent 3 (Writer) │ → [File Writer, Template Tool]
└─────────────────────────────┘
↓
[Task Pipeline: Sequential / Hierarchical]
↓
[Final Output]
"""
def show_components(self):
print("=== CrewAI Components ===\n")
for key, comp in self.COMPONENTS.items():
print(f"[{comp['name']}]")
print(f" {comp['description']}")
print(f" Config: {comp['config']}")
print()
def show_architecture(self):
print("=== Architecture ===")
print(self.ARCHITECTURE)
arch = CrewAIArch()
arch.show_components()
arch.show_architecture()
CrewAI Implementation
# crewai_impl.py — CrewAI implementation example
import json
class CrewAIImplementation:
CODE = """
# crew_setup.py — Multi-agent crew for data analysis
from crewai import Agent, Task, Crew, Process
from crewai_tools import SerperDevTool, FileReadTool
# Define Tools
search_tool = SerperDevTool()
file_tool = FileReadTool()
# Define Agents
researcher = Agent(
role="Senior Data Researcher",
goal="Find comprehensive data and insights on the given topic",
backstory="Expert researcher with 15 years experience in data analysis",
tools=[search_tool],
verbose=True,
memory=True,
llm="gpt-4o-mini",
)
analyst = Agent(
role="Data Analyst",
goal="Analyze data and create actionable insights",
backstory="Statistical expert specializing in trend analysis",
tools=[file_tool],
verbose=True,
memory=True,
)
writer = Agent(
role="Technical Writer",
goal="Create clear, comprehensive reports from analysis",
backstory="Award-winning technical writer with data viz skills",
verbose=True,
memory=True,
)
# Define Tasks
research_task = Task(
description="Research the latest trends in {topic}. "
"Find at least 5 credible sources with statistics.",
expected_output="Detailed research notes with sources and key statistics",
agent=researcher,
)
analysis_task = Task(
description="Analyze the research data and identify top 3 trends. "
"Provide statistical backing for each trend.",
expected_output="Analysis report with trends, statistics, and recommendations",
agent=analyst,
context=[research_task],
)
report_task = Task(
description="Write a comprehensive report based on the analysis. "
"Include executive summary, findings, and recommendations.",
expected_output="Final report in markdown format, 1000+ words",
agent=writer,
context=[analysis_task],
)
# Create Crew
crew = Crew(
agents=[researcher, analyst, writer],
tasks=[research_task, analysis_task, report_task],
process=Process.sequential,
memory=True,
verbose=True,
)
# Run
result = crew.kickoff(inputs={"topic": "AI in Healthcare 2025"})
print(result)
"""
def show_code(self):
print("=== CrewAI Implementation ===")
print(self.CODE[:800])
impl = CrewAIImplementation()
impl.show_code()
Backup Strategy
# backup.py — Multi-agent backup strategy
import json
import random
from datetime import datetime
class BackupStrategy:
BACKUP_ITEMS = {
"agent_configs": {
"what": "Agent configurations (role, goal, backstory, tools)",
"format": "YAML/JSON files",
"location": "configs/agents/",
"frequency": "ทุกครั้งที่ config เปลี่ยน",
"critical": "High — ถ้าหาย ต้อง recreate agents จากศูนย์",
},
"task_definitions": {
"what": "Task definitions (description, expected output, dependencies)",
"format": "YAML/JSON files",
"location": "configs/tasks/",
"frequency": "ทุกครั้งที่ task เปลี่ยน",
"critical": "High — กำหนด workflow ทั้งหมด",
},
"memory_state": {
"what": "Agent memory (short-term, long-term, entity)",
"format": "SQLite/Vector DB",
"location": "data/memory/",
"frequency": "ทุก 1 ชั่วโมง หรือหลังทุก crew run",
"critical": "Medium — สูญเสียได้แต่ agent จะลืม context",
},
"tool_configs": {
"what": "Tool configurations (API keys, endpoints, credentials)",
"format": "Encrypted YAML (SOPS)",
"location": "configs/tools/",
"frequency": "เมื่อ credentials เปลี่ยน",
"critical": "High — agents ใช้ tools ไม่ได้ถ้า config หาย",
},
"task_history": {
"what": "Task execution history (inputs, outputs, logs)",
"format": "JSON logs / Database",
"location": "data/history/",
"frequency": "ทุก crew run",
"critical": "Low — สำหรับ audit และ debugging",
},
}
BACKUP_SCRIPT = """
# backup_crew.py — Automated CrewAI backup
import shutil
import json
from datetime import datetime
from pathlib import Path
class CrewBackup:
def __init__(self, crew_dir=".", backup_dir="/backup/crewai"):
self.crew_dir = Path(crew_dir)
self.backup_dir = Path(backup_dir)
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
def backup_configs(self):
src = self.crew_dir / "configs"
dest = self.backup_dir / f"configs-{self.timestamp}"
if src.exists():
shutil.copytree(src, dest)
print(f"Configs backed up: {dest}")
def backup_memory(self):
src = self.crew_dir / "data" / "memory"
dest = self.backup_dir / f"memory-{self.timestamp}"
if src.exists():
shutil.copytree(src, dest)
print(f"Memory backed up: {dest}")
def backup_history(self):
src = self.crew_dir / "data" / "history"
dest = self.backup_dir / f"history-{self.timestamp}"
if src.exists():
shutil.copytree(src, dest)
print(f"History backed up: {dest}")
def full_backup(self):
self.backup_configs()
self.backup_memory()
self.backup_history()
print(f"Full backup complete: {self.timestamp}")
backup = CrewBackup()
backup.full_backup()
"""
def show_items(self):
print("=== Backup Items ===\n")
for key, item in self.BACKUP_ITEMS.items():
print(f"[{item['what'][:50]}]")
print(f" Format: {item['format']} | Freq: {item['frequency']}")
print(f" Critical: {item['critical']}")
print()
def show_script(self):
print("=== Backup Script ===")
print(self.BACKUP_SCRIPT[:500])
backup = BackupStrategy()
backup.show_items()
backup.show_script()
Recovery Procedures
# recovery.py — CrewAI recovery procedures
import json
class RecoveryProcedures:
SCENARIOS = {
"agent_failure": {
"scenario": "Agent หยุดทำงานกลางคัน",
"severity": "Medium",
"steps": [
"1. ตรวจ error logs: crew.log",
"2. ตรวจ LLM API status (OpenAI, local LLM)",
"3. Restart agent ด้วย last checkpoint",
"4. ถ้า memory corrupt: restore จาก backup",
"5. Re-run task ที่ fail ด้วย context จาก completed tasks",
],
},
"config_lost": {
"scenario": "Agent/Task configurations สูญหาย",
"severity": "High",
"steps": [
"1. Restore configs จาก Git history",
"2. ถ้าไม่มี Git: restore จาก backup",
"3. Verify agent roles, goals, tools ถูกต้อง",
"4. Test run ด้วย sample input",
"5. Compare output กับ historical results",
],
},
"memory_corruption": {
"scenario": "Agent memory database เสียหาย",
"severity": "Medium",
"steps": [
"1. Stop all crew operations",
"2. Restore memory DB จาก latest backup",
"3. ถ้าไม่มี backup: reset memory (agent จะลืม context)",
"4. Re-run recent tasks เพื่อ rebuild memory",
"5. Verify agent responses ถูกต้อง",
],
},
"llm_outage": {
"scenario": "LLM API ล่ม (OpenAI, Anthropic)",
"severity": "Critical",
"steps": [
"1. Switch to fallback LLM (local Ollama, alternative API)",
"2. Update agent LLM config",
"3. Re-run pending tasks",
"4. Monitor fallback performance",
"5. Switch back เมื่อ primary LLM กลับมา",
],
},
}
FALLBACK_CONFIG = """
# fallback_config.py — LLM fallback configuration
from crewai import LLM
# Primary LLM
primary_llm = LLM(model="gpt-4o-mini", temperature=0.7)
# Fallback LLMs (priority order)
fallbacks = [
LLM(model="ollama/mistral", base_url="http://localhost:11434"),
LLM(model="anthropic/claude-3-haiku"),
LLM(model="groq/llama-3.1-8b-instant"),
]
def get_llm():
try:
primary_llm.call("test")
return primary_llm
except Exception:
for fb in fallbacks:
try:
fb.call("test")
print(f"Using fallback: {fb.model}")
return fb
except:
continue
raise Exception("All LLMs unavailable!")
"""
def show_scenarios(self):
print("=== Recovery Scenarios ===\n")
for key, scenario in self.SCENARIOS.items():
print(f"[{scenario['scenario']}] Severity: {scenario['severity']}")
for step in scenario["steps"][:3]:
print(f" {step}")
print()
def show_fallback(self):
print("=== LLM Fallback Config ===")
print(self.FALLBACK_CONFIG[:500])
recovery = RecoveryProcedures()
recovery.show_scenarios()
recovery.show_fallback()
Monitoring & Health Checks
# monitoring.py — CrewAI monitoring
import json
import random
class CrewMonitoring:
def health_check(self):
print("=== Crew Health Check ===\n")
checks = [
{"component": "LLM API (OpenAI)", "status": "OK", "latency": f"{random.randint(200, 800)}ms"},
{"component": "Agent Memory DB", "status": "OK", "size": f"{random.randint(50, 500)} MB"},
{"component": "Tool: Search API", "status": "OK", "quota": f"{random.randint(500, 2000)} remaining"},
{"component": "Tool: File System", "status": "OK", "disk": f"{random.randint(20, 80)}% used"},
{"component": "Task Queue", "status": random.choice(["OK", "OK", "WARN"]), "pending": random.randint(0, 5)},
]
for c in checks:
print(f" [{c['status']:>4}] {c['component']}")
def crew_metrics(self):
print(f"\n=== Crew Performance Metrics ===")
metrics = {
"Total runs today": random.randint(5, 50),
"Success rate": f"{random.uniform(90, 99.5):.1f}%",
"Avg run time": f"{random.randint(30, 300)} seconds",
"Total tokens used": f"{random.randint(10, 200)}K",
"Cost today": f"",
"Memory entries": random.randint(100, 5000),
}
for m, v in metrics.items():
print(f" {m}: {v}")
def agent_status(self):
print(f"\n=== Agent Status ===")
agents = [
{"name": "Researcher", "tasks": random.randint(5, 20), "success": f"{random.randint(90, 100)}%"},
{"name": "Analyst", "tasks": random.randint(5, 15), "success": f"{random.randint(85, 100)}%"},
{"name": "Writer", "tasks": random.randint(5, 15), "success": f"{random.randint(90, 100)}%"},
]
for a in agents:
print(f" [{a['name']}] Tasks: {a['tasks']} | Success: {a['success']}")
mon = CrewMonitoring()
mon.health_check()
mon.crew_metrics()
mon.agent_status()
FAQ - คำถามที่พบบ่อย
Q: CrewAI กับ AutoGen กับ LangGraph อันไหนดี?
A: CrewAI: ง่ายที่สุด, role-based agents, ดีสำหรับ structured workflows AutoGen: Microsoft, ดีสำหรับ conversational agents, code execution LangGraph: LangChain-based, graph workflows, flexible ที่สุด ใช้ CrewAI: เมื่อมี roles ชัดเจน, sequential/hierarchical tasks ใช้ AutoGen: เมื่อ agents ต้อง discuss/debate กัน ใช้ LangGraph: เมื่อ workflow ซับซ้อน, conditional branching
Q: ต้อง backup memory บ่อยแค่ไหน?
A: Production: ทุก crew run + ทุก 1 ชั่วโมง Development: ทุกวัน ขึ้นอยู่กับ: ความสำคัญของ memory context ถ้า memory reset ได้ง่าย: backup น้อยลงได้ ถ้า memory สะสม knowledge สำคัญ: backup บ่อย
Q: CrewAI ใช้กับ local LLM ได้ไหม?
A: ได้ ใช้ Ollama: llm="ollama/mistral" หรือ "ollama/llama3" ใช้ vLLM: custom OpenAI-compatible endpoint ข้อจำกัด: local LLM อาจช้ากว่าและคุณภาพต่ำกว่า cloud LLMs แนะนำ: ใช้ cloud LLM สำหรับ production, local สำหรับ development
Q: Multi-agent ดีกว่า single agent อย่างไร?
A: ดีกว่าเมื่อ: งานซับซ้อนต้องหลาย expertise, ต้อง checks and balances ระหว่าง agents ไม่จำเป็นเมื่อ: งานง่าย, single prompt เพียงพอ ข้อเสีย: ช้ากว่า (หลาย LLM calls), แพงกว่า (token cost), debug ยากกว่า ใช้เมื่อ value > cost
