Pinecone Vector Database
Pinecone Vector Database Automation Embedding Semantic Search RAG LLM Index Upsert Query Metadata Namespace Serverless Python SDK
| Feature | Pinecone | Weaviate | ChromaDB | Qdrant |
|---|---|---|---|---|
| Type | Managed Cloud | Self-host/Cloud | Self-host | Self-host/Cloud |
| Scale | Auto (Serverless) | Manual/Auto | Small-Medium | Manual/Auto |
| Price | Free Tier + Pay | Free (Self-host) | Free (Open Source) | Free (Open Source) |
| Metadata | Yes (Filter) | Yes (Filter) | Yes (Filter) | Yes (Filter) |
| Ease | ง่ายมาก | ปานกลาง | ง่าย | ปานกลาง |
| Production | พร้อมใช้ | พร้อมใช้ | Dev/Small | พร้อมใช้ |
Python Automation Script
# === Pinecone Automation Script ===
# pip install pinecone-client openai
# from pinecone import Pinecone, ServerlessSpec
# import openai
# import os
#
# pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
# openai.api_key = os.environ['OPENAI_API_KEY']
#
# # Create Index
# pc.create_index(
# name="knowledge-base",
# dimension=1536, # OpenAI embedding dimension
# metric="cosine",
# spec=ServerlessSpec(cloud="aws", region="us-east-1")
# )
#
# index = pc.Index("knowledge-base")
#
# # Generate Embedding
# def get_embedding(text, model="text-embedding-3-small"):
# response = openai.embeddings.create(input=text, model=model)
# return response.data[0].embedding
#
# # Upsert Vectors (Batch)
# def upsert_documents(documents, namespace="default"):
# batch_size = 100
# for i in range(0, len(documents), batch_size):
# batch = documents[i:i+batch_size]
# vectors = []
# for doc in batch:
# embedding = get_embedding(doc['text'])
# vectors.append({
# 'id': doc['id'],
# 'values': embedding,
# 'metadata': {
# 'text': doc['text'],
# 'source': doc['source'],
# 'page': doc.get('page', 0)
# }
# })
# index.upsert(vectors=vectors, namespace=namespace)
#
# # Query (Semantic Search)
# def search(query, top_k=5, namespace="default", filter=None):
# embedding = get_embedding(query)
# results = index.query(
# vector=embedding, top_k=top_k,
# namespace=namespace, filter=filter,
# include_metadata=True
# )
# return results.matches
from dataclasses import dataclass
@dataclass
class PineconeOperation:
operation: str
method: str
params: str
use_case: str
operations = [
PineconeOperation("Create Index",
"pc.create_index(name, dimension, metric, spec)",
"name: str, dimension: int, metric: cosine/euclidean/dotproduct",
"สร้าง Index ใหม่สำหรับเก็บ Vector"),
PineconeOperation("Upsert",
"index.upsert(vectors, namespace)",
"vectors: [{id, values, metadata}], batch_size: 100",
"เพิ่ม/อัพเดท Vector พร้อม Metadata"),
PineconeOperation("Query",
"index.query(vector, top_k, filter, include_metadata)",
"vector: list, top_k: int, filter: dict",
"ค้นหา Vector ที่คล้ายกัน Semantic Search"),
PineconeOperation("Delete",
"index.delete(ids, namespace, filter)",
"ids: list หรือ filter: dict หรือ delete_all: True",
"ลบ Vector ตาม ID หรือ Metadata"),
PineconeOperation("Describe Stats",
"index.describe_index_stats()",
"-",
"ดูจำนวน Vector ต่อ Namespace Dimension"),
PineconeOperation("List Indexes",
"pc.list_indexes()",
"-",
"ดูรายการ Index ทั้งหมด"),
]
print("=== Pinecone Operations ===")
for o in operations:
print(f" [{o.operation}]")
print(f" Method: {o.method}")
print(f" Params: {o.params}")
print(f" Use: {o.use_case}")
RAG Pipeline
# === RAG (Retrieval Augmented Generation) Pipeline ===
# def rag_query(question, namespace="default"):
# # Step 1: Embed the question
# q_embedding = get_embedding(question)
#
# # Step 2: Search Pinecone for relevant chunks
# results = index.query(
# vector=q_embedding, top_k=5,
# namespace=namespace, include_metadata=True
# )
#
# # Step 3: Build context from results
# context = "\n\n".join([
# f"Source: {m.metadata['source']}\n{m.metadata['text']}"
# for m in results.matches
# ])
#
# # Step 4: Send to LLM with context
# response = openai.chat.completions.create(
# model="gpt-4o-mini",
# messages=[
# {"role": "system", "content": f"Answer based on context:\n{context}"},
# {"role": "user", "content": question}
# ]
# )
# return response.choices[0].message.content
@dataclass
class RAGStep:
step: int
name: str
tool: str
detail: str
tip: str
rag_steps = [
RAGStep(1, "Document Ingestion",
"Python + LangChain / LlamaIndex",
"อ่าน PDF Word HTML แบ่งเป็น Chunk 500-1000 tokens",
"Overlap 100-200 tokens ระหว่าง Chunk ไม่ให้ Context ขาด"),
RAGStep(2, "Embedding Generation",
"OpenAI / Cohere / Sentence-transformers",
"แปลง Chunk เป็น Vector Embedding",
"ใช้ text-embedding-3-small ราคาถูก คุณภาพดี"),
RAGStep(3, "Vector Storage",
"Pinecone (Upsert)",
"เก็บ Vector + Metadata เข้า Pinecone",
"ใช้ Namespace แยกตาม Document Type หรือ Tenant"),
RAGStep(4, "Query Embedding",
"Same Embedding Model",
"แปลงคำถาม User เป็น Vector",
"ใช้ Model เดียวกับตอน Ingest"),
RAGStep(5, "Similarity Search",
"Pinecone (Query)",
"ค้นหา Top-k Chunks ที่คล้ายกับคำถาม",
"top_k=5-10 ใช้ Metadata Filter เพิ่มความแม่นยำ"),
RAGStep(6, "LLM Generation",
"GPT-4o / Llama 3 / Claude",
"ส่ง Context + Question ให้ LLM สร้างคำตอบ",
"System Prompt บอกให้ตอบจาก Context เท่านั้น"),
]
print("=== RAG Pipeline ===")
for s in rag_steps:
print(f"\n Step {s.step}: {s.name}")
print(f" Tool: {s.tool}")
print(f" Detail: {s.detail}")
print(f" Tip: {s.tip}")
Monitoring & Cost
# === Pinecone Monitoring & Cost Optimization ===
@dataclass
class CostTier:
tier: str
vectors: str
queries: str
price: str
best_for: str
tiers = [
CostTier("Free (Starter)",
"100K vectors, 1 index",
"Unlimited reads",
"$0/เดือน",
"ทดลอง Prototype POC"),
CostTier("Serverless",
"Unlimited",
"Pay per query ($0.08/1M read units)",
"$0.08-0.33/1M units",
"Variable workload ใช้น้อยจ่ายน้อย"),
CostTier("Standard (Pod)",
"1M+ vectors per pod",
"Unlimited",
"$70+/เดือน per pod",
"Consistent workload High throughput"),
]
@dataclass
class MonitorMetric:
metric: str
how: str
threshold: str
action: str
monitoring = [
MonitorMetric("Vector Count",
"index.describe_index_stats()",
"ใกล้ Limit ของ Plan",
"Upgrade Plan หรือลบ Vector เก่า"),
MonitorMetric("Query Latency",
"Measure response time",
"> 200ms p99",
"เพิ่ม Replicas หรือลด top_k"),
MonitorMetric("Index Fullness",
"describe_index_stats().index_fullness",
"> 80%",
"Scale up Pod หรือสร้าง Index ใหม่"),
MonitorMetric("Error Rate",
"Monitor API responses",
"> 1%",
"Check API Key Quota Network"),
MonitorMetric("Embedding Cost",
"Track API calls to OpenAI",
"Budget threshold",
"Cache Embeddings ลด Duplicate Calls"),
]
print("=== Cost Tiers ===")
for t in tiers:
print(f" [{t.tier}] {t.price}")
print(f" Vectors: {t.vectors} | Queries: {t.queries}")
print(f" Best: {t.best_for}")
print("\n=== Monitoring ===")
for m in monitoring:
print(f" [{m.metric}] Threshold: {m.threshold}")
print(f" How: {m.how} | Action: {m.action}")
เคล็ดลับ
- Chunk: แบ่ง Chunk 500-1000 tokens + Overlap 100-200 tokens
- Namespace: ใช้ Namespace แยกข้อมูลตาม Tenant หรือ Type
- Metadata: เก็บ Metadata ที่จำเป็น source page date ใช้ Filter
- Batch: Upsert เป็น Batch 100 vectors ต่อครั้ง
- Cache: Cache Embedding ที่ใช้บ่อย ลดค่า API Call
Pinecone คืออะไร
Managed Vector Database Embedding Semantic Search RAG Recommendation Serverless Metadata Namespace REST API Python SDK Scale Auto
Vector Embedding คืออะไร
แปลงข้อมูลเป็น Vector ตัวเลข Cosine Similarity OpenAI Cohere Sentence-transformers BGE Dimension 384-1536 ความหมายคล้ายใกล้กัน
Automation Script ทำอะไรได้
Create Index Upsert Query Delete Describe Stats Batch Processing Namespace Metadata Filter RAG Pipeline Monitor Backup Collection
RAG Pipeline ทำอย่างไร
Ingest Document Chunk Embed Upsert Pinecone Query Embedding Similarity Search Top-k Context LLM Generate Answer Source Citation
สรุป
Pinecone Vector Database Automation Embedding RAG Semantic Search Upsert Query Metadata Namespace Serverless Python LLM Production
