LLM Fine-tuning LoRA Remote Work Setup คืออะไร
LLM Fine-tuning คือการปรับแต่ง Large Language Model ที่ pre-trained แล้วให้เชี่ยวชาญในงานเฉพาะทาง LoRA (Low-Rank Adaptation) เป็นเทคนิค parameter-efficient fine-tuning ที่ลดจำนวน parameters ที่ต้อง train ลงอย่างมาก ทำให้ fine-tune โมเดลขนาดใหญ่ได้บน GPU ที่มี VRAM จำกัด Remote Work Setup คือการจัดสภาพแวดล้อมให้ทีม ML engineers ทำงาน fine-tuning จากระยะไกลได้อย่างมีประสิทธิภาพ ด้วย cloud GPUs, experiment tracking, model registry และ collaboration tools
LoRA Fundamentals
# lora_basics.py — LoRA fundamentals
import json
class LoRABasics:
CONCEPTS = {
"full_finetuning": {
"name": "Full Fine-tuning",
"description": "Train ทุก parameters ของโมเดล",
"params": "7B-70B+ parameters",
"gpu_req": "4-8x A100 80GB (สำหรับ 7B model)",
"cost": "สูงมาก",
},
"lora": {
"name": "LoRA (Low-Rank Adaptation)",
"description": "เพิ่ม low-rank matrices เข้า attention layers แล้ว train เฉพาะส่วนที่เพิ่ม",
"params": "0.1-1% ของ total parameters",
"gpu_req": "1x RTX 4090 24GB (สำหรับ 7B model)",
"cost": "ต่ำมาก",
},
"qlora": {
"name": "QLoRA (Quantized LoRA)",
"description": "รวม 4-bit quantization + LoRA — ลด VRAM ลงอีก 50%+",
"params": "0.1-1% + 4-bit base model",
"gpu_req": "1x RTX 3090 24GB (สำหรับ 7B model)",
"cost": "ต่ำที่สุด",
},
}
LORA_MATH = """
LoRA Math:
Original weight matrix W: d × k (e.g., 4096 × 4096 = 16M params)
LoRA decomposition:
W' = W + BA
where B: d × r, A: r × k (r = rank, typically 8-64)
Example (r=16):
B: 4096 × 16 = 65,536 params
A: 16 × 4096 = 65,536 params
Total LoRA: 131,072 params (0.8% of original 16M)
→ Train เฉพาะ B และ A (freeze W)
→ ลด trainable params 99%+
"""
def show_concepts(self):
print("=== Fine-tuning Methods ===\n")
for key, concept in self.CONCEPTS.items():
print(f"[{concept['name']}]")
print(f" {concept['description']}")
print(f" GPU: {concept['gpu_req']} | Cost: {concept['cost']}")
print()
def show_math(self):
print("=== LoRA Math ===")
print(self.LORA_MATH)
lora = LoRABasics()
lora.show_concepts()
lora.show_math()
Fine-tuning Pipeline
# finetune.py — LoRA fine-tuning pipeline
import json
class FineTunePipeline:
TRAINING_CODE = """
# train_lora.py — LoRA fine-tuning with Hugging Face
import torch
from transformers import (
AutoModelForCausalLM, AutoTokenizer,
TrainingArguments, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset
# 1. Load base model with 4-bit quantization (QLoRA)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
model_name = "meta-llama/Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# 2. Configure LoRA
lora_config = LoraConfig(
r=16, # LoRA rank
lora_alpha=32, # Scaling factor
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# Output: trainable params: 83,886,080 || all params: 8,030,261,248 || 1.04%
# 3. Load dataset
dataset = load_dataset("json", data_files="train_data.jsonl", split="train")
# 4. Training arguments
training_args = TrainingArguments(
output_dir="./output",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
warmup_steps=100,
logging_steps=10,
save_steps=200,
bf16=True,
optim="paged_adamw_8bit",
report_to="wandb",
)
# 5. Train
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=dataset,
tokenizer=tokenizer,
max_seq_length=2048,
dataset_text_field="text",
)
trainer.train()
# 6. Save LoRA adapter
model.save_pretrained("./lora-adapter")
tokenizer.save_pretrained("./lora-adapter")
"""
def show_code(self):
print("=== LoRA Training Code ===")
print(self.TRAINING_CODE[:700])
def hyperparameters(self):
print(f"\n=== Recommended Hyperparameters ===")
params = {
"LoRA rank (r)": "8-64 (16 เป็น default ดี)",
"LoRA alpha": "2× rank (r=16 → alpha=32)",
"Learning rate": "1e-4 to 3e-4",
"Batch size": "4-8 (ใช้ gradient accumulation ถ้า VRAM ไม่พอ)",
"Epochs": "1-5 (monitor overfitting)",
"Max seq length": "512-4096 (ตาม task)",
"Warmup": "5-10% of total steps",
}
for p, v in params.items():
print(f" {p}: {v}")
ft = FineTunePipeline()
ft.show_code()
ft.hyperparameters()
Remote GPU Setup
# remote_gpu.py — Remote GPU setup for fine-tuning
import json
class RemoteGPUSetup:
PROVIDERS = {
"runpod": {
"name": "RunPod",
"gpus": ["A100 80GB", "H100", "RTX 4090"],
"price": "$0.44-2.49/hr (on-demand)",
"features": "Serverless, templates, persistent storage",
},
"lambda": {
"name": "Lambda Cloud",
"gpus": ["A100 80GB", "H100"],
"price": "$1.10-2.49/hr",
"features": "Simple UI, SSH access, persistent storage",
},
"vast_ai": {
"name": "Vast.ai",
"gpus": ["A100", "RTX 4090", "RTX 3090"],
"price": "$0.20-1.50/hr (marketplace)",
"features": "Cheapest, marketplace model, variable availability",
},
"colab_pro": {
"name": "Google Colab Pro+",
"gpus": ["A100 40GB", "T4", "V100"],
"price": "$49.99/month",
"features": "Notebooks, easy sharing, Google Drive integration",
},
"aws_sagemaker": {
"name": "AWS SageMaker",
"gpus": ["A100", "A10G", "T4"],
"price": "$1.20-32.77/hr",
"features": "Enterprise, managed training, MLOps integration",
},
}
SETUP_SCRIPT = """
# setup_remote.sh — Remote GPU environment setup
#!/bin/bash
# Update and install basics
apt-get update && apt-get install -y git curl htop nvtop tmux
# Install Python packages
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install transformers datasets accelerate peft trl bitsandbytes
pip install wandb tensorboard evaluate scikit-learn
pip install flash-attn --no-build-isolation
# Login to services
huggingface-cli login --token $HF_TOKEN
wandb login $WANDB_API_KEY
# Clone training repo
git clone https://github.com/company/llm-finetune.git
cd llm-finetune
# Download base model (cache)
python -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('meta-llama/Llama-3.1-8B-Instruct')"
echo "Setup complete!"
"""
def show_providers(self):
print("=== GPU Cloud Providers ===\n")
for key, provider in self.PROVIDERS.items():
print(f"[{provider['name']}]")
print(f" GPUs: {', '.join(provider['gpus'][:2])}")
print(f" Price: {provider['price']}")
print()
def show_setup(self):
print("=== Setup Script ===")
print(self.SETUP_SCRIPT[:400])
gpu = RemoteGPUSetup()
gpu.show_providers()
gpu.show_setup()
Experiment Tracking & Collaboration
# experiment.py — Experiment tracking for remote teams
import json
import random
class ExperimentTracking:
TOOLS = {
"wandb": {
"name": "Weights & Biases (W&B)",
"features": ["Experiment tracking", "Model registry", "Team dashboards", "Artifact versioning"],
"price": "Free (personal), $50/user/month (team)",
},
"mlflow": {
"name": "MLflow",
"features": ["Experiment tracking", "Model registry", "Self-hosted", "Open source"],
"price": "Free (self-hosted), Databricks managed",
},
"huggingface_hub": {
"name": "Hugging Face Hub",
"features": ["Model hosting", "Dataset hosting", "Spaces (demos)", "Collaboration"],
"price": "Free (public), $9/month (private)",
},
}
WANDB_INTEGRATION = """
# wandb_tracking.py — W&B experiment tracking
import wandb
# Initialize
wandb.init(
project="llm-finetune",
name="lora-r16-llama3-thai",
config={
"model": "meta-llama/Llama-3.1-8B-Instruct",
"lora_r": 16,
"lora_alpha": 32,
"learning_rate": 2e-4,
"epochs": 3,
"batch_size": 4,
"dataset": "thai-instruction-50k",
"gpu": "A100-80GB",
},
tags=["lora", "thai", "llama3"],
)
# Log metrics during training (automatic with TrainingArguments report_to="wandb")
# Manual logging:
wandb.log({"eval/loss": 0.85, "eval/accuracy": 0.92, "step": 1000})
# Log model artifact
artifact = wandb.Artifact("lora-adapter", type="model")
artifact.add_dir("./lora-adapter")
wandb.log_artifact(artifact)
wandb.finish()
"""
def show_tools(self):
print("=== Experiment Tracking Tools ===\n")
for key, tool in self.TOOLS.items():
print(f"[{tool['name']}]")
print(f" Features: {', '.join(tool['features'][:3])}")
print(f" Price: {tool['price']}")
print()
def show_wandb(self):
print("=== W&B Integration ===")
print(self.WANDB_INTEGRATION[:500])
def experiment_dashboard(self):
print(f"\n=== Experiment Dashboard ===")
experiments = [
{"name": "lora-r8-llama3", "loss": f"{random.uniform(0.5, 1.2):.3f}", "acc": f"{random.uniform(0.80, 0.95):.2f}", "gpu_hrs": f"{random.uniform(1, 8):.1f}h"},
{"name": "lora-r16-llama3", "loss": f"{random.uniform(0.4, 1.0):.3f}", "acc": f"{random.uniform(0.85, 0.96):.2f}", "gpu_hrs": f"{random.uniform(2, 12):.1f}h"},
{"name": "lora-r32-llama3", "loss": f"{random.uniform(0.3, 0.9):.3f}", "acc": f"{random.uniform(0.87, 0.97):.2f}", "gpu_hrs": f"{random.uniform(4, 16):.1f}h"},
{"name": "qlora-r16-mistral", "loss": f"{random.uniform(0.4, 1.1):.3f}", "acc": f"{random.uniform(0.82, 0.94):.2f}", "gpu_hrs": f"{random.uniform(1, 6):.1f}h"},
]
print(f" {'Name':<25} {'Loss':>8} {'Acc':>6} {'GPU Hours':>10}")
for e in experiments:
print(f" {e['name']:<25} {e['loss']:>8} {e['acc']:>6} {e['gpu_hrs']:>10}")
exp = ExperimentTracking()
exp.show_tools()
exp.show_wandb()
exp.experiment_dashboard()
Dataset Preparation
# dataset.py — Dataset preparation for fine-tuning
import json
class DatasetPrep:
FORMATS = {
"instruction": {
"name": "Instruction Format (Alpaca-style)",
"example": '{"instruction": "แปลภาษาอังกฤษเป็นไทย", "input": "Hello world", "output": "สวัสดีชาวโลก"}',
},
"chat": {
"name": "Chat Format (ChatML)",
"example": '{"messages": [{"role": "system", "content": "คุณเป็น AI ผู้ช่วย"}, {"role": "user", "content": "Python คืออะไร"}, {"role": "assistant", "content": "Python เป็นภาษาโปรแกรม..."}]}',
},
"completion": {
"name": "Completion Format",
"example": '{"text": "[INST] คำถาม [/INST] คำตอบ"}',
},
}
PREP_SCRIPT = """
# prepare_dataset.py — Dataset preparation
import json
from datasets import Dataset
def prepare_chat_dataset(input_file, output_file):
data = []
with open(input_file) as f:
for line in f:
item = json.loads(line)
# Format as chat template
messages = item.get("messages", [])
if not messages:
messages = [
{"role": "system", "content": "คุณเป็น AI ผู้ช่วยภาษาไทย ตอบคำถามอย่างถูกต้องและเป็นประโยชน์"},
{"role": "user", "content": item.get("instruction", "") + " " + item.get("input", "")},
{"role": "assistant", "content": item.get("output", "")},
]
# Apply chat template
text = ""
for msg in messages:
if msg["role"] == "system":
text += f"<|system|>\\n{msg['content']}\\n"
elif msg["role"] == "user":
text += f"
