SiamCafe.net Blog
Technology

LLM Fine-tuning LoRA Remote Work Setup

llm fine tuning lora remote work setup
LLM Fine-tuning LoRA Remote Work Setup | SiamCafe Blog
2025-11-10· อ. บอม — SiamCafe.net· 1,460 คำ

LLM Fine-tuning LoRA Remote Work Setup คืออะไร

LLM Fine-tuning คือการปรับแต่ง Large Language Model ที่ pre-trained แล้วให้เชี่ยวชาญในงานเฉพาะทาง LoRA (Low-Rank Adaptation) เป็นเทคนิค parameter-efficient fine-tuning ที่ลดจำนวน parameters ที่ต้อง train ลงอย่างมาก ทำให้ fine-tune โมเดลขนาดใหญ่ได้บน GPU ที่มี VRAM จำกัด Remote Work Setup คือการจัดสภาพแวดล้อมให้ทีม ML engineers ทำงาน fine-tuning จากระยะไกลได้อย่างมีประสิทธิภาพ ด้วย cloud GPUs, experiment tracking, model registry และ collaboration tools

LoRA Fundamentals

# lora_basics.py — LoRA fundamentals
import json

class LoRABasics:
 CONCEPTS = {
 "full_finetuning": {
 "name": "Full Fine-tuning",
 "description": "Train ทุก parameters ของโมเดล",
 "params": "7B-70B+ parameters",
 "gpu_req": "4-8x A100 80GB (สำหรับ 7B model)",
 "cost": "สูงมาก",
 },
 "lora": {
 "name": "LoRA (Low-Rank Adaptation)",
 "description": "เพิ่ม low-rank matrices เข้า attention layers แล้ว train เฉพาะส่วนที่เพิ่ม",
 "params": "0.1-1% ของ total parameters",
 "gpu_req": "1x RTX 4090 24GB (สำหรับ 7B model)",
 "cost": "ต่ำมาก",
 },
 "qlora": {
 "name": "QLoRA (Quantized LoRA)",
 "description": "รวม 4-bit quantization + LoRA — ลด VRAM ลงอีก 50%+",
 "params": "0.1-1% + 4-bit base model",
 "gpu_req": "1x RTX 3090 24GB (สำหรับ 7B model)",
 "cost": "ต่ำที่สุด",
 },
 }

 LORA_MATH = """
 LoRA Math:
 
 Original weight matrix W: d × k (e.g., 4096 × 4096 = 16M params)
 
 LoRA decomposition:
 W' = W + BA
 where B: d × r, A: r × k (r = rank, typically 8-64)
 
 Example (r=16):
 B: 4096 × 16 = 65,536 params
 A: 16 × 4096 = 65,536 params
 Total LoRA: 131,072 params (0.8% of original 16M)
 
 → Train เฉพาะ B และ A (freeze W)
 → ลด trainable params 99%+
 """

 def show_concepts(self):
 print("=== Fine-tuning Methods ===\n")
 for key, concept in self.CONCEPTS.items():
 print(f"[{concept['name']}]")
 print(f" {concept['description']}")
 print(f" GPU: {concept['gpu_req']} | Cost: {concept['cost']}")
 print()

 def show_math(self):
 print("=== LoRA Math ===")
 print(self.LORA_MATH)

lora = LoRABasics()
lora.show_concepts()
lora.show_math()

Fine-tuning Pipeline

# finetune.py — LoRA fine-tuning pipeline
import json

class FineTunePipeline:
 TRAINING_CODE = """
# train_lora.py — LoRA fine-tuning with Hugging Face
import torch
from transformers import (
 AutoModelForCausalLM, AutoTokenizer,
 TrainingArguments, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset

# 1. Load base model with 4-bit quantization (QLoRA)
bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_compute_dtype=torch.bfloat16,
 bnb_4bit_use_double_quant=True,
)

model_name = "meta-llama/Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 quantization_config=bnb_config,
 device_map="auto",
 torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# 2. Configure LoRA
lora_config = LoraConfig(
 r=16, # LoRA rank
 lora_alpha=32, # Scaling factor
 target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
 "gate_proj", "up_proj", "down_proj"],
 lora_dropout=0.05,
 bias="none",
 task_type="CAUSAL_LM",
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# Output: trainable params: 83,886,080 || all params: 8,030,261,248 || 1.04%

# 3. Load dataset
dataset = load_dataset("json", data_files="train_data.jsonl", split="train")

# 4. Training arguments
training_args = TrainingArguments(
 output_dir="./output",
 num_train_epochs=3,
 per_device_train_batch_size=4,
 gradient_accumulation_steps=4,
 learning_rate=2e-4,
 warmup_steps=100,
 logging_steps=10,
 save_steps=200,
 bf16=True,
 optim="paged_adamw_8bit",
 report_to="wandb",
)

# 5. Train
trainer = SFTTrainer(
 model=model,
 args=training_args,
 train_dataset=dataset,
 tokenizer=tokenizer,
 max_seq_length=2048,
 dataset_text_field="text",
)
trainer.train()

# 6. Save LoRA adapter
model.save_pretrained("./lora-adapter")
tokenizer.save_pretrained("./lora-adapter")
"""

 def show_code(self):
 print("=== LoRA Training Code ===")
 print(self.TRAINING_CODE[:700])

 def hyperparameters(self):
 print(f"\n=== Recommended Hyperparameters ===")
 params = {
 "LoRA rank (r)": "8-64 (16 เป็น default ดี)",
 "LoRA alpha": "2× rank (r=16 → alpha=32)",
 "Learning rate": "1e-4 to 3e-4",
 "Batch size": "4-8 (ใช้ gradient accumulation ถ้า VRAM ไม่พอ)",
 "Epochs": "1-5 (monitor overfitting)",
 "Max seq length": "512-4096 (ตาม task)",
 "Warmup": "5-10% of total steps",
 }
 for p, v in params.items():
 print(f" {p}: {v}")

ft = FineTunePipeline()
ft.show_code()
ft.hyperparameters()

Remote GPU Setup

# remote_gpu.py — Remote GPU setup for fine-tuning
import json

class RemoteGPUSetup:
 PROVIDERS = {
 "runpod": {
 "name": "RunPod",
 "gpus": ["A100 80GB", "H100", "RTX 4090"],
 "price": "$0.44-2.49/hr (on-demand)",
 "features": "Serverless, templates, persistent storage",
 },
 "lambda": {
 "name": "Lambda Cloud",
 "gpus": ["A100 80GB", "H100"],
 "price": "$1.10-2.49/hr",
 "features": "Simple UI, SSH access, persistent storage",
 },
 "vast_ai": {
 "name": "Vast.ai",
 "gpus": ["A100", "RTX 4090", "RTX 3090"],
 "price": "$0.20-1.50/hr (marketplace)",
 "features": "Cheapest, marketplace model, variable availability",
 },
 "colab_pro": {
 "name": "Google Colab Pro+",
 "gpus": ["A100 40GB", "T4", "V100"],
 "price": "$49.99/month",
 "features": "Notebooks, easy sharing, Google Drive integration",
 },
 "aws_sagemaker": {
 "name": "AWS SageMaker",
 "gpus": ["A100", "A10G", "T4"],
 "price": "$1.20-32.77/hr",
 "features": "Enterprise, managed training, MLOps integration",
 },
 }

 SETUP_SCRIPT = """
# setup_remote.sh — Remote GPU environment setup
#!/bin/bash

# Update and install basics
apt-get update && apt-get install -y git curl htop nvtop tmux

# Install Python packages
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install transformers datasets accelerate peft trl bitsandbytes
pip install wandb tensorboard evaluate scikit-learn
pip install flash-attn --no-build-isolation

# Login to services
huggingface-cli login --token $HF_TOKEN
wandb login $WANDB_API_KEY

# Clone training repo
git clone https://github.com/company/llm-finetune.git
cd llm-finetune

# Download base model (cache)
python -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('meta-llama/Llama-3.1-8B-Instruct')"

echo "Setup complete!"
"""

 def show_providers(self):
 print("=== GPU Cloud Providers ===\n")
 for key, provider in self.PROVIDERS.items():
 print(f"[{provider['name']}]")
 print(f" GPUs: {', '.join(provider['gpus'][:2])}")
 print(f" Price: {provider['price']}")
 print()

 def show_setup(self):
 print("=== Setup Script ===")
 print(self.SETUP_SCRIPT[:400])

gpu = RemoteGPUSetup()
gpu.show_providers()
gpu.show_setup()

Experiment Tracking & Collaboration

# experiment.py — Experiment tracking for remote teams
import json
import random

class ExperimentTracking:
 TOOLS = {
 "wandb": {
 "name": "Weights & Biases (W&B)",
 "features": ["Experiment tracking", "Model registry", "Team dashboards", "Artifact versioning"],
 "price": "Free (personal), $50/user/month (team)",
 },
 "mlflow": {
 "name": "MLflow",
 "features": ["Experiment tracking", "Model registry", "Self-hosted", "Open source"],
 "price": "Free (self-hosted), Databricks managed",
 },
 "huggingface_hub": {
 "name": "Hugging Face Hub",
 "features": ["Model hosting", "Dataset hosting", "Spaces (demos)", "Collaboration"],
 "price": "Free (public), $9/month (private)",
 },
 }

 WANDB_INTEGRATION = """
# wandb_tracking.py — W&B experiment tracking
import wandb

# Initialize
wandb.init(
 project="llm-finetune",
 name="lora-r16-llama3-thai",
 config={
 "model": "meta-llama/Llama-3.1-8B-Instruct",
 "lora_r": 16,
 "lora_alpha": 32,
 "learning_rate": 2e-4,
 "epochs": 3,
 "batch_size": 4,
 "dataset": "thai-instruction-50k",
 "gpu": "A100-80GB",
 },
 tags=["lora", "thai", "llama3"],
)

# Log metrics during training (automatic with TrainingArguments report_to="wandb")
# Manual logging:
wandb.log({"eval/loss": 0.85, "eval/accuracy": 0.92, "step": 1000})

# Log model artifact
artifact = wandb.Artifact("lora-adapter", type="model")
artifact.add_dir("./lora-adapter")
wandb.log_artifact(artifact)

wandb.finish()
"""

 def show_tools(self):
 print("=== Experiment Tracking Tools ===\n")
 for key, tool in self.TOOLS.items():
 print(f"[{tool['name']}]")
 print(f" Features: {', '.join(tool['features'][:3])}")
 print(f" Price: {tool['price']}")
 print()

 def show_wandb(self):
 print("=== W&B Integration ===")
 print(self.WANDB_INTEGRATION[:500])

 def experiment_dashboard(self):
 print(f"\n=== Experiment Dashboard ===")
 experiments = [
 {"name": "lora-r8-llama3", "loss": f"{random.uniform(0.5, 1.2):.3f}", "acc": f"{random.uniform(0.80, 0.95):.2f}", "gpu_hrs": f"{random.uniform(1, 8):.1f}h"},
 {"name": "lora-r16-llama3", "loss": f"{random.uniform(0.4, 1.0):.3f}", "acc": f"{random.uniform(0.85, 0.96):.2f}", "gpu_hrs": f"{random.uniform(2, 12):.1f}h"},
 {"name": "lora-r32-llama3", "loss": f"{random.uniform(0.3, 0.9):.3f}", "acc": f"{random.uniform(0.87, 0.97):.2f}", "gpu_hrs": f"{random.uniform(4, 16):.1f}h"},
 {"name": "qlora-r16-mistral", "loss": f"{random.uniform(0.4, 1.1):.3f}", "acc": f"{random.uniform(0.82, 0.94):.2f}", "gpu_hrs": f"{random.uniform(1, 6):.1f}h"},
 ]
 print(f" {'Name':<25} {'Loss':>8} {'Acc':>6} {'GPU Hours':>10}")
 for e in experiments:
 print(f" {e['name']:<25} {e['loss']:>8} {e['acc']:>6} {e['gpu_hrs']:>10}")

exp = ExperimentTracking()
exp.show_tools()
exp.show_wandb()
exp.experiment_dashboard()

Dataset Preparation

# dataset.py — Dataset preparation for fine-tuning
import json

class DatasetPrep:
 FORMATS = {
 "instruction": {
 "name": "Instruction Format (Alpaca-style)",
 "example": '{"instruction": "แปลภาษาอังกฤษเป็นไทย", "input": "Hello world", "output": "สวัสดีชาวโลก"}',
 },
 "chat": {
 "name": "Chat Format (ChatML)",
 "example": '{"messages": [{"role": "system", "content": "คุณเป็น AI ผู้ช่วย"}, {"role": "user", "content": "Python คืออะไร"}, {"role": "assistant", "content": "Python เป็นภาษาโปรแกรม..."}]}',
 },
 "completion": {
 "name": "Completion Format",
 "example": '{"text": "[INST] คำถาม [/INST] คำตอบ"}',
 },
 }

 PREP_SCRIPT = """
# prepare_dataset.py — Dataset preparation
import json
from datasets import Dataset

def prepare_chat_dataset(input_file, output_file):
 data = []
 with open(input_file) as f:
 for line in f:
 item = json.loads(line)
 
 # Format as chat template
 messages = item.get("messages", [])
 if not messages:
 messages = [
 {"role": "system", "content": "คุณเป็น AI ผู้ช่วยภาษาไทย ตอบคำถามอย่างถูกต้องและเป็นประโยชน์"},
 {"role": "user", "content": item.get("instruction", "") + " " + item.get("input", "")},
 {"role": "assistant", "content": item.get("output", "")},
 ]
 
 # Apply chat template
 text = ""
 for msg in messages:
 if msg["role"] == "system":
 text += f"<|system|>\\n{msg['content']}\\n"
 elif msg["role"] == "user":
 text += f"

📖 บทความที่เกี่ยวข้อง

LLM Fine-tuning LoRA API Integration เชื่อมต่อระบบอ่านบทความ → LLM Fine-tuning LoRA Real-time Processingอ่านบทความ → LLM Fine-tuning LoRA Domain Driven Design DDDอ่านบทความ → LLM Fine-tuning LoRA GitOps Workflowอ่านบทความ → LLM Fine-tuning LoRA Metric Collectionอ่านบทความ →

📚 ดูบทความทั้งหมด →