SiamCafe.net Blog
Technology

LLM Fine-tuning LoRA Remote Work Setup

llm fine tuning lora remote work setup
LLM Fine-tuning LoRA Remote Work Setup | SiamCafe Blog
2025-11-10· อ. บอม — SiamCafe.net· 1,460 คำ

LLM Fine-tuning LoRA Remote Work Setup คืออะไร

LLM Fine-tuning คือการปรับแต่ง Large Language Model ที่ pre-trained แล้วให้เชี่ยวชาญในงานเฉพาะทาง LoRA (Low-Rank Adaptation) เป็นเทคนิค parameter-efficient fine-tuning ที่ลดจำนวน parameters ที่ต้อง train ลงอย่างมาก ทำให้ fine-tune โมเดลขนาดใหญ่ได้บน GPU ที่มี VRAM จำกัด Remote Work Setup คือการจัดสภาพแวดล้อมให้ทีม ML engineers ทำงาน fine-tuning จากระยะไกลได้อย่างมีประสิทธิภาพ ด้วย cloud GPUs, experiment tracking, model registry และ collaboration tools

LoRA Fundamentals

# lora_basics.py — LoRA fundamentals
import json

class LoRABasics:
    CONCEPTS = {
        "full_finetuning": {
            "name": "Full Fine-tuning",
            "description": "Train ทุก parameters ของโมเดล",
            "params": "7B-70B+ parameters",
            "gpu_req": "4-8x A100 80GB (สำหรับ 7B model)",
            "cost": "สูงมาก",
        },
        "lora": {
            "name": "LoRA (Low-Rank Adaptation)",
            "description": "เพิ่ม low-rank matrices เข้า attention layers แล้ว train เฉพาะส่วนที่เพิ่ม",
            "params": "0.1-1% ของ total parameters",
            "gpu_req": "1x RTX 4090 24GB (สำหรับ 7B model)",
            "cost": "ต่ำมาก",
        },
        "qlora": {
            "name": "QLoRA (Quantized LoRA)",
            "description": "รวม 4-bit quantization + LoRA — ลด VRAM ลงอีก 50%+",
            "params": "0.1-1% + 4-bit base model",
            "gpu_req": "1x RTX 3090 24GB (สำหรับ 7B model)",
            "cost": "ต่ำที่สุด",
        },
    }

    LORA_MATH = """
    LoRA Math:
    
    Original weight matrix W: d × k (e.g., 4096 × 4096 = 16M params)
    
    LoRA decomposition:
    W' = W + BA
    where B: d × r, A: r × k (r = rank, typically 8-64)
    
    Example (r=16):
    B: 4096 × 16 = 65,536 params
    A: 16 × 4096 = 65,536 params
    Total LoRA: 131,072 params (0.8% of original 16M)
    
    → Train เฉพาะ B และ A (freeze W)
    → ลด trainable params 99%+
    """

    def show_concepts(self):
        print("=== Fine-tuning Methods ===\n")
        for key, concept in self.CONCEPTS.items():
            print(f"[{concept['name']}]")
            print(f"  {concept['description']}")
            print(f"  GPU: {concept['gpu_req']} | Cost: {concept['cost']}")
            print()

    def show_math(self):
        print("=== LoRA Math ===")
        print(self.LORA_MATH)

lora = LoRABasics()
lora.show_concepts()
lora.show_math()

Fine-tuning Pipeline

# finetune.py — LoRA fine-tuning pipeline
import json

class FineTunePipeline:
    TRAINING_CODE = """
# train_lora.py — LoRA fine-tuning with Hugging Face
import torch
from transformers import (
    AutoModelForCausalLM, AutoTokenizer,
    TrainingArguments, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset

# 1. Load base model with 4-bit quantization (QLoRA)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model_name = "meta-llama/Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# 2. Configure LoRA
lora_config = LoraConfig(
    r=16,                    # LoRA rank
    lora_alpha=32,           # Scaling factor
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# Output: trainable params: 83,886,080 || all params: 8,030,261,248 || 1.04%

# 3. Load dataset
dataset = load_dataset("json", data_files="train_data.jsonl", split="train")

# 4. Training arguments
training_args = TrainingArguments(
    output_dir="./output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_steps=100,
    logging_steps=10,
    save_steps=200,
    bf16=True,
    optim="paged_adamw_8bit",
    report_to="wandb",
)

# 5. Train
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    max_seq_length=2048,
    dataset_text_field="text",
)
trainer.train()

# 6. Save LoRA adapter
model.save_pretrained("./lora-adapter")
tokenizer.save_pretrained("./lora-adapter")
"""

    def show_code(self):
        print("=== LoRA Training Code ===")
        print(self.TRAINING_CODE[:700])

    def hyperparameters(self):
        print(f"\n=== Recommended Hyperparameters ===")
        params = {
            "LoRA rank (r)": "8-64 (16 เป็น default ดี)",
            "LoRA alpha": "2× rank (r=16 → alpha=32)",
            "Learning rate": "1e-4 to 3e-4",
            "Batch size": "4-8 (ใช้ gradient accumulation ถ้า VRAM ไม่พอ)",
            "Epochs": "1-5 (monitor overfitting)",
            "Max seq length": "512-4096 (ตาม task)",
            "Warmup": "5-10% of total steps",
        }
        for p, v in params.items():
            print(f"  {p}: {v}")

ft = FineTunePipeline()
ft.show_code()
ft.hyperparameters()

Remote GPU Setup

# remote_gpu.py — Remote GPU setup for fine-tuning
import json

class RemoteGPUSetup:
    PROVIDERS = {
        "runpod": {
            "name": "RunPod",
            "gpus": ["A100 80GB", "H100", "RTX 4090"],
            "price": "$0.44-2.49/hr (on-demand)",
            "features": "Serverless, templates, persistent storage",
        },
        "lambda": {
            "name": "Lambda Cloud",
            "gpus": ["A100 80GB", "H100"],
            "price": "$1.10-2.49/hr",
            "features": "Simple UI, SSH access, persistent storage",
        },
        "vast_ai": {
            "name": "Vast.ai",
            "gpus": ["A100", "RTX 4090", "RTX 3090"],
            "price": "$0.20-1.50/hr (marketplace)",
            "features": "Cheapest, marketplace model, variable availability",
        },
        "colab_pro": {
            "name": "Google Colab Pro+",
            "gpus": ["A100 40GB", "T4", "V100"],
            "price": "$49.99/month",
            "features": "Notebooks, easy sharing, Google Drive integration",
        },
        "aws_sagemaker": {
            "name": "AWS SageMaker",
            "gpus": ["A100", "A10G", "T4"],
            "price": "$1.20-32.77/hr",
            "features": "Enterprise, managed training, MLOps integration",
        },
    }

    SETUP_SCRIPT = """
# setup_remote.sh — Remote GPU environment setup
#!/bin/bash

# Update and install basics
apt-get update && apt-get install -y git curl htop nvtop tmux

# Install Python packages
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install transformers datasets accelerate peft trl bitsandbytes
pip install wandb tensorboard evaluate scikit-learn
pip install flash-attn --no-build-isolation

# Login to services
huggingface-cli login --token $HF_TOKEN
wandb login $WANDB_API_KEY

# Clone training repo
git clone https://github.com/company/llm-finetune.git
cd llm-finetune

# Download base model (cache)
python -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('meta-llama/Llama-3.1-8B-Instruct')"

echo "Setup complete!"
"""

    def show_providers(self):
        print("=== GPU Cloud Providers ===\n")
        for key, provider in self.PROVIDERS.items():
            print(f"[{provider['name']}]")
            print(f"  GPUs: {', '.join(provider['gpus'][:2])}")
            print(f"  Price: {provider['price']}")
            print()

    def show_setup(self):
        print("=== Setup Script ===")
        print(self.SETUP_SCRIPT[:400])

gpu = RemoteGPUSetup()
gpu.show_providers()
gpu.show_setup()

Experiment Tracking & Collaboration

# experiment.py — Experiment tracking for remote teams
import json
import random

class ExperimentTracking:
    TOOLS = {
        "wandb": {
            "name": "Weights & Biases (W&B)",
            "features": ["Experiment tracking", "Model registry", "Team dashboards", "Artifact versioning"],
            "price": "Free (personal), $50/user/month (team)",
        },
        "mlflow": {
            "name": "MLflow",
            "features": ["Experiment tracking", "Model registry", "Self-hosted", "Open source"],
            "price": "Free (self-hosted), Databricks managed",
        },
        "huggingface_hub": {
            "name": "Hugging Face Hub",
            "features": ["Model hosting", "Dataset hosting", "Spaces (demos)", "Collaboration"],
            "price": "Free (public), $9/month (private)",
        },
    }

    WANDB_INTEGRATION = """
# wandb_tracking.py — W&B experiment tracking
import wandb

# Initialize
wandb.init(
    project="llm-finetune",
    name="lora-r16-llama3-thai",
    config={
        "model": "meta-llama/Llama-3.1-8B-Instruct",
        "lora_r": 16,
        "lora_alpha": 32,
        "learning_rate": 2e-4,
        "epochs": 3,
        "batch_size": 4,
        "dataset": "thai-instruction-50k",
        "gpu": "A100-80GB",
    },
    tags=["lora", "thai", "llama3"],
)

# Log metrics during training (automatic with TrainingArguments report_to="wandb")
# Manual logging:
wandb.log({"eval/loss": 0.85, "eval/accuracy": 0.92, "step": 1000})

# Log model artifact
artifact = wandb.Artifact("lora-adapter", type="model")
artifact.add_dir("./lora-adapter")
wandb.log_artifact(artifact)

wandb.finish()
"""

    def show_tools(self):
        print("=== Experiment Tracking Tools ===\n")
        for key, tool in self.TOOLS.items():
            print(f"[{tool['name']}]")
            print(f"  Features: {', '.join(tool['features'][:3])}")
            print(f"  Price: {tool['price']}")
            print()

    def show_wandb(self):
        print("=== W&B Integration ===")
        print(self.WANDB_INTEGRATION[:500])

    def experiment_dashboard(self):
        print(f"\n=== Experiment Dashboard ===")
        experiments = [
            {"name": "lora-r8-llama3", "loss": f"{random.uniform(0.5, 1.2):.3f}", "acc": f"{random.uniform(0.80, 0.95):.2f}", "gpu_hrs": f"{random.uniform(1, 8):.1f}h"},
            {"name": "lora-r16-llama3", "loss": f"{random.uniform(0.4, 1.0):.3f}", "acc": f"{random.uniform(0.85, 0.96):.2f}", "gpu_hrs": f"{random.uniform(2, 12):.1f}h"},
            {"name": "lora-r32-llama3", "loss": f"{random.uniform(0.3, 0.9):.3f}", "acc": f"{random.uniform(0.87, 0.97):.2f}", "gpu_hrs": f"{random.uniform(4, 16):.1f}h"},
            {"name": "qlora-r16-mistral", "loss": f"{random.uniform(0.4, 1.1):.3f}", "acc": f"{random.uniform(0.82, 0.94):.2f}", "gpu_hrs": f"{random.uniform(1, 6):.1f}h"},
        ]
        print(f"  {'Name':<25} {'Loss':>8} {'Acc':>6} {'GPU Hours':>10}")
        for e in experiments:
            print(f"  {e['name']:<25} {e['loss']:>8} {e['acc']:>6} {e['gpu_hrs']:>10}")

exp = ExperimentTracking()
exp.show_tools()
exp.show_wandb()
exp.experiment_dashboard()

Dataset Preparation

# dataset.py — Dataset preparation for fine-tuning
import json

class DatasetPrep:
    FORMATS = {
        "instruction": {
            "name": "Instruction Format (Alpaca-style)",
            "example": '{"instruction": "แปลภาษาอังกฤษเป็นไทย", "input": "Hello world", "output": "สวัสดีชาวโลก"}',
        },
        "chat": {
            "name": "Chat Format (ChatML)",
            "example": '{"messages": [{"role": "system", "content": "คุณเป็น AI ผู้ช่วย"}, {"role": "user", "content": "Python คืออะไร"}, {"role": "assistant", "content": "Python เป็นภาษาโปรแกรม..."}]}',
        },
        "completion": {
            "name": "Completion Format",
            "example": '{"text": "[INST] คำถาม [/INST] คำตอบ"}',
        },
    }

    PREP_SCRIPT = """
# prepare_dataset.py — Dataset preparation
import json
from datasets import Dataset

def prepare_chat_dataset(input_file, output_file):
    data = []
    with open(input_file) as f:
        for line in f:
            item = json.loads(line)
            
            # Format as chat template
            messages = item.get("messages", [])
            if not messages:
                messages = [
                    {"role": "system", "content": "คุณเป็น AI ผู้ช่วยภาษาไทย ตอบคำถามอย่างถูกต้องและเป็นประโยชน์"},
                    {"role": "user", "content": item.get("instruction", "") + " " + item.get("input", "")},
                    {"role": "assistant", "content": item.get("output", "")},
                ]
            
            # Apply chat template
            text = ""
            for msg in messages:
                if msg["role"] == "system":
                    text += f"<|system|>\\n{msg['content']}\\n"
                elif msg["role"] == "user":
                    text += f"

📖 บทความที่เกี่ยวข้อง

LLM Fine-tuning LoRA API Integration เชื่อมต่อระบบอ่านบทความ → LLM Fine-tuning LoRA Real-time Processingอ่านบทความ → LLM Fine-tuning LoRA Domain Driven Design DDDอ่านบทความ → LLM Fine-tuning LoRA GitOps Workflowอ่านบทความ → LLM Fine-tuning LoRA Metric Collectionอ่านบทความ →

📚 ดูบทความทั้งหมด →