LLM Fine-tuning LoRA Troubleshooting แก้ปัญหา —

LLM Fine-tuning ด้วย LoRA

LoRA เป็นเทคนิค Fine-tuning LLM ที่ใช้ Memory น้อย เพิ่ม Low-rank Matrices ใน Attention Layers ไม่ Update Weight ทั้งหมด Train เร็ว ผลลัพธ์ดี

เนื้อหาเกี่ยวข้อง — อ่านต่อ: money hero day trade

Troubleshooting ปัญหาที่พบบ่อย Out of Memory, Loss ไม่ลด, Overfitting, Inference ช้า พร้อมวิธีแก้ไขจริง

เนื้อหาเกี่ยวข้อง — ทำความเข้าใจ Python SQLAlchemy Audit Trail Logging

ปัญหา	สาเหตุ	วิธีแก้
OOM Error	VRAM ไม่พอ	QLoRA 4-bit, ลด batch_size
Loss ไม่ลด	LR ไม่เหมาะ	ปรับ LR, ตรวจ Data Format
Overfitting	Data น้อย	เพิ่ม Data, ลด Epochs
Inference ช้า	Model ใหญ่	Merge LoRA, Quantize

LoRA Fine-tuning Setup

# === LoRA Fine-tuning with Hugging Face ===
# pip install transformers peft trl datasets bitsandbytes accelerate

from dataclasses import dataclass
from typing import Optional

@dataclass
class LoRAConfig:
    """LoRA Configuration"""
    model_name: str = "meta-llama/Llama-2-7b-hf"
    r: int = 16                    # LoRA rank
    lora_alpha: int = 32           # LoRA alpha (scaling)
    lora_dropout: float = 0.05     # Dropout
    target_modules: list = None    # Target modules
    bias: str = "none"             # Bias training
    task_type: str = "CAUSAL_LM"   # Task type

    # Training
    num_epochs: int = 3
    batch_size: int = 4
    gradient_accumulation: int = 4
    learning_rate: float = 2e-4
    max_seq_length: int = 1024
    warmup_ratio: float = 0.03
    weight_decay: float = 0.001

    # Optimization
    use_4bit: bool = True          # QLoRA
    use_flash_attn: bool = True    # Flash Attention
    gradient_checkpointing: bool = True

    def __post_init__(self):
        if self.target_modules is None:
            self.target_modules = [
                "q_proj", "k_proj", "v_proj", "o_proj",
                "gate_proj", "up_proj", "down_proj",
            ]

# Fine-tuning Script
# from transformers import (
#     AutoModelForCausalLM, AutoTokenizer,
#     BitsAndBytesConfig, TrainingArguments,
# )
# from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
# from trl import SFTTrainer
# from datasets import load_dataset
#
# # 1. Quantization Config (QLoRA)
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16,
#     bnb_4bit_use_double_quant=True,
# )
#
# # 2. Load Model
# model = AutoModelForCausalLM.from_pretrained(
#     config.model_name,
#     quantization_config=bnb_config,
#     device_map="auto",
#     attn_implementation="flash_attention_2",
#     torch_dtype=torch.bfloat16,
# )
# model = prepare_model_for_kbit_training(model)
#
# # 3. LoRA Config
# peft_config = LoraConfig(
#     r=config.r,
#     lora_alpha=config.lora_alpha,
#     lora_dropout=config.lora_dropout,
#     target_modules=config.target_modules,
#     bias=config.bias,
#     task_type=config.task_type,
# )
#
# # 4. Training Arguments
# training_args = TrainingArguments(
#     output_dir="./output",
#     num_train_epochs=config.num_epochs,
#     per_device_train_batch_size=config.batch_size,
#     gradient_accumulation_steps=config.gradient_accumulation,
#     learning_rate=config.learning_rate,
#     warmup_ratio=config.warmup_ratio,
#     weight_decay=config.weight_decay,
#     fp16=False, bf16=True,
#     gradient_checkpointing=True,
#     logging_steps=10,
#     save_strategy="epoch",
#     optim="paged_adamw_8bit",
#     lr_scheduler_type="cosine",
# )
#
# # 5. Train
# trainer = SFTTrainer(
#     model=model,
#     args=training_args,
#     train_dataset=dataset,
#     peft_config=peft_config,
#     max_seq_length=config.max_seq_length,
#     tokenizer=tokenizer,
#     dataset_text_field="text",
# )
# trainer.train()

config = LoRAConfig()
print("LoRA Fine-tuning Config:")
print(f"  Model: {config.model_name}")
print(f"  LoRA r={config.r}, alpha={config.lora_alpha}")
print(f"  QLoRA 4-bit: {config.use_4bit}")
print(f"  Batch: {config.batch_size} x {config.gradient_accumulation} accum")
print(f"  LR: {config.learning_rate}")
print(f"  Epochs: {config.num_epochs}")
print(f"  Max Seq: {config.max_seq_length}")
print(f"  Targets: {config.target_modules}")

Troubleshooting Guide

# troubleshooting.py — LLM Fine-tuning Troubleshooting
from dataclasses import dataclass, field
from typing import List, Dict

@dataclass
class Problem:
    name: str
    symptoms: List[str]
    causes: List[str]
    solutions: List[str]
    code_fix: str = ""

class LoRATroubleshooter:
    """LoRA Fine-tuning Troubleshooter"""

    def __init__(self):
        self.problems: List[Problem] = []
        self._init_problems()

    def _init_problems(self):
        self.problems = [
            Problem(
                "Out of Memory (OOM)",
                [
                    "CUDA out of memory",
                    "RuntimeError: CUDA error",
                    "Killed (OOM Killer)",
                ],
                [
                    "Batch size ใหญ่เกินไป",
                    "Sequence length ยาวเกินไป",
                    "Model ใหญ่เกินสำหรับ GPU",
                    "ไม่ได้ใช้ Quantization",
                ],
                [
                    "ลด per_device_train_batch_size เป็น 1",
                    "ใช้ gradient_accumulation_steps=8",
                    "ใช้ QLoRA 4-bit (load_in_4bit=True)",
                    "ลด max_seq_length เป็น 512",
                    "ลด LoRA r เป็น 8",
                    "ใช้ gradient_checkpointing=True",
                    "ใช้ Flash Attention 2",
                    "ใช้ optim='paged_adamw_8bit'",
                ],
                "bnb_config = BitsAndBytesConfig(load_in_4bit=True, "
                "bnb_4bit_quant_type='nf4')",
            ),
            Problem(
                "Loss ไม่ลด (Loss Not Decreasing)",
                [
                    "Training loss คงที่",
                    "Loss กระโดดไปมา",
                    "Loss เพิ่มขึ้นแทนที่จะลด",
                ],
                [
                    "Learning rate สูงหรือต่ำเกินไป",
                    "Data format ไม่ถูกต้อง",
                    "Target modules ไม่ครบ",
                    "LoRA rank ต่ำเกินไป",
                    "Dataset มีปัญหา",
                ],
                [
                    "ใช้ LR=2e-4 สำหรับ LoRA (Default ที่ดี)",
                    "ตรวจ Chat Template ว่าถูกต้อง",
                    "เพิ่ม target_modules ให้ครบ",
                    "เพิ่ม LoRA r เป็น 32 หรือ 64",
                    "ใช้ lr_scheduler_type='cosine'",
                    "ตรวจ Dataset คุณภาพ ลบ Noise",
                    "ลอง warmup_ratio=0.03",
                ],
                "# ตรวจ data format\n"
                "# tokenizer.apply_chat_template(messages)",
            ),
            Problem(
                "Overfitting",
                [
                    "Train loss ลดแต่ Val loss เพิ่ม",
                    "Model ท่องจำ Training Data",
                    "ผลลัพธ์ดีกับ Train แย่กับ Data ใหม่",
                ],
                [
                    "Dataset น้อยเกินไป",
                    "Train นานเกินไป (Epochs มาก)",
                    "LoRA rank สูงเกินไป",
                    "ไม่มี Regularization",
                ],
                [
                    "เพิ่ม Training Data",
                    "ลด num_train_epochs เป็น 1-3",
                    "ลด LoRA r เป็น 8-16",
                    "เพิ่ม lora_dropout=0.1",
                    "เพิ่ม weight_decay=0.01",
                    "ใช้ Early Stopping",
                    "เพิ่ม Data Augmentation",
                ],
                "peft_config = LoraConfig(r=8, lora_dropout=0.1)",
            ),
            Problem(
                "Inference ช้า / Model ใหญ่",
                [
                    "Inference ช้ากว่า Base Model",
                    "Model ขนาดใหญ่",
                    "Latency สูง",
                ],
                [
                    "LoRA Adapter ยังไม่ Merge",
                    "ไม่ได้ Quantize สำหรับ Inference",
                    "ไม่ได้ใช้ KV Cache",
                ],
                [
                    "Merge LoRA เข้า Base Model",
                    "Quantize เป็น GGUF/AWQ/GPTQ",
                    "ใช้ vLLM หรือ TGI สำหรับ Serving",
                    "เปิด KV Cache",
                    "ใช้ Flash Attention",
                    "Batch Requests",
                ],
                "# Merge LoRA\n"
                "merged = model.merge_and_unload()\n"
                "merged.save_pretrained('merged_model')",
            ),
        ]

    def diagnose(self, symptom_keyword):
        """วินิจฉัยปัญหาจาก Keyword"""
        results = []
        for problem in self.problems:
            for symptom in problem.symptoms:
                if symptom_keyword.lower() in symptom.lower():
                    results.append(problem)
                    break
        return results

    def full_guide(self):
        """แสดง Troubleshooting Guide ทั้งหมด"""
        print(f"\n{'='*60}")
        print(f"LLM Fine-tuning LoRA Troubleshooting Guide")
        print(f"{'='*60}")

        for problem in self.problems:
            print(f"\n  [{problem.name}]")
            print(f"  Symptoms:")
            for s in problem.symptoms:
                print(f"    - {s}")
            print(f"  Solutions:")
            for s in problem.solutions[:4]:
                print(f"    ✓ {s}")
            if problem.code_fix:
                print(f"  Code Fix:")
                for line in problem.code_fix.split('\n'):
                    print(f"    {line}")

troubleshooter = LoRATroubleshooter()
troubleshooter.full_guide()

Data Preparation

# data_preparation.py — Dataset Preparation สำหรับ LoRA
import json
from typing import List, Dict

class DatasetPreparator:
    """เตรียม Dataset สำหรับ LoRA Fine-tuning"""

    # Chat Template Formats
    TEMPLATES = {
        "chatml": "<|im_start|>{role}\n{content}<|im_end|>",
        "llama2": "[INST] {instruction} [/INST] {response}",
        "alpaca": "### Instruction:\n{instruction}\n\n### Response:\n{response}",
    }

    def __init__(self, template="chatml"):
        self.template = template
        self.data: List[Dict] = []

    def add_example(self, instruction: str, response: str,
                    system: str = "You are a helpful assistant."):
        """เพิ่มตัวอย่าง"""
        if self.template == "chatml":
            text = (
                f"<|im_start|>system\n{system}<|im_end|>\n"
                f"<|im_start|>user\n{instruction}<|im_end|>\n"
                f"<|im_start|>assistant\n{response}<|im_end|>"
            )
        elif self.template == "llama2":
            text = f"[INST] <>\n{system}\n<>\n\n{instruction} [/INST] {response}"
        else:  # alpaca
            text = f"### Instruction:\n{instruction}\n\n### Response:\n{response}"

        self.data.append({"text": text})

    def validate(self):
        """ตรวจสอบ Dataset"""
        issues = []

        if len(self.data) < 100:
            issues.append(f"WARNING: Dataset เล็กมาก ({len(self.data)} examples)")

        # Check for empty entries
        empty = sum(1 for d in self.data if len(d["text"].strip()) < 50)
        if empty > 0:
            issues.append(f"WARNING: {empty} examples สั้นเกินไป")

        # Check for duplicates
        texts = [d["text"] for d in self.data]
        dupes = len(texts) - len(set(texts))
        if dupes > 0:
            issues.append(f"WARNING: {dupes} duplicate examples")

        print(f"\n  Dataset Validation:")
        print(f"    Total examples: {len(self.data)}")
        print(f"    Template: {self.template}")
        print(f"    Avg length: {sum(len(d['text']) for d in self.data) / len(self.data):.0f} chars")

        if issues:
            for issue in issues:
                print(f"    {issue}")
        else:
            print(f"    Status: OK")

    def save(self, filepath):
        """บันทึก Dataset"""
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(self.data, f, ensure_ascii=False, indent=2)
        print(f"  Saved: {filepath} ({len(self.data)} examples)")

    def recommended_config(self):
        """แนะนำ Config ตาม Dataset"""
        n = len(self.data)
        config = {
            "num_epochs": 3 if n > 1000 else 5 if n > 500 else 10,
            "batch_size": 4 if n > 1000 else 2,
            "lora_r": 16 if n > 1000 else 8,
            "learning_rate": "2e-4",
            "max_seq_length": 1024,
        }
        print(f"\n  Recommended Config for {n} examples:")
        for k, v in config.items():
            print(f"    {k}: {v}")

# ตัวอย่าง
prep = DatasetPreparator("chatml")

examples = [
    ("Python list comprehension คืออะไร",
     "List comprehension เป็นวิธีสร้าง list แบบสั้น เช่น [x**2 for x in range(10)]"),
    ("Docker กับ VM ต่างกันอย่างไร",
     "Docker ใช้ Container แชร์ OS Kernel เบากว่า VM ที่ต้อง Full OS แยก"),
    ("REST API คืออะไร",
     "REST API เป็น Architecture Style สำหรับ Web Services ใช้ HTTP Methods GET POST PUT DELETE"),
]

for inst, resp in examples:
    prep.add_example(inst, resp)

prep.validate()
prep.recommended_config()

เคล็ดลับ

QLoRA: ใช้ QLoRA 4-bit สำหรับ GPU VRAM จำกัด ผลลัพธ์ใกล้เคียง LoRA
LoRA Rank: เริ่มจาก r=16 ถ้า Loss ไม่ลดเพิ่มเป็น 32 หรือ 64
Learning Rate: ใช้ 2e-4 เป็นค่าเริ่มต้นที่ดีสำหรับ LoRA
Data Quality: คุณภาพ Data สำคัญกว่าปริมาณ ทำความสะอาดก่อน Train
Chat Template: ใช้ Chat Template ที่ตรงกับ Base Model
Merge LoRA: Merge LoRA เข้า Base Model ก่อน Deploy ลด Latency

LoRA คืออะไร

Low-Rank Adaptation Fine-tuning LLM ใช้ Memory น้อย เพิ่ม Low-rank Matrices ใน Attention Layers ไม่ Update Weight ทั้งหมด GPU VRAM น้อย Train เร็ว ผลลัพธ์ใกล้เคียง Full Fine-tuning

แนะนำเพิ่มเติม — เรียนเทรดกับ iCafeForex

เนื้อหาเกี่ยวข้อง — ดูเพิ่มเติมเรื่อง LangChain Agent Shift Left Security