微调工具链与自动化
High Contrast
Dark Mode
Light Mode
Sepia
Forest
1 min read243 words

微调工具链与自动化

手动微调一次容易,持续迭代需要自动化流水线。本章构建从数据到部署的端到端管道。

微调 MLOps 全景

graph TB A[数据管理] --> B[训练管道] B --> C[评估管道] C --> D[模型仓库] D --> E[部署管道] E --> F[监控反馈] F --> A style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px style F fill:#e8f5e9,stroke:#388e3c,stroke-width:2px

工具链对比

工具 定位 支持方法 易用性 生产就绪
Axolotl 全功能微调 SFT/DPO/RLHF ★★★★☆ ★★★★★
Unsloth 高速微调 SFT/DPO ★★★★★ ★★★★☆
LLaMA-Factory 中文友好 SFT/DPO/PPO ★★★★★ ★★★★☆
TRL HuggingFace 官方 SFT/DPO/PPO ★★★☆☆ ★★★★☆
OpenRLHF RLHF 专精 PPO/DPO ★★★☆☆ ★★★☆☆
MLX Apple Silicon SFT/LoRA ★★★★☆ ★★★☆☆

自动化训练配置

"""
微调自动化管道
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
import time
class PipelineStage(Enum):
DATA_VALIDATION = "data_validation"
PREPROCESSING = "preprocessing"
TRAINING = "training"
EVALUATION = "evaluation"
MODEL_REGISTRY = "model_registry"
DEPLOYMENT = "deployment"
@dataclass
class PipelineConfig:
"""训练管道配置"""
name: str
base_model: str = "meta-llama/Llama-3.1-8B-Instruct"
method: str = "lora"        # lora / qlora / full
# 数据
train_data: str = ""
eval_data: str = ""
# 训练
lora_r: int = 16
learning_rate: float = 2e-4
num_epochs: int = 3
batch_size: int = 4
# 评估门槛
min_accuracy: float = 0.85
max_harmful_rate: float = 0.01
max_capability_drop: float = 0.02  # 通用能力下降 < 2%
# 自动部署
auto_deploy: bool = False
@dataclass
class StageResult:
"""阶段结果"""
stage: PipelineStage
status: str = "pending"     # pending / running / passed / failed
duration_s: float = 0.0
metrics: dict = field(default_factory=dict)
error: str = ""
class FineTunePipeline:
"""微调自动化管道"""
def __init__(self, config: PipelineConfig):
self.config = config
self._results: dict[PipelineStage, StageResult] = {}
def run(self) -> dict:
"""执行完整管道"""
stages = [
(PipelineStage.DATA_VALIDATION, self._validate_data),
(PipelineStage.PREPROCESSING, self._preprocess),
(PipelineStage.TRAINING, self._train),
(PipelineStage.EVALUATION, self._evaluate),
(PipelineStage.MODEL_REGISTRY, self._register),
]
if self.config.auto_deploy:
stages.append((PipelineStage.DEPLOYMENT, self._deploy))
for stage, handler in stages:
result = self._run_stage(stage, handler)
if result.status == "failed":
break
return self.get_summary()
def _run_stage(self, stage: PipelineStage, handler) -> StageResult:
"""运行单个阶段"""
result = StageResult(stage=stage, status="running")
start = time.time()
try:
metrics = handler()
result.metrics = metrics
result.status = "passed"
except Exception as e:
result.status = "failed"
result.error = str(e)
result.duration_s = time.time() - start
self._results[stage] = result
return result
def _validate_data(self) -> dict:
"""数据验证"""
return {"samples": 1000, "valid_ratio": 0.95, "duplicates": 12}
def _preprocess(self) -> dict:
"""数据预处理"""
return {"train_samples": 950, "eval_samples": 100, "max_length": 2048}
def _train(self) -> dict:
"""训练"""
return {
"final_loss": 0.82,
"training_time_min": 45,
"gpu_used": "A100-80G",
}
def _evaluate(self) -> dict:
"""评估(含门控检查)"""
metrics = {
"accuracy": 0.91,
"harmful_rate": 0.003,
"capability_drop": 0.01,
"win_rate": 0.62,
}
# 门控检查
if metrics["accuracy"] < self.config.min_accuracy:
raise ValueError(f"准确率 {metrics['accuracy']} < 门槛 {self.config.min_accuracy}")
if metrics["harmful_rate"] > self.config.max_harmful_rate:
raise ValueError(f"有害率 {metrics['harmful_rate']} > 门槛 {self.config.max_harmful_rate}")
return metrics
def _register(self) -> dict:
"""注册模型"""
return {"model_id": f"{self.config.name}-v1", "registry": "model-hub"}
def _deploy(self) -> dict:
"""部署"""
return {"endpoint": f"https://api.example.com/{self.config.name}", "status": "live"}
def get_summary(self) -> dict:
"""获取管道摘要"""
return {
"pipeline": self.config.name,
"stages": {
stage.value: {
"status": result.status,
"duration_s": round(result.duration_s, 1),
"metrics": result.metrics,
}
for stage, result in self._results.items()
},
"overall": (
"passed" if all(r.status == "passed" for r in self._results.values())
else "failed"
),
}

CI/CD 集成

graph LR A[Git Push
数据/配置变更] --> B[CI 触发
数据验证] B --> C[训练
GPU Runner] C --> D[评估
自动门控] D --> E{通过?} E -->|是| F[注册模型
Model Registry] E -->|否| G[通知 + 回滚] F --> H[Canary 部署
5% 流量] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px style H fill:#e8f5e9,stroke:#388e3c,stroke-width:2px

本章小结

要点 说明
推荐工具 Axolotl(全功能)、Unsloth(快速)、LLaMA-Factory(中文)
自动化管道 数据→训练→评估→注册→部署,全程门控
评估门控 准确率、有害率、通用能力退化,任一不达标即阻断
CI/CD Git push 触发,Canary 部署,支持自动回滚

延伸阅读:LLM 评估与测试指南 · DevOps 实战指南