1 min read243 words

微调工具链与自动化

手动微调一次容易，持续迭代需要自动化流水线。本章构建从数据到部署的端到端管道。

微调 MLOps 全景

graph TB A[数据管理] --> B[训练管道] B --> C[评估管道] C --> D[模型仓库] D --> E[部署管道] E --> F[监控反馈] F --> A style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px style F fill:#e8f5e9,stroke:#388e3c,stroke-width:2px

工具链对比

工具	定位	支持方法	易用性	生产就绪
Axolotl	全功能微调	SFT/DPO/RLHF	★★★★☆	★★★★★
Unsloth	高速微调	SFT/DPO	★★★★★	★★★★☆
LLaMA-Factory	中文友好	SFT/DPO/PPO	★★★★★	★★★★☆
TRL	HuggingFace 官方	SFT/DPO/PPO	★★★☆☆	★★★★☆
OpenRLHF	RLHF 专精	PPO/DPO	★★★☆☆	★★★☆☆
MLX	Apple Silicon	SFT/LoRA	★★★★☆	★★★☆☆

自动化训练配置

"""
微调自动化管道
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
import time
class PipelineStage(Enum):
DATA_VALIDATION = "data_validation"
PREPROCESSING = "preprocessing"
TRAINING = "training"
EVALUATION = "evaluation"
MODEL_REGISTRY = "model_registry"
DEPLOYMENT = "deployment"
@dataclass
class PipelineConfig:
"""训练管道配置"""
name: str
base_model: str = "meta-llama/Llama-3.1-8B-Instruct"
method: str = "lora"        # lora / qlora / full
# 数据
train_data: str = ""
eval_data: str = ""
# 训练
lora_r: int = 16
learning_rate: float = 2e-4
num_epochs: int = 3
batch_size: int = 4
# 评估门槛
min_accuracy: float = 0.85
max_harmful_rate: float = 0.01
max_capability_drop: float = 0.02  # 通用能力下降 < 2%
# 自动部署
auto_deploy: bool = False
@dataclass
class StageResult:
"""阶段结果"""
stage: PipelineStage
status: str = "pending"     # pending / running / passed / failed
duration_s: float = 0.0
metrics: dict = field(default_factory=dict)
error: str = ""
class FineTunePipeline:
"""微调自动化管道"""
def __init__(self, config: PipelineConfig):
self.config = config
self._results: dict[PipelineStage, StageResult] = {}
def run(self) -> dict:
"""执行完整管道"""
stages = [
(PipelineStage.DATA_VALIDATION, self._validate_data),
(PipelineStage.PREPROCESSING, self._preprocess),
(PipelineStage.TRAINING, self._train),
(PipelineStage.EVALUATION, self._evaluate),
(PipelineStage.MODEL_REGISTRY, self._register),
]
if self.config.auto_deploy:
stages.append((PipelineStage.DEPLOYMENT, self._deploy))
for stage, handler in stages:
result = self._run_stage(stage, handler)
if result.status == "failed":
break
return self.get_summary()
def _run_stage(self, stage: PipelineStage, handler) -> StageResult:
"""运行单个阶段"""
result = StageResult(stage=stage, status="running")
start = time.time()
try:
metrics = handler()
result.metrics = metrics
result.status = "passed"
except Exception as e:
result.status = "failed"
result.error = str(e)
result.duration_s = time.time() - start
self._results[stage] = result
return result
def _validate_data(self) -> dict:
"""数据验证"""
return {"samples": 1000, "valid_ratio": 0.95, "duplicates": 12}
def _preprocess(self) -> dict:
"""数据预处理"""
return {"train_samples": 950, "eval_samples": 100, "max_length": 2048}
def _train(self) -> dict:
"""训练"""
return {
"final_loss": 0.82,
"training_time_min": 45,
"gpu_used": "A100-80G",
}
def _evaluate(self) -> dict:
"""评估（含门控检查）"""
metrics = {
"accuracy": 0.91,
"harmful_rate": 0.003,
"capability_drop": 0.01,
"win_rate": 0.62,
}
# 门控检查
if metrics["accuracy"] < self.config.min_accuracy:
raise ValueError(f"准确率 {metrics['accuracy']} < 门槛 {self.config.min_accuracy}")
if metrics["harmful_rate"] > self.config.max_harmful_rate:
raise ValueError(f"有害率 {metrics['harmful_rate']} > 门槛 {self.config.max_harmful_rate}")
return metrics
def _register(self) -> dict:
"""注册模型"""
return {"model_id": f"{self.config.name}-v1", "registry": "model-hub"}
def _deploy(self) -> dict:
"""部署"""
return {"endpoint": f"https://api.example.com/{self.config.name}", "status": "live"}
def get_summary(self) -> dict:
"""获取管道摘要"""
return {
"pipeline": self.config.name,
"stages": {
stage.value: {
"status": result.status,
"duration_s": round(result.duration_s, 1),
"metrics": result.metrics,
}
for stage, result in self._results.items()
},
"overall": (
"passed" if all(r.status == "passed" for r in self._results.values())
else "failed"
),
}

CI/CD 集成

graph LR A[Git Push
数据/配置变更] --> B[CI 触发
数据验证] B --> C[训练
GPU Runner] C --> D[评估
自动门控] D --> E{通过?} E -->|是| F[注册模型
Model Registry] E -->|否| G[通知 + 回滚] F --> H[Canary 部署
5% 流量] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px style H fill:#e8f5e9,stroke:#388e3c,stroke-width:2px

本章小结

要点	说明
推荐工具	Axolotl（全功能）、Unsloth（快速）、LLaMA-Factory（中文）
自动化管道	数据→训练→评估→注册→部署，全程门控
评估门控	准确率、有害率、通用能力退化，任一不达标即阻断
CI/CD	Git push 触发，Canary 部署，支持自动回滚

延伸阅读：LLM 评估与测试指南 · DevOps 实战指南