自一致性与多路径推理
单次推理容易出错。自一致性(Self-Consistency)通过多次采样+投票机制,显著提升推理准确率——用冗余换可靠性。
多路径推理架构
graph TB
A[同一问题] --> B1[路径 1
CoT 推理] A --> B2[路径 2
CoT 推理] A --> B3[路径 3
CoT 推理] A --> B4[路径 N
CoT 推理] B1 --> C1[答案 A] B2 --> C2[答案 B] B3 --> C3[答案 A] B4 --> C4[答案 A] C1 --> D[投票聚合] C2 --> D C3 --> D C4 --> D D --> E[最终答案: A
置信度 75%] style A fill:#ede7f6,stroke:#5e35b1,stroke-width:2px style D fill:#fff9c4,stroke:#f9a825,stroke-width:2px style E fill:#c8e6c9,stroke:#43a047,stroke-width:2px
CoT 推理] A --> B2[路径 2
CoT 推理] A --> B3[路径 3
CoT 推理] A --> B4[路径 N
CoT 推理] B1 --> C1[答案 A] B2 --> C2[答案 B] B3 --> C3[答案 A] B4 --> C4[答案 A] C1 --> D[投票聚合] C2 --> D C3 --> D C4 --> D D --> E[最终答案: A
置信度 75%] style A fill:#ede7f6,stroke:#5e35b1,stroke-width:2px style D fill:#fff9c4,stroke:#f9a825,stroke-width:2px style E fill:#c8e6c9,stroke:#43a047,stroke-width:2px
自一致性投票系统
from dataclasses import dataclass, field
from collections import Counter
@dataclass
class ReasoningPath:
"""单次推理路径"""
chain_of_thought: str
answer: str
confidence: float = 0.0
@dataclass
class SelfConsistency:
"""自一致性推理器"""
num_paths: int = 5
temperature: float = 0.7
paths: list[ReasoningPath] = field(default_factory=list)
def add_path(self, path: ReasoningPath) -> None:
self.paths.append(path)
def vote(self) -> dict:
"""多数投票选出最终答案"""
if not self.paths:
return {"answer": None, "confidence": 0.0}
answers = [p.answer.strip() for p in self.paths]
counter = Counter(answers)
winner, count = counter.most_common(1)[0]
return {
"answer": winner,
"confidence": count / len(self.paths),
"vote_distribution": dict(counter),
"total_paths": len(self.paths),
}
def weighted_vote(self) -> dict:
"""加权投票——高置信度路径权重更大"""
if not self.paths:
return {"answer": None, "confidence": 0.0}
scores: dict[str, float] = {}
for path in self.paths:
answer = path.answer.strip()
scores[answer] = scores.get(answer, 0) + path.confidence
winner = max(scores, key=scores.get)
total_weight = sum(scores.values())
return {
"answer": winner,
"confidence": scores[winner] / total_weight if total_weight else 0,
"weighted_scores": scores,
}
# 模拟:数学问题多路径推理
sc = SelfConsistency(num_paths=5)
sc.add_path(ReasoningPath("先算3×4=12,再加5=17", "17", 0.9))
sc.add_path(ReasoningPath("3×4=12, 12+5=17", "17", 0.85))
sc.add_path(ReasoningPath("3+4=7, 7×5=35", "35", 0.3)) # 错误路径
sc.add_path(ReasoningPath("4×3=12, 加5得17", "17", 0.8))
sc.add_path(ReasoningPath("3×(4+5)=27", "27", 0.4)) # 错误路径
result = sc.vote()
print(f"答案: {result['answer']} (置信度: {result['confidence']:.0%})")
print(f"分布: {result['vote_distribution']}")
进阶多路径策略
from dataclasses import dataclass, field
from enum import Enum
class Strategy(Enum):
MAJORITY_VOTE = "majority_vote"
WEIGHTED_VOTE = "weighted_vote"
BEST_OF_N = "best_of_n"
UNIVERSAL_SELF_CONSISTENCY = "usc"
@dataclass
class MultiPathConfig:
"""多路径推理配置"""
strategy: Strategy = Strategy.MAJORITY_VOTE
num_paths: int = 5
temperatures: list[float] = field(
default_factory=lambda: [0.5, 0.7, 0.9, 0.7, 0.5]
)
# 不同策略的建议路径数
STRATEGY_MIN_PATHS = {
Strategy.MAJORITY_VOTE: 3,
Strategy.WEIGHTED_VOTE: 5,
Strategy.BEST_OF_N: 3,
Strategy.UNIVERSAL_SELF_CONSISTENCY: 7,
}
@property
def min_paths(self) -> int:
return self.STRATEGY_MIN_PATHS.get(self.strategy, 3)
@property
def is_valid(self) -> bool:
return self.num_paths >= self.min_paths
# 策略选型
configs = {
"简单分类": MultiPathConfig(Strategy.MAJORITY_VOTE, 3),
"数学计算": MultiPathConfig(Strategy.WEIGHTED_VOTE, 5),
"代码生成": MultiPathConfig(Strategy.BEST_OF_N, 3),
"开放问答": MultiPathConfig(Strategy.UNIVERSAL_SELF_CONSISTENCY, 7),
}
for task, cfg in configs.items():
print(f"{task}: {cfg.strategy.value}, {cfg.num_paths}路径, 有效={cfg.is_valid}")
多路径策略对比
| 策略 | 原理 | 推荐路径数 | 成本 | 准确率提升 | 适用场景 |
|---|---|---|---|---|---|
| 多数投票 | 最高频答案 | 3-5 | 低 | 5-15% | 分类、选择题 |
| 加权投票 | 按置信度加权 | 5-7 | 中 | 10-20% | 推理、计算 |
| Best-of-N | 选最优输出 | 3-5 | 低 | 因评判标准而异 | 代码、文本生成 |
| USC | 多模型+多路径 | 7-15 | 高 | 15-30% | 复杂推理 |
| Verifier | 额外模型验证 | 3+1 | 中高 | 20-35% | 数学、逻辑 |
成本与效果权衡
graph LR
A[1次调用
成本×1] -->|准确率基线| B[60-80%] C[3次+投票
成本×3] -->|自一致性| D[75-90%] E[5次+加权
成本×5] -->|高置信| F[85-95%] G[验证器
成本×4-6] -->|最可靠| H[90-98%] style A fill:#ffcdd2,stroke:#c62828,stroke-width:2px style C fill:#fff9c4,stroke:#f9a825,stroke-width:2px style E fill:#e3f2fd,stroke:#1565c0,stroke-width:2px style G fill:#c8e6c9,stroke:#43a047,stroke-width:2px
成本×1] -->|准确率基线| B[60-80%] C[3次+投票
成本×3] -->|自一致性| D[75-90%] E[5次+加权
成本×5] -->|高置信| F[85-95%] G[验证器
成本×4-6] -->|最可靠| H[90-98%] style A fill:#ffcdd2,stroke:#c62828,stroke-width:2px style C fill:#fff9c4,stroke:#f9a825,stroke-width:2px style E fill:#e3f2fd,stroke:#1565c0,stroke-width:2px style G fill:#c8e6c9,stroke:#43a047,stroke-width:2px
本章小结
- 自一致性用冗余换准确——3-5 条路径即可显著提升
- 多数投票最简单——适合有明确正确答案的任务
- 加权投票更精细——考虑每条路径的置信度
- 温度多样性很关键——不同 temperature 产生不同推理路径
- 成本和准确率要平衡——3 次调用是最佳性价比起点
下一章:角色扮演与系统提示