2 min read307 words

人机协作与交互设计

纯自动化 Agent 在许多场景下不够可靠。Human-in-the-Loop（人在回路中）是让 Agent 安全、可控地运行的关键模式。

人机协作模式

graph LR A[Agent 生成方案] --> B{置信度评估} B -->|高置信度| C[自动执行] B -->|中置信度| D[人工确认] B -->|低置信度| E[人工接管] D --> D1[用户审核] D1 -->|批准| C D1 -->|修改| F[Agent 修正] D1 -->|拒绝| G[终止任务] F --> B E --> E1[人工操作] E1 --> H[Agent 学习] style B fill:#fff3e0,stroke:#f57c00,stroke-width:3px style D fill:#e3f2fd,stroke:#1976d2,stroke-width:2px

置信度路由系统

"""
基于置信度的人机协作路由
"""
from dataclasses import dataclass, field
from enum import Enum
class ActionLevel(Enum):
AUTO = "auto"           # 自动执行
CONFIRM = "confirm"     # 需要确认
MANUAL = "manual"       # 人工接管
@dataclass
class AgentProposal:
"""Agent 提出的行动方案"""
action: str
params: dict
confidence: float       # 0.0 ~ 1.0
reasoning: str
risk_level: str = "low"  # low / medium / high
@dataclass
class HumanFeedback:
"""人工反馈"""
approved: bool
modification: str | None = None
comment: str = ""
@dataclass
class RouterConfig:
"""路由配置"""
auto_threshold: float = 0.85       # 高于此值自动执行
confirm_threshold: float = 0.5     # 高于此值需确认
high_risk_actions: list[str] = field(
default_factory=lambda: ["delete", "send_email", "payment", "deploy"]
)
class HumanInTheLoopRouter:
"""人机协作路由器"""
def __init__(self, config: RouterConfig | None = None):
self.config = config or RouterConfig()
self._feedback_handler = None
def set_feedback_handler(self, handler):
"""设置人工反馈接口"""
self._feedback_handler = handler
def route(self, proposal: AgentProposal) -> ActionLevel:
"""决定路由级别"""
# 高风险动作 -> 强制确认
if proposal.action in self.config.high_risk_actions:
return ActionLevel.CONFIRM if proposal.confidence > 0.7 else ActionLevel.MANUAL
if proposal.confidence >= self.config.auto_threshold:
return ActionLevel.AUTO
elif proposal.confidence >= self.config.confirm_threshold:
return ActionLevel.CONFIRM
else:
return ActionLevel.MANUAL
def execute(self, proposal: AgentProposal) -> dict:
"""执行流程"""
level = self.route(proposal)
if level == ActionLevel.AUTO:
return {"status": "executed", "proposal": proposal.action}
elif level == ActionLevel.CONFIRM:
feedback = self._request_feedback(proposal)
if feedback.approved:
if feedback.modification:
proposal.params["modification"] = feedback.modification
return {"status": "executed_after_confirm", "proposal": proposal.action}
else:
return {"status": "rejected", "reason": feedback.comment}
else:  # MANUAL
return {"status": "escalated_to_human", "proposal": proposal.action}
def _request_feedback(self, proposal: AgentProposal) -> HumanFeedback:
"""请求人工反馈"""
if self._feedback_handler:
return self._feedback_handler(proposal)
# 默认：控制台确认
print(f"\n⚠️ Agent 请求确认:")
print(f"  动作: {proposal.action}")
print(f"  参数: {proposal.params}")
print(f"  置信度: {proposal.confidence:.0%}")
print(f"  推理: {proposal.reasoning}")
return HumanFeedback(approved=True, comment="自动批准（开发模式）")

渐进式自治

Agent 不需要一开始就完全自动化。渐进式自治让系统逐步获得信任。

"""
渐进式自治控制器
"""
from dataclasses import dataclass, field
from datetime import datetime
@dataclass
class AutonomyMetrics:
"""自治度指标"""
total_executions: int = 0
auto_successes: int = 0
auto_failures: int = 0
human_overrides: int = 0
@property
def success_rate(self) -> float:
total = self.auto_successes + self.auto_failures
return self.auto_successes / total if total > 0 else 0.0
@property
def override_rate(self) -> float:
if self.total_executions == 0:
return 0.0
return self.human_overrides / self.total_executions
@dataclass
class AutonomyLevel:
"""自治等级"""
level: int           # 1-5
name: str
auto_threshold: float
description: str
AUTONOMY_LEVELS = [
AutonomyLevel(1, "观察者", 1.0, "所有操作需人工确认"),
AutonomyLevel(2, "建议者", 0.95, "Agent 建议，人工决策"),
AutonomyLevel(3, "执行者", 0.85, "常规操作自动执行，关键操作需确认"),
AutonomyLevel(4, "委托者", 0.70, "大部分操作自动化，仅异常上报"),
AutonomyLevel(5, "自治者", 0.50, "完全自治，定期审计"),
]
class ProgressiveAutonomy:
"""渐进式自治控制器"""
PROMOTION_THRESHOLD = 50    # 最少执行次数
SUCCESS_RATE_REQUIRED = 0.95  # 升级所需成功率
OVERRIDE_RATE_MAX = 0.05    # 升级所需最大覆写率
def __init__(self):
self.current_level = AUTONOMY_LEVELS[0]
self.metrics = AutonomyMetrics()
self.level_history: list[tuple[datetime, int]] = []
def record_execution(self, auto_executed: bool, success: bool, human_override: bool):
"""记录执行结果"""
self.metrics.total_executions += 1
if auto_executed:
if success:
self.metrics.auto_successes += 1
else:
self.metrics.auto_failures += 1
if human_override:
self.metrics.human_overrides += 1
def evaluate_promotion(self) -> bool:
"""评估是否可以升级自治等级"""
if self.current_level.level >= 5:
return False
if self.metrics.total_executions < self.PROMOTION_THRESHOLD:
return False
if (
self.metrics.success_rate >= self.SUCCESS_RATE_REQUIRED
and self.metrics.override_rate <= self.OVERRIDE_RATE_MAX
):
self._promote()
return True
return False
def _promote(self):
"""升级自治等级"""
next_idx = self.current_level.level  # level 从 1 开始
if next_idx < len(AUTONOMY_LEVELS):
self.current_level = AUTONOMY_LEVELS[next_idx]
self.level_history.append((datetime.now(), self.current_level.level))
self.metrics = AutonomyMetrics()  # 重置指标
print(f"🎉 自治等级升级: L{self.current_level.level} ({self.current_level.name})")

交互模式对比

模式	描述	适用场景	用户负担
全自动	无需人工介入	低风险重复任务	最低
确认式	Agent 提方案，人批准	中等风险操作	低
协作式	人与 Agent 交替推进	创意/复杂任务	中
监督式	人实时监控 Agent 行为	高风险/初始部署	高
教学式	Agent 观察人操作学习	新领域冷启动	最高

本章小结

主题	要点
置信度路由	高/中/低 → 自动/确认/人工
高风险保护	关键操作（删除、支付）强制人工确认
渐进自治	5 级自治等级，基于表现自动升级
设计原则	安全 > 效率，可控 > 全自动

下一章：Agent 可观测性