2 min read342 words

成本控制与监控体系

Agent 在生产环境的最大风险之一是成本失控——一个无限循环的 Agent 一夜之间可以产生数千美元的费用。本章介绍如何构建完善的成本控制和监控体系。

成本来源分析

graph TD A[Agent 成本构成] --> B[LLM API 费用] A --> C[工具调用费用] A --> D[基础设施费用] B --> B1[输入 Token × 单价] B --> B2[输出 Token × 单价] B --> B3[模型选择影响 10-100x] C --> C1[搜索 API] C --> C2[数据库查询] C --> C3[第三方服务] D --> D1[服务器/容器] D --> D2[向量数据库] D --> D3[消息队列] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px style B fill:#ffcdd2,stroke:#c62828,stroke-width:2px style C fill:#fff3e0,stroke:#f57c00,stroke-width:2px

实时成本追踪器

"""
Agent 成本实时追踪系统
"""
from dataclasses import dataclass, field
from datetime import datetime, date
import threading
# 主流模型价格（每百万 Token，美元）
MODEL_PRICING = {
"gpt-4o": {"input": 2.50, "output": 10.00},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"claude-sonnet-4": {"input": 3.00, "output": 15.00},
"claude-haiku-3": {"input": 0.25, "output": 1.25},
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
}
@dataclass
class CostRecord:
"""单次调用成本记录"""
agent_id: str
task_id: str
model: str
input_tokens: int
output_tokens: int
timestamp: datetime = field(default_factory=datetime.now)
@property
def cost_usd(self) -> float:
pricing = MODEL_PRICING.get(self.model, {"input": 5.0, "output": 20.0})
return (
self.input_tokens / 1_000_000 * pricing["input"]
+ self.output_tokens / 1_000_000 * pricing["output"]
)
class CostTracker:
"""全局成本追踪器"""
def __init__(
self,
daily_budget_usd: float = 10.0,
task_budget_usd: float = 0.50,
):
self.daily_budget = daily_budget_usd
self.task_budget = task_budget_usd
self._records: list[CostRecord] = []
self._lock = threading.Lock()
def record(self, record: CostRecord) -> None:
"""记录一次 LLM 调用"""
with self._lock:
self._records.append(record)
# 实时检查预算
self._check_budget(record)
def _check_budget(self, latest: CostRecord) -> None:
"""预算告警检查"""
daily_cost = self.get_daily_cost()
task_cost = self.get_task_cost(latest.task_id)
if daily_cost > self.daily_budget * 0.8:
print(f"⚠️  每日预算警告: ${daily_cost:.3f} / ${self.daily_budget}")
if daily_cost > self.daily_budget:
raise BudgetExceededError(
f"每日预算超限！已用 ${daily_cost:.3f}，预算 ${self.daily_budget}"
)
if task_cost > self.task_budget:
raise BudgetExceededError(
f"单任务超限！已用 ${task_cost:.4f}，预算 ${self.task_budget}"
)
def get_daily_cost(self, target_date: date = None) -> float:
target = target_date or date.today()
with self._lock:
return sum(
r.cost_usd for r in self._records
if r.timestamp.date() == target
)
def get_task_cost(self, task_id: str) -> float:
with self._lock:
return sum(r.cost_usd for r in self._records if r.task_id == task_id)
def get_report(self) -> dict:
"""生成成本报告"""
with self._lock:
total = sum(r.cost_usd for r in self._records)
by_model = {}
for r in self._records:
by_model[r.model] = by_model.get(r.model, 0) + r.cost_usd
return {
"total_cost_usd": round(total, 4),
"today_cost_usd": round(self.get_daily_cost(), 4),
"daily_budget_usd": self.daily_budget,
"budget_used_pct": round(self.get_daily_cost() / self.daily_budget * 100, 1),
"by_model": {k: round(v, 4) for k, v in by_model.items()},
"total_calls": len(self._records),
}
class BudgetExceededError(Exception):
pass
# 全局追踪器
tracker = CostTracker(daily_budget_usd=10.0, task_budget_usd=0.50)
# 在 Agent 每次调用 LLM 后记录
tracker.record(CostRecord(
agent_id="agent-001",
task_id="task-abc",
model="gpt-4o",
input_tokens=1200,
output_tokens=350,
))
print(tracker.get_report())

关键监控指标

"""
Agent 生产监控指标定义
"""
MONITORING_METRICS = {
"业务指标": {
"task_success_rate": "任务成功率（目标 > 85%）",
"avg_quality_score": "平均输出质量分（LLM Judge 评分）",
"user_satisfaction": "用户满意度（反馈收集）",
},
"性能指标": {
"p50_latency_ms": "中位数延迟（目标 < 5000ms）",
"p99_latency_ms": "99 分位延迟（目标 < 30000ms）",
"avg_iterations": "平均迭代次数（目标 < 8）",
"tool_call_count": "工具调用次数分布",
},
"成本指标": {
"daily_cost_usd": "每日总成本（设置告警阈值）",
"cost_per_task": "单任务平均成本",
"token_efficiency": "有效 Token 比率（去除重复/填充）",
"model_usage_split": "各模型使用占比",
},
"稳定性指标": {
"error_rate": "错误率（目标 < 1%）",
"timeout_rate": "超时率（目标 < 2%）",
"budget_exceeded_count": "预算超限次数",
"retry_rate": "重试率（高重试说明质量差）",
},
}
# Prometheus 指标定义示例
PROMETHEUS_METRICS = """
# 任务计数器
agent_tasks_total{agent_id, status, category}
# 延迟直方图
agent_task_duration_seconds{agent_id, category}
# 成本计数器
agent_cost_usd_total{agent_id, model}
# Token 使用量
agent_tokens_total{agent_id, model, type}  # type: input/output
# 迭代次数直方图
agent_iterations{agent_id, category}
"""
print("=== 核心监控指标 ===")
for category, metrics in MONITORING_METRICS.items():
print(f"\n{category}:")
for key, desc in metrics.items():
print(f"  {key}: {desc}")

成本优化策略对比

策略	实施难度	节省幅度	适用场景
升级到 mini 模型	低	10-90%	简单任务
实现响应缓存	中	20-50%	高重复请求
压缩上下文历史	中	20-40%	长对话 Agent
并行工具调用	中	减少延迟	多步任务
Batch API 处理	低	50%	离线任务
智能模型路由	高	40-60%	混合复杂度

本章实践清单

[ ] 为所有 LLM 调用添加成本追踪，记录 token 消耗
[ ] 设置每日预算硬上限，超限自动停止并告警
[ ] 设置单任务预算上限，防止无限循环
[ ] 接入 Grafana/DataDog 等监控平台，可视化关键指标
[ ] 建立成本异常告警（单日超预算 80% 发送通知）
[ ] 每周生成成本分析报告，找出高成本任务类别
[ ] 对重复率高的工具调用实现缓存

下一章：Agent 评估与持续改进——建立反馈闭环。