部署与企业实践
High Contrast
Dark Mode
Light Mode
Sepia
Forest
1 min read265 words

部署与企业实践

将 Agent 从实验环境推向生产,需要解决可靠性、可观测性和成本控制等问题。

生产级 Agent 架构

graph TB subgraph 接入层 GW[API Gateway] AUTH[认证授权] end subgraph Agent 服务 ROUTER[任务路由] AGENT[Agent Runtime] TOOL[工具执行器] end subgraph 基础设施 QUEUE[任务队列] CACHE[缓存层] VDB[向量数据库] end subgraph 可观测性 LOG[日志] TRACE[链路追踪] METRIC[指标] ALERT[告警] end GW --> AUTH AUTH --> ROUTER ROUTER --> QUEUE QUEUE --> AGENT AGENT --> TOOL AGENT --> CACHE AGENT --> VDB AGENT --> LOG AGENT --> TRACE AGENT --> METRIC METRIC --> ALERT style GW fill:#e3f2fd,stroke:#1976d2,stroke-width:2px style AGENT fill:#c8e6c9,stroke:#388e3c,stroke-width:2px

FastAPI Agent 服务

"""
生产级 Agent API 服务
"""
import time
import uuid
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Depends
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel
class TaskRequest(BaseModel):
task: str
agent_type: str = "general"
max_iterations: int = 10
timeout_seconds: int = 60
class TaskResponse(BaseModel):
task_id: str
result: str
iterations: int
latency_ms: float
cost_usd: float
status: str
class AgentService:
"""Agent 服务"""
def __init__(self):
self.agents = {}
self.metrics = {
"total_tasks": 0,
"successful_tasks": 0,
"failed_tasks": 0,
"total_cost": 0.0,
"avg_latency_ms": 0.0,
}
async def process_task(self, request: TaskRequest) -> TaskResponse:
"""处理任务"""
task_id = str(uuid.uuid4())[:8]
start = time.time()
self.metrics["total_tasks"] += 1
try:
# 执行 Agent
result = await self._run_agent(
request.task,
request.agent_type,
request.max_iterations,
)
latency = (time.time() - start) * 1000
self.metrics["successful_tasks"] += 1
return TaskResponse(
task_id=task_id,
result=result,
iterations=1,
latency_ms=round(latency, 2),
cost_usd=0.01,
status="completed",
)
except Exception as e:
self.metrics["failed_tasks"] += 1
raise HTTPException(status_code=500, detail=str(e))
async def _run_agent(
self, task: str, agent_type: str, max_iter: int
) -> str:
"""运行 Agent"""
# 根据类型选择 Agent
# 实际项目中这里会调用具体的 Agent 实现
return f"Agent ({agent_type}) 完成任务: {task[:50]}..."
# 全局服务
agent_service = AgentService()
@asynccontextmanager
async def lifespan(app: FastAPI):
print("Agent 服务启动")
yield
print("Agent 服务关闭")
app = FastAPI(title="Agent API", version="1.0", lifespan=lifespan)
security = HTTPBearer()
@app.post("/api/task", response_model=TaskResponse)
async def create_task(
request: TaskRequest,
credentials: HTTPAuthorizationCredentials = Depends(security),
):
"""提交任务"""
return await agent_service.process_task(request)
@app.get("/api/health")
async def health():
return {"status": "healthy", "metrics": agent_service.metrics}

链路追踪

"""
Agent 链路追踪系统
"""
import time
import json
from dataclasses import dataclass, field
@dataclass
class Span:
"""追踪跨度"""
name: str
start_time: float = field(default_factory=time.time)
end_time: float = 0
metadata: dict = field(default_factory=dict)
children: list = field(default_factory=list)
@property
def duration_ms(self) -> float:
return (self.end_time - self.start_time) * 1000
def end(self):
self.end_time = time.time()
class AgentTracer:
"""Agent 链路追踪"""
def __init__(self, task_id: str):
self.task_id = task_id
self.root = Span(name="agent_task")
self.current_span = self.root
def start_span(self, name: str, metadata: dict = None) -> Span:
"""开始一个新的追踪跨度"""
span = Span(name=name, metadata=metadata or {})
self.current_span.children.append(span)
return span
def print_trace(self) -> None:
"""打印完整追踪"""
self.root.end()
print(f"\n=== Trace: {self.task_id} ===")
self._print_span(self.root, indent=0)
print(f"总耗时: {self.root.duration_ms:.0f}ms")
def _print_span(self, span: Span, indent: int) -> None:
prefix = "  " * indent
duration = f"{span.duration_ms:.0f}ms" if span.end_time else "..."
print(f"{prefix}├── [{duration}] {span.name}")
if span.metadata:
for k, v in span.metadata.items():
print(f"{prefix}│   {k}: {v}")
for child in span.children:
self._print_span(child, indent + 1)
def to_dict(self) -> dict:
"""导出为字典格式"""
def span_to_dict(span):
return {
"name": span.name,
"duration_ms": span.duration_ms if span.end_time else None,
"metadata": span.metadata,
"children": [span_to_dict(c) for c in span.children],
}
return {"task_id": self.task_id, "trace": span_to_dict(self.root)}
# 使用
tracer = AgentTracer("task-001")
# 追踪每个步骤
span1 = tracer.start_span("llm_call", {"model": "gpt-4o", "tokens": 500})
time.sleep(0.1)  # 模拟 LLM 调用
span1.end()
span2 = tracer.start_span("tool_call", {"tool": "web_search", "query": "Python best practices"})
time.sleep(0.05)  # 模拟搜索
span2.end()
span3 = tracer.start_span("llm_call", {"model": "gpt-4o", "tokens": 300})
time.sleep(0.08)  # 模拟 LLM 调用
span3.end()
tracer.print_trace()

企业应用场景

graph TB A[Agent 企业应用] --> B[内部工具] A --> C[客户服务] A --> D[数据分析] A --> E[开发辅助] B --> B1[IT 自动化运维] B --> B2[文档自动生成] B --> B3[流程自动化] C --> C1[智能客服] C --> C2[工单处理] C --> C3[FAQ 自动回复] D --> D1[报表生成] D --> D2[数据洞察] D --> D3[异常检测] E --> E1[代码审查] E --> E2[Bug 修复] E --> E3[API 集成] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px

最佳实践总结

领域 实践 原因
架构 无状态设计 便于水平扩展
安全 最小权限 降低风险
可靠性 超时 + 重试 防止卡死
成本 预算上限 + 降级 控制开支
监控 全链路追踪 快速排障
测试 评估数据集 持续衡量质量

本书总结

graph TB A[AI Agent 知识体系] --> B[基础] A --> C[单 Agent] A --> D[多 Agent] A --> E[高级主题] A --> F[企业实践] B --> B1[ReAct 架构] B --> B2[工具调用] C --> C1[框架选择] C --> C2[工具库设计] C --> C3[Memory 系统] D --> D1[主从/流水线/辩论] D --> D2[任务分解协作] E --> E1[Reflexion] E --> E2[安全可控] F --> F1[生产部署] F --> F2[监控追踪] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px

恭喜你完成了 AI Agent 实战指南的学习!

本章小结

感谢阅读!希望本书对你构建 AI Agent 有所帮助。