1 min read116 words

Docker部署

将LLM治理平台部署到生产环境。

Docker化架构

graph TB subgraph "Docker Compose" API[FastAPI应用] Dashboard[Streamlit仪表盘] Prometheus[Prometheus监控] Grafana[Grafana可视化] Redis[Redis缓存] API --> Prometheus Prometheus --> Grafana API --> Redis end

Dockerfile

创建 Dockerfile：

# 基础镜像
FROM python:3.12-slim
# 设置工作目录
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件
COPY requirements.txt .
# 安装Python依赖
RUN pip install --no-cache-dir -r requirements.txt
# 复制应用代码
COPY . .
# 创建必要的目录
RUN mkdir -p logs data
# 暴露端口
EXPOSE 8000 9090
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import httpx; httpx.get('http://localhost:8000/health')"
# 启动命令
CMD ["python", "-m", "uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]

docker-compose.yml

创建 docker-compose.yml：

version: '3.8'
services:
# FastAPI应用
api:
build: .
container_name: llm-governance-api
ports:
- "8000:8000"
- "9090:9090"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- HOST=0.0.0.0
- PORT=8000
- DEBUG=False
- LOG_LEVEL=INFO
volumes:
- ./logs:/app/logs
- ./data:/app/data
env_file:
- .env
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
# Streamlit仪表盘
dashboard:
build: .
container_name: llm-governance-dashboard
ports:
- "8501:8501"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
volumes:
- ./logs:/app/logs
- ./data:/app/data
command: streamlit run dashboard/app.py --server.port=8501 --server.address=0.0.0.0
restart: unless-stopped
# Prometheus监控
prometheus:
image: prom/prometheus:latest
container_name: llm-governance-prometheus
ports:
- "9091:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
restart: unless-stopped
# Grafana可视化
grafana:
image: grafana/grafana:latest
container_name: llm-governance-grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin123
volumes:
- grafana-data:/var/lib/grafana
depends_on:
- prometheus
restart: unless-stopped
# Redis缓存
redis:
image: redis:7-alpine
container_name: llm-governance-redis
ports:
- "6379:6379"
volumes:
- redis-data:/data
restart: unless-stopped
volumes:
prometheus-data:
grafana-data:
redis-data:

Prometheus配置

创建 prometheus.yml：

global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'llm-governance-api'
static_configs:
- targets: ['api:9090']
metrics_path: '/metrics'
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

.dockerignore

创建 .dockerignore：

# Git
.git
.gitignore
# Python
__pycache__
*.pyc
*.pyo
*.pyd
.Python
*.so
*.egg
*.egg-info
dist
build
venv
# IDE
.vscode
.idea
*.swp
*.swo
# Logs
logs/
*.log
# Data
data/chroma_db/
# Environment
.env.local
.env.production
# Tests
tests/
.pytest_cache/
.coverage

构建和部署

# 构建镜像
docker-compose build
# 启动所有服务
docker-compose up -d
# 查看日志
docker-compose logs -f api
# 停止服务
docker-compose down
# 停止并删除卷
docker-compose down -v

健康检查

创建 health_check.sh：

#!/bin/bash
# API健康检查
check_api() {
echo "检查API服务..."
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health)
if [ $response -eq 200 ]; then
echo "✅ API服务正常"
return 0
else
echo "❌ API服务异常 (HTTP $response)"
return 1
fi
}
# Prometheus健康检查
check_prometheus() {
echo "检查Prometheus服务..."
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9091/-/healthy)
if [ $response -eq 200 ]; then
echo "✅ Prometheus服务正常"
return 0
else
echo "❌ Prometheus服务异常 (HTTP $response)"
return 1
fi
}
# Grafana健康检查
check_grafana() {
echo "检查Grafana服务..."
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/api/health)
if [ $response -eq 200 ]; then
echo "✅ Grafana服务正常"
return 0
else
echo "❌ Grafana服务异常 (HTTP $response)"
return 1
fi
}
# 主检查
main() {
echo "================================"
echo "LLM治理平台健康检查"
echo "================================"
all_ok=true
check_api || all_ok=false
check_prometheus || all_ok=false
check_grafana || all_ok=false
echo "================================"
if [ "$all_ok" = true ]; then
echo "✅ 所有服务正常"
exit 0
else
echo "❌ 部分服务异常"
exit 1
fi
}
main

性能测试

创建 load_test.py：

import asyncio
import httpx
import time
from statistics import mean, median
async def send_request(client: httpx.AsyncClient, prompt: str, index: int):
"""发送请求"""
try:
start = time.time()
response = await client.post(
"http://localhost:8000/api/chat",
json={"prompt": prompt, "user_id": f"user-{index}"}
)
duration = time.time() - start
if response.status_code == 200:
return {"success": True, "duration": duration}
else:
return {"success": False, "duration": duration, "status": response.status_code}
except Exception as e:
return {"success": False, "duration": 0, "error": str(e)}
async def run_load_test(
num_requests: int = 100,
concurrency: int = 10
):
"""运行负载测试"""
print(f"开始负载测试: {num_requests}个请求, 并发{concurrency}")
async with httpx.AsyncClient(timeout=60.0) as client:
start_time = time.time()
# 创建任务
semaphore = asyncio.Semaphore(concurrency)
async def limited_request(index):
async with semaphore:
prompt = f"测试提示词 {index}"
return await send_request(client, prompt, index)
tasks = [limited_request(i) for i in range(num_requests)]
results = await asyncio.gather(*tasks)
total_time = time.time() - start_time
# 统计
successful = [r for r in results if r["success"]]
failed = [r for r in results if not r["success"]]
durations = [r["duration"] for r in successful if r["duration"] > 0]
print(f"\n{'='*50}")
print("负载测试结果")
print(f"{'='*50}")
print(f"总请求数: {num_requests}")
print(f"成功请求: {len(successful)}")
print(f"失败请求: {len(failed)}")
print(f"成功率: {len(successful)/num_requests*100:.2f}%")
print(f"总耗时: {total_time:.2f}秒")
print(f"QPS: {num_requests/total_time:.2f}")
if durations:
print(f"\n响应时间统计:")
print(f"  平均: {mean(durations)*1000:.2f}ms")
print(f"  中位数: {median(durations)*1000:.2f}ms")
print(f"  最小: {min(durations)*1000:.2f}ms")
print(f"  最大: {max(durations)*1000:.2f}ms")
print(f"{'='*50}")
if __name__ == "__main__":
asyncio.run(run_load_test(num_requests=100, concurrency=10))

学习要点

✅ 创建Docker镜像 ✅ 使用Docker Compose编排服务 ✅ 配置Prometheus监控 ✅ 实现健康检查 ✅ 编写负载测试脚本

下一步: 常见面试问题 🎓