1 min read280 words

内容审核系统架构

内容审核是LLM治理的第一道防线，负责检测和过滤不当内容。

审核系统架构

graph TB A[输入内容] --> B[预处理] B --> C[关键词过滤] B --> D[正则匹配] B --> E[API审核] C --> F{关键词匹配?} D --> G{正则匹配?} E --> H[OpenAI Moderation] F -->|Yes| I[阻断/警告] G -->|Yes| I H --> J{违规?} J -->|Yes| I F -->|No| K[继续] G -->|No| K J -->|No| K K --> L[输出内容] L --> M[结果汇总] M --> N[返回审核结果] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:2px style I fill:#ffcdd2,stroke:#c62828,stroke-width:2px style N fill:#c8e6c9,stroke:#43a047,stroke-width:2px style B fill:#fff9c4,stroke:#f9a825,stroke-width:2px style C fill:#ffccbc,stroke:#d84315,stroke-width:2px style D fill:#ffe0b2,stroke:#e64a19,stroke-width:2px style E fill:#b3e5fc,stroke:#0277bd,stroke-width:2px

审核层次结构

graph LR A[第1层:
关键词过滤] --> B[第2层:
正则模式] B --> C[第3层:
API审核] C --> D[第4层:
AI检测] A --> A1[快速拦截
⚡ <1ms] B --> B1[复杂模式
⚡ <5ms] C --> C1[智能判断
🐌 ~100ms] D --> D1[深度分析
🐌 ~500ms] style A fill:#e1f5fe,stroke:#0277bd,stroke-width:2px style B fill:#fff9c4,stroke:#f9a825,stroke-width:2px style C fill:#c8e6c9,stroke:#43a047,stroke-width:2px style D fill:#d1c4e9,stroke:#7b1fa2,stroke-width:2px style A1 fill:#b3e5fc,stroke:#01579b,stroke-width:2px style B1 fill:#fff59d,stroke:#f57f17,stroke-width:2px style C1 fill:#a5d6a7,stroke:#2e7d32,stroke-width:2px style D1 fill:#ce93d8,stroke:#6a1b9a,stroke-width:2px

审核策略矩阵

审核类型	技术方案	响应速度	准确率	成本
关键词过滤	精确/模糊匹配	⚡ 极快	⭐⭐ 低	💰 免费
正则表达式	模式匹配	⚡ 快	⭐⭐⭐ 中	💰 免费
Moderation API	OpenAI服务	🐢 中等	⭐⭐⭐⭐ 高	💳 付费
AI分类模型	自定义模型	🐌 较慢	⭐⭐⭐⭐⭐ 很高	💎 昂贵

审核内容分类

graph TB A[内容审核] --> B[暴力内容] A --> C[仇恨言论] A --> D[色情内容] A --> E[骚扰辱骂] A --> F[非法活动] A --> G[隐私泄露] A --> H[虚假信息] A --> I[垃圾信息] B --> B1[暴力描述] B --> B2[恐怖主义] B --> B3[武器制造] C --> C1[种族歧视] C --> C2[宗教攻击] C --> C3[性别歧视] D --> D1[色情描写] D --> D2[成人内容] E --> E1[辱骂诅咒] E --> E2[人身攻击] F --> F1[违法行为] F --> F2[危险物品] F --> F3[非法药物] style A fill:#ede7f6,stroke:#5e35b1,stroke-width:3px style B fill:#ffcdd2,stroke:#c62828,stroke-width:2px style C fill:#ffccbc,stroke:#d84315,stroke-width:2px style D fill:#f8bbd0,stroke:#c2185b,stroke-width:2px style E fill:#fff9c4,stroke:#f9a825,stroke-width:2px style F fill:#ffe0b2,stroke:#e64a19,stroke-width:2px style G fill:#b3e5fc,stroke:#0277bd,stroke-width:2px style H fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px style I fill:#d1c4e9,stroke:#512da8,stroke-width:2px

审核器接口设计

from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from core.models import AuditRequest, AuditResult, AuditLevel, RiskLevel
class ContentAuditor(ABC):
"""内容审核器基类"""
def __init__(self, name: str, priority: int = 0):
self.name = name
self.priority = priority
@abstractmethod
async def audit(self, request: AuditRequest) -> AuditResult:
"""执行审核"""
pass
def get_score(self) -> int:
"""获取审核优先级分数（数字越小优先级越高）"""
return self.priority

多审核器链式执行

sequenceDiagram participant C as 治理服务 participant A1 as 审核器1 participant A2 as 审核器2 participant A3 as 审核器3 C->>A1: audit() A1-->>C: PASS C->>A2: audit() A2-->>C: BLOCK (关键词) C->>C: 中断后续审核 Note over C: 返回阻断结果

审核决策流程

class AuditDecision:
"""审核决策器"""
def __init__(self):
self.auditors: List[ContentAuditor] = []
self.block_on_first_fail = True  # 首次失败即阻断
async def evaluate(self, request: AuditRequest) -> AuditResult:
"""
评估审核请求
策略1: block_on_first_fail = True
- 任一审核器返回BLOCK，立即阻断
- 用于快速拦截高风险内容
策略2: block_on_first_fail = False
- 运行所有审核器
- 汇总所有审核结果
- 基于权重做最终决策
"""
results = []
blocked_reasons = []
# 按优先级排序审核器
sorted_auditors = sorted(self.auditors, key=lambda x: x.get_score())
for auditor in sorted_auditors:
result = await auditor.audit(request)
results.append(result)
if result.is_blocked:
blocked_reasons.extend(result.reasons)
if self.block_on_first_fail:
# 阻断并返回
return self._merge_blocked_results(
results,
blocked_reasons
)
# 所有审核器通过
return self._create_pass_result(results)
def _merge_blocked_results(
self,
results: List[AuditResult],
reasons: List[str]
) -> AuditResult:
"""合并阻断结果"""
return AuditResult(
request_id=results[0].request_id,
level=AuditLevel.BLOCK,
risk_level=max([r.risk_level for r in results]),
is_blocked=True,
reasons=reasons,
details={
"auditor_results": [r.dict() for r in results]
}
)
def _create_pass_result(
self,
results: List[AuditResult]
) -> AuditResult:
"""创建通过结果"""
return AuditResult(
request_id=results[0].request_id,
level=AuditLevel.PASS,
risk_level=RiskLevel.LOW,
is_blocked=False,
reasons=["所有审核器通过"],
details={
"auditor_results": [r.dict() for r in results]
}
)

审核性能优化

1. 缓存策略

from functools import lru_cache
from hashlib import md5
class CachedAuditor(ContentAuditor):
"""带缓存的审核器"""
def __init__(self, name: str, cache_size: int = 1000):
super().__init__(name)
self.cache_size = cache_size
def _get_cache_key(self, content: str) -> str:
"""生成缓存键"""
return md5(content.encode()).hexdigest()
@lru_cache(maxsize=1000)
def _audit_with_cache(self, content: str) -> AuditResult:
"""带缓存的审核"""
# 实际审核逻辑
pass

2. 异步并行审核

import asyncio
async def parallel_audit(
auditors: List[ContentAuditor],
request: AuditRequest
) -> List[AuditResult]:
"""并行执行审核"""
tasks = [auditor.audit(request) for auditor in auditors]
results = await asyncio.gather(*tasks)
return results

3. 短路优化

class ShortCircuitAuditor(ContentAuditor):
"""短路审核器（快速失败）"""
async def audit(self, request: AuditRequest) -> AuditResult:
# 先检查长度
if len(request.content) > 10000:
return self._block_result(request, "内容过长")
# 快速关键词检查
if self._quick_keyword_check(request.content):
return self._block_result(request, "包含敏感词")
# 完整审核
return await self._full_audit(request)

审核日志与追踪

from loguru import logger
from datetime import datetime
class AuditLogger:
"""审核日志记录器"""
def log_audit(
self,
request: AuditRequest,
result: AuditResult,
auditor_name: str
):
"""记录审核日志"""
log_entry = {
"timestamp": datetime.now().isoformat(),
"request_id": request.request_id,
"auditor": auditor_name,
"content_length": len(request.content),
"result": result.level.value,
"blocked": result.is_blocked,
"risk_level": result.risk_level.value,
"reasons": result.reasons
}
if result.is_blocked:
logger.warning(f"🚫 审核阻断: {log_entry}")
else:
logger.debug(f"✅ 审核通过: {log_entry}")

测试框架

import pytest
from core.models import AuditRequest
@pytest.mark.asyncio
async def test_keyword_filter():
"""测试关键词过滤"""
from core.keyword_auditor import KeywordAuditor
auditor = KeywordAuditor()
# 测试正常内容
normal_request = AuditRequest(
request_id="test-001",
content="这是一条正常内容"
)
result = await auditor.audit(normal_request)
assert not result.is_blocked
# 测试违规内容
blocked_request = AuditRequest(
request_id="test-002",
content="这条内容包含暴力词汇"
)
result = await auditor.audit(blocked_request)
assert result.is_blocked
assert "暴力" in str(result.reasons)

学习要点

✅ 理解内容审核的多层架构 ✅ 掌握审核器链式执行模式 ✅ 了解审核决策策略 ✅ 学习性能优化方法（缓存、并行、短路） ✅ 实现审核日志追踪

下一步: 实现关键词过滤器 🏷️