Self-RAG 与自适应检索
High Contrast
Dark Mode
Light Mode
Sepia
Forest
2 min read432 words

Self-RAG 与自适应检索

传统 RAG 对每个查询都执行检索,但并非所有问题都需要外部知识。Self-RAG 让模型自主决定何时检索、检索什么、以及如何验证生成结果。

Self-RAG 核心思想

graph TB A[用户查询] --> B{需要检索?} B -->|是| C[执行检索] B -->|否| D[直接生成] C --> E[检索相关文档] E --> F{文档相关?} F -->|相关| G[基于文档生成] F -->|不相关| H[丢弃,自主回答] G --> I{回答忠实于文档?} I -->|是| J[输出回答] I -->|否| K[重新生成] D --> J style B fill:#fff3e0,stroke:#f57c00,stroke-width:2px style F fill:#fff3e0,stroke:#f57c00,stroke-width:2px style I fill:#fff3e0,stroke:#f57c00,stroke-width:2px style J fill:#c8e6c9,stroke:#388e3c,stroke-width:3px

Self-RAG 反思令牌

Self-RAG 引入四种特殊的反思令牌(Reflection Token)来控制生成过程:

令牌 含义 取值 作用
Retrieve 是否需要检索 yes / no / continue 控制检索触发
IsRel 文档是否相关 relevant / irrelevant 过滤噪声文档
IsSup 回答是否有文档支持 fully / partially / no 检查忠实度
IsUse 回答是否有用 5 / 4 / 3 / 2 / 1 评估回答质量
"""
Self-RAG 实现框架
"""
from dataclasses import dataclass
from enum import Enum
class RetrieveDecision(Enum):
YES = "yes"
NO = "no"
CONTINUE = "continue"
class RelevanceJudgment(Enum):
RELEVANT = "relevant"
IRRELEVANT = "irrelevant"
class SupportLevel(Enum):
FULLY = "fully_supported"
PARTIALLY = "partially_supported"
NO = "no_support"
@dataclass
class SelfRAGResult:
"""Self-RAG 生成结果"""
answer: str
retrieved: bool
relevance: RelevanceJudgment | None = None
support: SupportLevel | None = None
usefulness: int = 0
sources: list[dict] | None = None
class SelfRAGPipeline:
"""Self-RAG 管道"""
def __init__(self, llm_client, retriever, critic_llm=None):
self.llm = llm_client
self.retriever = retriever
self.critic = critic_llm or llm_client
def generate(self, query: str) -> SelfRAGResult:
"""Self-RAG 完整生成流程"""
# Step 1: 判断是否需要检索
need_retrieval = self._decide_retrieval(query)
if need_retrieval == RetrieveDecision.NO:
answer = self.llm.generate(f"请直接回答:{query}")
return SelfRAGResult(answer=answer, retrieved=False, usefulness=3)
# Step 2: 检索文档
docs = self.retriever.search(query, top_k=5)
# Step 3: 判断相关性,过滤不相关文档
relevant_docs = []
for doc in docs:
judgment = self._judge_relevance(query, doc["content"])
if judgment == RelevanceJudgment.RELEVANT:
relevant_docs.append(doc)
if not relevant_docs:
answer = self.llm.generate(f"请根据你的知识回答:{query}")
return SelfRAGResult(
answer=answer, retrieved=True,
relevance=RelevanceJudgment.IRRELEVANT,
)
# Step 4: 基于相关文档生成回答
context = "\n".join(d["content"] for d in relevant_docs[:3])
answer = self.llm.generate(
f"根据以下资料回答问题。\n资料:{context}\n问题:{query}\n回答:"
)
# Step 5: 验证忠实度
support = self._check_support(answer, context)
if support == SupportLevel.NO:
# 重新生成
answer = self.llm.generate(
f"严格根据以下资料回答,不要编造。\n资料:{context}\n问题:{query}\n回答:"
)
support = self._check_support(answer, context)
return SelfRAGResult(
answer=answer,
retrieved=True,
relevance=RelevanceJudgment.RELEVANT,
support=support,
usefulness=self._rate_usefulness(query, answer),
sources=relevant_docs,
)
def _decide_retrieval(self, query: str) -> RetrieveDecision:
"""判断是否需要检索"""
prompt = f"""判断以下问题是否需要查阅外部资料才能准确回答。
- 如果是常识或简单定义,回答 no
- 如果需要最新信息或专业细节,回答 yes
问题:{query}
判断(yes/no):"""
response = self.critic.generate(prompt).strip().lower()
if "yes" in response:
return RetrieveDecision.YES
return RetrieveDecision.NO
def _judge_relevance(self, query: str, doc_content: str) -> RelevanceJudgment:
"""判断文档与查询的相关性"""
prompt = f"""判断以下文档是否与问题相关。
问题:{query}
文档:{doc_content[:500]}
判断(relevant/irrelevant):"""
response = self.critic.generate(prompt).strip().lower()
if "relevant" in response:
return RelevanceJudgment.RELEVANT
return RelevanceJudgment.IRRELEVANT
def _check_support(self, answer: str, context: str) -> SupportLevel:
"""检查回答是否有文档支持"""
prompt = f"""判断回答是否完全基于提供的资料。
资料:{context[:500]}
回答:{answer}
判断(fully_supported/partially_supported/no_support):"""
response = self.critic.generate(prompt).strip().lower()
if "fully" in response:
return SupportLevel.FULLY
if "partially" in response:
return SupportLevel.PARTIALLY
return SupportLevel.NO
def _rate_usefulness(self, query: str, answer: str) -> int:
"""评估回答有用性(1-5分)"""
prompt = f"""给以下回答打分(1-5分)。
问题:{query}
回答:{answer}
分数:"""
response = self.critic.generate(prompt).strip()
try:
score = int(response[0])
return max(1, min(5, score))
except (ValueError, IndexError):
return 3

自适应检索策略

除了 Self-RAG,还有多种自适应检索策略:

graph TB A[自适应检索] --> B[Self-RAG] A --> C[CRAG
纠正性RAG] A --> D[Active RAG] A --> E[Adaptive RAG] B --> B1[模型自己判断
是否需要检索] C --> C1[检索后评估
纠正不相关结果] D --> D1[多轮迭代
渐进式检索] E --> E1[路由选择
检索策略] style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px

CRAG:纠正性 RAG

"""
Corrective RAG (CRAG) 实现
"""
from dataclasses import dataclass
@dataclass
class CRAGConfig:
"""CRAG 配置"""
confidence_threshold: float = 0.7
ambiguous_range: tuple[float, float] = (0.3, 0.7)
class CorrectiveRAG:
"""CRAG 纠正性检索生成"""
def __init__(self, retriever, llm_client, config: CRAGConfig | None = None):
self.retriever = retriever
self.llm = llm_client
self.config = config or CRAGConfig()
def generate(self, query: str) -> dict:
"""CRAG 生成流程"""
# Step 1: 初始检索
docs = self.retriever.search(query, top_k=5)
# Step 2: 评估检索质量
confidence = self._evaluate_retrieval(query, docs)
print(f"检索置信度: {confidence:.2f}")
# Step 3: 根据置信度选择策略
if confidence >= self.config.confidence_threshold:
# 高置信度 → 直接使用
action = "correct"
context_docs = docs
elif confidence <= self.config.ambiguous_range[0]:
# 低置信度 → 网络搜索补充
action = "incorrect"
context_docs = self._web_search_fallback(query)
else:
# 模糊区间 → 两者结合
action = "ambiguous"
web_docs = self._web_search_fallback(query)
refined = self._refine_documents(query, docs)
context_docs = refined + web_docs
context = "\n".join(d.get("content", "")[:300] for d in context_docs[:5])
answer = self.llm.generate(
f"根据以下资料回答:\n{context}\n\n问题:{query}\n回答:"
)
return {"answer": answer, "action": action, "confidence": confidence}
def _evaluate_retrieval(self, query: str, docs: list[dict]) -> float:
"""评估检索结果整体质量"""
if not docs:
return 0.0
scores = [d.get("score", 0.5) for d in docs]
return sum(scores[:3]) / min(3, len(scores))
def _web_search_fallback(self, query: str) -> list[dict]:
"""Web 搜索兜底(占位实现)"""
print(f"  触发 Web 搜索兜底: {query}")
return [{"content": f"Web search results for: {query}", "source": "web"}]
def _refine_documents(self, query: str, docs: list[dict]) -> list[dict]:
"""提炼文档,移除不相关部分"""
refined = []
for doc in docs:
prompt = f"从以下文档中提取与问题相关的部分。\n问题:{query}\n文档:{doc.get('content', '')[:500]}\n相关内容:"
relevant_part = self.llm.generate(prompt)
if len(relevant_part.strip()) > 20:
refined.append({"content": relevant_part, "source": doc.get("source", "")})
return refined

策略对比

策略 检索时机 质量控制 额外开销 特点
Naive RAG 每次都检索 简单但可能引入噪声
Self-RAG 模型自主决定 4种反思令牌 端到端训练
CRAG 每次检索 + 评估 置信度分层 Web 搜索兜底
Active RAG 迭代式检索 每轮评估是否继续 适合复杂推理
Adaptive RAG 路由选择 分类器判断 灵活路由

本章小结

主题 要点
Self-RAG 4种反思令牌控制检索-生成全流程
检索决策 不是所有查询都需要检索
忠实度验证 生成后检查是否有文档支持
CRAG 检索后评估置信度,低则 Web 兜底
实战建议 先基线 RAG,识别瓶颈后再引入自适应

下一章:多模态 RAG