医疗影像与文档AI
High Contrast
Dark Mode
Light Mode
Sepia
Forest
2 min read446 words

医疗影像与文档AI

医疗行业是多模态 AI 最具潜力也最需要谨慎的应用领域——X 光影像识别、病历 OCR、临床文档结构化,每一步都涉及患者安全和监管合规。

医疗 AI 数据流与合规门控

graph LR A[原始医疗数据
影像/报告/病历] --> B{数据脱敏网关
PHI 检测} B -->|通过| C[合规数据湖
HIPAA/PDPA 存储] B -->|失败| LOG[合规审计日志
告警通知] C --> D[影像处理
X-Ray / CT / MRI] C --> E[文档处理
OCR + NER] D --> F[AI 辅助诊断
辅助+不代替医生] E --> G[结构化病历
FHIR 格式输出] F --> H{人工审核关
执照医生确认} G --> H H --> I[HIS / EMR 系统] style B fill:#ffcdd2,stroke:#c62828,stroke-width:2px style H fill:#fff9c4,stroke:#f9a825,stroke-width:2px style C fill:#e8f5e9,stroke:#388e3c,stroke-width:2px

医疗影像分析工作流

from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import hashlib
import datetime
class ImagingModality(Enum):
XRAY        = "X-Ray"
CT          = "CT"
MRI         = "MRI"
ULTRASOUND  = "Ultrasound"
PATHOLOGY   = "Pathology-Slide"
class FindingSeverity(Enum):
NORMAL       = "normal"
MILD         = "mild"
MODERATE     = "moderate"
SEVERE       = "severe"
CRITICAL     = "critical"
@dataclass
class MedicalImage:
"""医疗影像对象(已脱敏)"""
anonymized_id: str         # 脱敏后 ID(NON-PHI)
modality: ImagingModality
body_part: str
acquisition_date: str      # yyyy-mm-dd(无具体时间,降低再识别风险)
image_path: str
pixel_spacing_mm: Optional[float] = None
study_uid: str = ""        # 脱敏后影像 UID
@classmethod
def from_dicom(cls, dicom_path: str, anonymizer=None) -> "MedicalImage":
"""从 DICOM 文件加载并自动脱敏"""
# 实际实现使用 pydicom + anonymizer 库
return cls(
anonymized_id=hashlib.sha256(dicom_path.encode()).hexdigest()[:16],
modality=ImagingModality.XRAY,
body_part="chest",
acquisition_date=str(datetime.date.today()),
image_path=dicom_path,
)
@dataclass
class DiagnosticFinding:
"""AI 诊断发现"""
finding_type: str               # 发现类型,如 "肺结节"
severity: FindingSeverity
confidence: float               # 0–1 置信度
location: str                   # 位置描述
bounding_region: Optional[dict] = None  # {"x": 120, "y": 340, "w": 60, "h": 80}
recommendation: str = ""        # 建议动作
requires_human_review: bool = True  # 默认要求人工复核
class MedicalImageAnalyzer:
"""
医疗影像 AI 分析器(辅助诊断,非独立决策)
合规原则:
1. 所有影像必须先经过 PHI 脱敏
2. 置信度低于阈值的结果必须人工复核
3. 输出不包含确定性诊断语言("疑似" 而非 "确诊")
4. 完整的不可变审计日志
"""
CONFIDENCE_REVIEW_THRESHOLD = 0.80
MODEL_VERSION = "medvision-v2.1"
def __init__(self, model_client=None, audit_logger=None):
self.model = model_client
self.audit = audit_logger
self.analysis_count = 0
def analyze(self, image: MedicalImage) -> list[DiagnosticFinding]:
"""执行影像分析"""
self.analysis_count += 1
# 审计启动
audit_entry = {
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
"anonymized_id": image.anonymized_id,
"modality": image.modality.value,
"model": self.MODEL_VERSION,
}
# 实际调用视觉模型(示例)
raw_findings = self._call_vision_model(image)
# 合规后处理
processed = []
for f in raw_findings:
# 低置信度 → 强制人工复核
if f.confidence < self.CONFIDENCE_REVIEW_THRESHOLD:
f.requires_human_review = True
f.recommendation = "置信度不足,请放射科医生确认"
# 严重发现 → 强制人工复核
if f.severity in (FindingSeverity.SEVERE, FindingSeverity.CRITICAL):
f.requires_human_review = True
f.recommendation = f"[紧急] {f.recommendation}"
processed.append(f)
audit_entry["findings_count"] = len(processed)
audit_entry["human_review_required"] = any(f.requires_human_review for f in processed)
if self.audit:
self.audit.log(audit_entry)
return processed
def _call_vision_model(self, image: MedicalImage) -> list[DiagnosticFinding]:
"""调用实际 AI 模型(此处为演示存根)"""
# 真实场景:调用医疗 AI 平台 API(如 Google Med-PaLM 2、推想、汇医慧影等)
return [
DiagnosticFinding(
finding_type="疑似肺结节",
severity=FindingSeverity.MILD,
confidence=0.73,
location="右肺上叶",
recommendation="建议 6 个月后复查 CT",
)
]
class ClinicalDocumentProcessor:
"""临床文档结构化处理器"""
FHIR_MAPPING = {
"主诉": "chief_complaint",
"现病史": "history_present_illness",
"体格检查": "physical_exam",
"诊断": "diagnosis",
"用药信息": "medications",
}
def __init__(self, ocr_engine=None, ner_model=None):
self.ocr = ocr_engine
self.ner = ner_model
def process(self, document_image_path: str) -> dict:
"""
完整文档处理流程:
OCR 识别 → NER 实体提取 → FHIR 结构化输出
"""
# Step 1: OCR
raw_text = self._run_ocr(document_image_path)
# Step 2: NER(实体识别)
entities = self._extract_entities(raw_text)
# Step 3: FHIR 映射
fhir_doc = {
"resourceType": "ClinicalDocument",
"sections": {},
"medications": entities.get("medications", []),
"diagnoses": entities.get("diagnoses", []),
}
for cn_section, fhir_key in self.FHIR_MAPPING.items():
fhir_doc["sections"][fhir_key] = entities.get(cn_section, "")
return fhir_doc
def _run_ocr(self, path: str) -> str:
"""调用 OCR 引擎(百度 / Azure / Google)"""
return "(OCR 演示文本 — 实际接入专业医疗 OCR)"
def _extract_entities(self, text: str) -> dict:
"""医疗 NER(命名实体识别)"""
return {
"主诉": "咳嗽伴胸痛 3 天",
"medications": ["阿莫西林胶囊 500mg tid × 5d"],
"diagnoses": ["急性支气管炎(ICD-10: J20.9)"],
}

医疗 AI 合规对照表

地区 主要合规框架 核心要求 AI 医疗器械资质
中国大陆 NMPA《AI 医疗器械注册技术审查指导原则》 临床试验数据、可解释性报告 三类医疗器械注册
美国 FDA 510(k) / De Novo 性能测试、偏差报告 SaMD 分类
欧盟 MDR + AI Act CE 认证、风险分级 Class IIa/IIb
马来西亚 MDA《医疗器械法令》 本地临床验证数据 Class C/D 注册
新加坡 HSA《SaMD 指南》 真实世界证据(RWE) 需预市场批准

关键原则:医疗 AI 的输出必须经过有执照医师的人工审核,系统应明确标注其为"辅助诊断工具"而非"独立诊断系统"。

本章小结

下一章:多模态AI产品设计