图像生成工程实战
High Contrast
Dark Mode
Light Mode
Sepia
Forest
1 min read292 words

图像生成工程实战

理解图像生成模型的原理是一回事,在生产环境稳定运行是另一回事。本章聚焦工程侧:如何封装 API、管理提示词、控制质量,以及构建批量生成 Pipeline。

图像生成技术栈

graph TB A[图像生成需求] --> B{规模?} B -->|小规模/原型| C[API 方案
DALL-E 3 / Stability AI] B -->|中大规模| D[自托管方案
Stable Diffusion] B -->|企业级| E[混合方案
API + 本地加速] C --> F[低成本快速上线] D --> G[高控制度
数据不出境] E --> H[平衡成本与灵活性] style C fill:#c8e6c9,stroke:#43a047,stroke-width:2px style D fill:#e3f2fd,stroke:#1565c0,stroke-width:2px style E fill:#fff9c4,stroke:#f9a825,stroke-width:2px

生产级图像生成 Pipeline

import base64
import hashlib
import json
import time
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Optional
class ImageModel(Enum):
DALLE3 = "dall-e-3"
STABLE_DIFFUSION_XL = "sdxl"
FLUX_PRO = "flux-pro"
MIDJOURNEY = "midjourney"
class ImageResolution(Enum):
SQUARE_1024 = "1024x1024"
LANDSCAPE_1792 = "1792x1024"
PORTRAIT_1024 = "1024x1792"
@dataclass
class ImageGenerationRequest:
"""图像生成请求"""
prompt: str
negative_prompt: str = ""
model: ImageModel = ImageModel.DALLE3
resolution: ImageResolution = ImageResolution.SQUARE_1024
n_images: int = 1
quality: str = "standard"    # "standard" / "hd"
style: str = "vivid"         # "vivid" / "natural"
seed: Optional[int] = None
@property
def cache_key(self) -> str:
"""基于参数生成确定性缓存键"""
content = f"{self.prompt}{self.negative_prompt}{self.model.value}{self.resolution.value}{self.quality}{self.seed}"
return hashlib.md5(content.encode()).hexdigest()[:16]
@dataclass
class ImageGenerationResult:
"""图像生成结果"""
request_id: str
prompt: str
model: ImageModel
image_urls: list[str]
revised_prompt: str          # DALL-E 3 会自动修改提示词
generation_time_ms: int
cost_usd: float
cached: bool = False
class ImageGenerationPipeline:
"""生产级图像生成 Pipeline"""
COST_PER_IMAGE = {             # 美元/图
ImageModel.DALLE3: {"standard": 0.040, "hd": 0.080},
ImageModel.STABLE_DIFFUSION_XL: {"standard": 0.005, "hd": 0.010},
ImageModel.FLUX_PRO: {"standard": 0.055, "hd": 0.055},
}
def __init__(self, cache_dir: Path | None = None):
self.cache_dir = cache_dir or Path("./image_cache")
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._cache: dict[str, ImageGenerationResult] = {}
self._total_cost = 0.0
self._request_count = 0
def _try_cache(self, request: ImageGenerationRequest) -> ImageGenerationResult | None:
"""检查本地缓存"""
cache_path = self.cache_dir / f"{request.cache_key}.json"
if cache_path.exists():
with open(cache_path, encoding="utf-8") as f:
data = json.load(f)
result = ImageGenerationResult(**data)
result.cached = True
return result
return None
def _save_cache(self, request: ImageGenerationRequest, result: ImageGenerationResult) -> None:
"""保存结果到本地缓存"""
cache_path = self.cache_dir / f"{request.cache_key}.json"
with open(cache_path, "w", encoding="utf-8") as f:
json.dump({
"request_id": result.request_id,
"prompt": result.prompt,
"model": result.model.value,
"image_urls": result.image_urls,
"revised_prompt": result.revised_prompt,
"generation_time_ms": result.generation_time_ms,
"cost_usd": result.cost_usd,
}, f, ensure_ascii=False)
def sanitize_prompt(self, prompt: str) -> str:
"""
提示词安全净化
- 过滤违规内容关键词
- 限制最大长度
- 标准化格式
"""
blocked_terms = [
"nude", "naked", "explicit", "violence", "gore", "underage"
]
prompt_lower = prompt.lower()
for term in blocked_terms:
if term in prompt_lower:
raise ValueError(f"提示词包含不允许的内容: '{term}'")
# DALL-E 3 最大 4000 字符
prompt = prompt[:4000].strip()
return prompt
def generate(self, request: ImageGenerationRequest) -> ImageGenerationResult:
"""执行图像生成(带缓存和错误处理)"""
# 尝试缓存
cached = self._try_cache(request)
if cached:
print(f"[Cache HIT] {request.cache_key}")
return cached
# 净化提示词
clean_prompt = self.sanitize_prompt(request.prompt)
start_ms = int(time.time() * 1000)
# 这里调用实际 API(此处为示意)
# import openai
# response = openai.images.generate(
#     model=request.model.value,
#     prompt=clean_prompt,
#     size=request.resolution.value,
#     quality=request.quality,
#     n=request.n_images,
# )
# 模拟返回
elapsed = int(time.time() * 1000) - start_ms
cost = self.COST_PER_IMAGE.get(request.model, {}).get(request.quality, 0.04)
result = ImageGenerationResult(
request_id=f"req_{request.cache_key}",
prompt=clean_prompt,
model=request.model,
image_urls=[f"https://cdn.example.com/images/{request.cache_key}_0.png"],
revised_prompt=clean_prompt,
generation_time_ms=elapsed,
cost_usd=cost * request.n_images,
)
self._total_cost += result.cost_usd
self._request_count += 1
self._save_cache(request, result)
return result
@property
def stats(self) -> dict:
return {
"total_requests": self._request_count,
"total_cost_usd": round(self._total_cost, 4),
"avg_cost_usd": round(self._total_cost / max(self._request_count, 1), 4),
}
# 使用示例
pipeline = ImageGenerationPipeline()
req = ImageGenerationRequest(
prompt="A minimalist product photo of a wireless headphone on white background, professional studio lighting, 8K",
model=ImageModel.DALLE3,
resolution=ImageResolution.SQUARE_1024,
quality="hd",
)
result = pipeline.generate(req)
print(f"生成完成: {result.image_urls[0]}")
print(f"耗时: {result.generation_time_ms}ms, 费用: ${result.cost_usd:.3f}")
print(f"统计: {pipeline.stats}")

提示词工程对照表

目的 推荐关键词 示例
产品摄影 studio lighting, white background, 8K, professional 白底产品图
艺术插画 digital art, concept art, detailed, trending on ArtStation 游戏风格插画
写实照片 photorealistic, DSLR, natural lighting, RAW photo 真实感人物
UI 设计 flat design, minimal, clean, UI mockup, Figma style 应用截图
负面提示词 blurry, deformed, extra limbs, watermark, text 通用负面提示

本章小结

下一章:目标检测与图像分类