电商多模态应用
电商是多模态 AI 落地最密集的行业——商品识别、以图搜货、智能客服、虚拟试穿,每一个功能都直接转化为 GMV 增长和客户体验提升。
电商多模态应用地图
graph TB
A[电商多模态应用] --> B[商品侧
Product] A --> C[搜索侧
Discovery] A --> D[客服侧
Support] A --> E[营销侧
Marketing] B --> B1[商品图自动标注
质检识别
SKU 匹配] C --> C1[以图搜货
多模态推荐
视觉相似商品] D --> D1[图片识别客服
视频拆包检测
退货损坏判定] E --> E1[AI 商品图生成
模特试穿合成
直播内容审核] style A fill:#ede7f6,stroke:#5e35b1,stroke-width:2px style C fill:#e3f2fd,stroke:#1565c0,stroke-width:2px style E fill:#c8e6c9,stroke:#43a047,stroke-width:2px
Product] A --> C[搜索侧
Discovery] A --> D[客服侧
Support] A --> E[营销侧
Marketing] B --> B1[商品图自动标注
质检识别
SKU 匹配] C --> C1[以图搜货
多模态推荐
视觉相似商品] D --> D1[图片识别客服
视频拆包检测
退货损坏判定] E --> E1[AI 商品图生成
模特试穿合成
直播内容审核] style A fill:#ede7f6,stroke:#5e35b1,stroke-width:2px style C fill:#e3f2fd,stroke:#1565c0,stroke-width:2px style E fill:#c8e6c9,stroke:#43a047,stroke-width:2px
以图搜货 Pipeline
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
import json
@dataclass
class ProductListing:
"""商品 Listing"""
sku_id: str
title: str
category: str
price: float
image_url: str
embedding: list[float] = field(default_factory=list)
attributes: dict = field(default_factory=dict)
def to_search_result(self, similarity: float) -> dict:
return {
"sku_id": self.sku_id,
"title": self.title,
"price": self.price,
"image_url": self.image_url,
"similarity_score": round(similarity, 4),
"category": self.category,
}
class ImageSearchEngine:
"""
以图搜货引擎
核心流程:
1. 商品图批量 Embedding 并入库
2. 用户上传图片 → 生成 Embedding → 向量检索
3. 返回视觉相似商品列表
"""
def __init__(self, embedder=None, vector_store=None):
self.embedder = embedder
self.vector_store = vector_store
self.index: list[ProductListing] = []
def index_product(self, product: ProductListing) -> None:
"""将商品图索引到向量库"""
if self.embedder and not product.embedding:
record = self.embedder.embed_image(product.image_url)
product.embedding = record.embedding
self.index.append(product)
if self.vector_store:
from multimodal_ai_guide.embedding import EmbeddingRecord
import numpy as np
self.vector_store.add(EmbeddingRecord(
id=product.sku_id,
modality="image",
source=product.image_url,
embedding=product.embedding,
metadata={"sku_id": product.sku_id, "title": product.title},
))
def search_by_image(
self,
query_image_url: str,
top_k: int = 10,
category_filter: Optional[str] = None,
) -> list[dict]:
"""
以图搜货核心方法
Args:
query_image_url: 用户上传的查询图片 URL
top_k: 返回结果数量
category_filter: 可选类目过滤
"""
if not self.embedder:
# 演示模式:返回模拟数据
return [p.to_search_result(0.95 - i * 0.03) for i, p in enumerate(self.index[:top_k])]
# 生成查询向量
query_record = self.embedder.embed_image(query_image_url, "query")
# 向量检索
raw_results = self.vector_store.search(query_record, top_k=top_k * 2)
# 类目过滤
products = {p.sku_id: p for p in self.index}
filtered = []
for record, score in raw_results:
product = products.get(record.id)
if product:
if category_filter and product.category != category_filter:
continue
filtered.append(product.to_search_result(score))
return filtered[:top_k]
class ProductImageTagger:
"""商品图自动标注系统"""
STANDARD_ATTRIBUTES = {
"fashion": ["颜色", "材质", "版型", "领型", "袖长", "适合场合"],
"electronics": ["品牌", "型号", "存储容量", "颜色", "接口类型"],
"furniture": ["材质", "颜色", "风格", "尺寸范围", "适合空间"],
}
def __init__(self, vision_llm=None):
self.llm = vision_llm
def extract_attributes(
self,
image_url: str,
category: str,
existing_title: str = "",
) -> dict[str, str]:
"""
从商品图自动提取结构化属性
真实实现中调用 GPT-4o / Claude 的视觉能力
"""
attributes_to_extract = self.STANDARD_ATTRIBUTES.get(category, [])
prompt = f"""分析这张商品图片,提取以下属性:{', '.join(attributes_to_extract)}
已知商品标题:{existing_title}
请以 JSON 格式返回,例如:{{"颜色": "米白色", "材质": "纯棉", ...}}
如果无法确定某属性,值设为 "未知"。"""
if self.llm:
# 实际 LLM 调用
pass
# 演示返回
return {attr: "自动识别中" for attr in attributes_to_extract}
def batch_tag(
self,
products: list[ProductListing],
category: str,
) -> list[ProductListing]:
"""批量自动标注"""
for i, product in enumerate(products):
attrs = self.extract_attributes(
product.image_url, category, product.title
)
product.attributes.update(attrs)
if (i + 1) % 10 == 0:
print(f"[标注] 已处理 {i+1}/{len(products)}")
return products
# 电商场景使用示例
search_engine = ImageSearchEngine()
# 模拟商品索引
demo_products = [
ProductListing("SKU001", "复古花纹棉麻连衣裙", "fashion", 299.0, "https://cdn.example.com/p001.jpg"),
ProductListing("SKU002", "休闲宽松亚麻长裙", "fashion", 259.0, "https://cdn.example.com/p002.jpg"),
ProductListing("SKU003", "碎花纯棉短袖衫", "fashion", 189.0, "https://cdn.example.com/p003.jpg"),
]
for p in demo_products:
p.embedding = [0.1] * 512 # 模拟
search_engine.index_product(p)
results = search_engine.search_by_image("https://cdn.example.com/user_upload.jpg", top_k=5)
print(f"以图搜货结果: {len(results)} 件相似商品")
for r in results:
print(f" [{r['sku_id']}] {r['title']} — ¥{r['price']} (相似度: {r['similarity_score']})")
电商多模态应用 ROI 评估
| 应用场景 | 实施成本 | 预期收益 | 建设周期 |
|---|---|---|---|
| 以图搜货 | 中 | GMV +2–5% | 2–4周 |
| 商品自动标注 | 低 | 人工成本 -70% | 1–2周 |
| 智能客服识图 | 中 | 客服效率 +40% | 3–6周 |
| AI 商品图生成 | 高 | 拍摄成本 -50% | 6–12周 |
| 虚拟试穿 | 很高 | 退货率 -15% | 3–6月 |
本章小结
- 以图搜货是电商 ROI 最高的多模态应用——技术成熟,快速变现
- 商品自动标注先从高频类目开始——时装类属性最多,收益最大
- 退货识别需要法律层面的合规处理——AI 决策不能完全代替人工
- 虚拟试穿需要高质量人体三维数据——不要低估工程复杂度
- 先做 MVP,再迭代——以图搜货上线 1–2 周就能验证价值
下一章:医疗影像与文档AI