Python SEO 自动化脚本
手动做 SEO 审计一个网站要几天——用 Python 自动化,几分钟出报告。
SEO 自动化体系
graph TD
DATA[数据获取] --> ANALYZE[分析处理]
ANALYZE --> REPORT[报告生成]
REPORT --> ACTION[行动清单]
DATA --> GSC[GSC API]
DATA --> CRAWL[爬虫抓取]
DATA --> SITEMAP[Sitemap解析]
ANALYZE --> TECH[技术问题]
ANALYZE --> CONTENT[内容质量]
ANALYZE --> RANKING[排名变化]
REPORT --> CSV[CSV导出]
REPORT --> ALERT[告警通知]
style DATA fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
style ANALYZE fill:#c8e6c9,stroke:#388e3c,stroke-width:2px
SEO 自动化脚本套件
"""
SEO 自动化脚本套件(演示版,使用模拟数据)
"""
from dataclasses import dataclass, field
from urllib.parse import urlparse
import re
# ──────────────────────────────────────────
# 1. Sitemap 分析器
# ──────────────────────────────────────────
@dataclass
class SitemapStats:
total_urls: int
url_types: dict[str, int]
avg_priority: float
changefreq_dist: dict[str, int]
issues: list[str] = field(default_factory=list)
class SitemapAnalyzer:
"""分析 XML Sitemap 结构"""
@staticmethod
def analyze(urls: list[dict]) -> SitemapStats:
"""
urls: [{"url": "...", "priority": 0.8, "changefreq": "weekly", "lastmod": "..."}]
"""
type_map: dict[str, int] = {}
freq_map: dict[str, int] = {}
priorities = []
issues = []
for item in urls:
# URL 类型分类
path = urlparse(item["url"]).path
parts = path.strip("/").split("/")
url_type = parts[0] if parts[0] else "homepage"
type_map[url_type] = type_map.get(url_type, 0) + 1
# 频率统计
freq = item.get("changefreq", "unknown")
freq_map[freq] = freq_map.get(freq, 0) + 1
# 优先级
p = item.get("priority", 0.5)
priorities.append(float(p))
# 问题检测
if not item.get("lastmod"):
issues.append(f"缺少 lastmod: {item['url'][:50]}")
return SitemapStats(
total_urls=len(urls),
url_types=type_map,
avg_priority=sum(priorities) / len(priorities) if priorities else 0,
changefreq_dist=freq_map,
issues=issues[:5], # 最多显示5个
)
# ──────────────────────────────────────────
# 2. 标题/Meta 批量审计器
# ──────────────────────────────────────────
@dataclass
class PageSEOIssues:
url: str
title: str
meta_desc: str
h1_count: int
issues: list[str] = field(default_factory=list)
class MetaAuditor:
"""批量审计标题和 Meta 描述"""
@staticmethod
def audit(pages: list[dict]) -> list[PageSEOIssues]:
results = []
seen_titles: dict[str, str] = {}
seen_metas: dict[str, str] = {}
for page in pages:
issues = []
url = page["url"]
title = page.get("title", "")
meta = page.get("meta_description", "")
h1_count = page.get("h1_count", 0)
# 标题检查
if not title:
issues.append("❌ 缺少 Title 标签")
elif len(title) < 30:
issues.append(f"⚠️ 标题过短 ({len(title)} 字符,建议 50–60)")
elif len(title) > 65:
issues.append(f"⚠️ 标题过长 ({len(title)} 字符,可能被截断)")
if title in seen_titles:
issues.append(f"❌ 重复标题 (与 {seen_titles[title]})")
elif title:
seen_titles[title] = url
# Meta 描述检查
if not meta:
issues.append("⚠️ 缺少 Meta Description")
elif len(meta) < 70:
issues.append(f"⚠️ Meta 过短 ({len(meta)} 字符,建议 120–155)")
elif len(meta) > 160:
issues.append(f"⚠️ Meta 过长 ({len(meta)} 字符,可能被截断)")
if meta in seen_metas and meta:
issues.append(f"❌ 重复 Meta (与 {seen_metas[meta]})")
elif meta:
seen_metas[meta] = url
# H1 检查
if h1_count == 0:
issues.append("❌ 缺少 H1 标签")
elif h1_count > 1:
issues.append(f"⚠️ 多个 H1 ({h1_count} 个,建议仅 1 个)")
results.append(PageSEOIssues(url, title, meta, h1_count, issues))
return sorted(results, key=lambda x: len(x.issues), reverse=True)
# ──────────────────────────────────────────
# 3. 排名变化追踪器
# ──────────────────────────────────────────
@dataclass
class RankingChange:
keyword: str
current_pos: float
prev_pos: float
url: str
@property
def delta(self) -> float:
return self.prev_pos - self.current_pos # 正数 = 上升
@property
def status(self) -> str:
if self.delta >= 5:
return "🚀 大幅上升"
elif self.delta >= 1:
return "📈 上升"
elif self.delta <= -5:
return "🔴 大幅下降"
elif self.delta <= -1:
return "📉 下降"
return "➡️ 稳定"
class RankingTracker:
"""排名变化分析"""
@staticmethod
def analyze(changes: list[RankingChange]) -> dict:
rising = [c for c in changes if c.delta >= 3]
falling = [c for c in changes if c.delta <= -3]
stable = [c for c in changes if abs(c.delta) < 3]
return {
"总关键词数": len(changes),
"上升 (≥3位)": len(rising),
"下降 (≥3位)": len(falling),
"稳定": len(stable),
"需关注": sorted(falling, key=lambda x: x.delta)[:5],
"值得庆祝": sorted(rising, key=lambda x: x.delta, reverse=True)[:3],
}
# ──────────────────────────────────────────
# 演示
# ──────────────────────────────────────────
# Sitemap 分析
sitemap_urls = [
{"url": "https://example.com/", "priority": "1.0", "changefreq": "daily", "lastmod": "2026-03-01"},
{"url": "https://example.com/blog/seo-guide", "priority": "0.8", "changefreq": "weekly", "lastmod": ""},
{"url": "https://example.com/blog/keyword-research", "priority": "0.8", "changefreq": "weekly", "lastmod": "2026-02-15"},
{"url": "https://example.com/products/widget", "priority": "0.6", "changefreq": "monthly", "lastmod": "2026-01-10"},
]
sa = SitemapAnalyzer()
stats = sa.analyze(sitemap_urls)
print("=== Sitemap 分析 ===")
print(f" 总 URL: {stats.total_urls} 平均优先级: {stats.avg_priority:.2f}")
print(f" URL 类型分布: {stats.url_types}")
print(f" 问题: {stats.issues}")
# Meta 批量审计
pages = [
{"url": "/", "title": "首页 — MyBrand", "meta_description": "我们提供最好的SEO服务,联系我们了解详情。", "h1_count": 1},
{"url": "/blog/seo", "title": "SEO", "meta_description": "", "h1_count": 2},
{"url": "/blog/content", "title": "内容营销完整指南:从策略到执行(2026版)提升自然流量的核心方法全解析", "meta_description": "内容营销是提升自然流量的关键策略,本文详细介绍从关键词研究到内容分发的完整流程。", "h1_count": 1},
{"url": "/blog/links", "title": "SEO", "meta_description": "我们提供最好的SEO服务,联系我们了解详情。", "h1_count": 0},
]
auditor = MetaAuditor()
audit_results = auditor.audit(pages)
print("\n=== Meta 批量审计 ===")
for r in audit_results:
if r.issues:
print(f"\n {r.url}:")
for issue in r.issues:
print(f" {issue}")
# 排名追踪
changes = [
RankingChange("SEO工具推荐", 4.2, 8.5, "/blog/seo-tools"),
RankingChange("关键词研究方法", 2.1, 1.8, "/blog/keyword"),
RankingChange("外链建设技巧", 15.3, 9.8, "/blog/backlinks"),
RankingChange("技术SEO审计", 6.7, 3.2, "/blog/tech-seo"),
]
tracker = RankingTracker()
report = tracker.analyze(changes)
print(f"\n=== 排名变化报告 ===")
print(f" 上升: {report['上升 (≥3位)']} 下降: {report['下降 (≥3位)']} 稳定: {report['稳定']}")
for c in report["需关注"]:
print(f" {c.status} {c.keyword}: P{c.prev_pos:.0f} → P{c.current_pos:.0f}")
SEO 自动化工具栈
| 工具 | 用途 | Python 库 |
|---|---|---|
| Screaming Frog API | 全站爬取 | requests |
| Google Search Console API | 排名/点击数据 | google-api-python-client |
| Ahrefs API | 外链/关键词 | requests |
| BeautifulSoup | 页面内容解析 | beautifulsoup4 |
| pandas | 数据分析/报告 | pandas |
| schedule | 定时执行脚本 | schedule |
行动清单
- [ ] 用 Python + GSC API 搭建周报自动生成器:输出本周排名变化 Top10 上升/下降
- [ ] 编写 Meta 标题批量审计脚本,连接 Screaming Frog 导出 CSV 自动识别重复/过长标题
- [ ] 配置 schedule 每周日自动运行 Sitemap 新增 URL 统计,识别最近7天新发布内容
- [ ] 对排名 4–10 且曝光量 > 1000 的关键词,用脚本自动标记为"摘要优化候选"
- [ ] 搭建 Slack/邮件告警:当品牌词排名超出 Top3 或出现新的负面 SERP 内容时推送
- [ ] 每月导出 GSC 数据与历史对比,用 pandas 生成折线图汇报给管理层
下一节:03-SEO工作流与团队协作 — 规模化 SEO 需要流程和协作,而不仅仅是技术。