网络请求与 API 调用
Python 的 HTTP 客户端从同步到异步——requests 快速上手,httpx 异步高性能,构建健壮的 API 调用层。
HTTP 客户端生态
graph TD
HTTP[Python HTTP 客户端] --> SYNC[同步]
HTTP --> ASYNC[异步]
SYNC --> REQ[requests]
SYNC --> URLLIB[urllib3]
ASYNC --> HTTPX[httpx]
ASYNC --> AIOHTTP[aiohttp]
REQ --> S1[最流行]
HTTPX --> A1[同步+异步]
HTTPX --> A2[HTTP/2]
style HTTP fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
style HTTPX fill:#c8e6c9,stroke:#388e3c,stroke-width:2px
requests 基础
"""
requests:最流行的 HTTP 库
"""
import requests
from dataclasses import dataclass
# === GET 请求 ===
response = requests.get(
"https://api.github.com/users/python",
headers={"Accept": "application/json"},
timeout=10, # 必须设置超时
)
if response.ok:
data = response.json()
print(f"用户: {data['login']}, 仓库数: {data['public_repos']}")
# === POST 请求 ===
response = requests.post(
"https://httpbin.org/post",
json={"name": "Alice", "age": 30},
timeout=10,
)
print(response.status_code) # 200
# === Session 复用连接 ===
@dataclass
class APIClient:
"""API 客户端封装"""
base_url: str
token: str
def __post_init__(self):
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json",
})
def get(self, path: str, **kwargs) -> dict:
resp = self.session.get(
f"{self.base_url}{path}",
timeout=10,
**kwargs,
)
resp.raise_for_status()
return resp.json()
def post(self, path: str, data: dict, **kwargs) -> dict:
resp = self.session.post(
f"{self.base_url}{path}",
json=data,
timeout=10,
**kwargs,
)
resp.raise_for_status()
return resp.json()
def close(self):
self.session.close()
# 使用
# client = APIClient("https://api.example.com", "my-token")
# users = client.get("/users")
# client.close()
httpx 异步请求
"""
httpx:同步 + 异步 + HTTP/2
"""
import httpx
import asyncio
# === 同步使用(和 requests 几乎相同)===
with httpx.Client(timeout=10) as client:
resp = client.get("https://httpbin.org/get")
print(resp.json())
# === 异步使用 ===
async def fetch_multiple():
"""并发请求多个 API"""
async with httpx.AsyncClient(timeout=10) as client:
urls = [
"https://httpbin.org/get",
"https://httpbin.org/ip",
"https://httpbin.org/headers",
]
tasks = [client.get(url) for url in urls]
responses = await asyncio.gather(*tasks)
for resp in responses:
print(f"{resp.url} → {resp.status_code}")
asyncio.run(fetch_multiple())
# === 文件下载 ===
async def download_file(url: str, path: str):
"""流式下载大文件"""
async with httpx.AsyncClient() as client:
async with client.stream("GET", url) as resp:
with open(path, "wb") as f:
async for chunk in resp.aiter_bytes(chunk_size=8192):
f.write(chunk)
print(f"下载完成: {path}")
重试与错误处理
"""
健壮的 API 调用:重试 + 断路器
"""
import time
import httpx
from dataclasses import dataclass, field
@dataclass
class RetryConfig:
"""重试配置"""
max_retries: int = 3
backoff_factor: float = 1.0
retry_status_codes: tuple = (429, 500, 502, 503, 504)
@dataclass
class RobustClient:
"""带重试的 HTTP 客户端"""
base_url: str
retry: RetryConfig = field(default_factory=RetryConfig)
def request(self, method: str, path: str, **kwargs) -> httpx.Response:
"""带重试的请求"""
url = f"{self.base_url}{path}"
last_error = None
for attempt in range(self.retry.max_retries + 1):
try:
with httpx.Client(timeout=10) as client:
resp = client.request(method, url, **kwargs)
if resp.status_code not in self.retry.retry_status_codes:
return resp
last_error = f"HTTP {resp.status_code}"
except httpx.TransportError as e:
last_error = str(e)
# 退避等待
if attempt < self.retry.max_retries:
wait = self.retry.backoff_factor * (2 ** attempt)
print(f"重试 {attempt + 1}/{self.retry.max_retries},等待 {wait}s...")
time.sleep(wait)
raise httpx.HTTPError(f"请求失败,已重试 {self.retry.max_retries} 次: {last_error}")
def get(self, path: str, **kwargs) -> httpx.Response:
return self.request("GET", path, **kwargs)
def post(self, path: str, **kwargs) -> httpx.Response:
return self.request("POST", path, **kwargs)
# API 响应解析
def parse_api_response(resp: httpx.Response) -> dict:
"""统一的响应解析"""
resp.raise_for_status()
data = resp.json()
if isinstance(data, dict) and "error" in data:
raise ValueError(f"API 错误: {data['error']}")
return data
客户端选择指南
| 特性 | requests | httpx | aiohttp |
|---|---|---|---|
| 同步 | 是 | 是 | 否 |
| 异步 | 否 | 是 | 是 |
| HTTP/2 | 否 | 是 | 否 |
| 流式传输 | 是 | 是 | 是 |
| 连接池 | Session | Client | Session |
| 推荐场景 | 脚本/同步 | 全能首选 | 纯异步服务 |
本章小结
| 知识点 | 要点 |
|---|---|
| requests | Session 复用、timeout 必设 |
| httpx | 同步+异步+HTTP/2 |
| 重试 | 指数退避、限制次数 |
| 错误处理 | raise_for_status + 状态码判断 |
| 流式下载 | aiter_bytes 分块写入 |
下一章:数据分析——NumPy 与 Pandas 实战。