推导式与生成器
用一行代码替代五行——推导式是 Python 最 Pythonic 的特性之一。
推导式全景
graph LR
COMP[推导式] --> LIST_C[列表推导式]
COMP --> DICT_C[字典推导式]
COMP --> SET_C[集合推导式]
COMP --> GEN[生成器表达式]
LIST_C --> |结果是 list| L1["[x for x in ...]"]
DICT_C --> |结果是 dict| D1["{k:v for ...}"]
SET_C --> |结果是 set| S1["{x for x in ...}"]
GEN --> |结果是迭代器| G1["(x for x in ...)"]
style COMP fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
style GEN fill:#c8e6c9,stroke:#388e3c,stroke-width:2px
列表推导式
"""
列表推导式 (List Comprehension)
"""
# === 基本语法 ===
# [表达式 for 变量 in 可迭代对象 if 条件]
# 传统写法
squares = []
for x in range(10):
squares.append(x ** 2)
# 推导式写法
squares = [x ** 2 for x in range(10)]
print(squares) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
# === 带条件过滤 ===
evens = [x for x in range(20) if x % 2 == 0]
print(evens) # [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
# === 条件表达式 ===
labels = ["偶数" if x % 2 == 0 else "奇数" for x in range(5)]
print(labels) # ['偶数', '奇数', '偶数', '奇数', '偶数']
# === 嵌套循环 ===
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
print(flat) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# === 实用场景 ===
# 文件过滤
from pathlib import Path
# py_files = [f for f in Path(".").glob("**/*") if f.suffix == ".py"]
# 字符串处理
words = [" Hello ", " World ", " Python "]
cleaned = [w.strip().lower() for w in words]
print(cleaned) # ['hello', 'world', 'python']
# 数据转换
raw_data = ["1", "2", "abc", "3", "4.5"]
numbers = [float(x) for x in raw_data if x.replace(".", "").isdigit()]
print(numbers) # [1.0, 2.0, 3.0, 4.5]
字典推导式和集合推导式
"""
字典推导式与集合推导式
"""
# === 字典推导式 ===
# {键表达式: 值表达式 for 变量 in 可迭代对象}
# 创建映射
names = ["alice", "bob", "charlie"]
name_lengths = {name: len(name) for name in names}
print(name_lengths) # {'alice': 5, 'bob': 3, 'charlie': 7}
# 键值反转
original = {"a": 1, "b": 2, "c": 3}
inverted = {v: k for k, v in original.items()}
print(inverted) # {1: 'a', 2: 'b', 3: 'c'}
# 过滤字典
scores = {"张三": 85, "李四": 62, "王五": 91, "赵六": 45}
passed = {name: score for name, score in scores.items() if score >= 60}
print(passed) # {'张三': 85, '李四': 62, '王五': 91}
# 分组统计
words = "hello world hello python hello world"
word_count = {}
for word in words.split():
word_count[word] = word_count.get(word, 0) + 1
print(word_count)
# 用 Counter 更简洁
from collections import Counter
word_count = Counter(words.split())
print(word_count.most_common(2)) # [('hello', 3), ('world', 2)]
# === 集合推导式 ===
# {表达式 for 变量 in 可迭代对象}
sentence = "the quick brown fox jumps over the lazy dog"
vowels = {c for c in sentence if c in "aeiou"}
print(vowels) # {'a', 'e', 'i', 'o', 'u'}
# 去重后取首字母
names = ["Alice", "Bob", "Anna", "Charlie", "Amy"]
initials = {name[0] for name in names}
print(initials) # {'A', 'B', 'C'}
生成器
"""
生成器:惰性求值,节省内存
"""
# === 生成器表达式 vs 列表推导式 ===
# 列表推导式:一次性占用内存
list_comp = [x ** 2 for x in range(1000000)] # 占用大量内存
# 生成器表达式:按需计算
gen_expr = (x ** 2 for x in range(1000000)) # 几乎不占内存
print(next(gen_expr)) # 0
print(next(gen_expr)) # 1
# === 生成器函数 ===
def fibonacci(limit: int):
"""斐波那契数列生成器"""
a, b = 0, 1
while a < limit:
yield a
a, b = b, a + b
# 使用
for num in fibonacci(100):
print(num, end=" ")
# 0 1 1 2 3 5 8 13 21 34 55 89
print()
# === yield 的工作原理 ===
def countdown(n: int):
"""倒计时生成器"""
print(f"开始倒计时 {n}")
while n > 0:
yield n # 暂停,返回值
n -= 1 # 下次 next() 从这里继续
print("发射!")
cd = countdown(3)
print(next(cd)) # 开始倒计时 3 → 3
print(next(cd)) # 2
print(next(cd)) # 1
# next(cd) # 发射! → StopIteration
# === 实用生成器 ===
# 读取大文件
def read_large_file(filepath: str, chunk_size: int = 8192):
"""分块读取大文件"""
with open(filepath, "r", encoding="utf-8") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
# 无限序列
def infinite_counter(start: int = 0):
"""无限计数器"""
n = start
while True:
yield n
n += 1
# 管道模式
def pipeline_example():
"""生成器管道"""
# 数据源
numbers = (x for x in range(100))
# 过滤偶数
evens = (x for x in numbers if x % 2 == 0)
# 平方
squared = (x ** 2 for x in evens)
# 取前 5 个
from itertools import islice
result = list(islice(squared, 5))
print(result) # [0, 4, 16, 36, 64]
pipeline_example()
itertools 常用工具
"""
itertools:迭代器工具箱
"""
from itertools import (
chain, combinations, permutations,
product, groupby, accumulate, islice,
)
# === chain: 连接多个迭代器 ===
a = [1, 2, 3]
b = [4, 5, 6]
print(list(chain(a, b))) # [1, 2, 3, 4, 5, 6]
# === combinations: 组合 ===
print(list(combinations("ABCD", 2)))
# [('A','B'), ('A','C'), ('A','D'), ('B','C'), ('B','D'), ('C','D')]
# === permutations: 排列 ===
print(list(permutations("ABC", 2)))
# [('A','B'), ('A','C'), ('B','A'), ('B','C'), ('C','A'), ('C','B')]
# === product: 笛卡尔积 ===
sizes = ["S", "M", "L"]
colors_list = ["红", "蓝"]
combos = list(product(sizes, colors_list))
print(combos) # [('S','红'), ('S','蓝'), ('M','红'), ...]
# === groupby: 分组 ===
data = [
("水果", "苹果"), ("水果", "香蕉"),
("蔬菜", "胡萝卜"), ("蔬菜", "西兰花"),
("水果", "橙子"),
]
data.sort(key=lambda x: x[0]) # groupby 需要先排序
for key, group in groupby(data, key=lambda x: x[0]):
items = [item[1] for item in group]
print(f"{key}: {items}")
# === accumulate: 累计 ===
nums = [1, 2, 3, 4, 5]
print(list(accumulate(nums))) # [1, 3, 6, 10, 15] 累加
print(list(accumulate(nums, max))) # [1, 2, 3, 4, 5] 累计最大值
本章小结
| 特性 | 语法 | 结果类型 | 内存 |
|---|---|---|---|
| 列表推导式 | [x for x in ...] | list | 全部加载 |
| 字典推导式 | {k:v for ...} | dict | 全部加载 |
| 集合推导式 | {x for x in ...} | set | 全部加载 |
| 生成器表达式 | (x for x in ...) | generator | 按需生成 |
| 生成器函数 | yield | generator | 按需生成 |
下一章:面向对象编程——类、继承、魔法方法。