文件与 IO
文件读写是编程基本功——JSON、CSV、YAML、pathlib 一网打尽。
文件操作全景
graph LR
IO[文件与 IO] --> TEXT[文本文件]
IO --> STRUCT[结构化数据]
IO --> PATH[路径操作]
TEXT --> READ[读取]
TEXT --> WRITE[写入]
STRUCT --> JSON_F[JSON]
STRUCT --> CSV_F[CSV]
STRUCT --> YAML_F[YAML]
PATH --> PATHLIB[pathlib]
PATH --> GLOB[文件搜索]
style IO fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
style STRUCT fill:#c8e6c9,stroke:#388e3c,stroke-width:2px
文本文件读写
"""
文本文件基础操作
"""
from pathlib import Path
# === 写入文件 ===
# 方法 1:with 语句(推荐)
with open("demo.txt", "w", encoding="utf-8") as f:
f.write("第一行\n")
f.write("第二行\n")
f.writelines(["第三行\n", "第四行\n"])
# === 读取文件 ===
# 读取全部
with open("demo.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)
# 按行读取
with open("demo.txt", "r", encoding="utf-8") as f:
for line in f: # 逐行迭代(内存友好)
print(line.strip())
# 读取所有行为列表
with open("demo.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
print(f"共 {len(lines)} 行")
# === 追加写入 ===
with open("demo.txt", "a", encoding="utf-8") as f:
f.write("追加内容\n")
# === 使用 pathlib(更现代) ===
path = Path("demo.txt")
path.write_text("Hello, pathlib!", encoding="utf-8")
text = path.read_text(encoding="utf-8")
print(text)
# 清理
path.unlink(missing_ok=True)
JSON 处理
"""
JSON 读写
"""
import json
# === 基本操作 ===
data = {
"name": "张三",
"age": 25,
"skills": ["Python", "SQL", "Git"],
"address": {"city": "北京", "district": "海淀"},
}
# 序列化
json_str = json.dumps(data, ensure_ascii=False, indent=2)
print(json_str)
# 反序列化
parsed = json.loads(json_str)
print(parsed["name"]) # 张三
# === 文件读写 ===
# 写入
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 读取
with open("data.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
# === JSONL (每行一个 JSON) ===
records = [
{"id": 1, "text": "你好"},
{"id": 2, "text": "世界"},
{"id": 3, "text": "Python"},
]
# 写入 JSONL
with open("data.jsonl", "w", encoding="utf-8") as f:
for record in records:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
# 读取 JSONL
with open("data.jsonl", "r", encoding="utf-8") as f:
loaded_records = [json.loads(line) for line in f if line.strip()]
print(f"读取 {len(loaded_records)} 条记录")
# 清理
Path("data.json").unlink(missing_ok=True)
Path("data.jsonl").unlink(missing_ok=True)
CSV 处理
"""
CSV 读写
"""
import csv
# === 写入 CSV ===
headers = ["姓名", "年龄", "城市"]
rows = [
["张三", 25, "北京"],
["李四", 30, "上海"],
["王五", 28, "广州"],
]
with open("people.csv", "w", encoding="utf-8-sig", newline="") as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(rows)
# === 字典方式 ===
dict_rows = [
{"姓名": "张三", "年龄": 25, "城市": "北京"},
{"姓名": "李四", "年龄": 30, "城市": "上海"},
]
with open("people_dict.csv", "w", encoding="utf-8-sig", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["姓名", "年龄", "城市"])
writer.writeheader()
writer.writerows(dict_rows)
# === 读取 CSV ===
with open("people.csv", "r", encoding="utf-8-sig") as f:
reader = csv.reader(f)
header = next(reader) # 跳过表头
for row in reader:
print(f"{row[0]}, {row[1]}岁, {row[2]}")
# 字典方式读取
with open("people.csv", "r", encoding="utf-8-sig") as f:
reader = csv.DictReader(f)
for row in reader:
print(f"{row['姓名']}: {row['城市']}")
# 清理
Path("people.csv").unlink(missing_ok=True)
Path("people_dict.csv").unlink(missing_ok=True)
pathlib 路径操作
"""
pathlib:现代路径操作
"""
from pathlib import Path
# === 基本操作 ===
p = Path("src/models/user.py")
print(p.name) # user.py
print(p.stem) # user
print(p.suffix) # .py
print(p.parent) # src/models
print(p.parts) # ('src', 'models', 'user.py')
# === 路径拼接 ===
base = Path("/home/user")
config = base / "config" / "settings.yml"
print(config) # /home/user/config/settings.yml
# === 文件系统操作 ===
work_dir = Path("temp_demo")
work_dir.mkdir(exist_ok=True) # 创建目录
(work_dir / "sub").mkdir(exist_ok=True) # 创建子目录
# 创建文件
(work_dir / "test.txt").write_text("hello", encoding="utf-8")
(work_dir / "data.json").write_text("{}", encoding="utf-8")
(work_dir / "sub" / "nested.py").write_text("# code", encoding="utf-8")
# === 文件搜索 ===
# 当前目录所有 .py 文件
for py_file in work_dir.rglob("*.py"):
print(f"找到: {py_file}")
# glob vs rglob
print(list(work_dir.glob("*.txt"))) # 当前目录
print(list(work_dir.rglob("*.*"))) # 递归搜索
# === 文件信息 ===
test_file = work_dir / "test.txt"
print(test_file.exists()) # True
print(test_file.is_file()) # True
print(work_dir.is_dir()) # True
print(test_file.stat().st_size) # 文件大小(字节)
# === 常用模式 ===
# 确保父目录存在
output = Path("outputs/results/data.csv")
output.parent.mkdir(parents=True, exist_ok=True)
# 修改扩展名
source = Path("report.md")
html_path = source.with_suffix(".html")
print(html_path) # report.html
# 清理
import shutil
shutil.rmtree(work_dir, ignore_errors=True)
Path("outputs").exists() and shutil.rmtree("outputs")
本章小结
| 格式 | 读取 | 写入 | 适用 |
|---|---|---|---|
| 文本 | open() / Path.read_text() | open("w") / Path.write_text() | 日志、配置 |
| JSON | json.load() | json.dump() | API 数据、配置 |
| JSONL | 逐行 json.loads() | 逐行 json.dumps() | 大数据集 |
| CSV | csv.reader() / DictReader | csv.writer() | 表格数据 |
| pathlib | Path.read_text() | Path.write_text() | 现代文件操作 |
下一章:数据分析——NumPy 和 Pandas 实战。