1 min read107 words

文件与 IO

文件读写是编程基本功——JSON、CSV、YAML、pathlib 一网打尽。

文件操作全景

graph LR IO[文件与 IO] --> TEXT[文本文件] IO --> STRUCT[结构化数据] IO --> PATH[路径操作] TEXT --> READ[读取] TEXT --> WRITE[写入] STRUCT --> JSON_F[JSON] STRUCT --> CSV_F[CSV] STRUCT --> YAML_F[YAML] PATH --> PATHLIB[pathlib] PATH --> GLOB[文件搜索] style IO fill:#e3f2fd,stroke:#1565c0,stroke-width:2px style STRUCT fill:#c8e6c9,stroke:#388e3c,stroke-width:2px

文本文件读写

"""
文本文件基础操作
"""
from pathlib import Path
# === 写入文件 ===
# 方法 1：with 语句（推荐）
with open("demo.txt", "w", encoding="utf-8") as f:
f.write("第一行\n")
f.write("第二行\n")
f.writelines(["第三行\n", "第四行\n"])
# === 读取文件 ===
# 读取全部
with open("demo.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)
# 按行读取
with open("demo.txt", "r", encoding="utf-8") as f:
for line in f:  # 逐行迭代（内存友好）
print(line.strip())
# 读取所有行为列表
with open("demo.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
print(f"共 {len(lines)} 行")
# === 追加写入 ===
with open("demo.txt", "a", encoding="utf-8") as f:
f.write("追加内容\n")
# === 使用 pathlib（更现代） ===
path = Path("demo.txt")
path.write_text("Hello, pathlib!", encoding="utf-8")
text = path.read_text(encoding="utf-8")
print(text)
# 清理
path.unlink(missing_ok=True)

JSON 处理

"""
JSON 读写
"""
import json
# === 基本操作 ===
data = {
"name": "张三",
"age": 25,
"skills": ["Python", "SQL", "Git"],
"address": {"city": "北京", "district": "海淀"},
}
# 序列化
json_str = json.dumps(data, ensure_ascii=False, indent=2)
print(json_str)
# 反序列化
parsed = json.loads(json_str)
print(parsed["name"])  # 张三
# === 文件读写 ===
# 写入
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 读取
with open("data.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
# === JSONL (每行一个 JSON) ===
records = [
{"id": 1, "text": "你好"},
{"id": 2, "text": "世界"},
{"id": 3, "text": "Python"},
]
# 写入 JSONL
with open("data.jsonl", "w", encoding="utf-8") as f:
for record in records:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
# 读取 JSONL
with open("data.jsonl", "r", encoding="utf-8") as f:
loaded_records = [json.loads(line) for line in f if line.strip()]
print(f"读取 {len(loaded_records)} 条记录")
# 清理
Path("data.json").unlink(missing_ok=True)
Path("data.jsonl").unlink(missing_ok=True)

CSV 处理

"""
CSV 读写
"""
import csv
# === 写入 CSV ===
headers = ["姓名", "年龄", "城市"]
rows = [
["张三", 25, "北京"],
["李四", 30, "上海"],
["王五", 28, "广州"],
]
with open("people.csv", "w", encoding="utf-8-sig", newline="") as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(rows)
# === 字典方式 ===
dict_rows = [
{"姓名": "张三", "年龄": 25, "城市": "北京"},
{"姓名": "李四", "年龄": 30, "城市": "上海"},
]
with open("people_dict.csv", "w", encoding="utf-8-sig", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["姓名", "年龄", "城市"])
writer.writeheader()
writer.writerows(dict_rows)
# === 读取 CSV ===
with open("people.csv", "r", encoding="utf-8-sig") as f:
reader = csv.reader(f)
header = next(reader)  # 跳过表头
for row in reader:
print(f"{row[0]}, {row[1]}岁, {row[2]}")
# 字典方式读取
with open("people.csv", "r", encoding="utf-8-sig") as f:
reader = csv.DictReader(f)
for row in reader:
print(f"{row['姓名']}: {row['城市']}")
# 清理
Path("people.csv").unlink(missing_ok=True)
Path("people_dict.csv").unlink(missing_ok=True)

pathlib 路径操作

"""
pathlib：现代路径操作
"""
from pathlib import Path
# === 基本操作 ===
p = Path("src/models/user.py")
print(p.name)       # user.py
print(p.stem)       # user
print(p.suffix)     # .py
print(p.parent)     # src/models
print(p.parts)      # ('src', 'models', 'user.py')
# === 路径拼接 ===
base = Path("/home/user")
config = base / "config" / "settings.yml"
print(config)       # /home/user/config/settings.yml
# === 文件系统操作 ===
work_dir = Path("temp_demo")
work_dir.mkdir(exist_ok=True)              # 创建目录
(work_dir / "sub").mkdir(exist_ok=True)    # 创建子目录
# 创建文件
(work_dir / "test.txt").write_text("hello", encoding="utf-8")
(work_dir / "data.json").write_text("{}", encoding="utf-8")
(work_dir / "sub" / "nested.py").write_text("# code", encoding="utf-8")
# === 文件搜索 ===
# 当前目录所有 .py 文件
for py_file in work_dir.rglob("*.py"):
print(f"找到: {py_file}")
# glob vs rglob
print(list(work_dir.glob("*.txt")))    # 当前目录
print(list(work_dir.rglob("*.*")))     # 递归搜索
# === 文件信息 ===
test_file = work_dir / "test.txt"
print(test_file.exists())        # True
print(test_file.is_file())       # True
print(work_dir.is_dir())         # True
print(test_file.stat().st_size)  # 文件大小(字节)
# === 常用模式 ===
# 确保父目录存在
output = Path("outputs/results/data.csv")
output.parent.mkdir(parents=True, exist_ok=True)
# 修改扩展名
source = Path("report.md")
html_path = source.with_suffix(".html")
print(html_path)  # report.html
# 清理
import shutil
shutil.rmtree(work_dir, ignore_errors=True)
Path("outputs").exists() and shutil.rmtree("outputs")

本章小结

格式	读取	写入	适用
文本	`open()` / `Path.read_text()`	`open("w")` / `Path.write_text()`	日志、配置
JSON	`json.load()`	`json.dump()`	API 数据、配置
JSONL	逐行 `json.loads()`	逐行 `json.dumps()`	大数据集
CSV	`csv.reader()` / `DictReader`	`csv.writer()`	表格数据
pathlib	`Path.read_text()`	`Path.write_text()`	现代文件操作

下一章：数据分析——NumPy 和 Pandas 实战。