pathlib and File Workflows
What You'll Learn
Everything you need to work with files and directories in Python using pathlib — the modern replacement for os.path.
Why pathlib?
pathlib.Path objects are cleaner and more readable than os.path string manipulation:
# ❌ Old way with os.path
import os
path = os.path.join("/home", "alice", "data", "report.csv")
filename = os.path.basename(path)
directory = os.path.dirname(path)
exists = os.path.exists(path)
# ✅ Modern way with pathlib
from pathlib import Path
path = Path("/home/alice/data/report.csv")
filename = path.name # "report.csv"
directory = path.parent # Path("/home/alice/data")
exists = path.exists() # True or False
Creating Path Objects
from pathlib import Path
# Absolute path
p = Path("/home/alice/documents/report.csv")
# Relative path
p = Path("data/output/result.txt")
# Current directory
cwd = Path.cwd()
# Home directory
home = Path.home() # /home/alice
# Build paths with / operator
config = Path.home() / ".config" / "myapp" / "settings.json"
Inspecting Paths
from pathlib import Path
p = Path("/home/alice/data/report.csv")
print(p.name) # report.csv
print(p.stem) # report (filename without extension)
print(p.suffix) # .csv
print(p.suffixes) # ['.csv']
print(p.parent) # /home/alice/data
print(p.parts) # ('/', 'home', 'alice', 'data', 'report.csv')
# Check what exists
print(p.exists()) # True/False
print(p.is_file()) # True/False
print(p.is_dir()) # True/False
# File size
print(p.stat().st_size) # size in bytes
Reading Files
from pathlib import Path
path = Path("data/report.txt")
# Read entire file as string
content = path.read_text(encoding="utf-8")
# Read entire file as bytes
raw = path.read_bytes()
# Read line by line (memory efficient for large files)
with open(path, encoding="utf-8") as f:
for line in f:
print(line.strip())
# Read all lines into a list
with open(path, encoding="utf-8") as f:
lines = f.readlines()
# Shorter version
lines = path.read_text(encoding="utf-8").splitlines()
Writing Files
from pathlib import Path
path = Path("output/result.txt")
# Create parent directory if it doesn't exist
path.parent.mkdir(parents=True, exist_ok=True)
# Write string (overwrites if exists)
path.write_text("Hello, World!\n", encoding="utf-8")
# Write bytes
path.write_bytes(b"\x00\x01\x02")
# Append to a file
with open(path, "a", encoding="utf-8") as f:
f.write("New line\n")
# Write multiple lines
lines = ["Line 1", "Line 2", "Line 3"]
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
Safe Write Pattern (Atomic)
Write to a temp file first, then rename — prevents half-written files on crash:
from pathlib import Path
def safe_write(path: Path, content: str) -> None:
"""Write content atomically — no partial writes on failure."""
tmp = path.with_suffix(".tmp")
try:
tmp.write_text(content, encoding="utf-8")
tmp.rename(path) # atomic on most filesystems
except Exception:
tmp.unlink(missing_ok=True) # clean up temp file
raise
Working with Directories
from pathlib import Path
d = Path("data/output")
# Create directory (and parents)
d.mkdir(parents=True, exist_ok=True)
# List contents
for item in d.iterdir():
print(item)
# List only files
for f in d.iterdir():
if f.is_file():
print(f.name)
# Find files by pattern
csv_files = list(d.glob("*.csv")) # current directory only
all_csv = list(d.rglob("*.csv")) # recursive (all subdirs)
log_files = list(d.glob("**/*.log")) # also recursive
# Delete empty directory
d.rmdir()
# Delete directory and all contents (careful!)
import shutil
shutil.rmtree(d)
Moving, Copying, Deleting Files
from pathlib import Path
import shutil
src = Path("data/report.csv")
dst = Path("archive/report.csv")
# Ensure destination directory exists
dst.parent.mkdir(parents=True, exist_ok=True)
# Move (rename)
src.rename(dst) # same filesystem
shutil.move(src, dst) # works across filesystems
# Copy
shutil.copy2(src, dst) # copy with metadata
# Delete a file
src.unlink() # raises FileNotFoundError if missing
src.unlink(missing_ok=True) # no error if missing (Python 3.8+)
# Delete directory tree
shutil.rmtree("old_data/")
Practical Pattern: Process All Files in a Directory
from pathlib import Path
import json
def process_json_files(input_dir: Path, output_dir: Path) -> dict:
output_dir.mkdir(parents=True, exist_ok=True)
results = {"processed": 0, "errors": 0}
for json_file in sorted(input_dir.glob("*.json")):
try:
data = json.loads(json_file.read_text(encoding="utf-8"))
# ... transform data ...
out_file = output_dir / json_file.name
out_file.write_text(json.dumps(data, indent=2), encoding="utf-8")
results["processed"] += 1
except (json.JSONDecodeError, OSError) as e:
print(f"Error: {json_file.name}: {e}")
results["errors"] += 1
return results
summary = process_json_files(Path("raw/"), Path("processed/"))
print(f"Done: {summary}")
Common Mistakes
| Mistake | Fix |
|---|---|
Forgetting encoding="utf-8" | Always specify encoding in open() |
| Not creating parent dirs | Use path.parent.mkdir(parents=True, exist_ok=True) |
Using os.path for new code | Use pathlib.Path instead |
| Reading huge files all at once | Iterate line by line |
| Not handling FileNotFoundError | Wrap in try/except or check .exists() |
Quick Reference
from pathlib import Path
# Create
p = Path("/some/path/file.txt")
p = Path.cwd() / "subdir" / "file.txt"
# Inspect
p.name # file.txt
p.stem # file
p.suffix # .txt
p.parent # /some/path
p.exists() p.is_file() p.is_dir()
# Read
p.read_text(encoding="utf-8")
p.read_bytes()
# Write
p.write_text("content", encoding="utf-8")
p.write_bytes(b"bytes")
# Directories
p.mkdir(parents=True, exist_ok=True)
list(p.glob("*.csv"))
list(p.rglob("**/*.log"))
# Move/Copy/Delete
import shutil
shutil.copy2(src, dst)
shutil.move(src, dst)
shutil.rmtree(directory)
p.unlink(missing_ok=True)