Skip to main content

pathlib and File Workflows

What You'll Learn

Everything you need to work with files and directories in Python using pathlib — the modern replacement for os.path.

Why pathlib?

pathlib.Path objects are cleaner and more readable than os.path string manipulation:

# ❌ Old way with os.path
import os
path = os.path.join("/home", "alice", "data", "report.csv")
filename = os.path.basename(path)
directory = os.path.dirname(path)
exists = os.path.exists(path)

# ✅ Modern way with pathlib
from pathlib import Path
path = Path("/home/alice/data/report.csv")
filename = path.name # "report.csv"
directory = path.parent # Path("/home/alice/data")
exists = path.exists() # True or False

Creating Path Objects

from pathlib import Path

# Absolute path
p = Path("/home/alice/documents/report.csv")

# Relative path
p = Path("data/output/result.txt")

# Current directory
cwd = Path.cwd()

# Home directory
home = Path.home() # /home/alice

# Build paths with / operator
config = Path.home() / ".config" / "myapp" / "settings.json"

Inspecting Paths

from pathlib import Path

p = Path("/home/alice/data/report.csv")

print(p.name) # report.csv
print(p.stem) # report (filename without extension)
print(p.suffix) # .csv
print(p.suffixes) # ['.csv']
print(p.parent) # /home/alice/data
print(p.parts) # ('/', 'home', 'alice', 'data', 'report.csv')

# Check what exists
print(p.exists()) # True/False
print(p.is_file()) # True/False
print(p.is_dir()) # True/False

# File size
print(p.stat().st_size) # size in bytes

Reading Files

from pathlib import Path

path = Path("data/report.txt")

# Read entire file as string
content = path.read_text(encoding="utf-8")

# Read entire file as bytes
raw = path.read_bytes()

# Read line by line (memory efficient for large files)
with open(path, encoding="utf-8") as f:
for line in f:
print(line.strip())

# Read all lines into a list
with open(path, encoding="utf-8") as f:
lines = f.readlines()

# Shorter version
lines = path.read_text(encoding="utf-8").splitlines()

Writing Files

from pathlib import Path

path = Path("output/result.txt")

# Create parent directory if it doesn't exist
path.parent.mkdir(parents=True, exist_ok=True)

# Write string (overwrites if exists)
path.write_text("Hello, World!\n", encoding="utf-8")

# Write bytes
path.write_bytes(b"\x00\x01\x02")

# Append to a file
with open(path, "a", encoding="utf-8") as f:
f.write("New line\n")

# Write multiple lines
lines = ["Line 1", "Line 2", "Line 3"]
path.write_text("\n".join(lines) + "\n", encoding="utf-8")

Safe Write Pattern (Atomic)

Write to a temp file first, then rename — prevents half-written files on crash:

from pathlib import Path

def safe_write(path: Path, content: str) -> None:
"""Write content atomically — no partial writes on failure."""
tmp = path.with_suffix(".tmp")
try:
tmp.write_text(content, encoding="utf-8")
tmp.rename(path) # atomic on most filesystems
except Exception:
tmp.unlink(missing_ok=True) # clean up temp file
raise

Working with Directories

from pathlib import Path

d = Path("data/output")

# Create directory (and parents)
d.mkdir(parents=True, exist_ok=True)

# List contents
for item in d.iterdir():
print(item)

# List only files
for f in d.iterdir():
if f.is_file():
print(f.name)

# Find files by pattern
csv_files = list(d.glob("*.csv")) # current directory only
all_csv = list(d.rglob("*.csv")) # recursive (all subdirs)
log_files = list(d.glob("**/*.log")) # also recursive

# Delete empty directory
d.rmdir()

# Delete directory and all contents (careful!)
import shutil
shutil.rmtree(d)

Moving, Copying, Deleting Files

from pathlib import Path
import shutil

src = Path("data/report.csv")
dst = Path("archive/report.csv")

# Ensure destination directory exists
dst.parent.mkdir(parents=True, exist_ok=True)

# Move (rename)
src.rename(dst) # same filesystem
shutil.move(src, dst) # works across filesystems

# Copy
shutil.copy2(src, dst) # copy with metadata

# Delete a file
src.unlink() # raises FileNotFoundError if missing
src.unlink(missing_ok=True) # no error if missing (Python 3.8+)

# Delete directory tree
shutil.rmtree("old_data/")

Practical Pattern: Process All Files in a Directory

from pathlib import Path
import json

def process_json_files(input_dir: Path, output_dir: Path) -> dict:
output_dir.mkdir(parents=True, exist_ok=True)

results = {"processed": 0, "errors": 0}

for json_file in sorted(input_dir.glob("*.json")):
try:
data = json.loads(json_file.read_text(encoding="utf-8"))
# ... transform data ...
out_file = output_dir / json_file.name
out_file.write_text(json.dumps(data, indent=2), encoding="utf-8")
results["processed"] += 1
except (json.JSONDecodeError, OSError) as e:
print(f"Error: {json_file.name}: {e}")
results["errors"] += 1

return results

summary = process_json_files(Path("raw/"), Path("processed/"))
print(f"Done: {summary}")

Common Mistakes

MistakeFix
Forgetting encoding="utf-8"Always specify encoding in open()
Not creating parent dirsUse path.parent.mkdir(parents=True, exist_ok=True)
Using os.path for new codeUse pathlib.Path instead
Reading huge files all at onceIterate line by line
Not handling FileNotFoundErrorWrap in try/except or check .exists()

Quick Reference

from pathlib import Path

# Create
p = Path("/some/path/file.txt")
p = Path.cwd() / "subdir" / "file.txt"

# Inspect
p.name # file.txt
p.stem # file
p.suffix # .txt
p.parent # /some/path
p.exists() p.is_file() p.is_dir()

# Read
p.read_text(encoding="utf-8")
p.read_bytes()

# Write
p.write_text("content", encoding="utf-8")
p.write_bytes(b"bytes")

# Directories
p.mkdir(parents=True, exist_ok=True)
list(p.glob("*.csv"))
list(p.rglob("**/*.log"))

# Move/Copy/Delete
import shutil
shutil.copy2(src, dst)
shutil.move(src, dst)
shutil.rmtree(directory)
p.unlink(missing_ok=True)

What's Next

Lesson 2: JSON, YAML, ENV, and Config