pathlib and File Workflows

What You'll Learn

Everything you need to work with files and directories in Python using pathlib — the modern replacement for os.path.

Why pathlib?

pathlib.Path objects are cleaner and more readable than os.path string manipulation:

# ❌ Old way with os.path
import os
path = os.path.join("/home", "alice", "data", "report.csv")
filename = os.path.basename(path)
directory = os.path.dirname(path)
exists = os.path.exists(path)

# ✅ Modern way with pathlib
from pathlib import Path
path = Path("/home/alice/data/report.csv")
filename = path.name           # "report.csv"
directory = path.parent        # Path("/home/alice/data")
exists = path.exists()         # True or False

Creating Path Objects

from pathlib import Path

# Absolute path
p = Path("/home/alice/documents/report.csv")

# Relative path
p = Path("data/output/result.txt")

# Current directory
cwd = Path.cwd()

# Home directory
home = Path.home()   # /home/alice

# Build paths with /  operator
config = Path.home() / ".config" / "myapp" / "settings.json"

Inspecting Paths

from pathlib import Path

p = Path("/home/alice/data/report.csv")

print(p.name)        # report.csv
print(p.stem)        # report (filename without extension)
print(p.suffix)      # .csv
print(p.suffixes)    # ['.csv']
print(p.parent)      # /home/alice/data
print(p.parts)       # ('/', 'home', 'alice', 'data', 'report.csv')

# Check what exists
print(p.exists())    # True/False
print(p.is_file())   # True/False
print(p.is_dir())    # True/False

# File size
print(p.stat().st_size)  # size in bytes

Reading Files

from pathlib import Path

path = Path("data/report.txt")

# Read entire file as string
content = path.read_text(encoding="utf-8")

# Read entire file as bytes
raw = path.read_bytes()

# Read line by line (memory efficient for large files)
with open(path, encoding="utf-8") as f:
    for line in f:
        print(line.strip())

# Read all lines into a list
with open(path, encoding="utf-8") as f:
    lines = f.readlines()

# Shorter version
lines = path.read_text(encoding="utf-8").splitlines()

Writing Files

from pathlib import Path

path = Path("output/result.txt")

# Create parent directory if it doesn't exist
path.parent.mkdir(parents=True, exist_ok=True)

# Write string (overwrites if exists)
path.write_text("Hello, World!\n", encoding="utf-8")

# Write bytes
path.write_bytes(b"\x00\x01\x02")

# Append to a file
with open(path, "a", encoding="utf-8") as f:
    f.write("New line\n")

# Write multiple lines
lines = ["Line 1", "Line 2", "Line 3"]
path.write_text("\n".join(lines) + "\n", encoding="utf-8")

Safe Write Pattern (Atomic)

Write to a temp file first, then rename — prevents half-written files on crash:

from pathlib import Path

def safe_write(path: Path, content: str) -> None:
    """Write content atomically — no partial writes on failure."""
    tmp = path.with_suffix(".tmp")
    try:
        tmp.write_text(content, encoding="utf-8")
        tmp.rename(path)   # atomic on most filesystems
    except Exception:
        tmp.unlink(missing_ok=True)  # clean up temp file
        raise

Working with Directories

from pathlib import Path

d = Path("data/output")

# Create directory (and parents)
d.mkdir(parents=True, exist_ok=True)

# List contents
for item in d.iterdir():
    print(item)

# List only files
for f in d.iterdir():
    if f.is_file():
        print(f.name)

# Find files by pattern
csv_files = list(d.glob("*.csv"))         # current directory only
all_csv = list(d.rglob("*.csv"))          # recursive (all subdirs)
log_files = list(d.glob("**/*.log"))      # also recursive

# Delete empty directory
d.rmdir()

# Delete directory and all contents (careful!)
import shutil
shutil.rmtree(d)

Moving, Copying, Deleting Files

from pathlib import Path
import shutil

src = Path("data/report.csv")
dst = Path("archive/report.csv")

# Ensure destination directory exists
dst.parent.mkdir(parents=True, exist_ok=True)

# Move (rename)
src.rename(dst)          # same filesystem
shutil.move(src, dst)    # works across filesystems

# Copy
shutil.copy2(src, dst)   # copy with metadata

# Delete a file
src.unlink()                    # raises FileNotFoundError if missing
src.unlink(missing_ok=True)     # no error if missing (Python 3.8+)

# Delete directory tree
shutil.rmtree("old_data/")

Practical Pattern: Process All Files in a Directory

from pathlib import Path
import json

def process_json_files(input_dir: Path, output_dir: Path) -> dict:
    output_dir.mkdir(parents=True, exist_ok=True)

    results = {"processed": 0, "errors": 0}

    for json_file in sorted(input_dir.glob("*.json")):
        try:
            data = json.loads(json_file.read_text(encoding="utf-8"))
            # ... transform data ...
            out_file = output_dir / json_file.name
            out_file.write_text(json.dumps(data, indent=2), encoding="utf-8")
            results["processed"] += 1
        except (json.JSONDecodeError, OSError) as e:
            print(f"Error: {json_file.name}: {e}")
            results["errors"] += 1

    return results

summary = process_json_files(Path("raw/"), Path("processed/"))
print(f"Done: {summary}")

Common Mistakes

Mistake	Fix
Forgetting `encoding="utf-8"`	Always specify encoding in `open()`
Not creating parent dirs	Use `path.parent.mkdir(parents=True, exist_ok=True)`
Using `os.path` for new code	Use `pathlib.Path` instead
Reading huge files all at once	Iterate line by line
Not handling FileNotFoundError	Wrap in try/except or check `.exists()`

Quick Reference

from pathlib import Path

# Create
p = Path("/some/path/file.txt")
p = Path.cwd() / "subdir" / "file.txt"

# Inspect
p.name      # file.txt
p.stem      # file
p.suffix    # .txt
p.parent    # /some/path
p.exists()  p.is_file()  p.is_dir()

# Read
p.read_text(encoding="utf-8")
p.read_bytes()

# Write
p.write_text("content", encoding="utf-8")
p.write_bytes(b"bytes")

# Directories
p.mkdir(parents=True, exist_ok=True)
list(p.glob("*.csv"))
list(p.rglob("**/*.log"))

# Move/Copy/Delete
import shutil
shutil.copy2(src, dst)
shutil.move(src, dst)
shutil.rmtree(directory)
p.unlink(missing_ok=True)

What's Next

→ Lesson 2: JSON, YAML, ENV, and Config

What You'll Learn​

Why pathlib?​

Creating Path Objects​

Inspecting Paths​

Reading Files​

Writing Files​

Safe Write Pattern (Atomic)​

Working with Directories​

Moving, Copying, Deleting Files​

Practical Pattern: Process All Files in a Directory​

Common Mistakes​

Quick Reference​

What's Next​