Skip to main content

CSV and Tabular Files

What You'll Learn

How to read and write CSV files reliably, handle edge cases like quotes and encoding, and process tabular data efficiently.

Reading CSV Files

Python's built-in csv module handles quoting, escaping, and delimiters correctly:

import csv
from pathlib import Path

# Read into a list of dicts (most useful)
with open("data.csv", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
rows = list(reader)

# rows is a list of dicts
# [{"name": "Alice", "age": "30", "score": "95"}, ...]

print(rows[0]["name"]) # Alice

Why newline=""? Without it, Python may add extra blank lines on Windows.

Reading as Lists (Without Headers)

with open("data.csv", newline="", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader) # first row = header
for row in reader:
print(row) # ['Alice', '30', '95']

Handling Different Delimiters

# Tab-separated values (TSV)
with open("data.tsv", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter="\t")
rows = list(reader)

# Semicolon-separated (common in European Excel exports)
with open("data.csv", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter=";")
rows = list(reader)

Writing CSV Files

import csv

rows = [
{"name": "Alice", "age": 30, "score": 95},
{"name": "Bob", "age": 25, "score": 82},
]
fieldnames = ["name", "age", "score"]

with open("output.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader() # write the header row
writer.writerows(rows) # write all rows

Writing Rows as Lists

with open("output.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["name", "age", "score"]) # header
writer.writerow(["Alice", 30, 95]) # one row
writer.writerows([["Bob", 25, 82], ["Carol", 35, 88]]) # many rows

Type Conversion — CSV Values Are Always Strings

CSV readers return everything as strings. Convert explicitly:

import csv

with open("data.csv", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
records = []
for row in reader:
records.append({
"name": row["name"],
"age": int(row["age"]), # convert to int
"score": float(row["score"]), # convert to float
"active": row["active"] == "true", # convert to bool
})

Filtering and Transforming CSV Data

import csv

with open("students.csv", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
rows = list(reader)

# Filter — only passing students
passing = [r for r in rows if float(r["score"]) >= 70]

# Sort by score descending
sorted_rows = sorted(rows, key=lambda r: float(r["score"]), reverse=True)

# Add a computed column
for r in rows:
score = float(r["score"])
r["grade"] = "A" if score >= 90 else "B" if score >= 80 else "C" if score >= 70 else "F"

# Write the transformed data
with open("graded.csv", "w", newline="", encoding="utf-8") as f:
fieldnames = ["name", "score", "grade"]
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
writer.writeheader()
writer.writerows(rows)

Handling Large CSV Files (Memory Efficient)

For files with millions of rows, don't load everything into memory:

import csv

def process_large_csv(input_path: str, output_path: str) -> dict:
stats = {"read": 0, "written": 0, "skipped": 0}

with open(input_path, newline="", encoding="utf-8") as fin, \
open(output_path, "w", newline="", encoding="utf-8") as fout:

reader = csv.DictReader(fin)
fieldnames = reader.fieldnames + ["grade"]
writer = csv.DictWriter(fout, fieldnames=fieldnames)
writer.writeheader()

for row in reader: # one row at a time — O(1) memory
stats["read"] += 1
try:
score = float(row["score"])
row["grade"] = "A" if score >= 90 else "B" if score >= 80 else "C"
writer.writerow(row)
stats["written"] += 1
except (ValueError, KeyError):
stats["skipped"] += 1

return stats

Detecting Encoding

Files from Windows often use UTF-8 BOM or latin-1:

# UTF-8 with BOM (from Excel on Windows)
with open("data.csv", newline="", encoding="utf-8-sig") as f:
reader = csv.DictReader(f)
rows = list(reader)

# Latin-1 (Windows default for many locales)
with open("data.csv", newline="", encoding="latin-1") as f:
reader = csv.DictReader(f)
rows = list(reader)

When encoding is unknown, try chardet:

pip install chardet
import chardet

with open("data.csv", "rb") as f:
result = chardet.detect(f.read(10000))
encoding = result["encoding"]

print(f"Detected encoding: {encoding}")

CSV with pandas (For Heavy Data Work)

For complex analysis, pandas is much more powerful:

pip install pandas
import pandas as pd

# Read
df = pd.read_csv("data.csv")

# Filter
passing = df[df["score"] >= 70]

# Sort
sorted_df = df.sort_values("score", ascending=False)

# Add column
df["grade"] = df["score"].apply(
lambda s: "A" if s >= 90 else "B" if s >= 80 else "C"
)

# Summary statistics
print(df["score"].describe())

# Write
df.to_csv("output.csv", index=False)

Use csv module for: scripts, simple tasks, no dependencies Use pandas for: analysis, large datasets, transformations

Common Mistakes

MistakeFix
Forgetting newline=""Always use newline="" with csv module
Assuming strings are numbersCast: int(), float()
Wrong delimiter (, vs ; vs \t)Specify delimiter= explicitly
Encoding errorsTry utf-8-sig for Excel exports, latin-1 for old Windows files
Loading huge file to memoryIterate row by row instead of list(reader)

Quick Reference

import csv

# Read as dicts
with open("f.csv", newline="", encoding="utf-8") as f:
rows = list(csv.DictReader(f))

# Write dicts
with open("f.csv", "w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=["a", "b"])
w.writeheader()
w.writerows(rows)

# TSV
csv.DictReader(f, delimiter="\t")

# Excel export (UTF-8 BOM)
open("f.csv", encoding="utf-8-sig")

# Large files — iterate, don't list()
for row in csv.DictReader(f):
process(row)

What's Next

Lesson 5: Environment Variables