JSON, YAML, ENV, and Config
What You'll Learn
How to work with the most common configuration and data formats: JSON, YAML, .env files, and environment variables.
JSON — Python's Most Common Data Format
JSON is built into Python — no install needed.
Reading JSON
import json
from pathlib import Path
# From a file
data = json.loads(Path("config.json").read_text(encoding="utf-8"))
# From a string
text = '{"name": "Alice", "age": 30}'
data = json.loads(text)
# Using open()
with open("config.json", encoding="utf-8") as f:
data = json.load(f)
Writing JSON
import json
from pathlib import Path
data = {"name": "Alice", "age": 30, "active": True}
# To a file
Path("output.json").write_text(
json.dumps(data, indent=2, ensure_ascii=False),
encoding="utf-8"
)
# Using open()
with open("output.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# To a string
text = json.dumps(data, indent=2)
JSON Type Mapping
| Python | JSON |
|---|---|
dict | object {} |
list, tuple | array [] |
str | string |
int, float | number |
True / False | true / false |
None | null |
Handling JSON Errors
import json
import sys
def load_json_safe(path: str) -> dict | None:
try:
with open(path, encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
print(f"File not found: {path}", file=sys.stderr)
except json.JSONDecodeError as e:
print(f"Invalid JSON in {path}: {e}", file=sys.stderr)
return None
Custom JSON Serialization
import json
from datetime import datetime
from pathlib import Path
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Path):
return str(obj)
return super().default(obj)
data = {"created": datetime.now(), "path": Path("/tmp/file.txt")}
print(json.dumps(data, cls=CustomEncoder, indent=2))
YAML — Human-Friendly Config
YAML is more readable than JSON for configuration files. Install first:
pip install pyyaml
config.yaml:
database:
host: localhost
port: 5432
name: myapp
server:
host: 0.0.0.0
port: 8080
debug: false
allowed_origins:
- https://myapp.com
- https://api.myapp.com
Reading YAML
import yaml
from pathlib import Path
with open("config.yaml", encoding="utf-8") as f:
config = yaml.safe_load(f) # always use safe_load, not load()
print(config["database"]["host"]) # localhost
print(config["allowed_origins"]) # ['https://myapp.com', ...]
Writing YAML
import yaml
from pathlib import Path
data = {"name": "myapp", "version": "1.0", "features": ["auth", "api"]}
with open("config.yaml", "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)
Environment Variables
Environment variables are the standard way to pass secrets and config to programs (especially in Docker/servers):
import os
# Get an env var (returns None if not set)
db_url = os.environ.get("DATABASE_URL")
# Get with a default
port = int(os.environ.get("PORT", "8080"))
debug = os.environ.get("DEBUG", "false").lower() == "true"
# Require a variable (fail fast if missing)
api_key = os.environ.get("API_KEY")
if not api_key:
raise RuntimeError("API_KEY environment variable is required")
Setting env vars in the shell:
export DATABASE_URL="postgresql://localhost/myapp"
export API_KEY="secret123"
python3 app.py
.env Files with python-dotenv
.env files store environment variables for local development:
pip install python-dotenv
.env file (never commit to git!):
DATABASE_URL=postgresql://localhost/myapp
API_KEY=secret123
DEBUG=true
PORT=8080
Loading in Python:
from dotenv import load_dotenv
import os
load_dotenv() # loads .env into os.environ automatically
db_url = os.environ.get("DATABASE_URL")
debug = os.environ.get("DEBUG", "false").lower() == "true"
Add .env to .gitignore:
.env
.env.local
.env.*.local
Building a Config Class
Centralize all config in one place:
import os
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class Config:
# Database
db_url: str = os.environ.get("DATABASE_URL", "sqlite:///local.db")
# Server
host: str = os.environ.get("HOST", "127.0.0.1")
port: int = int(os.environ.get("PORT", "8080"))
# App
debug: bool = os.environ.get("DEBUG", "false").lower() == "true"
log_level: str = os.environ.get("LOG_LEVEL", "INFO").upper()
# Paths
data_dir: Path = Path(os.environ.get("DATA_DIR", "data"))
output_dir: Path = Path(os.environ.get("OUTPUT_DIR", "output"))
def validate(self) -> None:
"""Fail fast on invalid config at startup."""
if not self.db_url:
raise ValueError("DATABASE_URL must be set")
if not 1 <= self.port <= 65535:
raise ValueError(f"PORT must be 1–65535, got {self.port}")
if self.log_level not in ("DEBUG", "INFO", "WARNING", "ERROR"):
raise ValueError(f"Invalid LOG_LEVEL: {self.log_level}")
def get_config() -> Config:
from dotenv import load_dotenv
load_dotenv()
cfg = Config()
cfg.validate()
return cfg
Layered Config (Defaults → File → Env Vars)
A robust pattern for production systems:
import os
import json
from pathlib import Path
from dataclasses import dataclass
@dataclass
class Config:
host: str = "localhost"
port: int = 8080
debug: bool = False
def load_config(config_file: str = "config.json") -> Config:
"""Load config with priority: defaults < file < environment."""
cfg = Config() # start with defaults
# Override with file if it exists
path = Path(config_file)
if path.exists():
data = json.loads(path.read_text(encoding="utf-8"))
cfg.host = data.get("host", cfg.host)
cfg.port = data.get("port", cfg.port)
cfg.debug = data.get("debug", cfg.debug)
# Override with environment variables (highest priority)
if "HOST" in os.environ:
cfg.host = os.environ["HOST"]
if "PORT" in os.environ:
cfg.port = int(os.environ["PORT"])
if "DEBUG" in os.environ:
cfg.debug = os.environ["DEBUG"].lower() == "true"
return cfg
Common Mistakes
| Mistake | Fix |
|---|---|
json.load() with invalid JSON | Wrap in try/except json.JSONDecodeError |
yaml.load() instead of yaml.safe_load() | Always use safe_load |
| Hardcoding secrets in code | Use env vars + .env |
Committing .env to git | Add .env to .gitignore |
os.environ["KEY"] when key may not exist | Use .get("KEY", default) |
Quick Reference
# JSON read
import json
data = json.loads(path.read_text(encoding="utf-8"))
# JSON write
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
# YAML read (pip install pyyaml)
import yaml
with open("config.yaml") as f:
config = yaml.safe_load(f)
# Env vars
import os
value = os.environ.get("KEY", "default")
# .env file (pip install python-dotenv)
from dotenv import load_dotenv
load_dotenv()
# Config class
@dataclass(frozen=True)
class Config:
port: int = int(os.environ.get("PORT", "8080"))