xavier collantes

Mastering JSON in Python: Cheatsheet

By Xavier Collantes

1/26/2024

Basic JSON Operations

Reading JSON Files

The fundamental approach uses json.load() for file objects:

🐍

Python3

1import json
2
3# Basic file reading
4with open("data.json", "r") as file:
5    data = json.load(file)
6

snippet hosted withby Xavier

Reading JSON Strings

For JSON strings in memory, use json.loads():

🐍

Python3

1json_string = '{"name": "Alice", "age": 30}'
2data = json.loads(json_string)
3print(data["name"])  # Output: Alice
4

snippet hosted withby Xavier

Remember the 's' in `loads()` stands for 'string'. Use it for string data, not file objects.

Writing JSON Files

🐍

Python3

1data = {"users": [{"id": 1, "name": "Alice"}], "count": 1}
2
3# Write to file with formatting
4with open("output.json", "w") as file:
5    json.dump(data, file, indent=2, ensure_ascii=False)
6

snippet hosted withby Xavier

Advanced Error Handling

Robust JSON Reading with Error Recovery

🐍

Python3

1import json
2from pathlib import Path
3from typing import Any, Optional
4
5def safe_load_json(file_path: str, default: Any = None) -> Optional[dict]:
6    """Safely load JSON with comprehensive error handling."""
7    try:
8        path = Path(file_path)
9        if not path.exists():
10            print(f"File not found: {file_path}")
11            return default
12
13        with open(path, "r", encoding="utf-8") as file:
14            return json.load(file)
15
16    except json.JSONDecodeError as e:
17        print(f"Invalid JSON in {file_path}: {e.msg} at line {e.lineno}, column {e.colno}")
18        return default
19    except UnicodeDecodeError as e:
20        print(f"Encoding error in {file_path}: {e}")
21        return default
22    except Exception as e:
23        print(f"Unexpected error reading {file_path}: {e}")
24        return default
25
26
27data = safe_load_json("config.json", default={})
28

snippet hosted withby Xavier

Partial JSON Recovery

🐍

Python3

1import json
2from json.decoder import JSONDecodeError
3
4def recover_partial_json(json_string: str) -> list:
5    """Attempt to recover individual JSON objects from malformed data."""
6    recovered = []
7    lines = json_string.strip().split('\n')
8
9    for line_num, line in enumerate(lines, 1):
10        line = line.strip()
11        if not line:
12            continue
13
14        try:
15            obj = json.loads(line)
16            recovered.append(obj)
17        except JSONDecodeError as e:
18            print(f"Skipping malformed JSON on line {line_num}: {e.msg}")
19
20    return recovered
21
22# Example with JSONL (JSON Lines) format
23jsonl_data = """
24{"id": 1, "name": "Alice"}
25{"id": 2, "name": "Bob"
26{"id": 3, "name": "Charlie"}
27"""
28
29recovered_objects = recover_partial_json(jsonl_data)
30print(f"Recovered {len(recovered_objects)} valid objects")
31

snippet hosted withby Xavier

Custom JSON Encoders and Decoders

Handling Non-Serializable Objects

🐍

Python3

1import json
2from datetime import datetime, date
3from decimal import Decimal
4from dataclasses import dataclass, asdict
5from enum import Enum
6import uuid
7
8class Status(Enum):
9    ACTIVE = "active"
10    INACTIVE = "inactive"
11
12@dataclass
13class User:
14    id: str
15    name: str
16    created_at: datetime
17    balance: Decimal
18    status: Status
19
20class AdvancedJSONEncoder(json.JSONEncoder):
21    """Custom encoder for complex Python objects."""
22
23    def default(self, obj):
24        if isinstance(obj, datetime):
25            return obj.isoformat()
26        elif isinstance(obj, date):
27            return obj.isoformat()
28        elif isinstance(obj, Decimal):
29            return float(obj)
30        elif isinstance(obj, Enum):
31            return obj.value
32        elif isinstance(obj, uuid.UUID):
33            return str(obj)
34        elif hasattr(obj, '__dict__'):
35            return obj.__dict__
36
37        return super().default(obj)
38
39
40user = User(
41    id=str(uuid.uuid4()),
42    name="Alice",
43    created_at=datetime.now(),
44    balance=Decimal("99.99"),
45    status=Status.ACTIVE
46)
47
48json_string = json.dumps(user, cls=AdvancedJSONEncoder, indent=2)
49print(json_string)
50

snippet hosted withby Xavier

Enums are automatically converted to strings using their `value` attribute.

Custom JSON Decoder

🐍

Python3

1from datetime import datetime
2from decimal import Decimal
3
4def advanced_json_decoder(dct):
5    """Custom decoder to reconstruct Python objects."""
6    # Convert ISO datetime strings back to datetime objects
7    for key, value in dct.items():
8        if isinstance(value, str):
9            # Try parsing as ISO datetime
10            try:
11                if 'T' in value and ('Z' in value or '+' in value or value.endswith(':00')):
12                    dct[key] = datetime.fromisoformat(value.replace('Z', '+00:00'))
13            except ValueError:
14                pass
15
16            # Try parsing as UUID
17            try:
18                if len(value) == 36 and value.count('-') == 4:
19                    dct[key] = uuid.UUID(value)
20            except ValueError:
21                pass
22
23    return dct
24
25
26loaded_data = json.loads(json_string, object_hook=advanced_json_decoder)
27

snippet hosted withby Xavier

Using Default Decoder

🐍

Python3

1loaded_data = json.loads(json_string, object_hook=advanced_json_decoder)
2

snippet hosted withby Xavier

Streaming JSON Processing for Large Files

JSONL (JSON Lines) Processing

🐍

Python3

1def process_jsonl_file(file_path: str, batch_size: int = 1000):
2    """Efficiently process JSONL files in batches."""
3    batch = []
4
5    with open(file_path, 'r') as file:
6        for line_num, line in enumerate(file, 1):
7            try:
8                record = json.loads(line.strip())
9                batch.append(record)
10
11                if len(batch) >= batch_size:
12                    yield batch
13                    batch = []
14
15            except json.JSONDecodeError as e:
16                print(f"Skipping invalid JSON on line {line_num}: {e}")
17
18    # Yield remaining records
19    if batch:
20        yield batch
21
22
23for batch in process_jsonl_file("logs.jsonl", batch_size=500):
24    # Process batch of records
25    analyze_batch(batch)
26

snippet hosted withby Xavier

Advanced Command-Line JSON Operations

Enhanced JSON Formatting

Bash

1# Pretty print with custom indentation
2python3 -m json.tool input.json --indent 4 > formatted.json
3
4# Compact JSON (remove whitespace)
5python3 -c "import json,sys;print(json.dumps(json.load(sys.stdin),separators=(',',':')))"
6
7# Sort keys while formatting
8python3 -m json.tool --sort-keys input.json
9

snippet hosted withby Xavier

JSON Validation and Analysis

🐍

Python3

1#!/usr/bin/env python3
2"""JSON analysis tool; save as json_analyzer.py."""
3import json
4import sys
5from collections import Counter
6from pathlib import Path
7
8def analyze_json(file_path):
9    """Analyze JSON structure and provide statistics."""
10    try:
11        with open(file_path) as f:
12            data = json.load(f)
13    except Exception as e:
14        print(f"Error: {e}")
15        return
16
17    def analyze_value(obj, path=""):
18        if isinstance(obj, dict):
19            print(f"Object at {path or 'root'}: {len(obj)} keys")
20            for key, value in obj.items():
21                analyze_value(value, f"{path}.{key}" if path else key)
22        elif isinstance(obj, list):
23            print(f"Array at {path}: {len(obj)} items")
24            if obj:  # Analyze first item structure
25                analyze_value(obj[0], f"{path}[0]")
26        else:
27            type_name = type(obj).__name__
28            print(f"{path}: {type_name} = {str(obj)[:50]}")
29
30    analyze_value(data)
31
32if __name__ == "__main__":
33    if len(sys.argv) != 2:
34        print("Usage: python json_analyzer.py <file.json>")
35        sys.exit(1)
36    analyze_json(sys.argv[1])
37

snippet hosted withby Xavier

JSON Path Queries

🐍

Python3

1# Install with: pip install jsonpath-ng
2from jsonpath_ng import parse
3import json
4
5def query_json_path(data, expression):
6    """Query JSON data using JSONPath expressions."""
7    jsonpath_expr = parse(expression)
8    matches = [match.value for match in jsonpath_expr.find(data)]
9    return matches
10
11# Example data
12data = {
13    "users": [
14        {"id": 1, "name": "Alice", "posts": [{"title": "Hello", "likes": 5}]},
15        {"id": 2, "name": "Bob", "posts": [{"title": "World", "likes": 10}]}
16    ]
17}
18
19# JSONPath queries
20names = query_json_path(data, "$.users[*].name")  # All user names
21high_likes = query_json_path(data, "$.users[*].posts[?(@.likes > 7)]")  # Posts with >7 likes
22print(f"Names: {names}")
23print(f"Popular posts: {high_likes}")
24

snippet hosted withby Xavier

Real-World Patterns and Examples

Configuration Management

🐍

Python3

1from pathlib import Path
2from typing import Dict, Any
3import os
4
5class ConfigManager:
6    """Robust configuration management with environment override."""
7
8    def __init__(self, config_file: str, schema: Dict[str, Any] = None):
9        self.config_file = Path(config_file)
10        self.schema = schema or {}
11        self._config = self._load_config()
12
13    def _load_config(self) -> Dict[str, Any]:
14        """Load configuration with environment variable overrides."""
15        # Load from file
16        config = {}
17        if self.config_file.exists():
18            with open(self.config_file) as f:
19                config = json.load(f)
20
21        # Override with environment variables
22        for key in config.keys():
23            env_key = f"APP_{key.upper()}"
24            if env_key in os.environ:
25                # Try to parse as JSON first, then as string
26                try:
27                    config[key] = json.loads(os.environ[env_key])
28                except json.JSONDecodeError:
29                    config[key] = os.environ[env_key]
30
31        return config
32
33    def get(self, key: str, default=None):
34        return self._config.get(key, default)
35
36    def save(self):
37        """Save current configuration back to file."""
38        with open(self.config_file, 'w') as f:
39            json.dump(self._config, f, indent=2, cls=AdvancedJSONEncoder)
40
41
42config = ConfigManager('app_config.json')
43database_url = config.get('database_url', 'sqlite:///default.db')
44

snippet hosted withby Xavier

API Response Caching

🐍

Python3

1import json
2import time
3from functools import wraps
4from pathlib import Path
5from typing import Optional, Callable, Any
6
7def json_cache(cache_dir: str = "cache", ttl: int = 3600):
8    """Decorator to cache JSON API responses."""
9    def decorator(func: Callable) -> Callable:
10        @wraps(func)
11        def wrapper(*args, **kwargs) -> Any:
12            # Create cache key from function name and arguments
13            cache_key = f"{func.__name__}_{hash(str(args) + str(sorted(kwargs.items())))}"
14            cache_file = Path(cache_dir) / f"{cache_key}.json"
15
16            # Check if cached version exists and is fresh
17            if cache_file.exists():
18                cache_age = time.time() - cache_file.stat().st_mtime
19                if cache_age < ttl:
20                    with open(cache_file) as f:
21                        cached_data = json.load(f)
22                    print(f"Cache hit for {func.__name__}")
23                    return cached_data
24
25            # Call function and cache result
26            result = func(*args, **kwargs)
27
28            # Ensure cache directory exists
29            cache_file.parent.mkdir(exist_ok=True)
30
31            # Save to cache
32            with open(cache_file, 'w') as f:
33                json.dump(result, f, cls=AdvancedJSONEncoder)
34
35            return result
36        return wrapper
37    return decorator
38
39
40@json_cache(ttl=1800)  # Cache for 30 minutes
41def fetch_user_data(user_id: int) -> dict:
42    # Simulate API call
43    import requests
44    response = requests.get(f"https://api.example.com/users/{user_id}")
45    return response.json()
46

snippet hosted withby Xavier

Data Validation and Transformation

🐍

Python3

1from typing import Dict, Any, List
2import json
3from datetime import datetime
4
5class JSONValidator:
6    """Validate and transform JSON data according to schema."""
7
8    def __init__(self, schema: Dict[str, Any]):
9        self.schema = schema
10
11    def validate(self, data: Dict[str, Any]) -> tuple[bool, List[str]]:
12        """Validate data against schema."""
13        errors = []
14
15        # Check required fields
16        required = self.schema.get('required', [])
17        for field in required:
18            if field not in data:
19                errors.append(f"Missing required field: {field}")
20
21        # Check field types
22        properties = self.schema.get('properties', {})
23        for field, expected_type in properties.items():
24            if field in data:
25                if not self._check_type(data[field], expected_type):
26                    errors.append(f"Field {field} has wrong type")
27
28        return len(errors) == 0, errors
29
30    def _check_type(self, value: Any, expected_type: str) -> bool:
31        type_map = {
32            'string': str,
33            'number': (int, float),
34            'boolean': bool,
35            'array': list,
36            'object': dict
37        }
38        expected = type_map.get(expected_type, str)
39        return isinstance(value, expected)
40
41    def transform(self, data: Dict[str, Any]) -> Dict[str, Any]:
42        """Transform data according to schema rules."""
43        transformed = data.copy()
44
45        # Apply transformations
46        transforms = self.schema.get('transforms', {})
47        for field, transform_type in transforms.items():
48            if field in transformed:
49                if transform_type == 'datetime':
50                    if isinstance(transformed[field], str):
51                        try:
52                            transformed[field] = datetime.fromisoformat(
53                                transformed[field].replace('Z', '+00:00')
54                            )
55                        except ValueError:
56                            pass
57
58        return transformed
59
60# Example usage
61schema = {
62    'required': ['id', 'name', 'email'],
63    'properties': {
64        'id': 'number',
65        'name': 'string',
66        'email': 'string',
67        'created_at': 'string'
68    },
69    'transforms': {
70        'created_at': 'datetime'
71    }
72}
73
74validator = JSONValidator(schema)
75
76user_data = {
77    'id': 1,
78    'name': 'Alice',
79    'email': '[email protected]',
80    'created_at': '2024-01-26T10:00:00Z'
81}
82
83is_valid, errors = validator.validate(user_data)
84if is_valid:
85    transformed_data = validator.transform(user_data)
86    print(f"Created at: {transformed_data['created_at']}")
87else:
88    print(f"Validation errors: {errors}")
89

snippet hosted withby Xavier

Common Pitfalls

JSON keys are always strings. Python dict keys like integers will be converted: `data = {1: 'one', 2: 'two'}` becomes `{'1': 'one', '2': 'two'}` after JSON serialization.

Use `ensure_ascii=False` when writing JSON with Unicode characters to preserve them properly instead of escaping to ASCII.

🐍

Python3

1data = {"message": "Hello 世界"}
2json.dump(data, file, ensure_ascii=False, indent=2)
3

snippet hosted withby Xavier

For large files, consider streaming parsers like `ijson` to parse JSON without loading the entire file into memory.

Bash

1pip install ijson
2

snippet hosted withby Xavier

🐍

Python3

1import ijson
2
3# Parse large JSON without loading into memory
4with open('huge_file.json', 'rb') as file:
5    objects = ijson.items(file, 'data.item')
6    for obj in objects:
7        process(obj)
8

snippet hosted withby Xavier

Never use `eval()` on JSON data as it can execute arbitrary code. Always use `json.loads()` for safe parsing.

🐍

Python3

1# NEVER do this
2data = eval(json_string)  # Dangerous!
3
4# Always do this
5data = json.loads(json_string)  # Safe
6

snippet hosted withby Xavier

Be aware of recursion limits with deeply nested JSON. You may need to increase the recursion limit for very deep structures.

🐍

Python3

1import sys
2sys.setrecursionlimit(10000)  # Increase if needed
3

snippet hosted withby Xavier

Advanced Command-Line Tools

JSON Diff Tool

🐍

Python3

1#!/usr/bin/env python3
2"""Compare two JSON files - save as json_diff.py"""
3import json
4import sys
5from deepdiff import DeepDiff  # pip install deepdiff
6
7def json_diff(file1, file2):
8    with open(file1) as f1, open(file2) as f2:
9        data1 = json.load(f1)
10        data2 = json.load(f2)
11
12    diff = DeepDiff(data1, data2, ignore_order=True)
13
14    if not diff:
15        print("Files are identical")
16    else:
17        print(json.dumps(diff, indent=2, default=str))
18
19if __name__ == "__main__":
20    if len(sys.argv) != 3:
21        print("Usage: python json_diff.py file1.json file2.json")
22        sys.exit(1)
23    json_diff(sys.argv[1], sys.argv[2])
24

snippet hosted withby Xavier

JSON Schema Validator

Bash

1# Install jsonschema
2pip install jsonschema
3
4# Validate JSON against schema
5python3 -c "
6import json, sys
7from jsonschema import validate
8
9with open(sys.argv[1]) as f: data = json.load(f)
10with open(sys.argv[2]) as f: schema = json.load(f)
11
12try:
13    validate(data, schema)
14    print('Valid')
15except Exception as e:
16    print(f'Invalid: {e}')
17" data.json schema.json
18