By Xavier Collantes
1/26/2024
json.load() for file objects:1import json
2
3# Basic file reading
4with open("data.json", "r") as file:
5 data = json.load(file)
6json.loads():1json_string = '{"name": "Alice", "age": 30}'
2data = json.loads(json_string)
3print(data["name"]) # Output: Alice
4Remember the 's' in `loads()` stands for 'string'. Use it for string data, not file objects.
1data = {"users": [{"id": 1, "name": "Alice"}], "count": 1}
2
3# Write to file with formatting
4with open("output.json", "w") as file:
5 json.dump(data, file, indent=2, ensure_ascii=False)
61import json
2from pathlib import Path
3from typing import Any, Optional
4
5def safe_load_json(file_path: str, default: Any = None) -> Optional[dict]:
6 """Safely load JSON with comprehensive error handling."""
7 try:
8 path = Path(file_path)
9 if not path.exists():
10 print(f"File not found: {file_path}")
11 return default
12
13 with open(path, "r", encoding="utf-8") as file:
14 return json.load(file)
15
16 except json.JSONDecodeError as e:
17 print(f"Invalid JSON in {file_path}: {e.msg} at line {e.lineno}, column {e.colno}")
18 return default
19 except UnicodeDecodeError as e:
20 print(f"Encoding error in {file_path}: {e}")
21 return default
22 except Exception as e:
23 print(f"Unexpected error reading {file_path}: {e}")
24 return default
25
26
27data = safe_load_json("config.json", default={})
281import json
2from json.decoder import JSONDecodeError
3
4def recover_partial_json(json_string: str) -> list:
5 """Attempt to recover individual JSON objects from malformed data."""
6 recovered = []
7 lines = json_string.strip().split('\n')
8
9 for line_num, line in enumerate(lines, 1):
10 line = line.strip()
11 if not line:
12 continue
13
14 try:
15 obj = json.loads(line)
16 recovered.append(obj)
17 except JSONDecodeError as e:
18 print(f"Skipping malformed JSON on line {line_num}: {e.msg}")
19
20 return recovered
21
22# Example with JSONL (JSON Lines) format
23jsonl_data = """
24{"id": 1, "name": "Alice"}
25{"id": 2, "name": "Bob"
26{"id": 3, "name": "Charlie"}
27"""
28
29recovered_objects = recover_partial_json(jsonl_data)
30print(f"Recovered {len(recovered_objects)} valid objects")
311import json
2from datetime import datetime, date
3from decimal import Decimal
4from dataclasses import dataclass, asdict
5from enum import Enum
6import uuid
7
8class Status(Enum):
9 ACTIVE = "active"
10 INACTIVE = "inactive"
11
12@dataclass
13class User:
14 id: str
15 name: str
16 created_at: datetime
17 balance: Decimal
18 status: Status
19
20class AdvancedJSONEncoder(json.JSONEncoder):
21 """Custom encoder for complex Python objects."""
22
23 def default(self, obj):
24 if isinstance(obj, datetime):
25 return obj.isoformat()
26 elif isinstance(obj, date):
27 return obj.isoformat()
28 elif isinstance(obj, Decimal):
29 return float(obj)
30 elif isinstance(obj, Enum):
31 return obj.value
32 elif isinstance(obj, uuid.UUID):
33 return str(obj)
34 elif hasattr(obj, '__dict__'):
35 return obj.__dict__
36
37 return super().default(obj)
38
39
40user = User(
41 id=str(uuid.uuid4()),
42 name="Alice",
43 created_at=datetime.now(),
44 balance=Decimal("99.99"),
45 status=Status.ACTIVE
46)
47
48json_string = json.dumps(user, cls=AdvancedJSONEncoder, indent=2)
49print(json_string)
50Enums are automatically converted to strings using their `value` attribute.
1from datetime import datetime
2from decimal import Decimal
3
4def advanced_json_decoder(dct):
5 """Custom decoder to reconstruct Python objects."""
6 # Convert ISO datetime strings back to datetime objects
7 for key, value in dct.items():
8 if isinstance(value, str):
9 # Try parsing as ISO datetime
10 try:
11 if 'T' in value and ('Z' in value or '+' in value or value.endswith(':00')):
12 dct[key] = datetime.fromisoformat(value.replace('Z', '+00:00'))
13 except ValueError:
14 pass
15
16 # Try parsing as UUID
17 try:
18 if len(value) == 36 and value.count('-') == 4:
19 dct[key] = uuid.UUID(value)
20 except ValueError:
21 pass
22
23 return dct
24
25
26loaded_data = json.loads(json_string, object_hook=advanced_json_decoder)
271loaded_data = json.loads(json_string, object_hook=advanced_json_decoder)
21def process_jsonl_file(file_path: str, batch_size: int = 1000):
2 """Efficiently process JSONL files in batches."""
3 batch = []
4
5 with open(file_path, 'r') as file:
6 for line_num, line in enumerate(file, 1):
7 try:
8 record = json.loads(line.strip())
9 batch.append(record)
10
11 if len(batch) >= batch_size:
12 yield batch
13 batch = []
14
15 except json.JSONDecodeError as e:
16 print(f"Skipping invalid JSON on line {line_num}: {e}")
17
18 # Yield remaining records
19 if batch:
20 yield batch
21
22
23for batch in process_jsonl_file("logs.jsonl", batch_size=500):
24 # Process batch of records
25 analyze_batch(batch)
261# Pretty print with custom indentation
2python3 -m json.tool input.json --indent 4 > formatted.json
3
4# Compact JSON (remove whitespace)
5python3 -c "import json,sys;print(json.dumps(json.load(sys.stdin),separators=(',',':')))"
6
7# Sort keys while formatting
8python3 -m json.tool --sort-keys input.json
91#!/usr/bin/env python3
2"""JSON analysis tool; save as json_analyzer.py."""
3import json
4import sys
5from collections import Counter
6from pathlib import Path
7
8def analyze_json(file_path):
9 """Analyze JSON structure and provide statistics."""
10 try:
11 with open(file_path) as f:
12 data = json.load(f)
13 except Exception as e:
14 print(f"Error: {e}")
15 return
16
17 def analyze_value(obj, path=""):
18 if isinstance(obj, dict):
19 print(f"Object at {path or 'root'}: {len(obj)} keys")
20 for key, value in obj.items():
21 analyze_value(value, f"{path}.{key}" if path else key)
22 elif isinstance(obj, list):
23 print(f"Array at {path}: {len(obj)} items")
24 if obj: # Analyze first item structure
25 analyze_value(obj[0], f"{path}[0]")
26 else:
27 type_name = type(obj).__name__
28 print(f"{path}: {type_name} = {str(obj)[:50]}")
29
30 analyze_value(data)
31
32if __name__ == "__main__":
33 if len(sys.argv) != 2:
34 print("Usage: python json_analyzer.py <file.json>")
35 sys.exit(1)
36 analyze_json(sys.argv[1])
371# Install with: pip install jsonpath-ng
2from jsonpath_ng import parse
3import json
4
5def query_json_path(data, expression):
6 """Query JSON data using JSONPath expressions."""
7 jsonpath_expr = parse(expression)
8 matches = [match.value for match in jsonpath_expr.find(data)]
9 return matches
10
11# Example data
12data = {
13 "users": [
14 {"id": 1, "name": "Alice", "posts": [{"title": "Hello", "likes": 5}]},
15 {"id": 2, "name": "Bob", "posts": [{"title": "World", "likes": 10}]}
16 ]
17}
18
19# JSONPath queries
20names = query_json_path(data, "$.users[*].name") # All user names
21high_likes = query_json_path(data, "$.users[*].posts[?(@.likes > 7)]") # Posts with >7 likes
22print(f"Names: {names}")
23print(f"Popular posts: {high_likes}")
241from pathlib import Path
2from typing import Dict, Any
3import os
4
5class ConfigManager:
6 """Robust configuration management with environment override."""
7
8 def __init__(self, config_file: str, schema: Dict[str, Any] = None):
9 self.config_file = Path(config_file)
10 self.schema = schema or {}
11 self._config = self._load_config()
12
13 def _load_config(self) -> Dict[str, Any]:
14 """Load configuration with environment variable overrides."""
15 # Load from file
16 config = {}
17 if self.config_file.exists():
18 with open(self.config_file) as f:
19 config = json.load(f)
20
21 # Override with environment variables
22 for key in config.keys():
23 env_key = f"APP_{key.upper()}"
24 if env_key in os.environ:
25 # Try to parse as JSON first, then as string
26 try:
27 config[key] = json.loads(os.environ[env_key])
28 except json.JSONDecodeError:
29 config[key] = os.environ[env_key]
30
31 return config
32
33 def get(self, key: str, default=None):
34 return self._config.get(key, default)
35
36 def save(self):
37 """Save current configuration back to file."""
38 with open(self.config_file, 'w') as f:
39 json.dump(self._config, f, indent=2, cls=AdvancedJSONEncoder)
40
41
42config = ConfigManager('app_config.json')
43database_url = config.get('database_url', 'sqlite:///default.db')
441import json
2import time
3from functools import wraps
4from pathlib import Path
5from typing import Optional, Callable, Any
6
7def json_cache(cache_dir: str = "cache", ttl: int = 3600):
8 """Decorator to cache JSON API responses."""
9 def decorator(func: Callable) -> Callable:
10 @wraps(func)
11 def wrapper(*args, **kwargs) -> Any:
12 # Create cache key from function name and arguments
13 cache_key = f"{func.__name__}_{hash(str(args) + str(sorted(kwargs.items())))}"
14 cache_file = Path(cache_dir) / f"{cache_key}.json"
15
16 # Check if cached version exists and is fresh
17 if cache_file.exists():
18 cache_age = time.time() - cache_file.stat().st_mtime
19 if cache_age < ttl:
20 with open(cache_file) as f:
21 cached_data = json.load(f)
22 print(f"Cache hit for {func.__name__}")
23 return cached_data
24
25 # Call function and cache result
26 result = func(*args, **kwargs)
27
28 # Ensure cache directory exists
29 cache_file.parent.mkdir(exist_ok=True)
30
31 # Save to cache
32 with open(cache_file, 'w') as f:
33 json.dump(result, f, cls=AdvancedJSONEncoder)
34
35 return result
36 return wrapper
37 return decorator
38
39
40@json_cache(ttl=1800) # Cache for 30 minutes
41def fetch_user_data(user_id: int) -> dict:
42 # Simulate API call
43 import requests
44 response = requests.get(f"https://api.example.com/users/{user_id}")
45 return response.json()
461from typing import Dict, Any, List
2import json
3from datetime import datetime
4
5class JSONValidator:
6 """Validate and transform JSON data according to schema."""
7
8 def __init__(self, schema: Dict[str, Any]):
9 self.schema = schema
10
11 def validate(self, data: Dict[str, Any]) -> tuple[bool, List[str]]:
12 """Validate data against schema."""
13 errors = []
14
15 # Check required fields
16 required = self.schema.get('required', [])
17 for field in required:
18 if field not in data:
19 errors.append(f"Missing required field: {field}")
20
21 # Check field types
22 properties = self.schema.get('properties', {})
23 for field, expected_type in properties.items():
24 if field in data:
25 if not self._check_type(data[field], expected_type):
26 errors.append(f"Field {field} has wrong type")
27
28 return len(errors) == 0, errors
29
30 def _check_type(self, value: Any, expected_type: str) -> bool:
31 type_map = {
32 'string': str,
33 'number': (int, float),
34 'boolean': bool,
35 'array': list,
36 'object': dict
37 }
38 expected = type_map.get(expected_type, str)
39 return isinstance(value, expected)
40
41 def transform(self, data: Dict[str, Any]) -> Dict[str, Any]:
42 """Transform data according to schema rules."""
43 transformed = data.copy()
44
45 # Apply transformations
46 transforms = self.schema.get('transforms', {})
47 for field, transform_type in transforms.items():
48 if field in transformed:
49 if transform_type == 'datetime':
50 if isinstance(transformed[field], str):
51 try:
52 transformed[field] = datetime.fromisoformat(
53 transformed[field].replace('Z', '+00:00')
54 )
55 except ValueError:
56 pass
57
58 return transformed
59
60# Example usage
61schema = {
62 'required': ['id', 'name', 'email'],
63 'properties': {
64 'id': 'number',
65 'name': 'string',
66 'email': 'string',
67 'created_at': 'string'
68 },
69 'transforms': {
70 'created_at': 'datetime'
71 }
72}
73
74validator = JSONValidator(schema)
75
76user_data = {
77 'id': 1,
78 'name': 'Alice',
79 'email': '[email protected]',
80 'created_at': '2024-01-26T10:00:00Z'
81}
82
83is_valid, errors = validator.validate(user_data)
84if is_valid:
85 transformed_data = validator.transform(user_data)
86 print(f"Created at: {transformed_data['created_at']}")
87else:
88 print(f"Validation errors: {errors}")
89JSON keys are always strings. Python dict keys like integers will be converted: `data = {1: 'one', 2: 'two'}` becomes `{'1': 'one', '2': 'two'}` after JSON serialization.
Use `ensure_ascii=False` when writing JSON with Unicode characters to preserve them properly instead of escaping to ASCII.
1data = {"message": "Hello δΈη"}
2json.dump(data, file, ensure_ascii=False, indent=2)
3For large files, consider streaming parsers like `ijson` to parse JSON without loading the entire file into memory.
1import ijson
2
3# Parse large JSON without loading into memory
4with open('huge_file.json', 'rb') as file:
5 objects = ijson.items(file, 'data.item')
6 for obj in objects:
7 process(obj)
8Never use `eval()` on JSON data as it can execute arbitrary code. Always use `json.loads()` for safe parsing.
1# NEVER do this
2data = eval(json_string) # Dangerous!
3
4# Always do this
5data = json.loads(json_string) # Safe
6Be aware of recursion limits with deeply nested JSON. You may need to increase the recursion limit for very deep structures.
1import sys
2sys.setrecursionlimit(10000) # Increase if needed
31#!/usr/bin/env python3
2"""Compare two JSON files - save as json_diff.py"""
3import json
4import sys
5from deepdiff import DeepDiff # pip install deepdiff
6
7def json_diff(file1, file2):
8 with open(file1) as f1, open(file2) as f2:
9 data1 = json.load(f1)
10 data2 = json.load(f2)
11
12 diff = DeepDiff(data1, data2, ignore_order=True)
13
14 if not diff:
15 print("Files are identical")
16 else:
17 print(json.dumps(diff, indent=2, default=str))
18
19if __name__ == "__main__":
20 if len(sys.argv) != 3:
21 print("Usage: python json_diff.py file1.json file2.json")
22 sys.exit(1)
23 json_diff(sys.argv[1], sys.argv[2])
241# Install jsonschema
2pip install jsonschema
3
4# Validate JSON against schema
5python3 -c "
6import json, sys
7from jsonschema import validate
8
9with open(sys.argv[1]) as f: data = json.load(f)
10with open(sys.argv[2]) as f: schema = json.load(f)
11
12try:
13 validate(data, schema)
14 print('Valid')
15except Exception as e:
16 print(f'Invalid: {e}')
17" data.json schema.json
18Related by topics: