Data Validation Patterns
Problem
Invalid data causes runtime errors, data corruption, and security vulnerabilities. Manual validation is error-prone, verbose, and difficult to maintain across complex data structures.
Solution
1. Pydantic Models with Field Validation
from pydantic import BaseModel, Field, EmailStr, HttpUrl, validator
from typing import Optional, List
from datetime import datetime
class User(BaseModel):
"""User model with automatic validation."""
id: int
username: str = Field(..., min_length=3, max_length=50, pattern=r'^[a-zA-Z0-9_]+$')
email: EmailStr
age: int = Field(..., ge=18, le=150, description="User age must be 18-150")
website: Optional[HttpUrl] = None
tags: List[str] = Field(default_factory=list, max_items=10)
created_at: datetime
user = User(
id=1,
username="john_doe",
email="john@example.com",
age=30,
website="https://example.com",
tags=["python", "developer"],
created_at=datetime.now()
)
try:
invalid_user = User(
id=1,
username="ab", # Too short!
email="invalid-email", # Invalid format!
age=15, # Too young!
created_at=datetime.now()
)
except ValidationError as e:
print(e.errors())
# [
# {'loc': ('username',), 'msg': 'ensure this value has at least 3 characters', 'type': 'value_error.any_str.min_length'},
# {'loc': ('email',), 'msg': 'value is not a valid email address', 'type': 'value_error.email'},
# {'loc': ('age',), 'msg': 'ensure this value is greater than or equal to 18', 'type': 'value_error.number.not_ge'}
# ]2. Custom Validators for Complex Rules
from pydantic import BaseModel, validator, root_validator
import re
class PasswordPolicy(BaseModel):
"""Password with strength validation."""
password: str
confirm_password: str
@validator('password')
def password_strength(cls, v):
"""Validate password complexity."""
if len(v) < 8:
raise ValueError('Password must be at least 8 characters')
if not re.search(r'[A-Z]', v):
raise ValueError('Password must contain uppercase letter')
if not re.search(r'[a-z]', v):
raise ValueError('Password must contain lowercase letter')
if not re.search(r'\d', v):
raise ValueError('Password must contain digit')
if not re.search(r'[!@#$%^&*(),.?":{}|<>]', v):
raise ValueError('Password must contain special character')
return v
@root_validator
def passwords_match(cls, values):
"""Validate passwords match."""
password = values.get('password')
confirm_password = values.get('confirm_password')
if password != confirm_password:
raise ValueError('Passwords do not match')
return values
valid_password = PasswordPolicy(
password='SecureP@ss123',
confirm_password='SecureP@ss123'
)
try:
invalid_password = PasswordPolicy(
password='weak',
confirm_password='weak'
)
except ValidationError as e:
print(e.errors())3. Nested Model Validation
from pydantic import BaseModel, conlist
from typing import List
class Address(BaseModel):
"""Address with validation."""
street: str = Field(..., min_length=5)
city: str = Field(..., min_length=2)
postal_code: str = Field(..., pattern=r'^\d{5}(-\d{4})?$')
country: str = Field(..., min_length=2, max_length=2)
class Company(BaseModel):
"""Company with nested address validation."""
name: str = Field(..., min_length=1, max_length=200)
registration_number: str
headquarters: Address
branches: conlist(Address, min_items=0, max_items=50) = []
@validator('registration_number')
def validate_registration(cls, v):
"""Validate registration number format."""
if not re.match(r'^REG-\d{6}$', v):
raise ValueError('Registration must be REG-XXXXXX format')
return v
company = Company(
name="Tech Corp",
registration_number="REG-123456",
headquarters=Address(
street="123 Main Street",
city="San Francisco",
postal_code="94105",
country="US"
),
branches=[
Address(
street="456 Market Street",
city="New York",
postal_code="10001",
country="US"
)
]
)4. Dynamic Validation with Field Validators
from pydantic import BaseModel, Field, validator
from typing import Optional
from enum import Enum
class UserRole(str, Enum):
ADMIN = "admin"
USER = "user"
GUEST = "guest"
class UserPermissions(BaseModel):
"""User with role-based validation."""
username: str
role: UserRole
can_delete: bool = False
can_edit: bool = False
can_view: bool = True
@validator('can_delete')
def admin_only_delete(cls, v, values):
"""Only admins can have delete permission."""
role = values.get('role')
if v and role != UserRole.ADMIN:
raise ValueError('Only admins can have delete permission')
return v
@validator('can_edit')
def user_or_admin_edit(cls, v, values):
"""Only users and admins can edit."""
role = values.get('role')
if v and role not in [UserRole.ADMIN, UserRole.USER]:
raise ValueError('Guests cannot have edit permission')
return v
admin = UserPermissions(
username="admin",
role=UserRole.ADMIN,
can_delete=True,
can_edit=True
)
user = UserPermissions(
username="user",
role=UserRole.USER,
can_edit=True
)
try:
invalid = UserPermissions(
username="guest",
role=UserRole.GUEST,
can_edit=True # Not allowed!
)
except ValidationError as e:
print(e.errors())5. Dataclass Validation with Pydantic
from pydantic.dataclasses import dataclass
from pydantic import validator
from dataclasses import field
@dataclass
class Point:
"""3D point with coordinate validation."""
x: float
y: float
z: float = 0.0
@validator('x', 'y', 'z')
def validate_coordinate(cls, v):
"""Ensure coordinates are within valid range."""
if not -1000 <= v <= 1000:
raise ValueError('Coordinate must be between -1000 and 1000')
return v
@dataclass
class Vector:
"""Vector with magnitude calculation."""
start: Point
end: Point
magnitude: Optional[float] = field(default=None, init=False)
def __post_init__(self):
"""Calculate magnitude after initialization."""
dx = self.end.x - self.start.x
dy = self.end.y - self.start.y
dz = self.end.z - self.start.z
self.magnitude = (dx**2 + dy**2 + dz**2) ** 0.5
vector = Vector(
start=Point(x=0, y=0, z=0),
end=Point(x=3, y=4, z=0)
)
print(f"Magnitude: {vector.magnitude}") # 5.06. JSON Schema and OpenAPI Integration
from pydantic import BaseModel, Field
import json
class Product(BaseModel):
"""Product with comprehensive documentation."""
id: int = Field(..., description="Unique product identifier")
name: str = Field(..., min_length=1, max_length=200, description="Product name")
price: float = Field(..., gt=0, description="Product price in USD")
in_stock: bool = Field(default=True, description="Stock availability")
class Config:
schema_extra = {
"example": {
"id": 1,
"name": "Laptop",
"price": 999.99,
"in_stock": True
}
}
schema = Product.schema()
print(json.dumps(schema, indent=2))How It Works
graph TD
A[Data Input] --> B[Pydantic Model]
B --> C{Field Type Check}
C -->|Invalid| D[ValidationError]
C -->|Valid| E{Field Constraints}
E -->|Invalid| D
E -->|Valid| F{Custom Validators}
F -->|Invalid| D
F -->|Valid| G{Root Validators}
G -->|Invalid| D
G -->|Valid| H[Validated Object]
style A fill:#0173B2
style B fill:#DE8F05
style F fill:#029E73
style H fill:#CC78BC
Validation Pipeline:
- Type Coercion: Convert input to declared types (str → int, str → datetime)
- Field Constraints: Check min/max, patterns, enums
- Field Validators: Run
@validatordecorated methods - Root Validators: Cross-field validation with
@root_validator - Result: Either validated object or ValidationError with details
Variations
Conditional Validation
from pydantic import BaseModel, validator
class ConditionalModel(BaseModel):
"""Model with conditional validation."""
is_company: bool
company_name: Optional[str] = None
individual_name: Optional[str] = None
@root_validator
def check_name_provided(cls, values):
"""Require appropriate name based on type."""
is_company = values.get('is_company')
company_name = values.get('company_name')
individual_name = values.get('individual_name')
if is_company and not company_name:
raise ValueError('Company name required for companies')
if not is_company and not individual_name:
raise ValueError('Individual name required for individuals')
return valuesValidation with External Data
from pydantic import BaseModel, validator
class UserRegistration(BaseModel):
"""User registration with database validation."""
username: str
email: EmailStr
@validator('username')
def username_not_taken(cls, v):
"""Verify username is available."""
# In real app, query database
taken_usernames = ['admin', 'root', 'test']
if v.lower() in taken_usernames:
raise ValueError('Username already taken')
return v
@validator('email')
def email_not_registered(cls, v):
"""Verify email not already registered."""
# In real app, query database
registered_emails = ['admin@example.com']
if v.lower() in registered_emails:
raise ValueError('Email already registered')
return vPre and Post Validation Processing
from pydantic import BaseModel, validator
class NormalizedUser(BaseModel):
"""User with automatic data normalization."""
username: str
email: str
@validator('username', pre=True)
def normalize_username(cls, v):
"""Normalize before validation."""
if isinstance(v, str):
return v.strip().lower()
return v
@validator('email', pre=True)
def normalize_email(cls, v):
"""Normalize email before validation."""
if isinstance(v, str):
return v.strip().lower()
return v
user = NormalizedUser(
username=" JohnDoe ", # Becomes "johndoe"
email=" JOHN@EXAMPLE.COM " # Becomes "john@example.com"
)Common Pitfalls
1. Mutating Values in Validators
Problem: Validators shouldn’t have side effects.
class BadModel(BaseModel):
value: int
@validator('value')
def log_value(cls, v):
# Side effect - writing to database/file
save_to_database(v) # Don't do this!
return v
class GoodModel(BaseModel):
value: int
@validator('value')
def validate_value(cls, v):
if v < 0:
raise ValueError('Value must be non-negative')
return v
model = GoodModel(value=42)
save_to_database(model.value) # Side effect after validation2. Forgetting to Return Values from Validators
Problem: Validator must return the value.
class BadModel(BaseModel):
name: str
@validator('name')
def validate_name(cls, v):
if len(v) < 3:
raise ValueError('Name too short')
# Forgot to return v!
class GoodModel(BaseModel):
name: str
@validator('name')
def validate_name(cls, v):
if len(v) < 3:
raise ValueError('Name too short')
return v # Must return!3. Using Validators for Data Transformation
Problem: Validators should validate, not transform (use pre=True cautiously).
class BadModel(BaseModel):
age: int
@validator('age')
def make_positive(cls, v):
return abs(v) # Silently changing data!
class GoodModel(BaseModel):
age: int
@validator('age')
def validate_positive(cls, v):
if v < 0:
raise ValueError('Age must be positive')
return v
class NormalizingModel(BaseModel):
age: int
@validator('age', pre=True)
def normalize_age(cls, v):
"""Explicitly normalize age string to int."""
if isinstance(v, str):
return int(v.strip())
return v4. Not Handling ValidationError Properly
Problem: Catching generic Exception hides validation details.
try:
user = User(**user_data)
except Exception as e: # Too broad!
print("Something went wrong")
from pydantic import ValidationError
try:
user = User(**user_data)
except ValidationError as e:
# Access detailed error information
for error in e.errors():
print(f"Field: {error['loc']}, Error: {error['msg']}")5. Over-validating in Production
Problem: Validating already-validated data wastes resources.
def process_user(user_dict: dict):
# Data already validated when created!
user = User(**user_dict) # Wasteful re-validation
return user.username
def process_user(user: User):
# Accept already-validated User object
return user.username
def load_from_database(user_dict: dict) -> User:
# Skip validation for database data
return User.construct(**user_dict)Related Patterns
Related Tutorial: See Intermediate Tutorial - Data Validation. Related How-To: See Build REST APIs. Related Cookbook: See Cookbook recipe “Validation Patterns”.
Last updated