pydantic
on this page
overview
pydantic - data validation library using python type hints
- runtime type validation and coercion
- json/yaml serialization and deserialization
- schema generation (json schema, openapi)
- used by fastapi, langchain, many others
- written in rust (v2) for performance
when to use
use pydantic when:
- handling external data (apis, configs, user input)
- need automatic validation and serialization
- building apis with fastapi
- need json schema generation
- working with complex nested data
don’t use when:
- simple internal data structures suffice
- performance is critical (use dataclasses)
- need database orm features (use sqlalchemy)
installation
uv add pydantic
# with email validation
uv add 'pydantic[email]'
basic usage
simple model
from pydantic import BaseModel, Field
from typing import Optional, List
from datetime import datetime
class User(BaseModel):
name: str
email: str
age: int = Field(gt=0, le=150)
tags: List[str] = []
created_at: datetime = Field(default_factory=datetime.now)
# automatic validation
user = User(
name="alice",
email="alice@example.com",
age=30
)
# type coercion
user2 = User(
name="bob",
email="bob@example.com",
age="25" # string converted to int
)
validation with field
from pydantic import BaseModel, Field, EmailStr
from decimal import Decimal
class Product(BaseModel):
name: str = Field(min_length=1, max_length=100)
price: Decimal = Field(gt=0, decimal_places=2, description="price in usd")
quantity: int = Field(ge=0, default=0)
sku: str = Field(pattern=r"^[A-Z]{3}-\d{4}$")
email: EmailStr # requires pydantic[email]
model configuration
from pydantic import BaseModel, ConfigDict
class StrictUser(BaseModel):
model_config = ConfigDict(
# common configurations
str_strip_whitespace=True, # auto-strip strings
validate_assignment=True, # validate on attribute assignment
use_enum_values=True, # use enum values not names
arbitrary_types_allowed=True, # allow non-pydantic types
# strict mode - no type coercion
strict=True,
# json serialization
json_encoders={
datetime: lambda v: v.isoformat()
}
)
name: str
age: int
validators
field validators
from pydantic import BaseModel, field_validator, model_validator
from typing import List
class User(BaseModel):
username: str
email: str
age: int
@field_validator('username')
@classmethod
def username_alphanumeric(cls, v: str) -> str:
assert v.isalnum(), 'must be alphanumeric'
return v.lower()
@field_validator('age')
@classmethod
def age_valid(cls, v: int) -> int:
if v < 0 or v > 150:
raise ValueError('age must be between 0 and 150')
return v
model validators
class Password(BaseModel):
password: str
confirm_password: str
@model_validator(mode='after')
def passwords_match(self) -> 'Password':
if self.password != self.confirm_password:
raise ValueError('passwords do not match')
return self
serialization
json serialization
class User(BaseModel):
id: int
name: str
email: str
password: str = Field(exclude=True) # exclude from serialization
user = User(id=1, name="alice", email="alice@example.com", password="secret")
# to dict
data = user.model_dump(exclude={'password'})
# to json
json_str = user.model_dump_json(indent=2)
# from json
user2 = User.model_validate_json(json_str)
custom serialization
from pydantic import BaseModel, field_serializer
from datetime import datetime
class Event(BaseModel):
name: str
timestamp: datetime
@field_serializer('timestamp')
def serialize_timestamp(self, timestamp: datetime, _info):
return timestamp.isoformat()
computed fields
from pydantic import BaseModel, computed_field
from functools import cached_property
class Rectangle(BaseModel):
width: float
height: float
@computed_field
@cached_property
def area(self) -> float:
return self.width * self.height
@computed_field
@property
def perimeter(self) -> float:
return 2 * (self.width + self.height)
nested models
class Address(BaseModel):
street: str
city: str
country: str
zip_code: str
class Company(BaseModel):
name: str
address: Address
class Employee(BaseModel):
name: str
email: str
company: Company
addresses: List[Address] # multiple addresses
# deep validation
employee_data = {
"name": "alice",
"email": "alice@example.com",
"company": {
"name": "acme corp",
"address": {
"street": "123 main st",
"city": "anytown",
"country": "usa",
"zip_code": "12345"
}
},
"addresses": []
}
employee = Employee(**employee_data)
json schema generation
class User(BaseModel):
"""user account model"""
id: int = Field(description="unique user id", example=123)
username: str = Field(
min_length=3,
max_length=20,
pattern="^[a-zA-Z0-9_]+$",
description="unique username",
example="alice_doe"
)
email: str = Field(description="user email", example="alice@example.com")
is_active: bool = Field(default=True, description="account status")
# generate json schema
schema = User.model_json_schema()
# openapi compatible
print(json.dumps(schema, indent=2))
settings management
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file='.env',
env_file_encoding='utf-8',
case_sensitive=False
)
# environment variables
app_name: str = "my app"
debug: bool = False
database_url: str
redis_url: Optional[str] = None
# nested config
class DatabaseConfig(BaseModel):
pool_size: int = 5
echo: bool = False
db: DatabaseConfig = DatabaseConfig()
# loads from environment and .env file
settings = Settings()
integration with dataclasses
from pydantic.dataclasses import dataclass
from pydantic import Field
@dataclass
class Point:
x: float = Field(ge=-180, le=180)
y: float = Field(ge=-90, le=90)
# works like pydantic model but looks like dataclass
point = Point(x=10.5, y=20.3)
performance considerations
from pydantic import BaseModel, ConfigDict
class OptimizedModel(BaseModel):
model_config = ConfigDict(
# performance optimizations
validate_default=False, # skip validation of defaults
validate_return=False, # skip validation of return values
arbitrary_types_allowed=False, # faster type checking
# use when appropriate
frozen=True, # immutable model (hashable)
extra='forbid' # no extra fields allowed
)
id: int
name: str
common patterns
api request/response models
from typing import Generic, TypeVar, Optional
from datetime import datetime
T = TypeVar('T')
class PaginatedResponse(BaseModel, Generic[T]):
items: List[T]
total: int
page: int
per_page: int
class UserCreateRequest(BaseModel):
username: str
email: str
password: str
class UserResponse(BaseModel):
id: int
username: str
email: str
created_at: datetime
model_config = ConfigDict(from_attributes=True) # from orm
discriminated unions
from typing import Union, Literal
class Cat(BaseModel):
type: Literal['cat']
meow_volume: int
class Dog(BaseModel):
type: Literal['dog']
bark_volume: int
Animal = Union[Cat, Dog]
# pydantic knows which model to use based on 'type'
cat = Animal.model_validate({'type': 'cat', 'meow_volume': 10})
migration from dataclasses
# before - dataclass
from dataclasses import dataclass
@dataclass
class User:
name: str
age: int
def __post_init__(self):
if self.age < 0:
raise ValueError("age must be positive")
# after - pydantic
from pydantic import BaseModel, Field
class User(BaseModel):
name: str
age: int = Field(ge=0) # automatic validation
comparison with alternatives
feature | dataclasses | pydantic | attrs | sqlalchemy |
---|---|---|---|---|
validation | manual | automatic | plugins | db-level |
serialization | basic | advanced | plugins | orm |
schema generation | ✗ | ✓ | ✗ | db schema |
performance | fastest | fast | fast | slower |
type coercion | ✗ | ✓ | ✗ | ✓ |
best practices
do
- use
Field()
for constraints and documentation - leverage type coercion for external data
- use
model_config
for consistent behavior - create separate models for requests/responses
- use validators for complex business rules
don’t
- use for simple internal data (use dataclasses)
- modify
__init__
directly - use mutable default arguments
- forget about
from_attributes=True
for orm models - use
Any
type unless necessary
common pitfalls
mutable defaults
# wrong
class Model(BaseModel):
items: List[str] = [] # shared between instances!
# correct
class Model(BaseModel):
items: List[str] = Field(default_factory=list)
validation timing
class User(BaseModel):
model_config = ConfigDict(validate_assignment=True)
age: int = Field(ge=0)
user = User(age=25)
user.age = -5 # raises validation error only with validate_assignment=True
references
══════════════════════════════════════════════════════════════════