pydantic

overview

pydantic - data validation library using python type hints

  • runtime type validation and coercion
  • json/yaml serialization and deserialization
  • schema generation (json schema, openapi)
  • used by fastapi, langchain, many others
  • written in rust (v2) for performance

when to use

use pydantic when:

  • handling external data (apis, configs, user input)
  • need automatic validation and serialization
  • building apis with fastapi
  • need json schema generation
  • working with complex nested data

don’t use when:

  • simple internal data structures suffice
  • performance is critical (use dataclasses)
  • need database orm features (use sqlalchemy)

installation

uv add pydantic

# with email validation
uv add 'pydantic[email]'

basic usage

simple model

from pydantic import BaseModel, Field
from typing import Optional, List
from datetime import datetime

class User(BaseModel):
    name: str
    email: str
    age: int = Field(gt=0, le=150)
    tags: List[str] = []
    created_at: datetime = Field(default_factory=datetime.now)

# automatic validation
user = User(
    name="alice",
    email="alice@example.com",
    age=30
)

# type coercion
user2 = User(
    name="bob",
    email="bob@example.com",
    age="25"  # string converted to int
)

validation with field

from pydantic import BaseModel, Field, EmailStr
from decimal import Decimal

class Product(BaseModel):
    name: str = Field(min_length=1, max_length=100)
    price: Decimal = Field(gt=0, decimal_places=2, description="price in usd")
    quantity: int = Field(ge=0, default=0)
    sku: str = Field(pattern=r"^[A-Z]{3}-\d{4}$")
    email: EmailStr  # requires pydantic[email]

model configuration

from pydantic import BaseModel, ConfigDict

class StrictUser(BaseModel):
    model_config = ConfigDict(
        # common configurations
        str_strip_whitespace=True,  # auto-strip strings
        validate_assignment=True,   # validate on attribute assignment
        use_enum_values=True,      # use enum values not names
        arbitrary_types_allowed=True,  # allow non-pydantic types

        # strict mode - no type coercion
        strict=True,

        # json serialization
        json_encoders={
            datetime: lambda v: v.isoformat()
        }
    )

    name: str
    age: int

validators

field validators

from pydantic import BaseModel, field_validator, model_validator
from typing import List

class User(BaseModel):
    username: str
    email: str
    age: int

    @field_validator('username')
    @classmethod
    def username_alphanumeric(cls, v: str) -> str:
        assert v.isalnum(), 'must be alphanumeric'
        return v.lower()

    @field_validator('age')
    @classmethod
    def age_valid(cls, v: int) -> int:
        if v < 0 or v > 150:
            raise ValueError('age must be between 0 and 150')
        return v

model validators

class Password(BaseModel):
    password: str
    confirm_password: str

    @model_validator(mode='after')
    def passwords_match(self) -> 'Password':
        if self.password != self.confirm_password:
            raise ValueError('passwords do not match')
        return self

serialization

json serialization

class User(BaseModel):
    id: int
    name: str
    email: str
    password: str = Field(exclude=True)  # exclude from serialization

user = User(id=1, name="alice", email="alice@example.com", password="secret")

# to dict
data = user.model_dump(exclude={'password'})

# to json
json_str = user.model_dump_json(indent=2)

# from json
user2 = User.model_validate_json(json_str)

custom serialization

from pydantic import BaseModel, field_serializer
from datetime import datetime

class Event(BaseModel):
    name: str
    timestamp: datetime

    @field_serializer('timestamp')
    def serialize_timestamp(self, timestamp: datetime, _info):
        return timestamp.isoformat()

computed fields

from pydantic import BaseModel, computed_field
from functools import cached_property

class Rectangle(BaseModel):
    width: float
    height: float

    @computed_field
    @cached_property
    def area(self) -> float:
        return self.width * self.height

    @computed_field
    @property
    def perimeter(self) -> float:
        return 2 * (self.width + self.height)

nested models

class Address(BaseModel):
    street: str
    city: str
    country: str
    zip_code: str

class Company(BaseModel):
    name: str
    address: Address

class Employee(BaseModel):
    name: str
    email: str
    company: Company
    addresses: List[Address]  # multiple addresses

# deep validation
employee_data = {
    "name": "alice",
    "email": "alice@example.com",
    "company": {
        "name": "acme corp",
        "address": {
            "street": "123 main st",
            "city": "anytown",
            "country": "usa",
            "zip_code": "12345"
        }
    },
    "addresses": []
}

employee = Employee(**employee_data)

json schema generation

class User(BaseModel):
    """user account model"""

    id: int = Field(description="unique user id", example=123)
    username: str = Field(
        min_length=3,
        max_length=20,
        pattern="^[a-zA-Z0-9_]+$",
        description="unique username",
        example="alice_doe"
    )
    email: str = Field(description="user email", example="alice@example.com")
    is_active: bool = Field(default=True, description="account status")

# generate json schema
schema = User.model_json_schema()

# openapi compatible
print(json.dumps(schema, indent=2))

settings management

from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional

class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file='.env',
        env_file_encoding='utf-8',
        case_sensitive=False
    )

    # environment variables
    app_name: str = "my app"
    debug: bool = False
    database_url: str
    redis_url: Optional[str] = None

    # nested config
    class DatabaseConfig(BaseModel):
        pool_size: int = 5
        echo: bool = False

    db: DatabaseConfig = DatabaseConfig()

# loads from environment and .env file
settings = Settings()

integration with dataclasses

from pydantic.dataclasses import dataclass
from pydantic import Field

@dataclass
class Point:
    x: float = Field(ge=-180, le=180)
    y: float = Field(ge=-90, le=90)

# works like pydantic model but looks like dataclass
point = Point(x=10.5, y=20.3)

performance considerations

from pydantic import BaseModel, ConfigDict

class OptimizedModel(BaseModel):
    model_config = ConfigDict(
        # performance optimizations
        validate_default=False,  # skip validation of defaults
        validate_return=False,   # skip validation of return values
        arbitrary_types_allowed=False,  # faster type checking

        # use when appropriate
        frozen=True,  # immutable model (hashable)
        extra='forbid'  # no extra fields allowed
    )

    id: int
    name: str

common patterns

api request/response models

from typing import Generic, TypeVar, Optional
from datetime import datetime

T = TypeVar('T')

class PaginatedResponse(BaseModel, Generic[T]):
    items: List[T]
    total: int
    page: int
    per_page: int

class UserCreateRequest(BaseModel):
    username: str
    email: str
    password: str

class UserResponse(BaseModel):
    id: int
    username: str
    email: str
    created_at: datetime

    model_config = ConfigDict(from_attributes=True)  # from orm

discriminated unions

from typing import Union, Literal

class Cat(BaseModel):
    type: Literal['cat']
    meow_volume: int

class Dog(BaseModel):
    type: Literal['dog']
    bark_volume: int

Animal = Union[Cat, Dog]

# pydantic knows which model to use based on 'type'
cat = Animal.model_validate({'type': 'cat', 'meow_volume': 10})

migration from dataclasses

# before - dataclass
from dataclasses import dataclass

@dataclass
class User:
    name: str
    age: int

    def __post_init__(self):
        if self.age < 0:
            raise ValueError("age must be positive")

# after - pydantic
from pydantic import BaseModel, Field

class User(BaseModel):
    name: str
    age: int = Field(ge=0)  # automatic validation

comparison with alternatives

featuredataclassespydanticattrssqlalchemy
validationmanualautomaticpluginsdb-level
serializationbasicadvancedpluginsorm
schema generationdb schema
performancefastestfastfastslower
type coercion

best practices

do

  • use Field() for constraints and documentation
  • leverage type coercion for external data
  • use model_config for consistent behavior
  • create separate models for requests/responses
  • use validators for complex business rules

don’t

  • use for simple internal data (use dataclasses)
  • modify __init__ directly
  • use mutable default arguments
  • forget about from_attributes=True for orm models
  • use Any type unless necessary

common pitfalls

mutable defaults

# wrong
class Model(BaseModel):
    items: List[str] = []  # shared between instances!

# correct
class Model(BaseModel):
    items: List[str] = Field(default_factory=list)

validation timing

class User(BaseModel):
    model_config = ConfigDict(validate_assignment=True)

    age: int = Field(ge=0)

user = User(age=25)
user.age = -5  # raises validation error only with validate_assignment=True

references

══════════════════════════════════════════════════════════════════
on this page