#!/usr/bin/env python3 """ Benchmark dataclasses vs pydantic vs sqlalchemy Tests object creation, modification, and serialization performance """ import json import sys import time from dataclasses import dataclass, asdict from typing import Optional from datetime import datetime import platform try: from pydantic import BaseModel, Field except ImportError: print("pydantic not installed") sys.exit(1) try: from sqlalchemy import create_engine, String, Integer from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session except ImportError: print("sqlalchemy not installed") sys.exit(1) # Number of iterations for each test ITERATIONS = 10000 # 1. Dataclass implementation @dataclass class UserDataclass: id: int name: str email: str age: int created_at: Optional[datetime] = None def to_dict(self): return asdict(self) # 2. Pydantic implementation class UserPydantic(BaseModel): id: int name: str email: str age: int = Field(ge=0, le=150) created_at: Optional[datetime] = None def to_dict(self): return self.model_dump() # 3. SQLAlchemy implementation class Base(DeclarativeBase): pass class UserSQLAlchemy(Base): __tablename__ = "users" id: Mapped[int] = mapped_column(Integer, primary_key=True) name: Mapped[str] = mapped_column(String(100)) email: Mapped[str] = mapped_column(String(100)) age: Mapped[int] = mapped_column(Integer) created_at: Mapped[Optional[datetime]] = mapped_column(default=None) def to_dict(self): return { "id": self.id, "name": self.name, "email": self.email, "age": self.age, "created_at": self.created_at } def benchmark_dataclass(): """Benchmark dataclass operations""" results = {} # Object creation start = time.perf_counter() for i in range(ITERATIONS): user = UserDataclass( id=i, name=f"User {i}", email=f"user{i}@example.com", age=25 + (i % 50) ) end = time.perf_counter() results["creation_time"] = end - start results["creation_per_object"] = (end - start) / ITERATIONS # Object modification users = [] for i in range(100): users.append(UserDataclass( id=i, name=f"User {i}", email=f"user{i}@example.com", age=25 )) start = time.perf_counter() for _ in range(ITERATIONS): for user in users: user.name = f"Modified {user.name}" user.age = user.age + 1 end = time.perf_counter() results["modification_time"] = end - start results["modification_per_operation"] = (end - start) / (ITERATIONS * 100) # Serialization start = time.perf_counter() for user in users: for _ in range(100): data = user.to_dict() end = time.perf_counter() results["serialization_time"] = end - start results["serialization_per_object"] = (end - start) / (100 * 100) return results def benchmark_pydantic(): """Benchmark pydantic operations""" results = {} # Object creation start = time.perf_counter() for i in range(ITERATIONS): user = UserPydantic( id=i, name=f"User {i}", email=f"user{i}@example.com", age=25 + (i % 50) ) end = time.perf_counter() results["creation_time"] = end - start results["creation_per_object"] = (end - start) / ITERATIONS # Object modification users = [] for i in range(100): users.append(UserPydantic( id=i, name=f"User {i}", email=f"user{i}@example.com", age=25 )) start = time.perf_counter() for _ in range(ITERATIONS): for user in users: user.name = f"Modified {user.name}" user.age = user.age + 1 end = time.perf_counter() results["modification_time"] = end - start results["modification_per_operation"] = (end - start) / (ITERATIONS * 100) # Serialization start = time.perf_counter() for user in users: for _ in range(100): data = user.to_dict() end = time.perf_counter() results["serialization_time"] = end - start results["serialization_per_object"] = (end - start) / (100 * 100) return results def benchmark_sqlalchemy(): """Benchmark sqlalchemy operations""" results = {} # Setup in-memory database engine = create_engine("sqlite:///:memory:") Base.metadata.create_all(engine) # Object creation (with database inserts) start = time.perf_counter() with Session(engine) as session: for i in range(ITERATIONS): user = UserSQLAlchemy( id=i, name=f"User {i}", email=f"user{i}@example.com", age=25 + (i % 50) ) session.add(user) session.commit() end = time.perf_counter() results["creation_time"] = end - start results["creation_per_object"] = (end - start) / ITERATIONS # Object modification (with database updates) with Session(engine) as session: users = session.query(UserSQLAlchemy).limit(100).all() start = time.perf_counter() for _ in range(100): # Reduced iterations for SQLAlchemy for user in users: user.name = f"Modified {user.name}" user.age = user.age + 1 session.commit() end = time.perf_counter() results["modification_time"] = end - start results["modification_per_operation"] = (end - start) / (100 * 100) # Serialization start = time.perf_counter() for user in users: for _ in range(100): data = user.to_dict() end = time.perf_counter() results["serialization_time"] = end - start results["serialization_per_object"] = (end - start) / (100 * 100) return results def main(): """Run all benchmarks and save results""" python_version = platform.python_version() print(f"Running benchmarks on Python {python_version}") print(f"Iterations: {ITERATIONS}") print("-" * 50) results = { "python_version": python_version, "platform": platform.platform(), "iterations": ITERATIONS, "timestamp": datetime.now().isoformat(), "benchmarks": {} } # Run dataclass benchmark print("Benchmarking dataclasses...") results["benchmarks"]["dataclass"] = benchmark_dataclass() # Run pydantic benchmark print("Benchmarking pydantic...") results["benchmarks"]["pydantic"] = benchmark_pydantic() # Run sqlalchemy benchmark print("Benchmarking sqlalchemy...") results["benchmarks"]["sqlalchemy"] = benchmark_sqlalchemy() # Save results output_file = f"results_python_{python_version}.json" with open(output_file, "w") as f: json.dump(results, f, indent=2) print(f"\nResults saved to {output_file}") # Print summary print("\nSummary (time per object in microseconds):") print("-" * 60) print(f"{'Library':<15} {'Creation':<15} {'Modification':<15} {'Serialization':<15}") print("-" * 60) for lib, data in results["benchmarks"].items(): creation = data["creation_per_object"] * 1_000_000 modification = data["modification_per_operation"] * 1_000_000 serialization = data["serialization_per_object"] * 1_000_000 print(f"{lib:<15} {creation:<15.2f} {modification:<15.2f} {serialization:<15.2f}") if __name__ == "__main__": main()