您现在的位置是:网站首页 > 博客日记 >

python数据验证库-Pydantic

作者:YXN-python 阅读量:58 发布日期:2025-09-12

Pydantic 是一个强大的 Python 库,主要用于数据验证设置管理。它通过 Python 类型注解来定义数据结构,并提供了自动的数据验证、序列化和文档生成功能

Pydantic 特别适合用于:

  • API 请求/响应验证
  • 配置管理
  • 数据转换和序列化
  • ORM 集成

基础概念

Pydantic 的核心是模型(Model) - 通过继承 BaseModel 定义的类,使用 Python 类型注解来描述数据结构。

from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str = "John Doe"
    is_active: bool = True

 

基本用法

创建模型实例

# 通过字典创建
user_data = {"id": 1, "name": "Alice"}
user = User(**user_data)

# 直接传递参数
user = User(id=1, name="Alice")

print(user.id)  # 输出: 1
print(user.model_dump())  # 输出: {'id': 1, 'name': 'Alice', 'is_active': True}

 

数据验证

try:
    user = User(id="not_an_int")  # 这会引发验证错误
except ValidationError as e:
    print(e)

 

字段类型和验证

基本类型

from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Set
from datetime import datetime

class Product(BaseModel):
    name: str
    price: float
    tags: List[str] = []
    metadata: Dict[str, str] = {}
    created_at: datetime = Field(default_factory=datetime.now)
    is_available: Optional[bool] = None

 

字段验证器

from pydantic import BaseModel, validator, Field

class User(BaseModel):
    username: str = Field(..., min_length=3, max_length=50)
    email: str = Field(..., regex=r"[^@]+@[^@]+\.[^@]+")
    age: int = Field(..., ge=0, le=150)
    
    @validator('username')
    def username_must_contain_letter(cls, v):
        if not any(c.isalpha() for c in v):
            raise ValueError('必须包含至少一个字母')
        return v
    
    @validator('age')
    def age_must_be_adult_if_email_provided(cls, v, values):
        if 'email' in values and v < 18:
            raise ValueError('有邮箱的用户必须年满18岁')
        return v

 

模型配置

from pydantic import BaseModel, ConfigDict

class ConfigExample(BaseModel):
    model_config = ConfigDict(
        extra='allow',  # 允许额外字段
        populate_by_name=True,  # 使用字段别名
        coerce_numbers_to_str=True,  # 验证时强制转换类型
        use_enum_values=True,  # 使用枚举值

        # 序列化时排除None值
        exclude_none=True,
        # 自定义JSON编码器
        json_encoders={
            datetime: lambda v: v.timestamp(),
            set: list,
        },
        # 允许任意类型
        arbitrary_types_allowed=True,
        # 保护字段名
        protected_namespaces=('protect_me_',),
    )
    
    name: str

 

高级特性

嵌套模型

class Address(BaseModel):
    street: str
    city: str
    zip_code: str

class User(BaseModel):
    name: str
    email: str
    address: Address

# 使用
user_data = {
    "name": "Alice",
    "email": "alice@example.com",
    "address": {
        "street": "123 Main St",
        "city": "Anytown",
        "zip_code": "12345"
    }
}

user = User(**user_data)

 

继承和组合

class BaseUser(BaseModel):
    username: str
    email: str

class UserCreate(BaseUser):
    password: str

class UserResponse(BaseUser):
    id: int
    created_at: datetime

 

自定义根类型

from pydantic import BaseModel, RootModel

class IntList(RootModel):
    root: List[int]

# 使用
int_list = IntList([1, 2, 3])
print(int_list.root)  # [1, 2, 3]

 

使用 TypeAdapter

from pydantic import TypeAdapter

# 验证非模型数据
IntListValidator = TypeAdapter(List[int])
validated_list = IntListValidator.validate_python([1, 2, 3])

 

实践示例

API 请求/响应模型

from pydantic import BaseModel, EmailStr
from typing import Optional
from datetime import datetime

# 请求模型
class UserCreateRequest(BaseModel):
    username: str
    email: EmailStr
    password: str

# 响应模型
class UserResponse(BaseModel):
    id: int
    username: str
    email: EmailStr
    created_at: datetime
    is_active: bool
    
    class Config:
        from_attributes = True  # 允许从ORM对象转换

# 更新模型(部分更新)
class UserUpdateRequest(BaseModel):
    username: Optional[str] = None
    email: Optional[EmailStr] = None
    password: Optional[str] = None

 

设置管理

from pydantic import BaseModel, Field, RedisDsn, PostgresDsn
from typing import Optional

class DatabaseSettings(BaseModel):
    url: PostgresDsn
    pool_size: int = Field(default=5, ge=1, le=20)
    echo: bool = False

class RedisSettings(BaseModel):
    url: RedisDsn
    timeout: int = Field(default=5, ge=1)

class Settings(BaseModel):
    database: DatabaseSettings
    redis: Optional[RedisSettings] = None
    debug: bool = False
    host: str = "localhost"
    port: int = Field(default=8000, ge=1, le=65535)

# 从环境变量加载
import os
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings

class AppSettings(BaseSettings):
    database_url: str = Field(..., env="DATABASE_URL")
    redis_url: Optional[str] = Field(None, env="REDIS_URL")
    debug: bool = Field(False, env="DEBUG")
    
    @field_validator('database_url')
    def validate_db_url(cls, v):
        if not v.startswith('postgresql://'):
            raise ValueError('必须是PostgreSQL连接字符串')
        return v

# 自动从环境变量加载
settings = AppSettings()

 

复杂业务逻辑验证

from pydantic import BaseModel, validator, root_validator
from typing import List, Optional
from datetime import datetime, date

class OrderItem(BaseModel):
    product_id: int
    quantity: int
    price: float
    
    @validator('quantity')
    def quantity_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError('数量必须大于0')
        return v

class Order(BaseModel):
    items: List[OrderItem]
    order_date: datetime = Field(default_factory=datetime.now)
    discount: Optional[float] = Field(None, ge=0, le=1)
    total_amount: Optional[float] = None
    
    @root_validator(pre=True)
    def calculate_total(cls, values):
        items = values.get('items', [])
        discount = values.get('discount', 0)
        
        if items:
            subtotal = sum(item['quantity'] * item['price'] for item in items)
            total = subtotal * (1 - discount)
            values['total_amount'] = round(total, 2)
        
        return values
    
    @validator('order_date')
    def order_date_cannot_be_future(cls, v):
        if v > datetime.now():
            raise ValueError('订单日期不能是未来时间')
        return v

 

与 FastAPI 集成

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, EmailStr
from typing import List

app = FastAPI()

class UserCreate(BaseModel):
    username: str
    email: EmailStr
    password: str

class UserResponse(BaseModel):
    id: int
    username: str
    email: EmailStr

@app.post("/users/", response_model=UserResponse)
async def create_user(user: UserCreate):
    # 这里通常是数据库操作
    db_user = create_user_in_db(user)
    return UserResponse(**db_user.__dict__)

@app.get("/users/", response_model=List[UserResponse])
async def get_users(skip: int = 0, limit: int = 10):
    users = get_users_from_db(skip, limit)
    return [UserResponse(**user.__dict__) for user in users]

 

 

YXN-python

2025-09-12