from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field class ContentPart(BaseModel): id: str = Field(description="Unique content part identifier") parentId: Optional[str] = Field(default=None, description="Optional parent content part id") label: str = Field(description="Human readable label of the part") typeGroup: str = Field(description="Logical type group: text, table, structure, binary, ...") mimeType: str = Field(description="MIME type of the part payload") data: str = Field(default="", description="Primary data payload, often extracted text") metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part") class ExtractedContent(BaseModel): id: str = Field(description="Extraction id or source document id") parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts") summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")