19 lines
1 KiB
Python
19 lines
1 KiB
Python
from typing import Any, Dict, List, Optional
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class ContentPart(BaseModel):
|
|
id: str = Field(description="Unique content part identifier")
|
|
parentId: Optional[str] = Field(default=None, description="Optional parent content part id")
|
|
label: str = Field(description="Human readable label of the part")
|
|
typeGroup: str = Field(description="Logical type group: text, table, structure, binary, ...")
|
|
mimeType: str = Field(description="MIME type of the part payload")
|
|
data: str = Field(default="", description="Primary data payload, often extracted text")
|
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
|
|
|
|
|
|
class ExtractedContent(BaseModel):
|
|
id: str = Field(description="Extraction id or source document id")
|
|
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
|
|
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
|
|
|