diff --git a/env_dev.env b/env_dev.env index da72e528..95b2b91e 100644 --- a/env_dev.env +++ b/env_dev.env @@ -73,7 +73,7 @@ Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJK # Debug Configuration APP_DEBUG_CHAT_WORKFLOW_ENABLED = True -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat +APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug # Manadate Pre-Processing Servers PREPROCESS_ALTHAUS_CHAT_SECRET = (empty) \ No newline at end of file diff --git a/env_prod.env b/env_prod.env index c7699b03..0daaff02 100644 --- a/env_prod.env +++ b/env_prod.env @@ -73,7 +73,7 @@ Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1F # Debug Configuration APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat# Development Environment Configuration +APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat # Manadate Pre-Processing Servers PREPROCESS_ALTHAUS_CHAT_SECRET = kj823u90209mj020394jp2msakhfkjashjkf \ No newline at end of file diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index c26bdaf2..50bcf3ca 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -26,8 +26,10 @@ class AiAnthropic(BaseConnectorAi): self.apiKey = self.config["apiKey"] # HttpClient for API calls + # Timeout set to 600 seconds (10 minutes) for complex requests that may take longer + # Document generation and complex AI operations can take significantly longer self.httpClient = httpx.AsyncClient( - timeout=120.0, # Longer timeout for complex requests + timeout=600.0, headers={ "x-api-key": self.apiKey, "anthropic-version": "2023-06-01", # Anthropic API Version @@ -42,6 +44,8 @@ class AiAnthropic(BaseConnectorAi): return "anthropic" def getModels(self) -> List[AiModel]: + return [] # TODO: DEBUG TO TURN ON AFTER TESTING + """Get all available Anthropic models.""" return [ AiModel( diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index 7f7e3c70..cc45edc0 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -29,8 +29,10 @@ class AiOpenai(BaseConnectorAi): self.apiKey = self.config["apiKey"] # HttpClient for API calls + # Timeout set to 600 seconds (10 minutes) for complex requests that may take longer + # AiService calls can take significantly longer due to prompt building and processing overhead self.httpClient = httpx.AsyncClient( - timeout=120.0, # Longer timeout for complex requests + timeout=600.0, headers={ "Authorization": f"Bearer {self.apiKey}", "Content-Type": "application/json" @@ -332,8 +334,9 @@ class AiOpenai(BaseConnectorAi): } # Create a separate client for DALL-E API calls + # Timeout set to 600 seconds (10 minutes) for complex image generation requests dalle_client = httpx.AsyncClient( - timeout=120.0, + timeout=600.0, headers={ "Authorization": f"Bearer {self.apiKey}", "Content-Type": "application/json" diff --git a/modules/aicore/aicorePluginPerplexity.py b/modules/aicore/aicorePluginPerplexity.py index 86e06898..2a6f0890 100644 --- a/modules/aicore/aicorePluginPerplexity.py +++ b/modules/aicore/aicorePluginPerplexity.py @@ -27,7 +27,7 @@ class AiPerplexity(BaseConnectorAi): # HttpClient for API calls self.httpClient = httpx.AsyncClient( - timeout=120.0, # Longer timeout for complex requests + timeout=600.0, # Timeout set to 600 seconds (10 minutes) for complex requests that may take longer headers={ "Authorization": f"Bearer {self.apiKey}", "Content-Type": "application/json", diff --git a/modules/datamodels/__init__.py b/modules/datamodels/__init__.py index e1adfd1d..7d73660e 100644 --- a/modules/datamodels/__init__.py +++ b/modules/datamodels/__init__.py @@ -12,4 +12,4 @@ from . import datamodelNeutralizer as neutralizer from . import datamodelChat as chat from . import datamodelFiles as files from . import datamodelVoice as voice -from . import datamodelUtils as utils +from . import datamodelUtils as utils \ No newline at end of file diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index 1da6c65f..4a64217d 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -1,9 +1,11 @@ from typing import Optional, List, Dict, Any, Callable, TYPE_CHECKING, Tuple -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from enum import Enum # Import ContentPart for runtime use (needed for Pydantic model rebuilding) from modules.datamodels.datamodelExtraction import ContentPart +# Import JSON utilities for safe conversion +from modules.shared.jsonUtils import extractJsonString, tryParseJson, repairBrokenJson # Operation Types class OperationTypeEnum(str, Enum): @@ -109,8 +111,7 @@ class AiModel(BaseModel): version: Optional[str] = Field(default=None, description="Model version") lastUpdated: Optional[str] = Field(default=None, description="Last update timestamp") - class Config: - arbitraryTypesAllowed = True # Allow Callable type + model_config = ConfigDict(arbitrary_types_allowed=True) # Allow Callable type class SelectionRule(BaseModel): @@ -172,8 +173,7 @@ class AiModelCall(BaseModel): model: Optional[AiModel] = Field(default=None, description="The AI model being called") options: AiCallOptions = Field(default_factory=AiCallOptions, description="Additional model-specific options") - class Config: - arbitraryTypesAllowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class AiModelResponse(BaseModel): @@ -189,8 +189,7 @@ class AiModelResponse(BaseModel): tokensUsed: Optional[Dict[str, int]] = Field(default=None, description="Token usage (input, output, total)") metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional model-specific metadata") - class Config: - arbitraryTypesAllowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) # Structured prompt models for specialized operations @@ -203,9 +202,6 @@ class AiCallPromptWebSearch(BaseModel): maxNumberPages: Optional[int] = Field(default=10, description="Maximum number of pages to search (default: 10)") language: Optional[str] = Field(default=None, description="Language code (lowercase, e.g., de, en, fr)") researchDepth: Optional[str] = Field(default="general", description="Research depth: fast (maxDepth=1), general (maxDepth=2), deep (maxDepth=3)") - - class Config: - pass class AiCallPromptWebCrawl(BaseModel): @@ -215,9 +211,6 @@ class AiCallPromptWebCrawl(BaseModel): url: str = Field(description="Single URL to crawl") maxDepth: Optional[int] = Field(default=2, description="Maximum number of hops from starting page (default: 2)") maxWidth: Optional[int] = Field(default=10, description="Maximum pages to crawl per level (default: 10)") - - class Config: - pass class AiCallPromptImage(BaseModel): @@ -227,7 +220,39 @@ class AiCallPromptImage(BaseModel): size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1792x1024, 1024x1792)") quality: Optional[str] = Field(default="standard", description="Image quality (standard, hd)") style: Optional[str] = Field(default="vivid", description="Image style (vivid, natural)") - - class Config: - pass + + +class AiProcessParameters(BaseModel): + """Parameters for AI processing action.""" + aiPrompt: str = Field(description="AI instruction prompt") + contentParts: Optional[List[ContentPart]] = Field( + None, + description="Already-extracted content parts (required if documents need to be processed)" + ) + resultType: str = Field( + default="txt", + description="Output file extension (txt, json, pdf, docx, xlsx, etc.)" + ) + + +# NOTE: DocumentData, AiResponseMetadata, and AiResponse are defined in datamodelWorkflow.py +# Import them from there if needed: from modules.datamodels.datamodelWorkflow import DocumentData, AiResponseMetadata, AiResponse + + +class JsonAccumulationState(BaseModel): + """State for JSON string accumulation during iterative AI generation.""" + accumulatedJsonString: str = Field(description="Raw accumulated JSON string") + isAccumulationMode: bool = Field(description="True if we're accumulating fragments") + lastParsedResult: Optional[Dict[str, Any]] = Field( + default=None, + description="Last successfully parsed result (for prompt context)" + ) + allSections: List[Dict[str, Any]] = Field( + default_factory=list, + description="Sections extracted so far (for prompt context)" + ) + kpis: List[Dict[str, Any]] = Field( + default_factory=list, + description="KPI definitions with current values: [{id, description, jsonPath, targetValue, currentValue}, ...]" + ) diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py index c748c44a..4a678c8b 100644 --- a/modules/datamodels/datamodelChat.py +++ b/modules/datamodels/datamodelChat.py @@ -61,6 +61,12 @@ class ChatLog(BaseModel): performance: Optional[Dict[str, Any]] = Field( None, description="Performance metrics" ) + parentId: Optional[str] = Field( + None, description="Parent log entry ID for hierarchical display" + ) + operationId: Optional[str] = Field( + None, description="Operation ID to group related log entries" + ) registerModelLabels( @@ -264,7 +270,6 @@ registerModelLabels( class WorkflowModeEnum(str, Enum): - WORKFLOW_ACTIONPLAN = "Actionplan" WORKFLOW_DYNAMIC = "Dynamic" WORKFLOW_AUTOMATION = "Automation" @@ -273,7 +278,6 @@ registerModelLabels( "WorkflowModeEnum", {"en": "Workflow Mode", "fr": "Mode de workflow"}, { - "WORKFLOW_ACTIONPLAN": {"en": "Actionplan", "fr": "Actionplan"}, "WORKFLOW_DYNAMIC": {"en": "Dynamic", "fr": "Dynamique"}, "WORKFLOW_AUTOMATION": {"en": "Automation", "fr": "Automatisation"}, }, @@ -281,125 +285,27 @@ registerModelLabels( class ChatWorkflow(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), - description="Primary key", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - mandateId: str = Field( - description="ID of the mandate this workflow belongs to", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - status: str = Field( - description="Current status of the workflow", - frontend_type="select", - frontend_readonly=False, - frontend_required=False, - frontend_options=[ + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + mandateId: str = Field(description="ID of the mandate this workflow belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + status: str = Field(default="running", description="Current status of the workflow", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [ {"value": "running", "label": {"en": "Running", "fr": "En cours"}}, {"value": "completed", "label": {"en": "Completed", "fr": "Terminé"}}, {"value": "stopped", "label": {"en": "Stopped", "fr": "Arrêté"}}, {"value": "error", "label": {"en": "Error", "fr": "Erreur"}}, - ], - ) - name: Optional[str] = Field( - None, - description="Name of the workflow", - frontend_type="text", - frontend_readonly=False, - frontend_required=True, - ) - currentRound: int = Field( - description="Current round number", - frontend_type="integer", - frontend_readonly=True, - frontend_required=False, - ) - currentTask: int = Field( - default=0, - description="Current task number", - frontend_type="integer", - frontend_readonly=True, - frontend_required=False, - ) - currentAction: int = Field( - default=0, - description="Current action number", - frontend_type="integer", - frontend_readonly=True, - frontend_required=False, - ) - totalTasks: int = Field( - default=0, - description="Total number of tasks in the workflow", - frontend_type="integer", - frontend_readonly=True, - frontend_required=False, - ) - totalActions: int = Field( - default=0, - description="Total number of actions in the workflow", - frontend_type="integer", - frontend_readonly=True, - frontend_required=False, - ) - lastActivity: float = Field( - default_factory=getUtcTimestamp, - description="Timestamp of last activity (UTC timestamp in seconds)", - frontend_type="timestamp", - frontend_readonly=True, - frontend_required=False, - ) - startedAt: float = Field( - default_factory=getUtcTimestamp, - description="When the workflow started (UTC timestamp in seconds)", - frontend_type="timestamp", - frontend_readonly=True, - frontend_required=False, - ) - logs: List[ChatLog] = Field( - default_factory=list, - description="Workflow logs", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - messages: List[ChatMessage] = Field( - default_factory=list, - description="Messages in the workflow", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - stats: List[ChatStat] = Field( - default_factory=list, - description="Workflow statistics list", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - tasks: list = Field( - default_factory=list, - description="List of tasks in the workflow", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - workflowMode: WorkflowModeEnum = Field( - default=WorkflowModeEnum.WORKFLOW_DYNAMIC, - description="Workflow mode selector", - frontend_type="select", - frontend_readonly=False, - frontend_required=False, - frontend_options=[ - { - "value": WorkflowModeEnum.WORKFLOW_ACTIONPLAN.value, - "label": {"en": "Actionplan", "fr": "Actionplan"}, - }, + ]}) + name: Optional[str] = Field(None, description="Name of the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True}) + currentRound: int = Field(default=0, description="Current round number", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) + currentTask: int = Field(default=0, description="Current task number", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) + currentAction: int = Field(default=0, description="Current action number", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) + totalTasks: int = Field(default=0, description="Total number of tasks in the workflow", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) + totalActions: int = Field(default=0, description="Total number of actions in the workflow", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) + lastActivity: float = Field(default_factory=getUtcTimestamp, description="Timestamp of last activity (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) + startedAt: float = Field(default_factory=getUtcTimestamp, description="When the workflow started (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) + logs: List[ChatLog] = Field(default_factory=list, description="Workflow logs", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + messages: List[ChatMessage] = Field(default_factory=list, description="Messages in the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + stats: List[ChatStat] = Field(default_factory=list, description="Workflow statistics list", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + tasks: list = Field(default_factory=list, description="List of tasks in the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + workflowMode: WorkflowModeEnum = Field(default=WorkflowModeEnum.WORKFLOW_DYNAMIC, description="Workflow mode selector", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [ { "value": WorkflowModeEnum.WORKFLOW_DYNAMIC.value, "label": {"en": "Dynamic", "fr": "Dynamique"}, @@ -408,22 +314,37 @@ class ChatWorkflow(BaseModel): "value": WorkflowModeEnum.WORKFLOW_AUTOMATION.value, "label": {"en": "Automation", "fr": "Automatisation"}, }, - ], - ) - maxSteps: int = Field( - default=5, - description="Maximum number of iterations in react mode", - frontend_type="integer", - frontend_readonly=False, - frontend_required=False, - ) - expectedFormats: Optional[List[str]] = Field( - None, - description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) + ]}) + maxSteps: int = Field(default=10, description="Maximum number of iterations in dynamic mode", json_schema_extra={"frontend_type": "integer", "frontend_readonly": False, "frontend_required": False}) + expectedFormats: Optional[List[str]] = Field(None, description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + + # Helper methods for execution state management + def getRoundIndex(self) -> int: + """Get current round index""" + return self.currentRound + + def getTaskIndex(self) -> int: + """Get current task index""" + return self.currentTask + + def getActionIndex(self) -> int: + """Get current action index""" + return self.currentAction + + def incrementRound(self): + """Increment round when new user input received""" + self.currentRound += 1 + self.currentTask = 0 + self.currentAction = 0 + + def incrementTask(self): + """Increment task when starting new task in current round""" + self.currentTask += 1 + self.currentAction = 0 + + def incrementAction(self): + """Increment action when executing new action in current task""" + self.currentAction += 1 registerModelLabels( @@ -475,6 +396,14 @@ class ActionDocument(BaseModel): documentName: str = Field(description="Name of the document") documentData: Any = Field(description="Content/data of the document") mimeType: str = Field(description="MIME type of the document") + sourceJson: Optional[Dict[str, Any]] = Field( + None, + description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)" + ) + validationMetadata: Optional[Dict[str, Any]] = Field( + None, + description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)" + ) registerModelLabels( @@ -885,7 +814,7 @@ registerModelLabels( class TaskContext(BaseModel): taskStep: TaskStep - workflow: Optional["ChatWorkflow"] = None + workflow: Optional[ChatWorkflow] = None workflowId: Optional[str] = None availableDocuments: Optional[str] = "No documents available" availableConnections: Optional[list[str]] = Field(default_factory=list) @@ -900,6 +829,27 @@ class TaskContext(BaseModel): failedActions: Optional[list] = Field(default_factory=list) successfulActions: Optional[list] = Field(default_factory=list) criteriaProgress: Optional[dict] = None + + # Stage 2 context fields (NEW) + actionObjective: Optional[str] = Field(None, description="Objective for current action") + parametersContext: Optional[str] = Field(None, description="Context for parameter generation") + learnings: Optional[list[str]] = Field(default_factory=list, description="Learnings from previous actions") + stage1Selection: Optional[dict] = Field(None, description="Stage 1 selection data") + nextActionGuidance: Optional[Dict[str, Any]] = Field(None, description="Guidance for the next action from previous refinement") + + def updateFromSelection(self, selection: Any): + """Update context from Stage 1 selection + + Args: + selection: ActionDefinition instance from Stage 1 + """ + from modules.datamodels.datamodelWorkflow import ActionDefinition + + if isinstance(selection, ActionDefinition): + self.actionObjective = selection.actionObjective + self.parametersContext = selection.parametersContext + self.learnings = selection.learnings if selection.learnings else [] + self.stage1Selection = selection.model_dump() def getDocumentReferences(self) -> List[str]: docs = [] @@ -936,6 +886,16 @@ class ReviewResult(BaseModel): userMessage: Optional[str] = Field( None, description="User-friendly message in user's language" ) + # NEW: Concrete next action guidance (when status is "continue") + nextAction: Optional[str] = Field( + None, description="Specific action to execute next (e.g., 'ai.convert', 'ai.process', 'ai.reformat')" + ) + nextActionParameters: Optional[Dict[str, Any]] = Field( + None, description="Parameters for the next action (e.g., {'fromFormat': 'json', 'toFormat': 'csv'})" + ) + nextActionObjective: Optional[str] = Field( + None, description="What this specific action will achieve" + ) registerModelLabels( @@ -973,8 +933,7 @@ registerModelLabels( }, ) -# Resolve forward references -TaskContext.update_forward_refs() +# Forward references resolved automatically since ChatWorkflow is defined above class PromptPlaceholder(BaseModel): @@ -1013,71 +972,20 @@ registerModelLabels( class AutomationDefinition(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), - description="Primary key", - frontend_type="text", - frontend_readonly=True, - frontend_required=False - ) - mandateId: str = Field( - description="Mandate ID", - frontend_type="text", - frontend_readonly=True, - frontend_required=False - ) - label: str = Field( - description="User-friendly name", - frontend_type="text", - frontend_required=True - ) - schedule: str = Field( - description="Cron schedule pattern", - frontend_type="select", - frontend_options=[ + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + mandateId: str = Field(description="Mandate ID", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + label: str = Field(description="User-friendly name", json_schema_extra={"frontend_type": "text", "frontend_required": True}) + schedule: str = Field(description="Cron schedule pattern", json_schema_extra={"frontend_type": "select", "frontend_required": True, "frontend_options": [ {"value": "0 */4 * * *", "label": {"en": "Every 4 hours", "fr": "Toutes les 4 heures"}}, {"value": "0 22 * * *", "label": {"en": "Daily at 22:00", "fr": "Quotidien à 22:00"}}, {"value": "0 10 * * 1", "label": {"en": "Weekly Monday 10:00", "fr": "Hebdomadaire lundi 10:00"}} - ], - frontend_required=True - ) - template: str = Field( - description="JSON template with placeholders (format: {{KEY:PLACEHOLDER_NAME}})", - frontend_type="textarea", - frontend_required=True - ) - placeholders: Dict[str, str] = Field( - default_factory=dict, - description="Dictionary of placeholder key/value pairs (e.g., {'connectionName': 'MyConnection', 'sharepointFolderNameSource': '/folder/path', 'webResearchUrl': 'https://...', 'webResearchPrompt': '...', 'documentPrompt': '...'})", - frontend_type="text" - ) - active: bool = Field( - default=False, - description="Whether automation should be launched in event handler", - frontend_type="checkbox", - frontend_required=False - ) - eventId: Optional[str] = Field( - None, - description="Event ID from event management (None if not registered)", - frontend_type="text", - frontend_readonly=True, - frontend_required=False - ) - status: Optional[str] = Field( - None, - description="Status: 'active' if event is registered, 'inactive' if not (computed, readonly)", - frontend_type="text", - frontend_readonly=True, - frontend_required=False - ) - executionLogs: List[Dict[str, Any]] = Field( - default_factory=list, - description="List of execution logs, each containing timestamp, workflowId, status, and messages", - frontend_type="text", - frontend_readonly=True, - frontend_required=False - ) + ]}) + template: str = Field(description="JSON template with placeholders (format: {{KEY:PLACEHOLDER_NAME}})", json_schema_extra={"frontend_type": "textarea", "frontend_required": True}) + placeholders: Dict[str, str] = Field(default_factory=dict, description="Dictionary of placeholder key/value pairs (e.g., {'connectionName': 'MyConnection', 'sharepointFolderNameSource': '/folder/path', 'webResearchUrl': 'https://...', 'webResearchPrompt': '...', 'documentPrompt': '...'})", json_schema_extra={"frontend_type": "text"}) + active: bool = Field(default=False, description="Whether automation should be launched in event handler", json_schema_extra={"frontend_type": "checkbox", "frontend_required": False}) + eventId: Optional[str] = Field(None, description="Event ID from event management (None if not registered)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + status: Optional[str] = Field(None, description="Status: 'active' if event is registered, 'inactive' if not (computed, readonly)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + executionLogs: List[Dict[str, Any]] = Field(default_factory=list, description="List of execution logs, each containing timestamp, workflowId, status, and messages", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) registerModelLabels( diff --git a/modules/datamodels/datamodelDocref.py b/modules/datamodels/datamodelDocref.py new file mode 100644 index 00000000..0ad3d2bb --- /dev/null +++ b/modules/datamodels/datamodelDocref.py @@ -0,0 +1,118 @@ +""" +Document reference models for typed document references in workflows. +""" + +from typing import List, Optional +from pydantic import BaseModel, Field +from modules.shared.attributeUtils import registerModelLabels + + +class DocumentReference(BaseModel): + """Base class for document references""" + pass + + +class DocumentListReference(DocumentReference): + """Reference to a document list via message label""" + messageId: Optional[str] = Field(None, description="Optional message ID for cross-round references") + label: str = Field(description="Document list label") + + def to_string(self) -> str: + """Convert to string format: docList:messageId:label or docList:label""" + if self.messageId: + return f"docList:{self.messageId}:{self.label}" + return f"docList:{self.label}" + + +class DocumentItemReference(DocumentReference): + """Reference to a specific document item""" + documentId: str = Field(description="Document ID") + fileName: Optional[str] = Field(None, description="Optional file name") + + def to_string(self) -> str: + """Convert to string format: docItem:documentId:fileName or docItem:documentId""" + if self.fileName: + return f"docItem:{self.documentId}:{self.fileName}" + return f"docItem:{self.documentId}" + + +class DocumentReferenceList(BaseModel): + """List of document references with conversion methods""" + references: List[DocumentReference] = Field( + default_factory=list, + description="List of document references" + ) + + def to_string_list(self) -> List[str]: + """Convert all references to string list""" + return [ref.to_string() for ref in self.references] + + @classmethod + def from_string_list(cls, stringList: List[str]) -> "DocumentReferenceList": + """Parse string list to typed references + + Supports formats: + - docList:label + - docList:messageId:label + - docItem:documentId + - docItem:documentId:fileName + """ + references = [] + + for refStr in stringList: + if not refStr or not isinstance(refStr, str): + continue + + refStr = refStr.strip() + + # Parse docList: references + if refStr.startswith("docList:"): + parts = refStr[8:].split(":", 1) # Remove "docList:" prefix + if len(parts) == 2: + # docList:messageId:label + messageId, label = parts + references.append(DocumentListReference(messageId=messageId, label=label)) + elif len(parts) == 1 and parts[0]: + # docList:label + references.append(DocumentListReference(label=parts[0])) + + # Parse docItem: references + elif refStr.startswith("docItem:"): + parts = refStr[8:].split(":", 1) # Remove "docItem:" prefix + if len(parts) == 2: + # docItem:documentId:fileName + documentId, fileName = parts + references.append(DocumentItemReference(documentId=documentId, fileName=fileName)) + elif len(parts) == 1 and parts[0]: + # docItem:documentId + references.append(DocumentItemReference(documentId=parts[0])) + + # Unknown format - skip or log warning + else: + # Try to parse as simple string (backward compatibility) + # Assume it's a label if it doesn't match known patterns + if refStr: + references.append(DocumentListReference(label=refStr)) + + return cls(references=references) + + +registerModelLabels( + "DocumentReference", + {"en": "Document Reference", "fr": "Référence de document"}, + { + "messageId": {"en": "Message ID", "fr": "ID du message"}, + "label": {"en": "Label", "fr": "Étiquette"}, + "documentId": {"en": "Document ID", "fr": "ID du document"}, + "fileName": {"en": "File Name", "fr": "Nom du fichier"}, + }, +) + +registerModelLabels( + "DocumentReferenceList", + {"en": "Document Reference List", "fr": "Liste de références de documents"}, + { + "references": {"en": "References", "fr": "Références"}, + }, +) + diff --git a/modules/datamodels/datamodelExtraction.py b/modules/datamodels/datamodelExtraction.py index 5a530cab..ebfe2944 100644 --- a/modules/datamodels/datamodelExtraction.py +++ b/modules/datamodels/datamodelExtraction.py @@ -1,9 +1,6 @@ -from typing import Any, Dict, List, Optional, Literal, TYPE_CHECKING +from typing import Any, Dict, List, Optional, Literal from pydantic import BaseModel, Field -if TYPE_CHECKING: - from modules.datamodels.datamodelAi import OperationTypeEnum - class ContentPart(BaseModel): id: str = Field(description="Unique content part identifier") @@ -67,7 +64,6 @@ class ExtractionOptions(BaseModel): # Core extraction parameters prompt: str = Field(description="Extraction prompt for AI processing") - operationType: 'OperationTypeEnum' = Field(description="Type of operation for AI processing") processDocumentsIndividually: bool = Field(default=True, description="Process each document separately") # Image processing parameters @@ -85,7 +81,4 @@ class ExtractionOptions(BaseModel): # Additional processing options enableParallelProcessing: bool = Field(default=True, description="Enable parallel processing of chunks") - maxConcurrentChunks: int = Field(default=5, ge=1, le=20, description="Maximum number of chunks to process concurrently") - - class Config: - arbitraryTypesAllowed = True # Allow OperationTypeEnum import \ No newline at end of file + maxConcurrentChunks: int = Field(default=5, ge=1, le=20, description="Maximum number of chunks to process concurrently") \ No newline at end of file diff --git a/modules/datamodels/datamodelFiles.py b/modules/datamodels/datamodelFiles.py index 32e8d445..106bac96 100644 --- a/modules/datamodels/datamodelFiles.py +++ b/modules/datamodels/datamodelFiles.py @@ -9,13 +9,13 @@ import base64 class FileItem(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", frontend_type="text", frontend_readonly=True, frontend_required=False) - mandateId: str = Field(description="ID of the mandate this file belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) - fileName: str = Field(description="Name of the file", frontend_type="text", frontend_readonly=False, frontend_required=True) - mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) - fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) - fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False) - creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + mandateId: str = Field(description="ID of the mandate this file belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + fileName: str = Field(description="Name of the file", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True}) + mimeType: str = Field(description="MIME type of the file", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + fileHash: str = Field(description="Hash of the file", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + fileSize: int = Field(description="Size of the file in bytes", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) + creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) registerModelLabels( "FileItem", diff --git a/modules/datamodels/datamodelNeutralizer.py b/modules/datamodels/datamodelNeutralizer.py index 60894dff..b1f2b411 100644 --- a/modules/datamodels/datamodelNeutralizer.py +++ b/modules/datamodels/datamodelNeutralizer.py @@ -7,13 +7,13 @@ from modules.shared.attributeUtils import registerModelLabels class DataNeutraliserConfig(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the configuration", frontend_type="text", frontend_readonly=True, frontend_required=False) - mandateId: str = Field(description="ID of the mandate this configuration belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True) - userId: str = Field(description="ID of the user who created this configuration", frontend_type="text", frontend_readonly=True, frontend_required=True) - enabled: bool = Field(default=True, description="Whether data neutralization is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) - namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False) - sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False) - sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False) + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the configuration", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + mandateId: str = Field(description="ID of the mandate this configuration belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + userId: str = Field(description="ID of the user who created this configuration", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + enabled: bool = Field(default=True, description="Whether data neutralization is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}) + namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", json_schema_extra={"frontend_type": "textarea", "frontend_readonly": False, "frontend_required": False}) + sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False}) + sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False}) registerModelLabels( "DataNeutraliserConfig", {"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"}, @@ -29,12 +29,12 @@ registerModelLabels( ) class DataNeutralizerAttributes(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", frontend_type="text", frontend_readonly=True, frontend_required=False) - mandateId: str = Field(description="ID of the mandate this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True) - userId: str = Field(description="ID of the user who created this attribute", frontend_type="text", frontend_readonly=True, frontend_required=True) - originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True) - fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) - patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True) + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + mandateId: str = Field(description="ID of the mandate this attribute belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + userId: str = Field(description="ID of the user who created this attribute", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + originalText: str = Field(description="Original text that was neutralized", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) registerModelLabels( "DataNeutralizerAttributes", {"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"}, diff --git a/modules/datamodels/datamodelPagination.py b/modules/datamodels/datamodelPagination.py index b1bc3bc6..b7338585 100644 --- a/modules/datamodels/datamodelPagination.py +++ b/modules/datamodels/datamodelPagination.py @@ -5,7 +5,7 @@ All models use camelStyle naming convention for consistency with frontend. """ from typing import List, Dict, Any, Optional, Generic, TypeVar -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict import math T = TypeVar('T') @@ -76,6 +76,5 @@ class PaginatedResponse(BaseModel, Generic[T]): items: List[T] = Field(..., description="Array of items for current page") pagination: Optional[PaginationMetadata] = Field(..., description="Pagination metadata (None if pagination not applied)") - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/modules/datamodels/datamodelSecurity.py b/modules/datamodels/datamodelSecurity.py index e5a1e8a4..6803638e 100644 --- a/modules/datamodels/datamodelSecurity.py +++ b/modules/datamodels/datamodelSecurity.py @@ -1,7 +1,7 @@ """Security models: Token and AuthEvent.""" from typing import Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from modules.shared.attributeUtils import registerModelLabels from modules.shared.timeUtils import getUtcTimestamp from .datamodelUam import AuthAuthority @@ -47,8 +47,7 @@ class Token(BaseModel): None, description="Mandate ID for tenant scoping of the token" ) - class Config: - use_enum_values = True + model_config = ConfigDict(use_enum_values=True) registerModelLabels( @@ -75,60 +74,14 @@ registerModelLabels( class AuthEvent(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), - description="Unique ID of the auth event", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - userId: str = Field( - description="ID of the user this event belongs to", - frontend_type="text", - frontend_readonly=True, - frontend_required=True, - ) - eventType: str = Field( - description="Type of authentication event (e.g., 'login', 'logout', 'token_refresh')", - frontend_type="text", - frontend_readonly=True, - frontend_required=True, - ) - timestamp: float = Field( - default_factory=getUtcTimestamp, - description="Unix timestamp when the event occurred", - frontend_type="datetime", - frontend_readonly=True, - frontend_required=True, - ) - ipAddress: Optional[str] = Field( - default=None, - description="IP address from which the event originated", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - userAgent: Optional[str] = Field( - default=None, - description="User agent string from the request", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) - success: bool = Field( - default=True, - description="Whether the authentication event was successful", - frontend_type="boolean", - frontend_readonly=True, - frontend_required=True, - ) - details: Optional[str] = Field( - default=None, - description="Additional details about the event", - frontend_type="text", - frontend_readonly=True, - frontend_required=False, - ) + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the auth event", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + userId: str = Field(description="ID of the user this event belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + eventType: str = Field(description="Type of authentication event (e.g., 'login', 'logout', 'token_refresh')", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + timestamp: float = Field(default_factory=getUtcTimestamp, description="Unix timestamp when the event occurred", json_schema_extra={"frontend_type": "datetime", "frontend_readonly": True, "frontend_required": True}) + ipAddress: Optional[str] = Field(default=None, description="IP address from which the event originated", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + userAgent: Optional[str] = Field(default=None, description="User agent string from the request", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + success: bool = Field(default=True, description="Whether the authentication event was successful", json_schema_extra={"frontend_type": "boolean", "frontend_readonly": True, "frontend_required": True}) + details: Optional[str] = Field(default=None, description="Additional details about the event", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) registerModelLabels( diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py index a889b4ae..4a9c10aa 100644 --- a/modules/datamodels/datamodelUam.py +++ b/modules/datamodels/datamodelUam.py @@ -25,15 +25,35 @@ class ConnectionStatus(str, Enum): PENDING = "pending" class Mandate(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the mandate", frontend_type="text", frontend_readonly=True, frontend_required=False) - name: str = Field(description="Name of the mandate", frontend_type="text", frontend_readonly=False, frontend_required=True) - language: str = Field(default="en", description="Default language of the mandate", frontend_type="select", frontend_readonly=False, frontend_required=True, frontend_options=[ - {"value": "de", "label": {"en": "Deutsch", "fr": "Allemand"}}, - {"value": "en", "label": {"en": "English", "fr": "Anglais"}}, - {"value": "fr", "label": {"en": "Français", "fr": "Français"}}, - {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, - ]) - enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) + id: str = Field( + default_factory=lambda: str(uuid.uuid4()), + description="Unique ID of the mandate", + json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False} + ) + name: str = Field( + description="Name of the mandate", + json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True} + ) + language: str = Field( + default="en", + description="Default language of the mandate", + json_schema_extra={ + "frontend_type": "select", + "frontend_readonly": False, + "frontend_required": True, + "frontend_options": [ + {"value": "de", "label": {"en": "Deutsch", "fr": "Allemand"}}, + {"value": "en", "label": {"en": "English", "fr": "Anglais"}}, + {"value": "fr", "label": {"en": "Français", "fr": "Français"}}, + {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, + ] + } + ) + enabled: bool = Field( + default=True, + description="Indicates whether the mandate is enabled", + json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False} + ) registerModelLabels( "Mandate", {"en": "Mandate", "fr": "Mandat"}, @@ -46,31 +66,31 @@ registerModelLabels( ) class UserConnection(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", frontend_type="text", frontend_readonly=True, frontend_required=False) - userId: str = Field(description="ID of the user this connection belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) - authority: AuthAuthority = Field(description="Authentication authority", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[ + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + userId: str = Field(description="ID of the user this connection belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + authority: AuthAuthority = Field(description="Authentication authority", json_schema_extra={"frontend_type": "select", "frontend_readonly": True, "frontend_required": False, "frontend_options": [ {"value": "local", "label": {"en": "Local", "fr": "Local"}}, {"value": "google", "label": {"en": "Google", "fr": "Google"}}, {"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}}, - ]) - externalId: str = Field(description="User ID in the external system", frontend_type="text", frontend_readonly=True, frontend_required=False) - externalUsername: str = Field(description="Username in the external system", frontend_type="text", frontend_readonly=False, frontend_required=False) - externalEmail: Optional[EmailStr] = Field(None, description="Email in the external system", frontend_type="email", frontend_readonly=False, frontend_required=False) - status: ConnectionStatus = Field(default=ConnectionStatus.ACTIVE, description="Connection status", frontend_type="select", frontend_readonly=False, frontend_required=False, frontend_options=[ + ]}) + externalId: str = Field(description="User ID in the external system", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + externalUsername: str = Field(description="Username in the external system", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False}) + externalEmail: Optional[EmailStr] = Field(None, description="Email in the external system", json_schema_extra={"frontend_type": "email", "frontend_readonly": False, "frontend_required": False}) + status: ConnectionStatus = Field(default=ConnectionStatus.ACTIVE, description="Connection status", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [ {"value": "active", "label": {"en": "Active", "fr": "Actif"}}, {"value": "inactive", "label": {"en": "Inactive", "fr": "Inactif"}}, {"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}}, {"value": "pending", "label": {"en": "Pending", "fr": "En attente"}}, - ]) - connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[ + ]}) + connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) + lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) + expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) + tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", json_schema_extra={"frontend_type": "select", "frontend_readonly": True, "frontend_required": False, "frontend_options": [ {"value": "active", "label": {"en": "Active", "fr": "Actif"}}, {"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}}, {"value": "none", "label": {"en": "None", "fr": "Aucun"}}, - ]) - tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + ]}) + tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) registerModelLabels( "UserConnection", {"en": "User Connection", "fr": "Connexion utilisateur"}, @@ -91,28 +111,28 @@ registerModelLabels( ) class User(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", frontend_type="text", frontend_readonly=True, frontend_required=False) - username: str = Field(description="Username for login", frontend_type="text", frontend_readonly=False, frontend_required=True) - email: Optional[EmailStr] = Field(None, description="Email address of the user", frontend_type="email", frontend_readonly=False, frontend_required=True) - fullName: Optional[str] = Field(None, description="Full name of the user", frontend_type="text", frontend_readonly=False, frontend_required=False) - language: str = Field(default="en", description="Preferred language of the user", frontend_type="select", frontend_readonly=False, frontend_required=True, frontend_options=[ + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + username: str = Field(description="Username for login", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True}) + email: Optional[EmailStr] = Field(None, description="Email address of the user", json_schema_extra={"frontend_type": "email", "frontend_readonly": False, "frontend_required": True}) + fullName: Optional[str] = Field(None, description="Full name of the user", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False}) + language: str = Field(default="en", description="Preferred language of the user", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True, "frontend_options": [ {"value": "de", "label": {"en": "Deutsch", "fr": "Allemand"}}, {"value": "en", "label": {"en": "English", "fr": "Anglais"}}, {"value": "fr", "label": {"en": "Français", "fr": "Français"}}, {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, - ]) - enabled: bool = Field(default=True, description="Indicates whether the user is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) - privilege: UserPrivilege = Field(default=UserPrivilege.USER, description="Permission level", frontend_type="select", frontend_readonly=False, frontend_required=True, frontend_options=[ + ]}) + enabled: bool = Field(default=True, description="Indicates whether the user is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}) + privilege: UserPrivilege = Field(default=UserPrivilege.USER, description="Permission level", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True, "frontend_options": [ {"value": "user", "label": {"en": "User", "fr": "Utilisateur"}}, {"value": "admin", "label": {"en": "Admin", "fr": "Administrateur"}}, {"value": "sysadmin", "label": {"en": "SysAdmin", "fr": "Administrateur système"}}, - ]) - authenticationAuthority: AuthAuthority = Field(default=AuthAuthority.LOCAL, description="Primary authentication authority", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[ + ]}) + authenticationAuthority: AuthAuthority = Field(default=AuthAuthority.LOCAL, description="Primary authentication authority", json_schema_extra={"frontend_type": "select", "frontend_readonly": True, "frontend_required": False, "frontend_options": [ {"value": "local", "label": {"en": "Local", "fr": "Local"}}, {"value": "google", "label": {"en": "Google", "fr": "Google"}}, {"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}}, - ]) - mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) + ]}) + mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) registerModelLabels( "User", {"en": "User", "fr": "Utilisateur"}, diff --git a/modules/datamodels/datamodelUtils.py b/modules/datamodels/datamodelUtils.py index 67a42534..4f1c69c2 100644 --- a/modules/datamodels/datamodelUtils.py +++ b/modules/datamodels/datamodelUtils.py @@ -6,10 +6,10 @@ import uuid class Prompt(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", frontend_type="text", frontend_readonly=True, frontend_required=False) - mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) - content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True) - name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True) + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + mandateId: str = Field(description="ID of the mandate this prompt belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + content: str = Field(description="Content of the prompt", json_schema_extra={"frontend_type": "textarea", "frontend_readonly": False, "frontend_required": True}) + name: str = Field(description="Name of the prompt", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True}) registerModelLabels( "Prompt", {"en": "Prompt", "fr": "Invite"}, diff --git a/modules/datamodels/datamodelVoice.py b/modules/datamodels/datamodelVoice.py index 1ab47f15..10e820c6 100644 --- a/modules/datamodels/datamodelVoice.py +++ b/modules/datamodels/datamodelVoice.py @@ -7,16 +7,16 @@ import uuid class VoiceSettings(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", frontend_type="text", frontend_readonly=True, frontend_required=False) - userId: str = Field(description="ID of the user these settings belong to", frontend_type="text", frontend_readonly=True, frontend_required=True) - mandateId: str = Field(description="ID of the mandate these settings belong to", frontend_type="text", frontend_readonly=True, frontend_required=True) - sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", frontend_type="select", frontend_readonly=False, frontend_required=True) - ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", frontend_type="select", frontend_readonly=False, frontend_required=True) - ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True) - translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) - targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False) - creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) + userId: str = Field(description="ID of the user these settings belong to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + mandateId: str = Field(description="ID of the mandate these settings belong to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True}) + sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True}) + ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True}) + ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True}) + translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}) + targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False}) + creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) + lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) registerModelLabels( diff --git a/modules/datamodels/datamodelWorkflow.py b/modules/datamodels/datamodelWorkflow.py new file mode 100644 index 00000000..4cf940f7 --- /dev/null +++ b/modules/datamodels/datamodelWorkflow.py @@ -0,0 +1,467 @@ +""" +Workflow execution models for action definitions, AI responses, and workflow-level structures. +""" + +from typing import Dict, Any, List, Optional, TYPE_CHECKING +from pydantic import BaseModel, Field +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.jsonUtils import extractJsonString, tryParseJson, repairBrokenJson + +# Import DocumentReferenceList at runtime (needed for ActionDefinition) +from modules.datamodels.datamodelDocref import DocumentReferenceList + +# Forward references for circular imports (use string annotations) +if TYPE_CHECKING: + from modules.datamodels.datamodelChat import ChatDocument, ActionResult + from modules.datamodels.datamodelExtraction import ExtractionOptions + + +class ActionDefinition(BaseModel): + """Action definition with selection and parameters from planning phase""" + + # Core action selection (Stage 1) + action: str = Field(description="Compound action name (method.action)") + actionObjective: str = Field(description="Objective for this action") + userMessage: Optional[str] = Field( + None, + description="User-friendly message in user's language explaining what this action will do (generated by AI in prompts)" + ) + parametersContext: Optional[str] = Field( + None, + description="Context for parameter generation" + ) + learnings: List[str] = Field( + default_factory=list, + description="Learnings from previous actions" + ) + + # Resources (ALWAYS defined in Stage 1 if action needs them) + documentList: Optional[DocumentReferenceList] = Field( + None, + description="Document references (ALWAYS defined in Stage 1 if action needs documents)" + ) + connectionReference: Optional[str] = Field( + None, + description="Connection reference (ALWAYS defined in Stage 1 if action needs connection)" + ) + + # Parameters (may be defined in Stage 1 OR Stage 2, depending on action and actionObjective) + parameters: Optional[Dict[str, Any]] = Field( + None, + description="Action-specific parameters (generated in Stage 2 for complex actions, or inferred from actionObjective for simple actions)" + ) + + def hasParameters(self) -> bool: + """Check if parameters have been generated (Stage 2 complete or inferred)""" + return self.parameters is not None + + def needsStage2(self) -> bool: + """Determine if Stage 2 parameter generation is needed (generic, deterministic check) + + Generic logic (works for any action, dynamically added or removed): + - If parameters are already set → Stage 2 not needed + - If parameters are None → Stage 2 needed (to generate parameters from actionObjective and context) + + Note: Stage 1 always defines documentList and connectionReference if the action needs them. + Stage 2 only generates the action-specific parameters dictionary. + """ + # Generic check: if parameters are not set, Stage 2 is needed + return self.parameters is None + + def updateFromStage1StringReferences(self, stringRefs: Optional[List[str]], connectionRef: Optional[str]): + """Update documentList and connectionReference from Stage 1 string references + + Called when Stage 1 AI returns string references that need to be converted to typed models. + """ + if stringRefs: + self.documentList = DocumentReferenceList.from_string_list(stringRefs) + if connectionRef: + self.connectionReference = connectionRef + + +class AiResponseMetadata(BaseModel): + """Metadata for AI response (varies by operation type).""" + + # Document Generation Metadata + title: Optional[str] = Field(None, description="Document title") + filename: Optional[str] = Field(None, description="Document filename") + + # Operation-Specific Metadata + operationType: Optional[str] = Field(None, description="Type of operation performed") + schemaVersion: Optional[str] = Field(None, description="Schema version (e.g., 'parameters_v1')", alias="schema") + extractionMethod: Optional[str] = Field(None, description="Method used for extraction") + sourceDocuments: Optional[List[str]] = Field(None, description="Source document references") + + # Additional metadata (for extensibility) + additionalData: Optional[Dict[str, Any]] = Field(None, description="Additional operation-specific metadata") + + +class DocumentData(BaseModel): + """Single document in response""" + documentName: str = Field(description="Document name") + documentData: Any = Field(description="Document data (can be str, bytes, dict, etc.)") + mimeType: str = Field(description="MIME type of the document") + sourceJson: Optional[Dict[str, Any]] = Field( + None, + description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)" + ) + + +class ExtractContentParameters(BaseModel): + """Parameters for extraction action. + + This model is defined together with the `methodAi.extractContent()` action function. + All action parameter models follow this pattern: defined in the same module as the action. + However, since this is a workflow-level model used across the system, it's defined here. + """ + documentList: DocumentReferenceList = Field(description="Document references to extract content from") + extractionOptions: Optional[Any] = Field( # ExtractionOptions - forward reference + None, + description="Extraction options (determined dynamically based on task and document characteristics)" + ) + + +class AiResponse(BaseModel): + """Unified response from all AI calls (planning, text, documents)""" + + content: str = Field(description="Response content (JSON string for planning, text for analysis, unified JSON for documents)") + metadata: Optional[AiResponseMetadata] = Field( + None, + description="Response metadata (varies by operation type)" + ) + documents: Optional[List[DocumentData]] = Field( + None, + description="Generated documents (only for document generation operations)" + ) + + def toJson(self) -> Dict[str, Any]: + """ + Convert AI response content to JSON using enhanced stabilizing failsafe conversion methods. + Centralizes AI result to JSON conversion in one place. + + Uses methods from jsonUtils: + - tryParseJson() - Safe parsing with error handling + - repairBrokenJson() - Repairs broken/incomplete JSON + - extractJsonString() - Extracts JSON from text with code fences + + Returns: + Dict containing the parsed JSON content, or a safe fallback structure if parsing fails. + - If content is valid JSON dict: returns the dict directly + - If content is valid JSON list: wraps in {"data": [...]} + - If content is broken JSON: attempts repair using repairBrokenJson() + - If all parsing fails: returns {"content": "...", "parseError": True} + """ + # If content is already a dict, return it directly + if isinstance(self.content, dict): + return self.content + + # If content is already a list, wrap it + if isinstance(self.content, list): + return {"data": self.content} + + # Convert to string if needed + contentStr = str(self.content) if not isinstance(self.content, str) else self.content + + # First, try to extract JSON from text (handles code fences, etc.) + extractedJson = extractJsonString(contentStr) + + # Try to parse as JSON (returns tuple: obj, error, cleaned_str) + parsedJson, parseError, _ = tryParseJson(extractedJson) + + if parsedJson is not None and parseError is None: + # If it's a dict, return directly + if isinstance(parsedJson, dict): + return parsedJson + # If it's a list, wrap in dict + elif isinstance(parsedJson, list): + return {"data": parsedJson} + + # Try to repair broken JSON + repairedJson = repairBrokenJson(contentStr) + if repairedJson: + # repairBrokenJson returns Optional[Dict[str, Any]] - always a dict or None + if isinstance(repairedJson, dict): + return repairedJson + + # All parsing failed - return safe fallback + contentStr = str(self.content) if not isinstance(self.content, str) else self.content + return {"content": contentStr, "parseError": True} + + +# Workflow-level models + +class RequestContext(BaseModel): + """Normalized request context from user input""" + + originalPrompt: str = Field(description="Original user prompt") + documents: List[Any] = Field( # ChatDocument - forward reference + default_factory=list, + description="Documents provided by user" + ) + userLanguage: str = Field(description="User's language") + detectedComplexity: str = Field( + description="Complexity level: simple, moderate, complex" + ) + requiresDocuments: bool = Field(default=False, description="Whether request requires documents") + requiresWebResearch: bool = Field(default=False, description="Whether request requires web research") + requiresAnalysis: bool = Field(default=False, description="Whether request requires analysis") + expectedOutputFormat: Optional[str] = Field(None, description="Expected output format") + expectedOutputType: Optional[str] = Field(None, description="Expected output type: answer, document, analysis") + + +class UnderstandingResult(BaseModel): + """Result from initial understanding phase (combined AI call)""" + + parameters: Dict[str, Any] = Field( + default_factory=dict, + description="Basic parameters (language, format, detail level)" + ) + intention: Dict[str, Any] = Field( + default_factory=dict, + description="User intention (primaryGoal, secondaryGoals, intentionType)" + ) + context: Dict[str, Any] = Field( + default_factory=dict, + description="Extracted context (topics, requirements, constraints)" + ) + documentReferences: List[Dict[str, Any]] = Field( + default_factory=list, + description="Document references with purpose and relevance" + ) + tasks: List["TaskDefinition"] = Field( # Forward reference + default_factory=list, + description="Task definitions with deliverables" + ) + + +class TaskDefinition(BaseModel): + """Task definition from understanding phase""" + + id: str = Field(description="Task identifier") + objective: str = Field(description="Task objective") + deliverable: Dict[str, Any] = Field( + description="Deliverable specification (type, format, style, detailLevel)" + ) + requiresWebResearch: bool = Field(default=False, description="Whether task requires web research") + requiresDocumentAnalysis: bool = Field(default=False, description="Whether task requires document analysis") + requiresContentGeneration: bool = Field(default=True, description="Whether task requires content generation") + requiredDocuments: List[str] = Field( + default_factory=list, + description="Document references needed for this task" + ) + extractionOptions: Optional[Any] = Field( # ExtractionOptions - forward reference + None, + description="Extraction options for document processing (determined dynamically based on task and document characteristics)" + ) + + +class TaskResult(BaseModel): + """Result from task execution""" + + taskId: str = Field(description="Task identifier") + actionResult: Any = Field(description="ActionResult from task execution") # ActionResult - forward reference + + +# Register model labels for UI +registerModelLabels( + "RequestContext", + {"en": "Request Context", "fr": "Contexte de la demande"}, + { + "originalPrompt": {"en": "Original Prompt", "fr": "Invite originale"}, + "documents": {"en": "Documents", "fr": "Documents"}, + "userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}, + "detectedComplexity": {"en": "Detected Complexity", "fr": "Complexité détectée"}, + "requiresDocuments": {"en": "Requires Documents", "fr": "Nécessite des documents"}, + "requiresWebResearch": {"en": "Requires Web Research", "fr": "Nécessite une recherche web"}, + "requiresAnalysis": {"en": "Requires Analysis", "fr": "Nécessite une analyse"}, + "expectedOutputFormat": {"en": "Expected Output Format", "fr": "Format de sortie attendu"}, + "expectedOutputType": {"en": "Expected Output Type", "fr": "Type de sortie attendu"}, + }, +) + +registerModelLabels( + "UnderstandingResult", + {"en": "Understanding Result", "fr": "Résultat de compréhension"}, + { + "parameters": {"en": "Parameters", "fr": "Paramètres"}, + "intention": {"en": "Intention", "fr": "Intention"}, + "context": {"en": "Context", "fr": "Contexte"}, + "documentReferences": {"en": "Document References", "fr": "Références de documents"}, + "tasks": {"en": "Tasks", "fr": "Tâches"}, + }, +) + +registerModelLabels( + "TaskDefinition", + {"en": "Task Definition", "fr": "Définition de tâche"}, + { + "id": {"en": "Task ID", "fr": "ID de la tâche"}, + "objective": {"en": "Objective", "fr": "Objectif"}, + "deliverable": {"en": "Deliverable", "fr": "Livrable"}, + "requiresWebResearch": {"en": "Requires Web Research", "fr": "Nécessite une recherche web"}, + "requiresDocumentAnalysis": {"en": "Requires Document Analysis", "fr": "Nécessite une analyse de documents"}, + "requiresContentGeneration": {"en": "Requires Content Generation", "fr": "Nécessite une génération de contenu"}, + "requiredDocuments": {"en": "Required Documents", "fr": "Documents requis"}, + "extractionOptions": {"en": "Extraction Options", "fr": "Options d'extraction"}, + }, +) + +registerModelLabels( + "TaskResult", + {"en": "Task Result", "fr": "Résultat de tâche"}, + { + "taskId": {"en": "Task ID", "fr": "ID de la tâche"}, + "actionResult": {"en": "Action Result", "fr": "Résultat de l'action"}, + }, +) + +registerModelLabels( + "RequestContext", + {"en": "Request Context", "fr": "Contexte de la demande"}, + { + "originalPrompt": {"en": "Original Prompt", "fr": "Invite originale"}, + "documents": {"en": "Documents", "fr": "Documents"}, + "userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}, + "detectedComplexity": {"en": "Detected Complexity", "fr": "Complexité détectée"}, + "requiresDocuments": {"en": "Requires Documents", "fr": "Nécessite des documents"}, + "requiresWebResearch": {"en": "Requires Web Research", "fr": "Nécessite une recherche web"}, + "requiresAnalysis": {"en": "Requires Analysis", "fr": "Nécessite une analyse"}, + "expectedOutputFormat": {"en": "Expected Output Format", "fr": "Format de sortie attendu"}, + "expectedOutputType": {"en": "Expected Output Type", "fr": "Type de sortie attendu"}, + }, +) + +registerModelLabels( + "UnderstandingResult", + {"en": "Understanding Result", "fr": "Résultat de compréhension"}, + { + "parameters": {"en": "Parameters", "fr": "Paramètres"}, + "intention": {"en": "Intention", "fr": "Intention"}, + "context": {"en": "Context", "fr": "Contexte"}, + "documentReferences": {"en": "Document References", "fr": "Références de documents"}, + "tasks": {"en": "Tasks", "fr": "Tâches"}, + }, +) + +registerModelLabels( + "TaskDefinition", + {"en": "Task Definition", "fr": "Définition de tâche"}, + { + "id": {"en": "Task ID", "fr": "ID de la tâche"}, + "objective": {"en": "Objective", "fr": "Objectif"}, + "deliverable": {"en": "Deliverable", "fr": "Livrable"}, + "requiresWebResearch": {"en": "Requires Web Research", "fr": "Nécessite une recherche web"}, + "requiresDocumentAnalysis": {"en": "Requires Document Analysis", "fr": "Nécessite une analyse de documents"}, + "requiresContentGeneration": {"en": "Requires Content Generation", "fr": "Nécessite une génération de contenu"}, + "requiredDocuments": {"en": "Required Documents", "fr": "Documents requis"}, + "extractionOptions": {"en": "Extraction Options", "fr": "Options d'extraction"}, + }, +) + +registerModelLabels( + "TaskResult", + {"en": "Task Result", "fr": "Résultat de tâche"}, + { + "taskId": {"en": "Task ID", "fr": "ID de la tâche"}, + "actionResult": {"en": "Action Result", "fr": "Résultat de l'action"}, + }, +) + +# Register model labels for UI +registerModelLabels( + "ActionDefinition", + {"en": "Action Definition", "fr": "Définition d'action"}, + { + "action": {"en": "Action", "fr": "Action"}, + "actionObjective": {"en": "Action Objective", "fr": "Objectif de l'action"}, + "parametersContext": {"en": "Parameters Context", "fr": "Contexte des paramètres"}, + "learnings": {"en": "Learnings", "fr": "Apprentissages"}, + "documentList": {"en": "Document List", "fr": "Liste de documents"}, + "connectionReference": {"en": "Connection Reference", "fr": "Référence de connexion"}, + "parameters": {"en": "Parameters", "fr": "Paramètres"}, + }, +) + +registerModelLabels( + "AiResponse", + {"en": "AI Response", "fr": "Réponse IA"}, + { + "content": {"en": "Content", "fr": "Contenu"}, + "metadata": {"en": "Metadata", "fr": "Métadonnées"}, + "documents": {"en": "Documents", "fr": "Documents"}, + }, +) + +registerModelLabels( + "AiResponseMetadata", + {"en": "AI Response Metadata", "fr": "Métadonnées de réponse IA"}, + { + "title": {"en": "Title", "fr": "Titre"}, + "filename": {"en": "Filename", "fr": "Nom de fichier"}, + "operationType": {"en": "Operation Type", "fr": "Type d'opération"}, + "schemaVersion": {"en": "Schema Version", "fr": "Version du schéma"}, + "extractionMethod": {"en": "Extraction Method", "fr": "Méthode d'extraction"}, + "sourceDocuments": {"en": "Source Documents", "fr": "Documents sources"}, + }, +) + +registerModelLabels( + "DocumentData", + {"en": "Document Data", "fr": "Données de document"}, + { + "documentName": {"en": "Document Name", "fr": "Nom du document"}, + "documentData": {"en": "Document Data", "fr": "Données du document"}, + "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, + }, +) + +registerModelLabels( + "RequestContext", + {"en": "Request Context", "fr": "Contexte de requête"}, + { + "originalPrompt": {"en": "Original Prompt", "fr": "Invite originale"}, + "documents": {"en": "Documents", "fr": "Documents"}, + "userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}, + "detectedComplexity": {"en": "Detected Complexity", "fr": "Complexité détectée"}, + "requiresDocuments": {"en": "Requires Documents", "fr": "Nécessite des documents"}, + "requiresWebResearch": {"en": "Requires Web Research", "fr": "Nécessite une recherche web"}, + "requiresAnalysis": {"en": "Requires Analysis", "fr": "Nécessite une analyse"}, + }, +) + +registerModelLabels( + "UnderstandingResult", + {"en": "Understanding Result", "fr": "Résultat de compréhension"}, + { + "parameters": {"en": "Parameters", "fr": "Paramètres"}, + "intention": {"en": "Intention", "fr": "Intention"}, + "context": {"en": "Context", "fr": "Contexte"}, + "documentReferences": {"en": "Document References", "fr": "Références de documents"}, + "tasks": {"en": "Tasks", "fr": "Tâches"}, + }, +) + +registerModelLabels( + "TaskDefinition", + {"en": "Task Definition", "fr": "Définition de tâche"}, + { + "id": {"en": "ID", "fr": "ID"}, + "objective": {"en": "Objective", "fr": "Objectif"}, + "deliverable": {"en": "Deliverable", "fr": "Livrable"}, + "requiresWebResearch": {"en": "Requires Web Research", "fr": "Nécessite une recherche web"}, + "requiresDocumentAnalysis": {"en": "Requires Document Analysis", "fr": "Nécessite une analyse de document"}, + "requiresContentGeneration": {"en": "Requires Content Generation", "fr": "Nécessite une génération de contenu"}, + "requiredDocuments": {"en": "Required Documents", "fr": "Documents requis"}, + "extractionOptions": {"en": "Extraction Options", "fr": "Options d'extraction"}, + }, +) + +registerModelLabels( + "TaskResult", + {"en": "Task Result", "fr": "Résultat de tâche"}, + { + "taskId": {"en": "Task ID", "fr": "ID de tâche"}, + "actionResult": {"en": "Action Result", "fr": "Résultat d'action"}, + }, +) + diff --git a/modules/features/chatPlayground/mainChatPlayground.py b/modules/features/chatPlayground/mainChatPlayground.py index fc148e56..7489608b 100644 --- a/modules/features/chatPlayground/mainChatPlayground.py +++ b/modules/features/chatPlayground/mainChatPlayground.py @@ -16,7 +16,7 @@ async def chatStart(currentUser: User, userInput: UserInputRequest, workflowMode currentUser: Current user userInput: User input request workflowId: Optional workflow ID to continue existing workflow - workflowMode: "Actionplan" for traditional task planning, "Dynamic" for iterative dynamic-style processing, "Template" for template-based processing + workflowMode: "Dynamic" for iterative dynamic-style processing, "Automation" for automated workflow execution Example usage for Dynamic mode: workflow = await chatStart(currentUser, userInput, workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC) diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index 18673987..7dc7db6b 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -262,11 +262,17 @@ class AiObjects: logger.info(f"✅ Image content part processed successfully with model: {model.name}") # Convert to AiCallResponse format + # Note: AiModelResponse doesn't have priceUsd, and processingTime can be None + # Calculate processing time if not provided (fallback to 0.0) + processingTime = getattr(modelResponse, 'processingTime', None) + if processingTime is None: + processingTime = 0.0 + return AiCallResponse( content=modelResponse.content, modelName=model.name, - priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0, - processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0, + priceUsd=0.0, # Price will be calculated elsewhere if needed + processingTime=processingTime, bytesSent=0, # Will be calculated elsewhere bytesReceived=0, # Will be calculated elsewhere errorCount=0 diff --git a/modules/routes/routeChatPlayground.py b/modules/routes/routeChatPlayground.py index 8f23f7fd..3307ac7a 100644 --- a/modules/routes/routeChatPlayground.py +++ b/modules/routes/routeChatPlayground.py @@ -39,7 +39,7 @@ def getServiceChat(currentUser: User): async def start_workflow( request: Request, workflowId: Optional[str] = Query(None, description="Optional ID of the workflow to continue"), - workflowMode: WorkflowModeEnum = Query(..., description="Workflow mode: 'Actionplan', 'Dynamic', or 'Template' (mandatory)"), + workflowMode: WorkflowModeEnum = Query(..., description="Workflow mode: 'Dynamic' or 'Automation' (mandatory)"), userInput: UserInputRequest = Body(...), currentUser: User = Depends(getCurrentUser) ) -> ChatWorkflow: @@ -48,7 +48,7 @@ async def start_workflow( Corresponds to State 1 in the state machine documentation. Args: - workflowMode: "Actionplan" for traditional task planning, "Dynamic" for iterative dynamic-style processing, "Template" for template-based processing + workflowMode: "Dynamic" for iterative dynamic-style processing, "Automation" for automated workflow execution """ try: # Start or continue workflow using playground controller diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index b72f4fa3..be7f6476 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -132,6 +132,80 @@ async def login( detail=f"Failed to initiate Microsoft login: {str(e)}" ) +@router.get("/adminconsent/callback") +async def adminconsent_callback( + admin_consent: Optional[str] = Query(None), + tenant: Optional[str] = Query(None), + error: Optional[str] = Query(None), + error_description: Optional[str] = Query(None), + request: Request = None +) -> HTMLResponse: + """Handle Microsoft Admin Consent callback""" + try: + if error: + logger.error(f"Admin consent error: {error} - {error_description}") + return HTMLResponse( + content=f""" + + Admin Consent Failed + +

Admin Consent Failed

+

Error: {error}

+

Description: {error_description or 'No description provided'}

+

Please contact your administrator.

+ + + """, + status_code=400 + ) + + if admin_consent == "True" and tenant: + logger.info(f"Admin consent granted for tenant: {tenant}") + return HTMLResponse( + content=f""" + + Admin Consent Successful + +

Admin Consent Successful

+

The application has been granted admin consent for tenant: {tenant}

+

All users in this tenant can now use the application without individual consent.

+

You can close this window.

+ + + + """ + ) + else: + logger.warning(f"Admin consent callback received unexpected parameters: admin_consent={admin_consent}, tenant={tenant}") + return HTMLResponse( + content=f""" + + Admin Consent Status + +

Admin Consent Status

+

Admin Consent: {admin_consent or 'Not provided'}

+

Tenant: {tenant or 'Not provided'}

+ + + """ + ) + except Exception as e: + logger.error(f"Error in admin consent callback: {str(e)}", exc_info=True) + return HTMLResponse( + content=f""" + + Admin Consent Error + +

Error Processing Admin Consent

+

{str(e)}

+ + + """, + status_code=500 + ) + @router.get("/auth/callback") async def auth_callback(code: str, state: str, request: Request, response: Response) -> HTMLResponse: """Handle Microsoft OAuth callback""" diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index e03b15cd..57f81aa7 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -2,17 +2,22 @@ import json import logging import re import time -from typing import Dict, Any, List, Optional, Tuple, Union -from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument +from typing import Dict, Any, List, Optional, Tuple +from modules.datamodels.datamodelChat import PromptPlaceholder from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum +from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( extractJsonString, repairBrokenJson, extractSectionsFromDocument, - buildContinuationContext + buildContinuationContext, + parseJsonWithModel ) +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.datamodels.datamodelAi import JsonAccumulationState logger = logging.getLogger(__name__) @@ -30,7 +35,7 @@ class AiService: """ self.services = serviceCenter # Only depend on interfaces - self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized() + self.aiObjects = None # Will be initialized in create() or ensureAiObjectsInitialized() # Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready self.extractionService = None @@ -43,7 +48,7 @@ class AiService: logger.info("Initializing ExtractionService...") self.extractionService = ExtractionService(self.services) - async def _ensureAiObjectsInitialized(self): + async def ensureAiObjectsInitialized(self): """Ensure aiObjects is initialized and submodules are ready.""" if self.aiObjects is None: logger.info("Lazy initializing AiObjects...") @@ -138,25 +143,11 @@ Respond with ONLY a JSON object in this exact format: response = await self.aiObjects.call(request) - # Parse AI response + # Parse AI response using structured parsing with AiCallOptions model try: - jsonStart = response.content.find('{') - jsonEnd = response.content.rfind('}') + 1 - if jsonStart != -1 and jsonEnd > jsonStart: - analysis = json.loads(response.content[jsonStart:jsonEnd]) - - # Map string values to enums - operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse')) - priority = PriorityEnum(analysis.get('priority', 'balanced')) - processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic')) - - return AiCallOptions( - operationType=operationType, - priority=priority, - processingMode=processingMode, - compressPrompt=analysis.get('compressPrompt', True), - compressContext=analysis.get('compressContext', True) - ) + # Use parseJsonWithModel to parse response into AiCallOptions (handles enum conversion automatically) + analysis = parseJsonWithModel(response.content, AiCallOptions) + return analysis except Exception as e: logger.warning(f"Failed to parse AI analysis response: {e}") @@ -177,8 +168,9 @@ Respond with ONLY a JSON object in this exact format: debugPrefix: str = "ai_call", promptBuilder: Optional[callable] = None, promptArgs: Optional[Dict[str, Any]] = None, - operationId: Optional[str] = None - ) -> str: + operationId: Optional[str] = None, + userPrompt: Optional[str] = None + ) -> str: """ Shared core function for AI calls with repair-based looping system. Automatically repairs broken JSON and continues generation seamlessly. @@ -199,36 +191,54 @@ Respond with ONLY a JSON object in this exact format: allSections = [] # Accumulate all sections across iterations lastRawResponse = None # Store last raw JSON response for continuation documentMetadata = None # Store document metadata (title, filename) from first iteration + accumulationState = None # Track accumulation state for string accumulation + + # Get parent log ID for iteration operations + parentLogId = None + if operationId: + parentLogId = self.services.chat.getOperationLogId(operationId) while iteration < maxIterations: iteration += 1 - # Update progress for iteration start + # Create separate operation for each iteration with parent reference + iterationOperationId = None if operationId: - if iteration == 1: - self.services.chat.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}") - else: - # For continuation iterations, show progress incrementally - baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations - self.services.chat.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})") + iterationOperationId = f"{operationId}_iter_{iteration}" + self.services.chat.progressLogStart( + iterationOperationId, + "AI Call", + f"Iteration {iteration}", + "", + parentId=parentLogId + ) # Build iteration prompt - if len(allSections) > 0 and promptBuilder and promptArgs: + # CRITICAL: Build continuation prompt if we have sections OR if we have a previous response (even if broken) + # This ensures continuation prompts are built even when JSON is so broken that no sections can be extracted + if (len(allSections) > 0 or lastRawResponse) and promptBuilder and promptArgs: # This is a continuation - build continuation context with raw JSON and rebuild prompt continuationContext = buildContinuationContext(allSections, lastRawResponse) if not lastRawResponse: logger.warning(f"Iteration {iteration}: No previous response available for continuation!") + # Filter promptArgs to only include parameters that buildGenerationPrompt accepts + # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext + filteredPromptArgs = { + k: v for k, v in promptArgs.items() + if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content'] + } + # Rebuild prompt with continuation context using the provided prompt builder - iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext) + iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext) else: # First iteration - use original prompt iterationPrompt = prompt # Make AI call try: - if operationId and iteration == 1: - self.services.chat.progressLogUpdate(operationId, 0.51, "Calling AI model") + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") request = AiCallRequest( prompt=iterationPrompt, context="", @@ -245,12 +255,8 @@ Respond with ONLY a JSON object in this exact format: result = response.content # Update progress after AI call - if operationId: - if iteration == 1: - self.services.chat.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})") - else: - progress = 0.6 + (min(iteration - 1, 10) * 0.03) - self.services.chat.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})") + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.6, "AI response received") # Write raw AI response to debug file if iteration == 1: @@ -258,54 +264,221 @@ Respond with ONLY a JSON object in this exact format: else: self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") - # Emit stats for this iteration - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - f"ai.call.{debugPrefix}.iteration_{iteration}" - ) + # Emit stats for this iteration (only if workflow exists and has id) + if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id: + try: + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + f"ai.call.{debugPrefix}.iteration_{iteration}" + ) + except Exception as statError: + # Don't break the main loop if stat storage fails + logger.warning(f"Failed to store workflow stat: {str(statError)}") + + # Check for error response using generic error detection (errorCount > 0 or modelName == "error") + if hasattr(response, 'errorCount') and response.errorCount > 0: + errorMsg = f"Iteration {iteration}: Error response detected (errorCount={response.errorCount}), stopping loop: {result[:200] if result else 'empty'}" + logger.error(errorMsg) + break + + if hasattr(response, 'modelName') and response.modelName == "error": + errorMsg = f"Iteration {iteration}: Error response detected (modelName=error), stopping loop: {result[:200] if result else 'empty'}" + logger.error(errorMsg) + break if not result or not result.strip(): logger.warning(f"Iteration {iteration}: Empty response, stopping") break + # Check if this is a text response (not document generation) + # Text responses don't need JSON parsing - return immediately after first successful response + isTextResponse = (promptBuilder is None and promptArgs is None) or debugPrefix == "text" + + if isTextResponse: + # For text responses, return the text immediately - no JSON parsing needed + logger.info(f"Iteration {iteration}: Text response received, returning immediately") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, True) + return result + # Store raw response for continuation (even if broken) lastRawResponse = result - # Check for complete_response flag in raw response (before parsing) - import re - if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE): - pass # Flag detected, will stop in _shouldContinueGeneration - # Extract sections from response (handles both valid and broken JSON) - extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(result, iteration, debugPrefix) + # Only for document generation (JSON responses) + # CRITICAL: Pass allSections and accumulationState to enable string accumulation + extractedSections, wasJsonComplete, parsedResult, accumulationState = self._extractSectionsFromResponse( + result, iteration, debugPrefix, allSections, accumulationState + ) + + # Define KPIs if we just entered accumulation mode (iteration 1, incomplete JSON) + if accumulationState and accumulationState.isAccumulationMode and iteration == 1 and not accumulationState.kpis: + logger.info(f"Iteration {iteration}: Defining KPIs for accumulation tracking") + continuationContext = buildContinuationContext(allSections, result) + # Pass raw response string from first iteration for KPI definition + kpiDefinitions = await self._defineKpisFromPrompt( + userPrompt or prompt, + result, # Pass raw JSON string from first iteration + continuationContext, + debugPrefix + ) + # Initialize KPIs with currentValue = 0 + accumulationState.kpis = [{**kpi, "currentValue": 0} for kpi in kpiDefinitions] + logger.info(f"Defined {len(accumulationState.kpis)} KPIs: {[kpi.get('id') for kpi in accumulationState.kpis]}") + + # Extract and validate KPIs (if in accumulation mode with KPIs defined) + if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis: + # For KPI extraction, prefer accumulated JSON string over repaired JSON + # because repairBrokenJson may lose data (e.g., empty rows array when JSON is incomplete) + updatedKpis = [] + + # First try to extract from parsedResult (repaired JSON) + if parsedResult: + try: + updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( + parsedResult, + accumulationState.kpis + ) + # Check if we got meaningful values (non-zero) + hasValidValues = any(kpi.get("currentValue", 0) > 0 for kpi in updatedKpis) + if not hasValidValues and accumulationState.accumulatedJsonString: + # Repaired JSON has empty values, try accumulated string + logger.debug("Repaired JSON has empty KPI values, trying accumulated JSON string") + updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + accumulationState.accumulatedJsonString, + accumulationState.kpis + ) + except Exception as e: + logger.debug(f"Error extracting KPIs from parsedResult: {e}") + updatedKpis = [] + + # If no parsedResult or extraction failed, try accumulated string + if not updatedKpis and accumulationState.accumulatedJsonString: + try: + updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + accumulationState.accumulatedJsonString, + accumulationState.kpis + ) + except Exception as e: + logger.debug(f"Error extracting KPIs from accumulated JSON string: {e}") + updatedKpis = [] + + if updatedKpis: + shouldProceed, reason = JsonResponseHandler.validateKpiProgression( + accumulationState, + updatedKpis + ) + + if not shouldProceed: + logger.warning(f"Iteration {iteration}: KPI validation failed: {reason}") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) + if operationId: + self.services.chat.progressLogUpdate(operationId, 0.9, f"KPI validation failed: {reason} ({iteration} iterations)") + break + + # Update KPIs in accumulation state + accumulationState.kpis = updatedKpis + logger.info(f"Iteration {iteration}: KPIs updated: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") + + # Check if all KPIs completed + allCompleted = True + for kpi in updatedKpis: + targetValue = kpi.get("targetValue", 0) + currentValue = kpi.get("currentValue", 0) + if currentValue < targetValue: + allCompleted = False + break + + if allCompleted: + logger.info(f"Iteration {iteration}: All KPIs completed, finishing accumulation") + wasJsonComplete = True # Mark as complete to exit loop + + # CRITICAL: Handle JSON fragments (continuation content) + # Fragment merging happens inside _extractSectionsFromResponse + # If merge fails (returns wasJsonComplete=True), stop iterations and complete JSON + if not extractedSections and allSections: + if wasJsonComplete: + # Merge failed - stop iterations, complete JSON with available data + logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - Stopping iterations, completing JSON with available data") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) + if operationId: + self.services.chat.progressLogUpdate(operationId, 0.9, f"Merge failed, completing JSON ({iteration} iterations)") + break + + # Fragment was detected and merged successfully + logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing") + # Don't break - fragment was merged, continue to get more content if needed + # Check if we should continue based on JSON completeness + shouldContinue = self._shouldContinueGeneration( + allSections, + iteration, + wasJsonComplete, + result + ) + if shouldContinue: + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing") + self.services.chat.progressLogFinish(iterationOperationId, True) + continue + else: + # Done - fragment was merged and JSON is complete + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, True) + if operationId: + self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)") + logger.info(f"Generation complete after {iteration} iterations: fragment merged") + break # Extract document metadata from first iteration if available if iteration == 1 and parsedResult and not documentMetadata: documentMetadata = self._extractDocumentMetadata(parsedResult) # Update progress after parsing - if operationId: + if iterationOperationId: if extractedSections: - self.services.chat.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})") + self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections") if not extractedSections: - # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry - if iteration > 1 and not wasJsonComplete: - logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt") + # CRITICAL: If JSON was incomplete/broken, continue even if no sections extracted + # This allows the AI to retry and complete the broken JSON + if not wasJsonComplete: + logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt") continue - # Otherwise, stop if no sections - logger.warning(f"Iteration {iteration}: No sections extracted, stopping") + # If JSON was complete but no sections extracted - check if it was a fragment + # Fragments are handled above, so if we get here and it's complete, it's an error + logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping") break - # Add new sections to accumulator - allSections.extend(extractedSections) + # Merge new sections with existing sections intelligently + # This handles the STANDARD CASE: broken JSON iterations must be merged together + # The break can occur anywhere - in any section, at any depth + allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration) + + # Log merged sections for debugging + merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}") # Check if we should continue (completion detection) - if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result): + # Simple logic: JSON completeness determines continuation + shouldContinue = self._shouldContinueGeneration( + allSections, + iteration, + wasJsonComplete, + result + ) + + if shouldContinue: + # Finish iteration operation (will continue with next iteration) + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, True) continue else: - # Done - build final result + # Done - finish iteration and update main operation + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, True) if operationId: self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)") logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections") @@ -313,11 +486,17 @@ Respond with ONLY a JSON object in this exact format: except Exception as e: logger.error(f"Error in AI call iteration {iteration}: {str(e)}") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) break if iteration >= maxIterations: logger.warning(f"AI call stopped after maximum iterations ({maxIterations})") + # CRITICAL: Complete any incomplete structures in sections before building final result + # This ensures JSON is properly closed even if merge failed or iterations stopped early + allSections = JsonResponseHandler.completeIncompleteStructures(allSections) + # Build final result from accumulated sections final_result = self._buildFinalResultFromSections(allSections, documentMetadata) @@ -326,63 +505,200 @@ Respond with ONLY a JSON object in this exact format: return final_result + # JSON merging logic moved to subJsonResponseHandling.py + + async def _defineKpisFromPrompt( + self, + userPrompt: str, + rawJsonString: Optional[str], + continuationContext: Dict[str, Any], + debugPrefix: str = "kpi" + ) -> List[Dict[str, Any]]: + """ + Make separate AI call to define KPIs based on user prompt and incomplete JSON. + + Args: + userPrompt: Original user prompt + rawJsonString: Raw JSON string from first iteration response + continuationContext: Continuation context (not used for JSON, kept for compatibility) + + Returns: + List of KPI definitions: [{"id": str, "description": str, "jsonPath": str, "targetValue": int}, ...] + """ + # Use raw JSON string from first iteration response + if rawJsonString: + # Remove markdown code fences if present + from modules.shared.jsonUtils import stripCodeFences + incompleteJson = stripCodeFences(rawJsonString.strip()) + else: + incompleteJson = "Not available" + + kpiDefinitionPrompt = f"""Analyze the user request and incomplete JSON to define KPIs (Key Performance Indicators) for tracking progress. + +User Request: +{userPrompt} + +Delivered JSON part: +{incompleteJson} + +Task: Define which JSON items should be tracked to measure completion progress. + +IMPORTANT: Analyze the Delivered JSON part structure to understand what is being tracked: +1. Identify the structure type (table with rows, list with items, etc.) +2. Determine what the jsonPath actually counts (number of rows, number of items, etc.) +3. Calculate targetValue based on what is being tracked, NOT the total quantity requested + +For each trackable item, provide: +- id: Unique identifier (use descriptive name) +- description: What this KPI measures (be specific about what is counted) +- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "documents[0].sections[1].elements[0].rows") +- targetValue: Target value to reach (integer) - MUST match what jsonPath actually tracks (rows count, items count, etc.) + +Return ONLY valid JSON in this format: +{{ + "kpis": [ + {{ + "id": "unique_id", + "description": "Description of what is measured", + "jsonPath": "path.to.value", + "targetValue": 0 + }} + ] +}} + +If no trackable items can be identified, return: {{"kpis": []}} +""" + + try: + request = AiCallRequest( + prompt=kpiDefinitionPrompt, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.SPEED, + processingMode=ProcessingModeEnum.BASIC + ) + ) + + # Write KPI definition prompt to debug file + self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt") + + response = await self.aiObjects.call(request) + + # Write KPI definition response to debug file + self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response") + + # Parse response + extracted = extractJsonString(response.content) + kpiResponse = json.loads(extracted) + + kpiDefinitions = kpiResponse.get("kpis", []) + logger.info(f"Defined {len(kpiDefinitions)} KPIs for tracking") + + return kpiDefinitions + + except Exception as e: + logger.warning(f"Failed to define KPIs: {e}, continuing without KPI tracking") + return [] + def _extractSectionsFromResponse( self, result: str, iteration: int, - debugPrefix: str - ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]: + debugPrefix: str, + allSections: List[Dict[str, Any]] = None, + accumulationState: Optional[JsonAccumulationState] = None + ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: """ Extract sections from AI response, handling both valid and broken JSON. - Uses repair mechanism for broken JSON. - Checks for "complete_response": true flag to determine completion. - Returns (sections, wasJsonComplete, parsedResult) - """ - # First, try to parse as valid JSON - try: - extracted = extractJsonString(result) - parsed_result = json.loads(extracted) - - # Check if AI marked response as complete - isComplete = parsed_result.get("complete_response", False) == True - - # Extract sections from parsed JSON - sections = extractSectionsFromDocument(parsed_result) - - # If AI marked as complete, always return as complete - if isComplete: - return sections, True, parsed_result - - # If in continuation mode (iteration > 1), continuation responses are expected to be fragments - # A fragment with 0 extractable sections means JSON is incomplete - need another iteration - if len(sections) == 0 and iteration > 1: - return sections, False, parsed_result # Mark as incomplete so loop continues - - # First iteration with 0 sections means empty response - stop - if len(sections) == 0: - return sections, True, parsed_result # Complete but empty - - return sections, True, parsed_result # JSON was complete with sections - - except json.JSONDecodeError as e: - # Broken JSON - try repair mechanism (normal in iterative generation) - self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}") - - # Try to repair - repaired_json = repairBrokenJson(result) - - if repaired_json: - # Extract sections from repaired JSON - sections = extractSectionsFromDocument(repaired_json) - return sections, False, repaired_json # JSON was broken but repaired - else: - # Repair failed - log error - logger.error(f"Iteration {iteration}: All repair strategies failed") - return [], False, None - except Exception as e: - logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}") - return [], False, None + NEW BEHAVIOR: + - First iteration: Check if complete, if not start accumulation + - Subsequent iterations: Accumulate strings, parse when complete + + Returns: + Tuple of: + - sections: Extracted sections + - wasJsonComplete: True if JSON is complete + - parsedResult: Parsed JSON object + - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode) + """ + if allSections is None: + allSections = [] + + if iteration == 1: + # First iteration - check if complete + parsed = None + try: + extracted = extractJsonString(result) + parsed = json.loads(extracted) + + # Check completeness + if JsonResponseHandler.isJsonComplete(parsed): + # Complete JSON - no accumulation needed + sections = extractSectionsFromDocument(parsed) + logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed") + return sections, True, parsed, None # No accumulation + except Exception: + pass + + # Incomplete - try to extract partial sections from broken JSON + logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections") + + partialSections = [] + if parsed: + # Try to extract sections from parsed (even if incomplete) + partialSections = extractSectionsFromDocument(parsed) + else: + # Try to repair broken JSON and extract sections + try: + repaired = repairBrokenJson(result) + if repaired: + partialSections = extractSectionsFromDocument(repaired) + parsed = repaired # Use repaired version for accumulation state + except Exception: + pass # If repair fails, continue with empty sections + + + # Define KPIs (async call - need to handle this) + # For now, create accumulation state without KPIs, will be updated after async call + accumulationState = JsonAccumulationState( + accumulatedJsonString=result, + isAccumulationMode=True, + lastParsedResult=parsed, + allSections=partialSections, + kpis=[] + ) + + # Note: KPI definition will be done in the caller (async context) + return partialSections, False, parsed, accumulationState + + else: + # Subsequent iterations - accumulate + if accumulationState and accumulationState.isAccumulationMode: + accumulated, sections, isComplete, parsedResult = \ + JsonResponseHandler.accumulateAndParseJsonFragments( + accumulationState.accumulatedJsonString, + result, + allSections, + iteration + ) + + # Update accumulation state + accumulationState.accumulatedJsonString = accumulated + accumulationState.lastParsedResult = parsedResult + accumulationState.allSections = allSections + sections if sections else allSections + accumulationState.isAccumulationMode = not isComplete + + # Log accumulated JSON for debugging + if parsedResult: + accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json") + + return sections, isComplete, parsedResult, accumulationState + else: + # No accumulation mode - process normally (shouldn't happen) + logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1") + return [], False, None, None def _shouldContinueGeneration( self, @@ -392,33 +708,76 @@ Respond with ONLY a JSON object in this exact format: rawResponse: str = None ) -> bool: """ - Determine if generation should continue based on JSON completeness, complete_response flag, and task completion. - Returns True if we should continue, False if done. + Determine if AI generation loop should continue. + + CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD! + Action DoD is checked AFTER the AI Loop completes in _refineDecide. + + Simple logic: + - If JSON parsing failed or incomplete → continue (needs more content) + - If JSON parses successfully and is complete → stop (all content delivered) + - Loop detection prevents infinite loops + + CRITICAL: JSON completeness is determined by parsing, NOT by last character check! + Returns True if we should continue, False if AI Loop is done. """ if len(allSections) == 0: return True # No sections yet, continue - # Check for complete_response flag in raw response - if rawResponse: - import re - if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE): - logger.info(f"Iteration {iteration}: AI marked response as complete (complete_response flag detected)") - return False - - # If JSON was complete, stop (AI should have set complete_response if task is done) - # For continuation iterations (iteration > 1), if JSON is complete but no flag was set, - # stop to prevent infinite loops - AI had a chance to set the flag - if wasJsonComplete: - if iteration > 1: - # Continuation mode: JSON complete without flag means we're likely done - # Stop to prevent infinite loops - logger.info(f"Iteration {iteration}: JSON complete without complete_response flag - stopping") - return False - # First iteration with complete JSON - done - return False - else: - # JSON was incomplete/broken - continue + # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete + if not wasJsonComplete: + logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete") return True + + # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection + if self._isStuckInLoop(allSections, iteration): + logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop") + return False + + # JSON is complete and not stuck in loop - done + logger.info(f"Iteration {iteration}: JSON complete - AI loop done") + return False + + def _isStuckInLoop( + self, + allSections: List[Dict[str, Any]], + iteration: int + ) -> bool: + """ + Detect if we're stuck in a loop (same content being repeated). + + Generic approach: Check if recent iterations are adding minimal or duplicate content. + """ + if iteration < 3: + return False # Need at least 3 iterations to detect a loop + + if len(allSections) == 0: + return False + + # Check if last section is very small (might be stuck) + lastSection = allSections[-1] + elements = lastSection.get("elements", []) + + if isinstance(elements, list) and elements: + lastElem = elements[-1] if elements else {} + else: + lastElem = elements if isinstance(elements, dict) else {} + + # Check content size of last section + lastSectionSize = 0 + if isinstance(lastElem, dict): + for key, value in lastElem.items(): + if isinstance(value, str): + lastSectionSize += len(value) + elif isinstance(value, list): + lastSectionSize += len(str(value)) + + # If last section is very small and we've done many iterations, might be stuck + if lastSectionSize < 100 and iteration > 10: + logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}") + return True + + return False def _extractDocumentMetadata( self, @@ -502,13 +861,13 @@ Respond with ONLY a JSON object in this exact format: Args: prompt: The planning prompt placeholders: Optional list of placeholder replacements - debugType: Optional debug file type identifier (e.g., 'taskplan', 'actionplan', 'intentanalysis') + debugType: Optional debug file type identifier (e.g., 'taskplan', 'dynamic', 'intentanalysis') If not provided, defaults to 'plan' Returns: Planning JSON response """ - await self._ensureAiObjectsInitialized() + await self.ensureAiObjectsInitialized() # Planning calls always use static parameters options = AiCallOptions( @@ -541,60 +900,66 @@ Respond with ONLY a JSON object in this exact format: self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") return result - # Document Generation AI Call - async def callAiDocuments( + async def callAiContent( self, prompt: str, - documents: Optional[List[ChatDocument]] = None, - options: Optional[AiCallOptions] = None, + options: AiCallOptions, + contentParts: Optional[List[ContentPart]] = None, outputFormat: Optional[str] = None, - title: Optional[str] = None - ) -> Union[str, Dict[str, Any]]: + title: Optional[str] = None, + parentOperationId: Optional[str] = None # Parent operation ID for hierarchical logging + ) -> AiResponse: """ - Document generation AI call for all non-planning calls. - Uses the current unified path with extraction and generation. + Unified AI content processing method (replaces callAiDocuments and callAiText). Args: prompt: The main prompt for the AI call - documents: Optional list of documents to process - options: AI call configuration options - outputFormat: Optional output format for document generation + contentParts: Optional list of already-extracted content parts (preferred) + options: AI call configuration options (REQUIRED - operationType must be set) + outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx') title: Optional title for generated documents + parentOperationId: Optional parent operation ID for hierarchical logging Returns: - AI response as string, or dict with documents if outputFormat is specified + AiResponse with content, metadata, and optional documents """ - await self._ensureAiObjectsInitialized() + await self.ensureAiObjectsInitialized() # Create separate operationId for detailed progress tracking workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" - aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}" + aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" - # Start progress tracking for this operation + # Get parent log ID if parent operation exists + parentLogId = None + if parentOperationId: + parentLogId = self.services.chat.getOperationLogId(parentOperationId) + + # Start progress tracking with parent reference self.services.chat.progressLogStart( aiOperationId, - "AI call with documents", - "Document Generation", - f"Format: {outputFormat or 'text'}" + "AI content processing", + "Content Processing", + f"Format: {outputFormat or 'text'}", + parentId=parentLogId ) try: - if options is None or (hasattr(options, 'operationType') and options.operationType is None): - # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None) - self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters") - options = await self._analyzePromptAndCreateOptions(prompt) + # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way) + if not outputFormat: + outputFormat = "txt" - # Check operationType FIRST - some operations need direct routing (before document generation checks) + # Extraction is now separate - contentParts must be extracted before calling + # Require operationType to be set before calling opType = getattr(options, "operationType", None) + if not opType: + # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE + options.operationType = OperationTypeEnum.DATA_GENERATE + opType = OperationTypeEnum.DATA_GENERATE - # Handle image generation requests directly via generic path - isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) - - if isImageRequest: - # Image generation uses generic call path but bypasses document generation pipeline + # Handle IMAGE_GENERATE operations + if opType == OperationTypeEnum.IMAGE_GENERATE: self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") - # Call via generic path (no looping for images) request = AiCallRequest( prompt=prompt, context="", @@ -603,62 +968,56 @@ Respond with ONLY a JSON object in this exact format: response = await self.aiObjects.call(request) - # Extract image data from response if response.content: - # For base64 format, return in expected format - if outputFormat == "base64": - result = { - "success": True, - "image_data": response.content, - "documents": [{ - "documentName": "generated_image.png", - "documentData": response.content, - "mimeType": "image/png", - "title": title or "Generated Image" - }] - } - else: - # Return raw content for other formats - result = response.content + # Build document data for image + imageDoc = DocumentData( + documentName="generated_image.png", + documentData=response.content, + mimeType="image/png" + ) + + metadata = AiResponseMetadata( + title=title or "Generated Image", + operationType=opType.value + ) - # Emit stats for image generation self.services.chat.storeWorkflowStat( self.services.workflow, response, - f"ai.generate.image" + "ai.generate.image" ) self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") self.services.chat.progressLogFinish(aiOperationId, True) - return result + + return AiResponse( + content=response.content, + metadata=metadata, + documents=[imageDoc] + ) else: errorMsg = f"No image data returned: {response.content}" logger.error(f"Error in AI image generation: {errorMsg}") self.services.chat.progressLogFinish(aiOperationId, False) - return {"success": False, "error": errorMsg} + raise ValueError(errorMsg) - # Handle WEB_SEARCH and WEB_CRAWL operations - route directly to connectors - # These operations require raw JSON prompts that connectors parse directly - # Must check BEFORE document generation to avoid wrapping the prompt - isWebOperation = (opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL) - - if isWebOperation: - # Web operations: prompt is already structured JSON (AiCallPromptWebSearch/WebCrawl) - # Route directly through centralized AI call - model selector chooses appropriate connector - # Connector parses the JSON prompt and executes the operation + # Handle WEB_SEARCH and WEB_CRAWL operations + if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL: self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") request = AiCallRequest( - prompt=prompt, # Pass raw JSON prompt unchanged - connector will parse it + prompt=prompt, # Raw JSON prompt - connector will parse it context="", options=options ) response = await self.aiObjects.call(request) - # Extract result from response if response.content: - # Emit stats for web operation + metadata = AiResponseMetadata( + operationType=opType.value + ) + self.services.chat.storeWorkflowStat( self.services.workflow, response, @@ -667,177 +1026,246 @@ Respond with ONLY a JSON object in this exact format: self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") self.services.chat.progressLogFinish(aiOperationId, True) - return response.content + + return AiResponse( + content=response.content, + metadata=metadata + ) else: errorMsg = f"No content returned from {opType.name}: {response.content}" logger.error(f"Error in {opType.name}: {errorMsg}") self.services.chat.progressLogFinish(aiOperationId, False) - return {"success": False, "error": errorMsg} + raise ValueError(errorMsg) + + # Handle document generation (outputFormat always set, defaults to "txt") + # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way + # outputFormat is always set now (defaults to "txt" if not specified) # CRITICAL: For document generation with JSON templates, NEVER compress the prompt - # Compressing would truncate the template structure and confuse the AI - if outputFormat: # Document generation with structured output - if not options: - options = AiCallOptions() - options.compressPrompt = False # JSON templates must NOT be truncated - options.compressContext = False # Context also should not be compressed + options.compressPrompt = False + options.compressContext = False - # Handle document generation with specific output format using unified approach - if outputFormat: - # Use unified generation method for all document generation - if documents and len(documents) > 0: - self.services.chat.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents") - extracted_content = await self.callAiText(prompt, documents, options, aiOperationId) - else: - self.services.chat.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation") - extracted_content = None + # Process contentParts for generation prompt (if provided) + # Use generic _callWithContentParts() which handles all content types (images, text, etc.) + # This automatically processes images with vision models and merges all results + if contentParts: + # Filter out binary/other parts that shouldn't be processed + processableParts = [] + skippedParts = [] + for p in contentParts: + if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))): + processableParts.append(p) + else: + skippedParts.append(p) - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - # First call without continuation context - generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None) + if skippedParts: + logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation") - # Prepare prompt builder arguments for continuation - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": extracted_content - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") - # Parse the generated JSON (extract fenced/embedded JSON first) - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - logger.error(f"JSON content length: {len(generated_json)}") - logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}") - logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}") + if processableParts: + # Count images for progress update + imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))]) + if imageCount > 0: + self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models") - # Write the problematic JSON to debug file - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") + # Build proper extraction prompt using buildExtractionPrompt + # This creates a focused extraction prompt, not the user's generation prompt + from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt - self.services.chat.progressLogFinish(aiOperationId, False) - return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"} - - # Extract title and filename from generated document structure - extractedTitle = title # Default to user-provided title - extractedFilename = None - if isinstance(generated_data, dict) and "documents" in generated_data: - documents = generated_data["documents"] - if isinstance(documents, list) and len(documents) > 0: - firstDoc = documents[0] - if isinstance(firstDoc, dict): - # Extract title from document (preferred over user-provided title) - if firstDoc.get("title"): - extractedTitle = firstDoc["title"] - # Extract filename from document - if firstDoc.get("filename"): - extractedFilename = firstDoc["filename"] - - # Ensure metadata contains the extracted title for renderers - if "metadata" not in generated_data: - generated_data["metadata"] = {} - if extractedTitle: - generated_data["metadata"]["title"] = extractedTitle - - self.services.chat.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format") - # Render to final format using the existing renderer - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - # Pass extracted title to renderer (will use metadata.title if available) - rendered_content, mime_type = await generationService.renderReport( - generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self + # Determine renderer for format-specific guidelines + renderer = None + if outputFormat: + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + renderer = generationService.getRendererForFormat(outputFormat) + except Exception as e: + logger.debug(f"Could not get renderer for format {outputFormat}: {e}") + + extractionPrompt = await buildExtractionPrompt( + outputFormat=outputFormat or "txt", + userPrompt=prompt, # User's prompt as context for what to extract + title=title or "Document", + aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None, + services=self.services, + renderer=renderer ) - # Use extracted filename if available, otherwise generate from title or use generic - if extractedFilename: - documentName = extractedFilename - elif extractedTitle and extractedTitle != "Generated Document": - # Sanitize title for filename - sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) - sanitized = re.sub(r"_+", "_", sanitized).strip("_") - if sanitized: - # Ensure correct extension - if not sanitized.lower().endswith(f".{outputFormat}"): - documentName = f"{sanitized}.{outputFormat}" - else: - documentName = sanitized + logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt") + + # Use DATA_EXTRACT operation type for extraction + extractionOptions = AiCallOptions( + operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction + compressPrompt=options.compressPrompt, + compressContext=options.compressContext + ) + + extractionRequest = AiCallRequest( + prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt + context="", + options=extractionOptions, + contentParts=processableParts + ) + + # Write debug file for extraction prompt (all parts) + self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt") + + # Call generic content parts processor - handles images, text, chunking, merging + extractionResponse = await self.aiObjects.call(extractionRequest) + + # Write debug file for extraction response + if extractionResponse.content: + self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response") + else: + self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response") + logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})") + + # Use extracted content directly for generation prompt + if extractionResponse.errorCount == 0 and extractionResponse.content: + # The extracted content is already merged and ready to use + content_for_generation = extractionResponse.content + logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation") + else: + # Extraction failed - use placeholders + logger.warning(f"Content extraction failed, using placeholders") + placeholderParts = [] + for p in processableParts: + placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]") + content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None + else: + content_for_generation = None + logger.debug("No processable parts found in contentParts") + else: + content_for_generation = None + + self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") + from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + + generation_prompt = await buildGenerationPrompt( + outputFormat, prompt, title, content_for_generation, None + ) + + promptArgs = { + "outputFormat": outputFormat, + "userPrompt": prompt, + "title": title, + "extracted_content": content_for_generation + } + + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") + # Extract user prompt from promptArgs for task completion analysis + userPrompt = None + if promptArgs: + userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") + + generated_json = await self._callAiWithLooping( + generation_prompt, + options, + "document_generation", + buildGenerationPrompt, + promptArgs, + aiOperationId, + userPrompt=userPrompt + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") + try: + extracted_json = self.services.utils.jsonExtractString(generated_json) + generated_data = json.loads(extracted_json) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse generated JSON: {str(e)}") + self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(f"Generated content is not valid JSON: {str(e)}") + + # Extract title and filename from generated document structure + extractedTitle = title + extractedFilename = None + if isinstance(generated_data, dict) and "documents" in generated_data: + docs = generated_data["documents"] + if isinstance(docs, list) and len(docs) > 0: + firstDoc = docs[0] + if isinstance(firstDoc, dict): + if firstDoc.get("title"): + extractedTitle = firstDoc["title"] + if firstDoc.get("filename"): + extractedFilename = firstDoc["filename"] + + # Ensure metadata contains the extracted title + if "metadata" not in generated_data: + generated_data["metadata"] = {} + if extractedTitle: + generated_data["metadata"]["title"] = extractedTitle + + # Create separate operation for content rendering + renderOperationId = f"{aiOperationId}_render" + renderParentLogId = self.services.chat.getOperationLogId(aiOperationId) + self.services.chat.progressLogStart( + renderOperationId, + "Content Rendering", + "Rendering", + f"Format: {outputFormat}", + parentId=renderParentLogId + ) + + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") + rendered_content, mime_type = await generationService.renderReport( + generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self + ) + self.services.chat.progressLogFinish(renderOperationId, True) + + # Determine document name + if extractedFilename: + documentName = extractedFilename + elif extractedTitle and extractedTitle != "Generated Document": + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{outputFormat}"): + documentName = f"{sanitized}.{outputFormat}" else: - documentName = f"generated.{outputFormat}" + documentName = sanitized else: documentName = f"generated.{outputFormat}" - - # Build result in the expected format - result = { - "success": True, - "content": generated_data, - "documents": [{ - "documentName": documentName, - "documentData": rendered_content, - "mimeType": mime_type, - "title": extractedTitle or "Generated Document" - }], - "is_multi_file": False, - "format": outputFormat, - "title": extractedTitle or title, - "split_strategy": "single", - "total_documents": 1, - "processed_documents": 1 - } - - # Log AI response for debugging - self.services.utils.writeDebugFile(str(result), "document_generation_response", documents) - - self.services.chat.progressLogFinish(aiOperationId, True) - return result + else: + documentName = f"generated.{outputFormat}" - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - self.services.chat.progressLogFinish(aiOperationId, False) - return {"success": False, "error": f"Rendering failed: {str(e)}"} - - # Handle text calls (no output format specified) - self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call") - if documents: - # Use document processing for text calls with documents - result = await self.callAiText(prompt, documents, options, aiOperationId) - else: - # Use shared core function for direct text calls - result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId) - - self.services.chat.progressLogFinish(aiOperationId, True) - return result + # Build document data + docData = DocumentData( + documentName=documentName, + documentData=rendered_content, + mimeType=mime_type, + sourceJson=generated_data # Preserve source JSON for structure validation + ) + + metadata = AiResponseMetadata( + title=extractedTitle or title or "Generated Document", + filename=extractedFilename, + operationType=opType.value if opType else None + ) + + # Write JSON with proper formatting (not str() which can truncate) + jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile(jsonStr, "document_generation_response") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=json.dumps(generated_data), + metadata=metadata, + documents=[docData] + ) + + except Exception as e: + logger.error(f"Error rendering document: {str(e)}") + if renderOperationId: + self.services.chat.progressLogFinish(renderOperationId, False) + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(f"Rendering failed: {str(e)}") except Exception as e: - logger.error(f"Error in callAiDocuments: {str(e)}") + logger.error(f"Error in callAiContent: {str(e)}") self.services.chat.progressLogFinish(aiOperationId, False) raise - async def callAiText( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions, - operationId: Optional[str] = None - ) -> str: - """ - Handle text calls with document processing through ExtractionService. - UNIFIED PROCESSING: Always use per-chunk processing for consistency. - """ - await self._ensureAiObjectsInitialized() - return await self.extractionService.processDocumentsPerChunk(documents, prompt, self.aiObjects, options, operationId) - diff --git a/modules/services/serviceAi/subJsonResponseHandling.py b/modules/services/serviceAi/subJsonResponseHandling.py new file mode 100644 index 00000000..f04484d7 --- /dev/null +++ b/modules/services/serviceAi/subJsonResponseHandling.py @@ -0,0 +1,1526 @@ +""" +JSON Response Handling Module + +Handles merging of JSON responses from multiple AI iterations, including: +- Section merging with intelligent overlap detection +- JSON fragment detection and merging +- Deep recursive structure merging +- Overlap detection for complex nested structures +- String accumulation for iterative JSON generation +""" +import json +import logging +import re +from typing import Dict, Any, List, Optional, Tuple + +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument +from modules.datamodels.datamodelAi import JsonAccumulationState + +logger = logging.getLogger(__name__) + + +class JsonResponseHandler: + """Handles JSON response merging and fragment detection for iterative AI generation.""" + + @staticmethod + def mergeSectionsIntelligently( + existingSections: List[Dict[str, Any]], + newSections: List[Dict[str, Any]], + iteration: int + ) -> List[Dict[str, Any]]: + """ + Intelligently merge sections from multiple iterations. + + This is a GENERIC merging strategy that handles broken JSON iterations. + The break can occur anywhere - in any section, at any depth. + + Merging strategies (in order of priority): + 1. Same Section ID: Merge sections with identical IDs + 2. Same Content-Type + Position: If last section is incomplete and new section continues it + 3. Same Order: Merge sections with same order value + 4. Structural Analysis: Detect continuation based on content structure + + Args: + existingSections: Sections accumulated from previous iterations + newSections: Sections extracted from current iteration + iteration: Current iteration number + + Returns: + Merged list of sections + """ + if not newSections: + return existingSections + + if not existingSections: + return newSections + + mergedSections = existingSections.copy() + + for newSection in newSections: + merged = False + + # Strategy 1: Same Section ID - merge directly + newSectionId = newSection.get("id") + if newSectionId: + for i, existingSection in enumerate(mergedSections): + if existingSection.get("id") == newSectionId: + # Merge sections with same ID + mergedSections[i] = JsonResponseHandler.mergeSectionContent( + existingSection, newSection, iteration + ) + merged = True + logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'") + break + + if merged: + continue + + # Strategy 2: Same Content-Type + Position (continuation detection) + # Check if last section is incomplete and new section continues it + if mergedSections: + lastSection = mergedSections[-1] + lastContentType = lastSection.get("content_type") + newContentType = newSection.get("content_type") + + if lastContentType == newContentType: + # Same content type - check if last section is incomplete + if JsonResponseHandler.isSectionIncomplete(lastSection): + # Last section is incomplete, merge with new section + mergedSections[-1] = JsonResponseHandler.mergeSectionContent( + lastSection, newSection, iteration + ) + merged = True + logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})") + continue + + # Strategy 3: Same Order value + newOrder = newSection.get("order") + if newOrder is not None: + for i, existingSection in enumerate(mergedSections): + existingOrder = existingSection.get("order") + if existingOrder is not None and existingOrder == newOrder: + # Merge sections with same order + mergedSections[i] = JsonResponseHandler.mergeSectionContent( + existingSection, newSection, iteration + ) + merged = True + logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}") + break + + if merged: + continue + + # Strategy 4: Structural Analysis - detect continuation + # For code_block and table: if last section matches new section type, merge them + if mergedSections: + lastSection = mergedSections[-1] + lastContentType = lastSection.get("content_type") + newContentType = newSection.get("content_type") + + # Both are code blocks - merge them + if lastContentType == "code_block" and newContentType == "code_block": + mergedSections[-1] = JsonResponseHandler.mergeSectionContent( + lastSection, newSection, iteration + ) + merged = True + logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis") + continue + + # Both are tables - merge them (common case for broken JSON iterations) + if lastContentType == "table" and newContentType == "table": + mergedSections[-1] = JsonResponseHandler.mergeSectionContent( + lastSection, newSection, iteration + ) + merged = True + logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis") + continue + + # No merge strategy matched - add as new section + if not merged: + mergedSections.append(newSection) + logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})") + + return mergedSections + + @staticmethod + def isSectionIncomplete(section: Dict[str, Any]) -> bool: + """ + Check if a section is incomplete (broken at the end). + + This detects incomplete sections based on content analysis: + - Code blocks: ends mid-line, ends with comma, ends with incomplete structure + - Text sections: ends mid-sentence, ends with incomplete structure + - Other types: check for incomplete elements + """ + contentType = section.get("content_type", "") + elements = section.get("elements", []) + + if not elements: + return False + + # Handle list of elements + if isinstance(elements, list) and len(elements) > 0: + lastElement = elements[-1] + else: + lastElement = elements + + if not isinstance(lastElement, dict): + return False + + # Check code_block for incomplete code + if contentType == "code_block": + code = lastElement.get("code", "") + if code: + # Check if code ends incompletely: + # - Ends with comma (incomplete CSV line) + # - Ends with number but no newline (incomplete line) + # - Ends mid-token (e.g., "23431,23" - incomplete number) + codeStripped = code.rstrip() + if codeStripped: + # Check for incomplete patterns + if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')): + # Ends with comma or has comma but no final newline - likely incomplete + return True + # Check if last line is incomplete (doesn't end with newline and has partial content) + if not code.endswith('\n') and codeStripped: + # No final newline - might be incomplete + # More sophisticated: check if last number is complete + lastLine = codeStripped.split('\n')[-1] + if lastLine and ',' in lastLine: + # Has commas but might be incomplete + parts = lastLine.split(',') + if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete + return True + + # Check table for incomplete rows + if contentType == "table": + rows = lastElement.get("rows", []) + if rows: + # Check if last row is incomplete (ends with incomplete data) + lastRow = rows[-1] if isinstance(rows, list) else [] + if isinstance(lastRow, list) and lastRow: + # CRITICAL: Check if last row doesn't have expected number of columns (if headers exist) + # This is the PRIMARY indicator of incomplete table rows + headers = lastElement.get("headers", []) + if headers and isinstance(headers, list): + expectedCols = len(headers) + if len(lastRow) < expectedCols: + logger.debug(f"Table section incomplete: last row has {len(lastRow)} columns, expected {expectedCols}") + return True + # Also check if last row ends with incomplete data (e.g., incomplete string) + lastCell = lastRow[-1] if lastRow else "" + if isinstance(lastCell, str): + # If last cell is incomplete (ends with quote or is very short), section might be incomplete + if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell): + logger.debug(f"Table section incomplete: last cell appears incomplete: '{lastCell}'") + return True + # Additional check: if last row has fewer cells than previous rows, it's likely incomplete + if len(rows) > 1: + prevRow = rows[-2] if isinstance(rows, list) and len(rows) > 1 else [] + if isinstance(prevRow, list) and len(prevRow) > len(lastRow): + logger.debug(f"Table section incomplete: last row has {len(lastRow)} cells, previous row has {len(prevRow)}") + return True + + # Check paragraph/text for incomplete sentences + if contentType in ["paragraph", "heading"]: + text = lastElement.get("text", "") + if text: + # Simple heuristic: if doesn't end with sentence-ending punctuation + textStripped = text.rstrip() + if textStripped and not textStripped[-1] in '.!?': + # Might be incomplete, but this is less reliable + # Only mark as incomplete if very short (likely cut off) + if len(textStripped) < 20: + return True + + # Check lists for incomplete items + if contentType in ["bullet_list", "numbered_list"]: + items = lastElement.get("items", []) + if items and isinstance(items, list): + # Check if last item is incomplete (very short or ends with incomplete string) + lastItem = items[-1] if items else None + if isinstance(lastItem, str) and len(lastItem) < 3: + return True + + # Check image for incomplete base64 data + if contentType == "image": + imageData = lastElement.get("base64Data", "") + if imageData: + # Base64 strings should end with padding ('=' or '==') + # If it doesn't, it might be incomplete + stripped = imageData.rstrip() + if stripped and not stripped.endswith(('=', '==')): + # Check if it's a valid base64 character sequence that was cut off + if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=': + return True + # If length is not a multiple of 4 (base64 requirement), it might be incomplete + if len(stripped) % 4 != 0: + return True + + # GENERIC CHECK: Recursively analyze structure for incompleteness + # This works for ANY structure: arrays, objects, nested, primitives + return JsonResponseHandler._isStructureIncomplete(lastElement) + + @staticmethod + def _isStructureIncomplete(structure: Any, max_depth: int = 10) -> bool: + """ + GENERIC recursive check for incomplete structures. + + Detects incompleteness by analyzing patterns: + - Arrays: Last item shorter than previous items, incomplete patterns + - Objects: Last object has fewer keys than pattern, incomplete values + - Strings: Very short, ends abruptly, incomplete patterns + - Nested: Recursively checks nested structures + + Works for ANY JSON structure of any depth/complexity. + """ + if max_depth <= 0: + return False + + # Arrays/Lists - check for incomplete patterns + if isinstance(structure, list): + if len(structure) == 0: + return False + + # Check if last item is incomplete compared to previous items + last_item = structure[-1] + + # If we have previous items, compare structure + if len(structure) > 1: + prev_item = structure[-2] + + # If last item is a list and previous is a list, check length + if isinstance(last_item, list) and isinstance(prev_item, list): + if len(last_item) < len(prev_item): + return True # Last row/item has fewer elements - likely incomplete + + # If last item is a dict and previous is a dict, check keys + if isinstance(last_item, dict) and isinstance(prev_item, dict): + if len(last_item) < len(prev_item): + return True # Last object has fewer keys - likely incomplete + + # Recursively check last item for incompleteness + if JsonResponseHandler._isStructureIncomplete(last_item, max_depth - 1): + return True + + # Objects/Dicts - check for incomplete values + elif isinstance(structure, dict): + for key, value in structure.items(): + # Recursively check each value + if JsonResponseHandler._isStructureIncomplete(value, max_depth - 1): + return True + + # Check for incomplete strings + if isinstance(value, str): + # Very short strings might be incomplete + if len(value) > 0 and len(value) < 3: + return True + # Strings ending with incomplete patterns (comma, quote, etc.) + stripped = value.rstrip() + if stripped and stripped.endswith((',', '"', '\\')): + return True + + # Strings - check for incomplete patterns + elif isinstance(structure, str): + # Very short strings might be incomplete + if len(structure) > 0 and len(structure) < 3: + return True + # Strings ending with incomplete patterns + stripped = structure.rstrip() + if stripped and stripped.endswith((',', '"', '\\')): + return True + + return False + + @staticmethod + def mergeSectionContent( + existingSection: Dict[str, Any], + newSection: Dict[str, Any], + iteration: int + ) -> Dict[str, Any]: + """ + Merge content from two sections. + + Handles different content types: + - code_block: Append code, handle overlaps, merge incomplete lines + - paragraph/heading: Append text + - table: Merge rows + - list: Merge items + - Other: Merge elements + """ + contentType = existingSection.get("content_type", "") + existingElements = existingSection.get("elements", []) + newElements = newSection.get("elements", []) + + if not newElements: + return existingSection + + # Handle list of elements + if isinstance(existingElements, list): + existingElem = existingElements[-1] if existingElements else {} + else: + existingElem = existingElements + + if isinstance(newElements, list): + newElem = newElements[0] if newElements else {} + else: + newElem = newElements + + if not isinstance(existingElem, dict) or not isinstance(newElem, dict): + return existingSection + + # Merge based on content type + if contentType == "code_block": + existingCode = existingElem.get("code", "") + newCode = newElem.get("code", "") + + if existingCode and newCode: + mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration) + existingElem["code"] = mergedCode + # Preserve language from existing or new + if "language" not in existingElem and "language" in newElem: + existingElem["language"] = newElem["language"] + + elif contentType in ["paragraph", "heading"]: + existingText = existingElem.get("text", "") + newText = newElem.get("text", "") + + if existingText and newText: + # Append text with space if needed + if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n': + mergedText = existingText.rstrip() + " " + newText.lstrip() + else: + mergedText = existingText.rstrip() + "\n" + newText.lstrip() + existingElem["text"] = mergedText + + elif contentType == "table": + # Merge table rows with sophisticated overlap detection + existingRows = existingElem.get("rows", []) + newRows = newElem.get("rows", []) + if existingRows and newRows: + # Use sophisticated overlap detection that handles multiple overlapping rows + mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration) + existingElem["rows"] = mergedRows + logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}") + elif newRows: + # If existing has no rows but new does, use new rows + existingElem["rows"] = newRows + # Preserve headers from existing (or use new if existing has none) + if not existingElem.get("headers") and newElem.get("headers"): + existingElem["headers"] = newElem["headers"] + # Preserve caption from existing (or use new if existing has none) + if not existingElem.get("caption") and newElem.get("caption"): + existingElem["caption"] = newElem.get("caption") + + elif contentType in ["bullet_list", "numbered_list"]: + # Merge list items with sophisticated overlap detection + existingItems = existingElem.get("items", []) + newItems = newElem.get("items", []) + if existingItems and newItems: + mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration) + existingElem["items"] = mergedItems + elif newItems: + existingElem["items"] = newItems + + elif contentType == "image": + # Images are typically complete - if new image is provided, replace existing + # But check if existing image data is incomplete (e.g., base64 string cut off) + existingImageData = existingElem.get("base64Data", "") + newImageData = newElem.get("base64Data", "") + if existingImageData and newImageData: + # If existing image data doesn't end with valid base64 padding, it might be incomplete + # Base64 padding is '=' or '==' at the end + if not existingImageData.rstrip().endswith(('=', '==')): + # Existing image might be incomplete - merge by appending new data + # This handles cases where base64 string was cut off + existingElem["base64Data"] = existingImageData + newImageData + logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data") + else: + # Existing image is complete - replace with new (or keep existing if new is empty) + if newImageData: + existingElem["base64Data"] = newImageData + elif newImageData: + existingElem["base64Data"] = newImageData + # Preserve other image metadata + if not existingElem.get("altText") and newElem.get("altText"): + existingElem["altText"] = newElem["altText"] + if not existingElem.get("caption") and newElem.get("caption"): + existingElem["caption"] = newElem["caption"] + + else: + # GENERIC FALLBACK: Use deep recursive merging for complex nested structures + # This handles any content type with arbitrary depth and complexity + merged_element = JsonResponseHandler.mergeDeepStructures( + existingElem, + newElem, + iteration, + f"section.{contentType}" + ) + existingElem = merged_element + + # Update section with merged content + mergedSection = existingSection.copy() + if isinstance(existingElements, list): + # Update the last element in the list with merged content + if existingElements: + existingElements[-1] = existingElem + mergedSection["elements"] = existingElements + else: + mergedSection["elements"] = existingElem + + # Preserve metadata from new section if missing in existing + if "order" not in mergedSection and "order" in newSection: + mergedSection["order"] = newSection["order"] + + return mergedSection + + @staticmethod + def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str: + """ + Merge two code blocks intelligently, handling overlaps and incomplete lines. + """ + if not existingCode: + return newCode + if not newCode: + return existingCode + + existingLines = existingCode.rstrip().split('\n') + newLines = newCode.strip().split('\n') + + if not existingLines or not newLines: + return existingCode + "\n" + newCode + + lastExistingLine = existingLines[-1].strip() + firstNewLine = newLines[0].strip() + + # Strategy 1: Exact overlap - remove duplicate line + if lastExistingLine == firstNewLine: + newLines = newLines[1:] + logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge") + + # Strategy 2: Incomplete line merge + # If last existing line ends with comma or is incomplete, merge with first new line + elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5): + # Last line is incomplete - merge with first new line + # Remove trailing comma from existing line + mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip() + existingLines[-1] = mergedLine + newLines = newLines[1:] + logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation") + + # Strategy 3: Partial overlap detection + # Check if first new line starts with the end of last existing line + elif ',' in lastExistingLine and ',' in firstNewLine: + lastExistingParts = lastExistingLine.split(',') + firstNewParts = firstNewLine.split(',') + + # Check for overlap: if last part of existing matches first part of new + if lastExistingParts and firstNewParts: + lastExistingPart = lastExistingParts[-1].strip() + firstNewPart = firstNewParts[0].strip() + + # If they match, there's overlap + if lastExistingPart == firstNewPart and len(lastExistingParts) > 1: + # Remove overlapping part from new line + newLines[0] = ','.join(firstNewParts[1:]) + logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge") + + # Reconstruct merged code + mergedCode = '\n'.join(existingLines) + if newLines: + if mergedCode and not mergedCode.endswith('\n'): + mergedCode += '\n' + mergedCode += '\n'.join(newLines) + + return mergedCode + + @staticmethod + def detectAndParseJsonFragment( + result: str, + allSections: List[Dict[str, Any]] + ) -> Optional[Dict[str, Any]]: + """ + GENERIC fragment detection for ANY JSON structure. + + Detects if response is a JSON fragment (continuation content) rather than full document structure. + Works for ANY JSON type: arrays, objects, primitives, nested structures of any depth/complexity. + + Fragment = Any JSON that: + 1. Does NOT have "documents" or "sections" keys (not full document structure) + 2. Can be ANY structure: array, object, nested, primitive, etc. + 3. Is continuation content that needs to be merged into existing sections + + Examples (all handled generically): + - Array: [["37643", ...], ...] (table rows, list items, any array) + - Object: {"rows": [...], "headers": [...]} (partial element) + - Primitive: "continuation text" (rare but possible) + - Nested: {"data": {"items": [...]}} (any nested structure) + + Returns fragment info dict with: + - fragment_data: The parsed fragment content (ANY type) + - target_section_id: ID of last incomplete section (generic, not type-specific) + + CRITICAL: Fully generic - no specific logic for tables, paragraphs, etc. + """ + try: + extracted = extractJsonString(result) + parsed = json.loads(extracted) + + # GENERIC fragment detection: Check if it's NOT a full document structure + is_full_document = False + if isinstance(parsed, dict): + # Full document structure has "documents" or "sections" keys + if "documents" in parsed or "sections" in parsed: + is_full_document = True + + # If it's a full document structure, it's not a fragment + if is_full_document: + return None + + # Otherwise, it's a fragment (can be ANY structure: array, object, primitive, nested) + # Find target: last incomplete section (generic, regardless of content type) + target_section_id = JsonResponseHandler.findLastIncompleteSectionId(allSections) + + logger.info(f"Detected GENERIC JSON fragment (type: {type(parsed).__name__}), target: {target_section_id}") + + return { + "fragment_data": parsed, # Can be ANY JSON structure + "target_section_id": target_section_id + } + + except Exception as e: + logger.error(f"Error detecting JSON fragment: {e}") + logger.debug(f"Fragment detection failed for result: {result[:500]}...") + + return None + + @staticmethod + def findLastIncompleteSectionId( + allSections: List[Dict[str, Any]] + ) -> Optional[str]: + """ + GENERIC: Find the last incomplete section (regardless of content type). + + This is fully generic - works for ANY content type, ANY structure. + Returns the ID of the last section that is incomplete, or None if all are complete. + """ + # Find the last incomplete section (generic, not type-specific) + for section in reversed(allSections): + if JsonResponseHandler.isSectionIncomplete(section): + return section.get("id") + # If no incomplete section found, return last section as fallback + if allSections: + return allSections[-1].get("id") + return None + + @staticmethod + def mergeFragmentIntoSection( + fragment: Dict[str, Any], + allSections: List[Dict[str, Any]], + iteration: int + ) -> Optional[List[Dict[str, Any]]]: + """ + GENERIC fragment merging for ANY JSON structure. + + Merges a JSON fragment (ANY structure: array, object, nested, primitive) into the last incomplete section. + Uses ONLY deep recursive merging - no specific logic for content types. + + Handles ALL cases: + 1. Fragments with overlap (detected and merged intelligently) + 2. Fragments without overlap (continuation after cut-off, appended) + 3. Any JSON structure (arrays, objects, nested, primitives) + 4. Accumulative merging (uses merged data from past iterations) + + CRITICAL: Fully generic - works for ANY JSON structure, ANY content type. + NO FALLBACKS: Returns None if merge fails (no target section found). + """ + fragment_data = fragment.get("fragment_data") + target_section_id = fragment.get("target_section_id") + + if fragment_data is None: + logger.error(f"Iteration {iteration}: ❌ Fragment has no fragment_data - merge FAILED") + return None + + # Find the target section (last incomplete section, generic) + target_section = None + target_index = -1 + + if target_section_id: + for i, section in enumerate(allSections): + if section.get("id") == target_section_id: + target_section = section + target_index = i + break + + # NO FALLBACKS: If target not found by ID, try to find incomplete section + if not target_section: + for i, section in enumerate(reversed(allSections)): + if JsonResponseHandler.isSectionIncomplete(section): + target_section = section + target_index = len(allSections) - 1 - i + break + + # NO FALLBACKS: If no target found, merge FAILS + if not target_section: + logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - No target section found for fragment!") + logger.error(f"Iteration {iteration}: Available sections: {[s.get('id') + ' (' + s.get('content_type', 'unknown') + ')' for s in allSections]}") + return None + + # Get the last element from target section (where fragment will be merged) + merged_section = target_section.copy() + elements = merged_section.get("elements", []) + + if not isinstance(elements, list): + elements = [elements] if elements else [] + + if not elements: + elements = [{}] + + last_element = elements[-1] if elements else {} + if not isinstance(last_element, dict): + last_element = {} + elements.append(last_element) + + # CRITICAL: Use ONLY deep recursive merging for ALL fragment types + # This handles ANY structure: arrays, objects, nested, primitives + # Handles overlap detection generically (deep recursive comparison) + # Handles continuation after cut-off (no overlap case) + merged_element = JsonResponseHandler.mergeDeepStructures( + last_element, + fragment_data, + iteration, + f"section.{target_section_id}.fragment" + ) + + # Update elements with merged content + elements[-1] = merged_element + merged_section["elements"] = elements + + # Update allSections (this ensures accumulative merging - merged data is used for next iteration) + merged_sections = allSections.copy() + merged_sections[target_index] = merged_section + + logger.info(f"Iteration {iteration}: ✅ Merged GENERIC fragment (type: {type(fragment_data).__name__}) into section '{target_section_id}'") + + # Log merged JSON for debugging + try: + from modules.shared.debugLogger import writeDebugFile + merged_json_str = json.dumps(merged_sections, indent=2, ensure_ascii=False) + writeDebugFile(merged_json_str, f"merged_json_iteration_{iteration}.json") + except Exception as e: + logger.debug(f"Iteration {iteration}: Failed to write merged JSON debug file: {e}") + + return merged_sections + + @staticmethod + def completeIncompleteStructures(allSections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Complete any incomplete structures in sections by ensuring proper JSON structure. + + This ensures JSON is properly closed even if merge failed or iterations stopped early. + Works generically for ANY structure type - recursively processes all nested structures. + + Returns sections with completed structures. + """ + completed_sections = [] + for section in allSections: + completed_section = JsonResponseHandler._completeStructure(section) + completed_sections.append(completed_section) + return completed_sections + + @staticmethod + def _completeStructure(structure: Any) -> Any: + """ + Recursively complete incomplete structures by ensuring arrays/objects are properly structured. + Works generically for ANY JSON structure - no specific logic for content types. + """ + if isinstance(structure, dict): + completed = {} + for key, value in structure.items(): + completed[key] = JsonResponseHandler._completeStructure(value) + return completed + elif isinstance(structure, list): + completed = [] + for item in structure: + completed.append(JsonResponseHandler._completeStructure(item)) + return completed + else: + # Primitive value - return as is + return structure + + @staticmethod + def getContentTypeForFragment(fragment_type: str) -> str: + """Map fragment type to content type.""" + mapping = { + "table_rows": "table", + "table_element": "table", + "code_lines": "code_block", + "code_element": "code_block", + "list_items": "bullet_list" + } + return mapping.get(fragment_type, "paragraph") + + @staticmethod + def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool: + """ + Deep recursive comparison of two JSON-serializable objects. + Handles nested structures of any depth and complexity. + + Args: + obj1: First object to compare + obj2: Second object to compare + max_depth: Maximum recursion depth to prevent infinite loops + + Returns: + True if objects are deeply equal, False otherwise + """ + if max_depth <= 0: + return False + + # Type check + if type(obj1) != type(obj2): + return False + + # Primitive types + if isinstance(obj1, (str, int, float, bool, type(None))): + return obj1 == obj2 + + # Lists/arrays - compare element by element + if isinstance(obj1, list): + if len(obj1) != len(obj2): + return False + return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1) + for item1, item2 in zip(obj1, obj2)) + + # Dicts/objects - compare key by key + if isinstance(obj1, dict): + if set(obj1.keys()) != set(obj2.keys()): + return False + return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1) + for key in obj1.keys()) + + # Fallback for other types + return obj1 == obj2 + + @staticmethod + def findLongestCommonSuffix( + existing_list: List[Any], + new_list: List[Any], + min_overlap: int = 1 + ) -> int: + """ + Find the longest common suffix of existing_list that matches a prefix of new_list. + + This handles cases where multiple elements overlap: + - existing: [A, B, C, D] + - new: [C, D, E, F] + - overlap: [C, D] (length 2) + + Returns the length of the overlap (0 if no overlap found). + """ + if not existing_list or not new_list: + return 0 + + max_overlap = min(len(existing_list), len(new_list)) + + # Try all possible overlap lengths (from longest to shortest) + for overlap_len in range(max_overlap, min_overlap - 1, -1): + existing_suffix = existing_list[-overlap_len:] + new_prefix = new_list[:overlap_len] + + # Deep compare suffix and prefix + if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i]) + for i in range(overlap_len)): + return overlap_len + + return 0 + + @staticmethod + def findPartialOverlap( + existing_item: Any, + new_item: Any + ) -> Tuple[bool, Optional[Any]]: + """ + Detect if new_item completes an incomplete existing_item. + + Handles cases like: + - existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"] + - new: ["37643", "37649", ...] + + Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None). + """ + # Check if both are lists + if isinstance(existing_item, list) and isinstance(new_item, list): + if not existing_item or not new_item: + return False, None + + # Check if last element of existing is incomplete and matches first of new + last_existing = existing_item[-1] + first_new = new_item[0] + + # If last existing is a string and first new is a string + if isinstance(last_existing, str) and isinstance(first_new, str): + # Check if last existing is incomplete (very short, ends with number, etc.) + if len(last_existing) < 10 and first_new.startswith(last_existing): + # Partial overlap - merge them + merged_last = last_existing + first_new[len(last_existing):] + merged_item = existing_item[:-1] + [merged_last] + new_item[1:] + return True, merged_item + + # Check if last existing is incomplete list and first new completes it + if isinstance(last_existing, list) and isinstance(first_new, list): + if len(last_existing) < len(first_new): + # Check if last existing is prefix of first new + if first_new[:len(last_existing)] == last_existing: + # Merge: replace incomplete last with complete first + merged_item = existing_item[:-1] + [first_new] + new_item[1:] + return True, merged_item + + # Check if existing is incomplete string and new completes it + if isinstance(existing_item, str) and isinstance(new_item, str): + if len(existing_item) < 50 and new_item.startswith(existing_item): + # Partial overlap + merged = existing_item + new_item[len(existing_item):] + return True, merged + + return False, None + + @staticmethod + def mergeRowsWithOverlap( + existing_rows: List[List[str]], + new_rows: List[List[str]], + iteration: int + ) -> List[List[str]]: + """ + Merge table rows with sophisticated overlap detection. + Handles multiple overlapping rows and partial overlaps. + """ + if not new_rows: + return existing_rows + if not existing_rows: + return new_rows + + # Strategy 1: Find longest common suffix/prefix overlap + overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1) + if overlap_len > 0: + logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates") + return existing_rows + new_rows[overlap_len:] + + # Strategy 2: Check for partial overlap in last row + if len(existing_rows) > 0 and len(new_rows) > 0: + last_existing = existing_rows[-1] + first_new = new_rows[0] + + is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new) + if is_partial: + logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging") + return existing_rows[:-1] + [merged_row] + new_rows[1:] + + # Strategy 3: Simple first/last comparison (fallback) + if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list): + if list(existing_rows[-1]) == list(new_rows[0]): + logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)") + return existing_rows + new_rows[1:] + + # No overlap detected - append all new rows + return existing_rows + new_rows + + @staticmethod + def mergeItemsWithOverlap( + existing_items: List[str], + new_items: List[str], + iteration: int + ) -> List[str]: + """ + Merge list items with sophisticated overlap detection. + Handles multiple overlapping items and partial overlaps. + """ + if not new_items: + return existing_items + if not existing_items: + return new_items + + # Strategy 1: Find longest common suffix/prefix overlap + overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1) + if overlap_len > 0: + logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates") + return existing_items + new_items[overlap_len:] + + # Strategy 2: Check for partial overlap in last item + if len(existing_items) > 0 and len(new_items) > 0: + is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0]) + if is_partial: + logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging") + return existing_items[:-1] + [merged_item] + new_items[1:] + + # Strategy 3: Simple first/last comparison (fallback) + if existing_items[-1] == new_items[0]: + logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)") + return existing_items + new_items[1:] + + # No overlap detected - append all new items + return existing_items + new_items + + @staticmethod + def mergeDeepStructures( + existing: Any, + new: Any, + iteration: int, + path: str = "root" + ) -> Any: + """ + FULLY GENERIC recursive merge for ANY JSON structure of arbitrary depth/complexity. + + Handles ALL cases generically: + 1. Arrays/Lists: Overlap detection (suffix/prefix), partial overlap, no overlap (continuation) + 2. Objects/Dicts: Key-by-key merge with overlap detection for nested structures + 3. Primitives: Equality check, replacement if different + 4. Nested structures: Recursively handles any depth/complexity + + Overlap detection strategies (all generic): + - Array overlap: Finds longest common suffix/prefix, handles partial overlaps + - Object overlap: Detected recursively through key matching and deep comparison + - No overlap: Appends/merges continuation content after cut-off point + + CRITICAL: Fully generic - no specific logic for content types. + Works for ANY JSON structure: arrays, objects, nested, primitives, any combination. + """ + # Type check + if type(existing) != type(new): + # Types don't match - return new (replacement) + logger.debug(f"Iteration {iteration}: Types don't match at {path} ({type(existing).__name__} vs {type(new).__name__}), replacing") + return new + + # Lists/arrays - GENERIC merge with overlap detection + if isinstance(existing, list) and isinstance(new, list): + if not new: + return existing + if not existing: + return new + + # Strategy 1: Find longest common suffix/prefix overlap (handles multiple overlapping elements) + overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1) + if overlap_len > 0: + logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates") + return existing + new[overlap_len:] + + # Strategy 2: Check for partial overlap in last element (incomplete element completion) + if len(existing) > 0 and len(new) > 0: + is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0]) + if is_partial: + logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging incomplete element") + return existing[:-1] + [merged_item] + new[1:] + + # Strategy 3: No overlap detected - continuation after cut-off point + # This handles the case where new data starts exactly after the cut-off + logger.debug(f"Iteration {iteration}: No overlap at {path}, appending continuation content ({len(new)} items)") + return existing + new + + # Dicts/objects - GENERIC merge with recursive overlap detection + if isinstance(existing, dict) and isinstance(new, dict): + merged = existing.copy() + + # Check for object-level overlap: if new object is subset/superset of existing + # This handles cases where same object structure appears in both + existing_keys = set(existing.keys()) + new_keys = set(new.keys()) + + # If new is subset of existing and values match, it's overlap (skip) + if new_keys.issubset(existing_keys): + all_match = True + for key in new_keys: + if not JsonResponseHandler.deepCompare(existing[key], new[key]): + all_match = False + break + if all_match: + logger.debug(f"Iteration {iteration}: Object at {path} is subset overlap, skipping") + return existing + + # Merge key-by-key with recursive overlap detection + for key, new_value in new.items(): + if key in merged: + # Key exists - merge recursively (handles nested overlap detection) + merged[key] = JsonResponseHandler.mergeDeepStructures( + merged[key], + new_value, + iteration, + f"{path}.{key}" + ) + else: + # New key - add it (continuation content) + merged[key] = new_value + logger.debug(f"Iteration {iteration}: Added new key '{key}' at {path} (continuation)") + + return merged + + # Primitives - equality check + if existing == new: + return existing + # Different primitive values - return new (continuation/replacement) + logger.debug(f"Iteration {iteration}: Primitive at {path} differs, using new value") + return new + + @staticmethod + def cleanEncodingIssues(jsonString: str) -> str: + """ + GENERIC function to remove problematic encoding parts from JSON string. + + Works for ANY JSON structure - removes problematic characters/bytes. + + Args: + jsonString: JSON string that may have encoding issues + + Returns: + Cleaned JSON string + """ + try: + # Try to decode/encode to detect issues + jsonString.encode('utf-8').decode('utf-8') + return jsonString + except UnicodeError: + # Remove problematic parts + cleaned = jsonString.encode('utf-8', errors='ignore').decode('utf-8', errors='ignore') + logger.warning("Removed encoding issues from JSON string") + return cleaned + + @staticmethod + def mergeJsonStringsWithOverlap( + accumulated: str, + newFragment: str + ) -> str: + """ + GENERIC function to merge two JSON strings, handling overlaps intelligently. + + Works for ANY JSON structure - no specific logic for content types. + + Overlap scenarios (all handled generically): + - Exact continuation: newFragment starts exactly where accumulated ends + - Partial overlap: newFragment overlaps with end of accumulated + - Full overlap: newFragment is subset of accumulated + + Strategy: + 1. Find longest common suffix/prefix match (string-based comparison) + 2. Remove duplicate content + 3. Concatenate remaining parts + + Args: + accumulated: Previously accumulated JSON string + newFragment: New fragment string to append + + Returns: + Combined JSON string with overlaps removed + """ + if not accumulated: + return newFragment + if not newFragment: + return accumulated + + # Find longest common suffix/prefix match + # Try different overlap lengths (from longest to shortest) + # Overlaps can be as small as 1 character, so we check all possible lengths + maxOverlapLen = min(len(accumulated), len(newFragment)) + + # Start from maximum possible overlap down to 1 character + # This ensures we find the longest overlap, even if it's just 1 character + for overlapLen in range(maxOverlapLen, 0, -1): + accumulatedSuffix = accumulated[-overlapLen:] + newFragmentPrefix = newFragment[:overlapLen] + + if accumulatedSuffix == newFragmentPrefix: + # Found overlap - remove duplicate part + logger.debug(f"Found overlap of {overlapLen} characters, removing duplicate") + return accumulated + newFragment[overlapLen:] + + # No overlap found - simple concatenation + return accumulated + newFragment + + @staticmethod + def isJsonComplete(parsedJson: Dict[str, Any]) -> bool: + """ + GENERIC function to check if parsed JSON structure is complete. + + Works for ANY JSON structure - no specific logic for content types. + + Completeness checks (all generic): + - All arrays are properly closed + - All objects are properly closed + - No incomplete structures + - Recursive validation of nested structures + + Args: + parsedJson: Parsed JSON object + + Returns: + True if JSON is complete, False otherwise + """ + def _checkStructureComplete(obj: Any, depth: int = 0) -> bool: + """Recursively check if structure is complete.""" + if depth > 50: # Prevent infinite recursion + return True + + if isinstance(obj, dict): + # Check all values recursively + for value in obj.values(): + if not _checkStructureComplete(value, depth + 1): + return False + return True + elif isinstance(obj, list): + # Check all items recursively + for item in obj: + if not _checkStructureComplete(item, depth + 1): + return False + return True + else: + # Primitive value - always complete + return True + + try: + return _checkStructureComplete(parsedJson) + except Exception as e: + logger.debug(f"Error checking JSON completeness: {e}") + return False + + @staticmethod + def finalizeJson(parsedJson: Dict[str, Any]) -> Dict[str, Any]: + """ + GENERIC function to finalize complete JSON by adding missing closing elements and repairing corruption. + + Works for ANY JSON structure - no specific logic for content types. + + Steps (all generic): + 1. Analyze structure for missing closing elements (recursively) + 2. Add closing brackets/braces where needed + 3. Repair any remaining corruption + 4. Validate final structure + + Args: + parsedJson: Parsed JSON object that needs finalization + + Returns: + Finalized JSON object + """ + # For now, just return as-is since parsing succeeded + # If needed, can add logic to check for incomplete structures + # and add closing elements + return parsedJson + + @staticmethod + def extractKpiValuesFromJson( + parsedJson: Dict[str, Any], + kpis: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """ + Extract current KPI values from parsed JSON and update KPI objects. + + Args: + parsedJson: Parsed JSON object + kpis: List of KPI objects (will be updated with currentValue) + + Returns: + Updated list of KPI objects with currentValue set + """ + updatedKpis = [] + + for kpi in kpis: + kpiId = kpi.get("id") + jsonPath = kpi.get("jsonPath") + + if not kpiId or not jsonPath: + continue + + # Create copy of KPI object + updatedKpi = kpi.copy() + + try: + # Extract value using JSON path + # Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows" + value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath) + + # Handle None (path doesn't exist - incomplete JSON) + if value is None: + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}") + # Count items/rows/elements based on type + elif isinstance(value, list): + updatedKpi["currentValue"] = len(value) + logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items") + elif isinstance(value, (int, float)): + updatedKpi["currentValue"] = int(value) + logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: numeric value {int(value)}") + else: + updatedKpi["currentValue"] = 0 + logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: non-list/non-numeric value, set to 0") + + except Exception as e: + logger.warning(f"Error extracting KPI {kpiId} from path {jsonPath}: {e}") + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + + updatedKpis.append(updatedKpi) + + return updatedKpis + + @staticmethod + def extractKpiValuesFromIncompleteJson( + jsonString: str, + kpis: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """ + Extract KPI values from incomplete JSON string. + Uses existing JSON completion function to close incomplete structures, then extracts KPIs. + + Args: + jsonString: Incomplete JSON string + kpis: List of KPI objects + + Returns: + Updated list of KPI objects with currentValue set + """ + updatedKpis = [] + + for kpi in kpis: + kpiId = kpi.get("id") + jsonPath = kpi.get("jsonPath") + + if not kpiId or not jsonPath: + continue + + updatedKpi = kpi.copy() + + try: + # Use existing JSON completion function to close incomplete structures + from modules.shared.jsonUtils import extractJsonString, closeJsonStructures + + # Extract JSON string and complete it with missing closing elements + extracted = extractJsonString(jsonString) + completed = closeJsonStructures(extracted) + + # Parse completed JSON + parsed = json.loads(completed) + + # Extract value using path + value = JsonResponseHandler._extractValueByPath(parsed, jsonPath) + + # Handle None (path doesn't exist - incomplete JSON) + if value is None: + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}") + # Count items/rows/elements based on type + elif isinstance(value, list): + updatedKpi["currentValue"] = len(value) + logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items") + elif isinstance(value, (int, float)): + updatedKpi["currentValue"] = int(value) + logger.debug(f"Extracted KPI {kpiId} from completed JSON: numeric value {int(value)}") + else: + updatedKpi["currentValue"] = 0 + logger.debug(f"Extracted KPI {kpiId} from completed JSON: non-list/non-numeric value, set to 0") + + except Exception as e: + logger.warning(f"Error extracting KPI {kpiId} from incomplete JSON: {e}") + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + + updatedKpis.append(updatedKpi) + + return updatedKpis + + @staticmethod + def _extractValueByPath(obj: Any, path: str) -> Any: + """ + Extract value from object using dot-notation path with array indices. + + Example: "sections[0].elements[0].items" + Returns None if path doesn't exist (for incomplete JSON handling). + """ + parts = path.split('.') + current = obj + + for part in parts: + if '[' in part and ']' in part: + # Handle array access: "sections[0]" + key = part[:part.index('[')] + index = int(part[part.index('[') + 1:part.index(']')]) + + if key: + if isinstance(current, dict): + current = current.get(key) + if current is None: + return None # Key doesn't exist + else: + return None # Can't access key on non-dict + + if isinstance(current, list): + if 0 <= index < len(current): + current = current[index] + else: + # Index out of range - return None for incomplete JSON + return None + else: + # Not a list, can't index + return None + else: + # Handle dict access + if isinstance(current, dict): + current = current.get(part) + if current is None: + return None # Key doesn't exist + else: + return None # Can't access key on non-dict + + return current + + @staticmethod + def validateKpiProgression( + accumulationState: JsonAccumulationState, + updatedKpis: List[Dict[str, Any]] + ) -> Tuple[bool, str]: + """ + Validate KPI progression from parsed JSON. + + Validation rules: + - Proceed if: At least ONE KPI increased + - Stop if: Any KPI went backwards → return (False, "KPI went backwards") + - Stop if: No KPIs progressed → return (False, "No progress") + - Finish if: All KPIs completed OR JSON is complete → return (True, "Complete") + + Args: + accumulationState: Current accumulation state (contains kpis) + updatedKpis: Updated KPI objects with currentValue set + + Returns: + Tuple of (shouldProceed, reason) + """ + if not accumulationState.kpis: + # No KPIs defined - always proceed + return True, "No KPIs defined" + + # Build dict of last values for comparison + lastValues = {kpi.get("id"): kpi.get("currentValue", 0) for kpi in accumulationState.kpis} + logger.debug(f"KPI validation: lastValues = {lastValues}") + logger.debug(f"KPI validation: updatedKpis = {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") + + # Check if any KPI went backwards + for updatedKpi in updatedKpis: + kpiId = updatedKpi.get("id") + currentValue = updatedKpi.get("currentValue", 0) + + if kpiId in lastValues: + lastValue = lastValues[kpiId] + if currentValue < lastValue: + logger.warning(f"KPI {kpiId} went BACKWARDS: {lastValue} → {currentValue}") + return False, f"KPI {kpiId} went backwards" + + # Check if all KPIs are completed + allCompleted = True + for updatedKpi in updatedKpis: + targetValue = updatedKpi.get("targetValue", 0) + currentValue = updatedKpi.get("currentValue", 0) + + if currentValue < targetValue: + allCompleted = False + break + + if allCompleted: + logger.info("All KPIs completed") + return True, "All KPIs completed" + + # Check if at least one KPI progressed + atLeastOneProgressed = False + for updatedKpi in updatedKpis: + kpiId = updatedKpi.get("id") + currentValue = updatedKpi.get("currentValue", 0) + + if kpiId in lastValues: + lastValue = lastValues[kpiId] + if currentValue > lastValue: + atLeastOneProgressed = True + logger.info(f"KPI {kpiId} progressed: {lastValue} → {currentValue}") + break + else: + # First time seeing this KPI - if it has a value, it's progress + if currentValue > 0: + atLeastOneProgressed = True + logger.info(f"KPI {kpiId} initialized: {currentValue}") + break + + if not atLeastOneProgressed: + logger.warning(f"No KPIs progressed. Last values: {lastValues}, Current values: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") + return False, "No progress" + + return True, "Progress detected" + + @staticmethod + def accumulateAndParseJsonFragments( + accumulatedJsonString: str, + newFragmentString: str, + allSections: List[Dict[str, Any]], + iteration: int + ) -> Tuple[str, List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]: + """ + Accumulate JSON fragments and parse when complete. + + GENERIC function that handles: + 1. Concatenating JSON strings with overlap detection + 2. Parsing the accumulated string + 3. Extracting sections (partial if incomplete, final if complete) + 4. Determining completion status + + Args: + accumulatedJsonString: Previously accumulated JSON string + newFragmentString: New fragment string from current iteration + allSections: Sections extracted so far (for prompt context) + iteration: Current iteration number + + Returns: + Tuple of: + - accumulatedJsonString: Updated accumulated string + - sections: Extracted sections (partial if incomplete, final if complete) + - isComplete: True if JSON is complete and valid + - parsedResult: Parsed JSON object (if parsing succeeded) + """ + + # Step 1: Clean encoding issues from accumulated string (check end of first delivered part) + cleanedAccumulated = JsonResponseHandler.cleanEncodingIssues(accumulatedJsonString) + + # Step 2: Clean encoding issues from new fragment + cleanedFragment = JsonResponseHandler.cleanEncodingIssues(newFragmentString) + + # Step 3: Concatenate with overlap handling + combinedString = JsonResponseHandler.mergeJsonStringsWithOverlap( + cleanedAccumulated, + cleanedFragment + ) + + # Step 4: Try to parse + try: + extracted = extractJsonString(combinedString) + parsedResult = json.loads(extracted) + + # Step 5: Parsing succeeded - check completeness + isComplete = JsonResponseHandler.isJsonComplete(parsedResult) + + if isComplete: + # Step 6: Complete JSON - finalize + finalizedJson = JsonResponseHandler.finalizeJson(parsedResult) + sections = extractSectionsFromDocument(finalizedJson) + logger.info(f"Iteration {iteration}: JSON accumulation complete, extracted {len(sections)} sections") + return combinedString, sections, True, finalizedJson + else: + # Step 7: Incomplete but parseable - extract partial sections + sections = extractSectionsFromDocument(parsedResult) + logger.info(f"Iteration {iteration}: JSON accumulation incomplete but parseable, extracted {len(sections)} partial sections") + return combinedString, sections, False, parsedResult + + except json.JSONDecodeError: + # Step 8: Still broken - repair and extract partial sections + repaired = repairBrokenJson(combinedString) + if repaired: + sections = extractSectionsFromDocument(repaired) + logger.info(f"Iteration {iteration}: JSON accumulation repaired, extracted {len(sections)} sections") + return combinedString, sections, False, repaired + else: + # Repair failed - continue with data BEFORE merging the problematic piece + # Return previous accumulated string (before adding new fragment) + # This ensures we don't lose previously accumulated data + logger.warning(f"Iteration {iteration}: Repair failed, continuing with previous accumulated data") + return accumulatedJsonString, [], False, None + diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py index a2c80a08..9ff148a8 100644 --- a/modules/services/serviceChat/mainServiceChat.py +++ b/modules/services/serviceChat/mainServiceChat.py @@ -20,8 +20,24 @@ class ChatService: self.interfaceDbApp = serviceCenter.interfaceDbApp self._progressLogger = None - def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]: - """Get ChatDocuments from a list of document references using all three formats.""" + def getChatDocumentsFromDocumentList(self, documentList) -> List[ChatDocument]: + """Get ChatDocuments from a DocumentReferenceList. + + Args: + documentList: DocumentReferenceList (required) + + Returns: + List[ChatDocument]: List of ChatDocument objects + """ + from modules.datamodels.datamodelDocref import DocumentReferenceList + + if not isinstance(documentList, DocumentReferenceList): + logger.error(f"getChatDocumentsFromDocumentList: Invalid documentList type: {type(documentList)}. Expected DocumentReferenceList.") + return [] + + # Convert to string list for processing + stringRefs = documentList.to_string_list() + try: # Use self.services.workflow which is the ChatWorkflow object (stable during workflow execution) workflow = self.services.workflow @@ -31,7 +47,7 @@ class ChatService: workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID' workflowObjId = id(workflow) - logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {documentList}") + logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {stringRefs}") logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflowId}, workflow object id = {workflowObjId}") # Root cause analysis: Verify workflow.messages integrity and detect workflow changes @@ -72,13 +88,20 @@ class ChatService: logger.debug(f"getChatDocumentsFromDocumentList: unable to enumerate messages for debug: {e}") allDocuments = [] - for docRef in documentList: + for docRef in stringRefs: if docRef.startswith("docItem:"): - # docItem:: - extract ID and find document + # docItem:: or docItem: (filename is optional) + # ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct) + # Both formats are supported: docItem: and docItem:: parts = docRef.split(':') if len(parts) >= 2: - docId = parts[1] - # Find the document by ID + docId = parts[1] # This should be the documentId (UUID) + docFound = False + + # ALWAYS try to match by documentId first (regardless of number of parts) + # This handles both formats: + # - docItem: (without filename - still works) + # - docItem:: (with filename - preferred) for message in workflow.messages: # Validate message belongs to this workflow msgWorkflowId = getattr(message, 'workflowId', None) @@ -88,9 +111,42 @@ class ChatService: if message.documents: for doc in message.documents: if doc.id == docId: - docName = getattr(doc, 'fileName', 'unknown') allDocuments.append(doc) + docFound = True + logger.debug(f"Matched document reference '{docRef}' to document {doc.id} (fileName: {getattr(doc, 'fileName', 'unknown')}) by documentId") break + if docFound: + break + + # Fallback: If not found by documentId and it looks like a filename (has file extension), try filename matching + # This handles cases where AI incorrectly generates docItem:filename.docx + if not docFound and '.' in docId and len(parts) == 2: + # Format: docItem:filename (AI generated wrong format) - try to match by filename + filename = parts[1] + logger.warning(f"Document reference '{docRef}' not found by documentId, attempting to match by filename: {filename}") + + for message in workflow.messages: + # Validate message belongs to this workflow + msgWorkflowId = getattr(message, 'workflowId', None) + if not msgWorkflowId or msgWorkflowId != workflowId: + continue + + if message.documents: + for doc in message.documents: + docFileName = getattr(doc, 'fileName', '') + # Match filename exactly or by base name (without path) + if docFileName == filename or docFileName.endswith(filename): + allDocuments.append(doc) + docFound = True + logger.info(f"Matched document reference '{docRef}' to document {doc.id} by filename {docFileName}") + break + if docFound: + break + + if not docFound: + logger.error(f"Could not resolve document reference '{docRef}' - no document found with filename '{filename}'") + elif not docFound: + logger.error(f"Could not resolve document reference '{docRef}' - no document found with documentId '{docId}'") elif docRef.startswith("docList:"): # docList::