diff --git a/app.py b/app.py
index a167503c..9ace64b5 100644
--- a/app.py
+++ b/app.py
@@ -16,6 +16,7 @@ from datetime import datetime
from modules.shared.configuration import APP_CONFIG
from modules.shared.eventManagement import eventManager
from modules.features import featuresLifecycle as featuresLifecycle
+from modules.interfaces.interfaceDbAppObjects import getRootInterface
class DailyRotatingFileHandler(RotatingFileHandler):
"""
@@ -275,15 +276,21 @@ instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
async def lifespan(app: FastAPI):
logger.info("Application is starting up")
+ # Get event user for feature lifecycle (system-level user for background operations)
+ rootInterface = getRootInterface()
+ eventUser = rootInterface.getUserByUsername("event")
+ if not eventUser:
+ logger.error("Could not get event user - some features may not start properly")
+
# --- Init Managers ---
- await featuresLifecycle.start()
+ await featuresLifecycle.start(eventUser)
eventManager.start()
yield
# --- Stop Managers ---
eventManager.stop()
- await featuresLifecycle.stop()
+ await featuresLifecycle.stop(eventUser)
logger.info("Application has been shut down")
diff --git a/how --stat HEAD b/how --stat HEAD
new file mode 100644
index 00000000..52b1d7b3
--- /dev/null
+++ b/how --stat HEAD
@@ -0,0 +1,49 @@
+M app.py
+A modules/.$DEPENDENCY_DIAGRAM.drawio.bkp
+A modules/AUTOMATION_FEATURE_ANALYSIS.md
+A modules/BIDIRECTIONAL_IMPORTS.md
+A modules/DEPENDENCY_DIAGRAM.drawio
+A modules/FEATURES_TO_INTERFACES_IMPORTS.md
+M modules/connectors/connectorVoiceGoogle.py
+M modules/datamodels/datamodelChat.py
+M modules/datamodels/datamodelPagination.py
+A modules/features/automation/__init__.py
+A modules/features/automation/mainAutomation.py
+A modules/features/automation/subAutomationUtils.py
+D modules/features/chatAlthaus/COMPONENT_DIAGRAM.md
+M modules/features/featuresLifecycle.py
+M modules/interfaces/interfaceAiObjects.py
+M modules/interfaces/interfaceDbAppObjects.py
+M modules/interfaces/interfaceDbChatObjects.py
+M modules/interfaces/interfaceDbComponentObjects.py
+M modules/interfaces/interfaceVoiceObjects.py
+M modules/routes/routeAdminAutomationEvents.py
+M modules/routes/routeVoiceGoogle.py
+M modules/services/__init__.py
+M modules/services/serviceAi/mainServiceAi.py
+M modules/services/serviceAi/subJsonResponseHandling.py
+M modules/services/serviceChat/mainServiceChat.py
+M modules/services/serviceExtraction/mainServiceExtraction.py
+M modules/services/serviceExtraction/subPipeline.py
+M modules/services/serviceExtraction/subPromptBuilderExtraction.py
+M modules/services/serviceGeneration/renderers/rendererXlsx.py
+M modules/services/serviceGeneration/subPromptBuilderGeneration.py
+A modules/services/serviceSecurity/mainServiceSecurity.py
+M modules/services/serviceSharepoint/mainServiceSharepoint.py
+M modules/services/serviceUtils/mainServiceUtils.py
+A modules/shared/callbackRegistry.py
+M modules/shared/debugLogger.py
+M modules/shared/jsonUtils.py
+M modules/workflows/methods/methodAi.py
+M modules/workflows/methods/methodBase.py
+A modules/workflows/methods/methodContext.py
+M modules/workflows/methods/methodOutlook.py
+M modules/workflows/methods/methodSharepoint.py
+M modules/workflows/processing/adaptive/contentValidator.py
+M modules/workflows/processing/core/messageCreator.py
+M modules/workflows/processing/modes/modeAutomation.py
+M modules/workflows/processing/modes/modeDynamic.py
+M modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+M modules/workflows/processing/shared/promptGenerationTaskplan.py
+M modules/workflows/processing/workflowProcessor.py
+M modules/workflows/workflowManager.py
diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py
index 715772d0..faead52a 100644
--- a/modules/connectors/connectorVoiceGoogle.py
+++ b/modules/connectors/connectorVoiceGoogle.py
@@ -403,6 +403,61 @@ class ConnectorGoogleSpeech:
"error": str(e)
}
+ async def detectLanguage(self, text: str) -> Dict:
+ """
+ Detect the language of text using Google Cloud Translation API.
+
+ Args:
+ text: Text to detect language for
+
+ Returns:
+ Dict containing detected language code and confidence
+ """
+ try:
+ if not text.strip():
+ logger.warning("⚠️ Empty text provided for language detection")
+ return {
+ "success": False,
+ "language": "",
+ "error": "Empty text provided"
+ }
+
+ # Use a sample of the text (middle 1000 bytes or full text if smaller)
+ textBytes = text.encode('utf-8')
+ if len(textBytes) > 1000:
+ # Take 1000 bytes from the middle
+ startPos = (len(textBytes) - 1000) // 2
+ textSample = textBytes[startPos:startPos + 1000].decode('utf-8', errors='ignore')
+ else:
+ textSample = text
+
+ logger.info(f"🔍 Detecting language for text sample: '{textSample[:100]}...'")
+
+ # Use translation API with auto-detection (source_language=None)
+ result = self.translate_client.translate(
+ textSample,
+ source_language=None, # Auto-detect
+ target_language='en' # Dummy target, we only need detection
+ )
+
+ detectedLanguage = result.get('detectedSourceLanguage', '')
+
+ logger.info(f"✅ Language detected: {detectedLanguage}")
+
+ return {
+ "success": True,
+ "language": detectedLanguage,
+ "confidence": 1.0 # Google Translation API doesn't provide confidence, assume high
+ }
+
+ except Exception as e:
+ logger.error(f"❌ Google Cloud Language Detection error: {e}")
+ return {
+ "success": False,
+ "language": "",
+ "error": str(e)
+ }
+
async def speechToTranslatedText(self, audioContent: bytes,
fromLanguage: str = "de-DE",
toLanguage: str = "en") -> Dict:
diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py
index 4a678c8b..967e0d9f 100644
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@@ -62,7 +62,7 @@ class ChatLog(BaseModel):
None, description="Performance metrics"
)
parentId: Optional[str] = Field(
- None, description="Parent log entry ID for hierarchical display"
+ None, description="Parent operation ID (operationId of parent operation) for hierarchical display"
)
operationId: Optional[str] = Field(
None, description="Operation ID to group related log entries"
@@ -828,6 +828,7 @@ class TaskContext(BaseModel):
failurePatterns: Optional[list[str]] = Field(default_factory=list)
failedActions: Optional[list] = Field(default_factory=list)
successfulActions: Optional[list] = Field(default_factory=list)
+ executedActions: Optional[list] = Field(default_factory=list, description="List of executed actions with action name, parameters, and step number")
criteriaProgress: Optional[dict] = None
# Stage 2 context fields (NEW)
diff --git a/modules/features/automation/__init__.py b/modules/features/automation/__init__.py
new file mode 100644
index 00000000..1cc8a344
--- /dev/null
+++ b/modules/features/automation/__init__.py
@@ -0,0 +1,12 @@
+"""
+Automation feature - handles automated workflow execution and scheduling.
+
+Moved from interfaces/interfaceDbChatObjects.py to follow proper architectural separation:
+- Interface layer: Data access only (getAutomationDefinition, etc.)
+- Feature layer: Business logic and orchestration (executeAutomation, syncAutomationEvents)
+"""
+
+from .mainAutomation import executeAutomation, syncAutomationEvents, createAutomationEventHandler
+
+__all__ = ['executeAutomation', 'syncAutomationEvents', 'createAutomationEventHandler']
+
diff --git a/modules/features/automation/mainAutomation.py b/modules/features/automation/mainAutomation.py
new file mode 100644
index 00000000..c0534229
--- /dev/null
+++ b/modules/features/automation/mainAutomation.py
@@ -0,0 +1,287 @@
+"""
+Main automation service - handles automation workflow execution and scheduling.
+
+Moved from interfaces/interfaceDbChatObjects.py to follow proper architectural separation.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+
+from modules.datamodels.datamodelChat import ChatWorkflow, UserInputRequest, WorkflowModeEnum, AutomationDefinition
+from modules.shared.timeUtils import getUtcTimestamp
+from modules.shared.eventManagement import eventManager
+from modules.services import getInterface as getServices
+from modules.features.chatPlayground.mainChatPlayground import chatStart
+from .subAutomationUtils import parseScheduleToCron, planToPrompt, replacePlaceholders
+
+logger = logging.getLogger(__name__)
+
+
+async def executeAutomation(automationId: str, chatInterface) -> ChatWorkflow:
+ """Execute automation workflow immediately (test mode) with placeholder replacement.
+
+ Args:
+ automationId: ID of automation to execute
+ chatInterface: ChatObjects interface instance for data access
+
+ Returns:
+ ChatWorkflow instance created by automation execution
+ """
+ executionStartTime = getUtcTimestamp()
+ executionLog = {
+ "timestamp": executionStartTime,
+ "workflowId": None,
+ "status": "running",
+ "messages": []
+ }
+
+ try:
+ # 1. Load automation definition
+ automation = chatInterface.getAutomationDefinition(automationId)
+ if not automation:
+ raise ValueError(f"Automation {automationId} not found")
+
+ executionLog["messages"].append(f"Started execution at {executionStartTime}")
+
+ # 2. Replace placeholders in template to generate plan
+ template = automation.get("template", "")
+ placeholders = automation.get("placeholders", {})
+ planJson = replacePlaceholders(template, placeholders)
+ try:
+ plan = json.loads(planJson)
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse plan JSON after placeholder replacement: {str(e)}")
+ logger.error(f"Template: {template[:500]}...")
+ logger.error(f"Placeholders: {placeholders}")
+ logger.error(f"Generated planJson (first 1000 chars): {planJson[:1000]}")
+ logger.error(f"Error position: line {e.lineno}, column {e.colno}, char {e.pos}")
+ if e.pos:
+ start = max(0, e.pos - 100)
+ end = min(len(planJson), e.pos + 100)
+ logger.error(f"Context around error: ...{planJson[start:end]}...")
+ raise ValueError(f"Invalid JSON after placeholder replacement: {str(e)}")
+ executionLog["messages"].append("Template placeholders replaced successfully")
+
+ # 3. Get user who created automation
+ creatorUserId = automation.get("_createdBy")
+
+ # CRITICAL: Automation MUST run as creator user only, or fail
+ if not creatorUserId:
+ errorMsg = f"Automation {automationId} has no creator user (_createdBy field missing). Cannot execute automation."
+ logger.error(errorMsg)
+ executionLog["messages"].append(errorMsg)
+ raise ValueError(errorMsg)
+
+ # Get user from database using services
+ services = getServices(chatInterface.currentUser, None)
+ creatorUser = services.interfaceDbApp.getUser(creatorUserId)
+ if not creatorUser:
+ raise ValueError(f"Creator user {creatorUserId} not found")
+
+ executionLog["messages"].append(f"Using creator user: {creatorUserId}")
+
+ # 4. Create UserInputRequest from plan
+ # Embed plan JSON in prompt for TemplateMode to extract
+ promptText = planToPrompt(plan)
+ planJsonStr = json.dumps(plan)
+ # Embed plan as JSON comment so TemplateMode can extract it
+ promptWithPlan = f"{promptText}\n\n\n{planJsonStr}\n"
+
+ userInput = UserInputRequest(
+ prompt=promptWithPlan,
+ listFileId=[],
+ userLanguage=creatorUser.language or "en"
+ )
+
+ executionLog["messages"].append("Starting workflow execution")
+
+ # 5. Start workflow using chatStart
+ workflow = await chatStart(
+ currentUser=creatorUser,
+ userInput=userInput,
+ workflowMode=WorkflowModeEnum.WORKFLOW_AUTOMATION,
+ workflowId=None
+ )
+
+ executionLog["workflowId"] = workflow.id
+ executionLog["status"] = "completed"
+ executionLog["messages"].append(f"Workflow {workflow.id} started successfully")
+ logger.info(f"Started workflow {workflow.id} with plan containing {len(plan.get('tasks', []))} tasks (plan embedded in userInput)")
+
+ # Set workflow name with "automated" prefix
+ automationLabel = automation.get("label", "Unknown Automation")
+ workflowName = f"automated: {automationLabel}"
+ workflow = chatInterface.updateWorkflow(workflow.id, {"name": workflowName})
+ logger.info(f"Set workflow {workflow.id} name to: {workflowName}")
+
+ # Update automation with execution log
+ executionLogs = automation.get("executionLogs", [])
+ executionLogs.append(executionLog)
+ # Keep only last 50 executions
+ if len(executionLogs) > 50:
+ executionLogs = executionLogs[-50:]
+
+ chatInterface.db.recordModify(
+ AutomationDefinition,
+ automationId,
+ {"executionLogs": executionLogs}
+ )
+
+ return workflow
+ except Exception as e:
+ # Log error to execution log
+ executionLog["status"] = "error"
+ executionLog["messages"].append(f"Error: {str(e)}")
+
+ # Update automation with execution log even on error
+ try:
+ automation = chatInterface.getAutomationDefinition(automationId)
+ if automation:
+ executionLogs = automation.get("executionLogs", [])
+ executionLogs.append(executionLog)
+ if len(executionLogs) > 50:
+ executionLogs = executionLogs[-50:]
+ chatInterface.db.recordModify(
+ AutomationDefinition,
+ automationId,
+ {"executionLogs": executionLogs}
+ )
+ except Exception as logError:
+ logger.error(f"Error saving execution log: {str(logError)}")
+
+ raise
+
+
+async def syncAutomationEvents(chatInterface, eventUser) -> Dict[str, Any]:
+ """Automation event handler - syncs scheduler with all active automations.
+
+ Args:
+ chatInterface: ChatObjects interface instance for data access
+ eventUser: System-level event user for accessing automations
+
+ Returns:
+ Dictionary with sync results (synced count and event IDs)
+ """
+ # Get all automation definitions (for current mandate)
+ allAutomations = chatInterface.db.getRecordset(AutomationDefinition)
+ filtered = chatInterface._uam(AutomationDefinition, allAutomations)
+
+ registeredEvents = {}
+
+ for automation in filtered:
+ automationId = automation.get("id")
+ isActive = automation.get("active", False)
+ currentEventId = automation.get("eventId")
+ schedule = automation.get("schedule")
+
+ if not schedule:
+ logger.warning(f"Automation {automationId} has no schedule, skipping")
+ continue
+
+ try:
+ # Parse schedule to cron kwargs
+ cronKwargs = parseScheduleToCron(schedule)
+
+ if isActive:
+ # Remove existing event if present (handles schedule changes)
+ if currentEventId:
+ try:
+ eventManager.remove(currentEventId)
+ except Exception as e:
+ logger.warning(f"Error removing old event {currentEventId}: {str(e)}")
+
+ # Register new event
+ newEventId = f"automation.{automationId}"
+
+ # Create event handler function
+ handler = createAutomationEventHandler(automationId, eventUser)
+
+ # Register cron job
+ eventManager.registerCron(
+ jobId=newEventId,
+ func=handler,
+ cronKwargs=cronKwargs,
+ replaceExisting=True
+ )
+
+ # Update automation with new eventId
+ if currentEventId != newEventId:
+ chatInterface.db.recordModify(
+ AutomationDefinition,
+ automationId,
+ {"eventId": newEventId}
+ )
+
+ registeredEvents[automationId] = newEventId
+ else:
+ # Remove event if exists
+ if currentEventId:
+ try:
+ eventManager.remove(currentEventId)
+ chatInterface.db.recordModify(
+ AutomationDefinition,
+ automationId,
+ {"eventId": None}
+ )
+ except Exception as e:
+ logger.warning(f"Error removing event {currentEventId}: {str(e)}")
+ except Exception as e:
+ logger.error(f"Error syncing automation {automationId}: {str(e)}")
+
+ return {
+ "synced": len(registeredEvents),
+ "events": registeredEvents
+ }
+
+
+def createAutomationEventHandler(automationId: str, eventUser):
+ """Create event handler function for a specific automation.
+
+ Args:
+ automationId: ID of automation to create handler for
+ eventUser: System-level event user for accessing automations (captured in closure)
+
+ Returns:
+ Async handler function for scheduled automation execution
+ """
+ async def handler():
+ try:
+ if not eventUser:
+ logger.error("Event user not available for automation execution")
+ return
+
+ # Get services for event user (provides access to interfaces)
+ eventServices = getServices(eventUser, None)
+
+ # Load automation using event user context
+ automation = eventServices.interfaceDbChat.getAutomationDefinition(automationId)
+ if not automation or not automation.get("active"):
+ logger.warning(f"Automation {automationId} not found or not active, skipping execution")
+ return
+
+ # Get creator user
+ creatorUserId = automation.get("_createdBy")
+ if not creatorUserId:
+ logger.error(f"Automation {automationId} has no creator user")
+ return
+
+ # Get creator user from database using services
+ eventServices = getServices(eventUser, None)
+ creatorUser = eventServices.interfaceDbApp.getUser(creatorUserId)
+ if not creatorUser:
+ logger.error(f"Creator user {creatorUserId} not found for automation {automationId}")
+ return
+
+ # Get services for creator user (provides access to interfaces)
+ creatorServices = getServices(creatorUser, None)
+
+ # Execute automation with creator user's context
+ # executeAutomation is in same module, so we can call it directly
+ await executeAutomation(automationId, creatorServices.interfaceDbChat)
+ logger.info(f"Successfully executed automation {automationId} as user {creatorUserId}")
+ except Exception as e:
+ logger.error(f"Error executing automation {automationId}: {str(e)}")
+
+ return handler
+
diff --git a/modules/features/automation/subAutomationUtils.py b/modules/features/automation/subAutomationUtils.py
new file mode 100644
index 00000000..f1948ffa
--- /dev/null
+++ b/modules/features/automation/subAutomationUtils.py
@@ -0,0 +1,108 @@
+"""
+Utility functions for automation feature.
+
+Moved from interfaces/interfaceDbChatObjects.py.
+"""
+
+import json
+from typing import Dict, Any
+
+
+def parseScheduleToCron(schedule: str) -> Dict[str, Any]:
+ """Parse schedule string to cron kwargs for APScheduler"""
+ parts = schedule.split()
+ if len(parts) != 5:
+ raise ValueError(f"Invalid schedule format: {schedule}")
+
+ return {
+ "minute": parts[0],
+ "hour": parts[1],
+ "day": parts[2],
+ "month": parts[3],
+ "day_of_week": parts[4]
+ }
+
+
+def planToPrompt(plan: Dict) -> str:
+ """Convert plan structure to prompt string for workflow execution"""
+ return plan.get("userMessage", plan.get("overview", "Execute automation workflow"))
+
+
+def replacePlaceholders(template: str, placeholders: Dict[str, str]) -> str:
+ """Replace placeholders in template with actual values. Placeholder format: {{KEY:PLACEHOLDER_NAME}}"""
+ result = template
+ for placeholderName, value in placeholders.items():
+ pattern = f"{{{{KEY:{placeholderName}}}}}"
+
+ # Check if placeholder is in an array context like ["{{KEY:...}}"]
+ # If value is a JSON array/dict, we should replace the entire ["{{KEY:...}}"] with the array
+ arrayPattern = f'["{pattern}"]'
+ if arrayPattern in result:
+ # Check if value is a JSON array/dict
+ isArrayValue = False
+ arrayValue = None
+
+ if isinstance(value, (list, dict)):
+ isArrayValue = True
+ arrayValue = json.dumps(value)
+ elif isinstance(value, str):
+ try:
+ parsed = json.loads(value)
+ if isinstance(parsed, (list, dict)):
+ isArrayValue = True
+ arrayValue = value # Already valid JSON string
+ except (json.JSONDecodeError, ValueError):
+ pass
+
+ if isArrayValue:
+ # Replace ["{{KEY:...}}"] with the array value
+ result = result.replace(arrayPattern, arrayValue)
+ continue # Skip the regular replacement below
+
+ # Regular replacement - check if in quoted context
+ patternStart = result.find(pattern)
+ isQuoted = False
+ if patternStart > 0:
+ charBefore = result[patternStart - 1] if patternStart > 0 else None
+ patternEnd = patternStart + len(pattern)
+ charAfter = result[patternEnd] if patternEnd < len(result) else None
+ if charBefore == '"' and charAfter == '"':
+ isQuoted = True
+
+ # Handle different value types
+ if isinstance(value, (list, dict)):
+ # Python list/dict - convert to JSON
+ replacement = json.dumps(value)
+ elif isinstance(value, str):
+ # String value - check if it's a JSON string representing list/dict
+ try:
+ parsed = json.loads(value)
+ if isinstance(parsed, (list, dict)):
+ # It's a JSON string of a list/dict
+ if isQuoted:
+ # In quoted context, escape the JSON string
+ escaped = json.dumps(value)
+ replacement = escaped[1:-1] # Remove outer quotes
+ else:
+ # In unquoted context, use JSON directly
+ replacement = value
+ else:
+ # It's a JSON string of a primitive
+ if isQuoted:
+ escaped = json.dumps(value)
+ replacement = escaped[1:-1]
+ else:
+ replacement = value
+ except (json.JSONDecodeError, ValueError):
+ # Not valid JSON - treat as plain string
+ if isQuoted:
+ escaped = json.dumps(value)
+ replacement = escaped[1:-1]
+ else:
+ replacement = value
+ else:
+ # Numbers, booleans, None - convert to string
+ replacement = str(value)
+ result = result.replace(pattern, replacement)
+ return result
+
diff --git a/modules/features/chatAlthaus/COMPONENT_DIAGRAM.md b/modules/features/chatAlthaus/COMPONENT_DIAGRAM.md
deleted file mode 100644
index 5ae3edae..00000000
--- a/modules/features/chatAlthaus/COMPONENT_DIAGRAM.md
+++ /dev/null
@@ -1,211 +0,0 @@
-# Komponentendiagramm: Kunden-Chatbot Althaus
-
-## Übersicht
-
-Dieses Diagramm zeigt die High-Level-Architektur der Althaus Chatbot-Anwendung mit allen beteiligten Komponenten, Datenflüssen und Kommunikationswegen.
-
-## Komponentendiagramm
-
-```mermaid
-graph TB
- subgraph "PowerOn Chat UI"
- ChatUI[Chat Interface]
- end
-
- subgraph "PowerOn Platform"
- Gateway[Gateway Backend
Event Scheduler & Data Query API]
- GatewayDB[(PostgreSQL)]
- AIServices[Dynamic AI, Tavily]
- end
-
- subgraph "Tenant althaus-ag.ch"
- subgraph "PowerOn PreProcessing"
- PreProcessing[Pre-Processing Service]
- PreProcessingDB[(PostgreSQL
Memory DB)]
- end
-
- subgraph "MSFT Services"
- PowerBI[Power BI]
- TenantServices[Azure DC, DNA Center]
- end
- end
-
- %% Hauptkommunikation
- ChatUI -->|"Data Queries
User/Password Auth"| Gateway
- Gateway -->|"SQL Queries
X-PP-API-Key"| PreProcessing
- Gateway -->|"Config Update
Daily 01:00 UTC"| PreProcessing
-
- %% Datenfluss
- PowerBI -->|"Rohdaten"| PreProcessing
- PreProcessing --> PreProcessingDB
- PreProcessingDB -->|"Query Results"| Gateway
- Gateway --> ChatUI
- Gateway --> GatewayDB
-
- %% Styling
- classDef platform fill:#e1f5ff,stroke:#01579b,stroke-width:2px
- classDef frontend fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
- classDef preprocessing fill:#fff3e0,stroke:#e65100,stroke-width:2px
- classDef customer fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px
- classDef database fill:#fce4ec,stroke:#880e4f,stroke-width:2px
-
- class Gateway,AIServices platform
- class ChatUI frontend
- class PreProcessing preprocessing
- class PowerBI,TenantServices customer
- class GatewayDB,PreProcessingDB database
-```
-
-## Komponentenbeschreibungen
-
-### 1. Gateway Backend (gateway.poweron-center.net)
-
-**Hauptkomponenten:**
-- **FastAPI Application**: Zentrale Backend-Anwendung der PowerOn Platform
-- **Event Scheduler (chatAlthaus)**:
- - Täglicher Scheduler um 01:00 UTC
- - Sendet Konfigurations-Updates an Pre-Processing Service
- - Verwendet `X-PP-API-Key` Header für Authentifizierung
-- **Configuration Management**:
- - Verwaltung von Secrets und Environment-Variablen
- - Verschlüsselung/Entschlüsselung von Secrets
- - Unterstützt verschiedene Umgebungen (dev, int, prod)
-- **Data Query API**:
- - `POST /api/v1/dataquery/query` - SQL Query ausführen
- - `GET /api/v1/dataquery/schema` - Datenbankschema abrufen
- - `GET /api/v1/dataquery/schema/{table_name}` - Tabellenschema abrufen
-- **PostgreSQL Database**: Zentrale Datenbank für Gateway-Daten
-
-**Technologie:**
-- Python/FastAPI
-- PostgreSQL
-- APScheduler für Event-Management
-
-**Externe AI-Services:**
-- **Dynamic AI**: LLM Service für AI-Anfragen
-- **Tavily**: Web-Such-Service für Web-Recherchen
-
-### 2. PowerOn Chat UI (althaus-chat.poweron-center.net)
-
-**Hauptkomponenten:**
-- **React Application**: Frontend-Interface für den Chatbot
-- **Authentication**: User/Password-basierte Authentifizierung mit JWT-Token
-
-**Kommunikation:**
-- Nutzt 3 Data Query Endpunkte vom Gateway
-- Authentifiziert sich mit User/Password beim Gateway
-- Erhält Antworten über Gateway API
-
-**Technologie:**
-- React
-- REST API Calls
-
-### 3. Tenant althaus-ag.ch
-
-#### 3.1 PowerOn PreProcessing
-
-**Hauptkomponenten:**
-- **FastAPI Application**: Pre-Processing Service im Azure-Tenant des Kunden
-- **Pre-Processing API**:
- - `POST /api/v1/dataprocessor/update-db-with-config` - Datenbank mit Konfiguration aktualisieren
- - Authentifizierung: `X-PP-API-Key` Header
-- **PostgreSQL Memory Database**:
- - Speichert verarbeitete Daten
- - Wird vom Chat für Queries genutzt
-
-**Datenfluss:**
-- Empfängt Rohdaten aus Power BI Semantikmodell
-- Verarbeitet Daten nach konfigurierten Schritten (keep, fillna, to_numeric, dropna, etc.)
-- Speichert verarbeitete Daten in Memory Database
-- Beantwortet SQL-Queries vom Gateway
-
-**Technologie:**
-- Python/FastAPI
-- PostgreSQL
-- Azure App Service (im Kunden-Tenant althaus-ag.ch)
-
-#### 3.2 MSFT Services
-
-**Power BI Semantikmodell:**
-- Datenquelle für Rohdaten
-- Wird vom Pre-Processing Service gelesen
-
-**Azure Domänen-Controller:**
-- Authentifizierungs-Service
-- Wird vom Gateway für Authentifizierung genutzt
-
-**DNA Center:**
-- Netzwerk-Management-Service
-- Wird vom Gateway genutzt
-
-## Datenfluss
-
-### 1. Datenaktualisierung (Scheduled)
-```
-Power BI Semantikmodell (Tenant althaus-ag.ch)
- → PowerOn PreProcessing (verarbeitet Daten)
- → PostgreSQL Memory DB (speichert verarbeitete Daten)
-
-Gateway Event Scheduler (01:00 UTC täglich)
- → POST /api/v1/dataprocessor/update-db-with-config
- → PowerOn PreProcessing (aktualisiert Konfiguration)
-```
-
-### 2. Chat-Interaktion (User Request)
-```
-PowerOn Chat UI
- → POST /api/v1/dataquery/query (mit User/Password Auth)
- → Gateway Data Query API
- → POST /api/v1/dataquery/query (mit X-PP-API-Key)
- → PowerOn PreProcessing
- → PostgreSQL Memory DB (führt Query aus)
- → PowerOn PreProcessing (gibt Ergebnisse zurück)
- → Gateway Data Query API
- → PowerOn Chat UI (zeigt Antwort)
-```
-
-### 3. AI-Integration
-```
-PowerOn Chat UI
- → Gateway (vermittelt AI-Anfragen)
- → Dynamic AI & Tavily (in PowerOn Platform)
- → Gateway (kombiniert Ergebnisse)
- → PowerOn Chat UI (zeigt Antwort)
-```
-
-## Authentifizierung
-
-### Gateway → PowerOn PreProcessing
-- **Header**: `X-PP-API-Key`
-- **Wert**: Aus Gateway Config (`PREPROCESS_ALTHAUS_CHAT_SECRET`)
-- **Verwendung**: Event Scheduler und Data Query API
-
-### PowerOn Chat UI → Gateway
-- **Methode**: User/Password
-- **Token**: JWT Token (nach erfolgreicher Authentifizierung)
-- **Verwendung**: Alle API-Calls vom Chat Frontend
-
-### Weitere Authentifizierung
-- Gateway nutzt Azure Domänen-Controller für zusätzliche Authentifizierung
-- Verschiedene API-Endpunkte können unterschiedliche Authentifizierungsmechanismen haben
-
-## Deployment
-
-- **PowerOn Platform**: gateway.poweron-center.net
-- **PowerOn Chat UI**: althaus-chat.poweron-center.net
-- **PowerOn PreProcessing**: Azure App Service im Kunden-Tenant (althaus-ag.ch)
- - URL: `poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net`
-- **Tenant althaus-ag.ch**: Enthält PowerOn PreProcessing und MSFT Services (Power BI, Azure DC, DNA Center) im Azure-Tenant von Althaus AG
-
-## Konfiguration
-
-### Gateway Config Keys
-- `PREPROCESS_ALTHAUS_CHAT_SECRET`: API-Key für Pre-Processing Service
-- `APP_ENV_TYPE`: Umgebung (dev, int, prod)
-- Weitere Gateway-spezifische Konfigurationen
-
-### Pre-Processing Config
-- Konfiguration wird als JSON im Gateway Code definiert
-- Wird täglich um 01:00 UTC an Pre-Processing Service gesendet
-- Definiert Tabellen, Spalten, Verarbeitungsschritte
-
diff --git a/modules/features/featuresLifecycle.py b/modules/features/featuresLifecycle.py
index d1ee20ad..1557db6f 100644
--- a/modules/features/featuresLifecycle.py
+++ b/modules/features/featuresLifecycle.py
@@ -1,24 +1,37 @@
import logging
-from modules.interfaces.interfaceDbAppObjects import getRootInterface
+from modules.services import getInterface as getServices
logger = logging.getLogger(__name__)
-async def start() -> None:
- """ Start feature triggers and background managers """
-
- # Provide Event User
- rootInterface = getRootInterface()
- eventUser = rootInterface.getUserByUsername("event")
+async def start(eventUser) -> None:
+ """ Start feature triggers and background managers
+
+ Args:
+ eventUser: System-level event user for background operations (provided by app.py)
+ """
# Feature Automation Events
if eventUser:
try:
- from modules.interfaces.interfaceDbChatObjects import getInterface as getChatInterface
- chatInterface = getChatInterface(eventUser)
- await chatInterface.syncAutomationEvents()
+ from modules.features.automation import syncAutomationEvents
+ from modules.shared.callbackRegistry import callbackRegistry
+
+ # Get services for event user (provides access to interfaces)
+ services = getServices(eventUser, None)
+
+ # Register callback for automation changes
+ async def onAutomationChanged(chatInterface):
+ """Callback triggered when automations are created/updated/deleted."""
+ await syncAutomationEvents(chatInterface, eventUser)
+
+ callbackRegistry.register('automation.changed', onAutomationChanged)
+ logger.info("Registered automation change callback")
+
+ # Initial sync on startup - use interface from services
+ await syncAutomationEvents(services.interfaceDbChat, eventUser)
logger.info("Automation events synced on startup")
except Exception as e:
- logger.error(f"Error syncing automation events on startup: {str(e)}")
+ logger.error(f"Error setting up automation events on startup: {str(e)}")
# Don't fail startup if automation sync fails
# Feature SyncDelta
@@ -36,8 +49,21 @@ async def start() -> None:
-async def stop() -> None:
- """ Stop feature triggers and background managers """
+async def stop(eventUser) -> None:
+ """ Stop feature triggers and background managers
+
+ Args:
+ eventUser: System-level event user (provided by app.py)
+ """
+
+ # Unregister automation callback
+ try:
+ from modules.shared.callbackRegistry import callbackRegistry
+ # Note: We'd need to store the callback reference to unregister it properly
+ # For now, callbacks will remain registered (acceptable for shutdown)
+ logger.info("Automation callbacks remain registered (will be cleaned up on process exit)")
+ except Exception as e:
+ logger.warning(f"Error during automation callback cleanup: {str(e)}")
# Feature ...
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 7dc7db6b..3cc4d2a5 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -75,15 +75,7 @@ class AiObjects:
# AI for Extraction, Processing, Generation
- async def call(self, request: AiCallRequest, progressCallback=None) -> AiCallResponse:
- """Call AI model for text generation with model-aware chunking."""
- # Handle content parts (unified path)
- if hasattr(request, 'contentParts') and request.contentParts:
- return await self._callWithContentParts(request, progressCallback)
- # Handle traditional text/context calls
- return await self._callWithTextContext(request)
-
- async def _callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
+ async def callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
"""Call AI model for traditional text/context calls with fallback mechanism."""
prompt = request.prompt
context = request.context or ""
@@ -148,412 +140,6 @@ class AiObjects:
errorCount=1
)
- async def _callWithContentParts(self, request: AiCallRequest, progressCallback=None) -> AiCallResponse:
- """Process content parts with model-aware chunking (unified for single and multiple parts)."""
- prompt = request.prompt
- options = request.options
- contentParts = request.contentParts
-
- # Get failover models
- availableModels = modelRegistry.getAvailableModels()
- failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
-
- if not failoverModelList:
- return self._createErrorResponse("No suitable models found", 0, 0)
-
- # Process each content part
- allResults = []
- for contentPart in contentParts:
- partResult = await self._processContentPartWithFallback(contentPart, prompt, options, failoverModelList, progressCallback)
- allResults.append(partResult)
-
- # Merge all results
- mergedContent = self._mergePartResults(allResults)
-
- return AiCallResponse(
- content=mergedContent,
- modelName="multiple",
- priceUsd=sum(r.priceUsd for r in allResults),
- processingTime=sum(r.processingTime for r in allResults),
- bytesSent=sum(r.bytesSent for r in allResults),
- bytesReceived=sum(r.bytesReceived for r in allResults),
- errorCount=sum(r.errorCount for r in allResults)
- )
-
- async def _processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList, progressCallback=None) -> AiCallResponse:
- """Process a single content part with model-aware chunking and fallback."""
- lastError = None
-
- # Check if this is an image - Vision models need special handling
- isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
-
- # Determine the correct operation type based on content type
- # Images should use IMAGE_ANALYSE, not the generic operation type
- actualOperationType = options.operationType
- if isImage:
- actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
- # Get vision-capable models for images
- availableModels = modelRegistry.getAvailableModels()
- visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
- if visionFailoverList:
- logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
- failoverModelList = visionFailoverList
-
- for attempt, model in enumerate(failoverModelList):
- try:
- logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
-
- # Special handling for images with Vision models
- if isImage and hasattr(model, 'functionCall'):
- # Call model's functionCall directly (for Vision models this is callAiImage)
- from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts
-
- try:
- # Validate and prepare image data
- if not contentPart.data:
- raise ValueError("Image content part has no data")
-
- # Ensure mimeType is valid
- mimeType = contentPart.mimeType or "image/jpeg"
- if not mimeType.startswith("image/"):
- raise ValueError(f"Invalid mimeType for image: {mimeType}")
-
- # Prepare base64 data
- if isinstance(contentPart.data, str):
- # Already base64 encoded - validate it
- try:
- base64.b64decode(contentPart.data, validate=True)
- base64Data = contentPart.data
- except Exception as e:
- raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
- elif isinstance(contentPart.data, bytes):
- # Binary data - encode to base64
- base64Data = base64.b64encode(contentPart.data).decode('utf-8')
- else:
- raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
-
- # Create data URL
- imageDataUrl = f"data:{mimeType};base64,{base64Data}"
-
- modelCall = AiModelCall(
- messages=[
- {
- "role": "user",
- "content": [
- {"type": "text", "text": prompt or ""},
- {
- "type": "image_url",
- "image_url": {
- "url": imageDataUrl
- }
- }
- ]
- }
- ],
- model=model,
- options=AiCallOpts(operationType=actualOperationType)
- )
-
- modelResponse = await model.functionCall(modelCall)
-
- if not modelResponse.success:
- raise ValueError(f"Model call failed: {modelResponse.error}")
-
- logger.info(f"✅ Image content part processed successfully with model: {model.name}")
-
- # Convert to AiCallResponse format
- # Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
- # Calculate processing time if not provided (fallback to 0.0)
- processingTime = getattr(modelResponse, 'processingTime', None)
- if processingTime is None:
- processingTime = 0.0
-
- return AiCallResponse(
- content=modelResponse.content,
- modelName=model.name,
- priceUsd=0.0, # Price will be calculated elsewhere if needed
- processingTime=processingTime,
- bytesSent=0, # Will be calculated elsewhere
- bytesReceived=0, # Will be calculated elsewhere
- errorCount=0
- )
- except Exception as e:
- # Image processing failed with this model
- lastError = e
- logger.warning(f"❌ Image processing failed with model {model.name}: {str(e)}")
-
- # If this is not the last model, try the next one
- if attempt < len(failoverModelList) - 1:
- logger.info(f"🔄 Trying next fallback model for image processing...")
- continue
- else:
- # All models failed
- logger.error(f"💥 All {len(failoverModelList)} models failed for image processing")
- raise
-
- # For non-image parts, check if part fits in model context
- # Calculate available space accounting for prompt, system message, and output reservation
- partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
-
- # Use same calculation as _chunkContentPart to determine actual available space
- modelContextTokens = model.contextLength
- modelMaxOutputTokens = model.maxTokens
-
- # Reserve tokens for prompt, system message, output, and message overhead
- promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
- systemMessageTokens = 10 # ~40 bytes = 10 tokens
- outputTokens = modelMaxOutputTokens
- messageOverheadTokens = 100
- totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
-
- # Available tokens for content (with 80% safety margin)
- availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
- if availableContentTokens < 100:
- availableContentTokens = max(100, int(modelContextTokens * 0.1))
-
- # Convert to bytes (1 token ≈ 4 bytes)
- availableContentBytes = availableContentTokens * 4
-
- logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes (contextLength={modelContextTokens} tokens, reserved={totalReservedTokens:.0f} tokens)")
-
- if partSize <= availableContentBytes:
- # Part fits - call AI directly
- response = await self._callWithModel(model, prompt, contentPart.data, options)
- logger.info(f"✅ Content part processed successfully with model: {model.name}")
- return response
- else:
- # Part too large - chunk it (pass prompt to account for it in chunk size calculation)
- chunks = await self._chunkContentPart(contentPart, model, options, prompt)
- if not chunks:
- raise ValueError(f"Failed to chunk content part for model {model.name}")
-
- logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
-
- # Log progress if callback provided
- if progressCallback:
- progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
-
- # Process each chunk
- chunkResults = []
- for idx, chunk in enumerate(chunks):
- chunkNum = idx + 1
- chunkData = chunk.get('data', '')
- chunkSize = len(chunkData.encode('utf-8')) if chunkData else 0
- logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}, chunk size: {chunkSize} bytes")
-
- # Calculate and log progress
- if progressCallback:
- progress = chunkNum / len(chunks)
- progressCallback(progress, f"Processing chunk {chunkNum}/{len(chunks)}")
-
- try:
- chunkResponse = await self._callWithModel(model, prompt, chunkData, options)
- chunkResults.append(chunkResponse)
- logger.info(f"✅ Chunk {chunkNum}/{len(chunks)} processed successfully")
-
- # Log completion progress
- if progressCallback:
- progressCallback(chunkNum / len(chunks), f"Chunk {chunkNum}/{len(chunks)} processed")
- except Exception as e:
- logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}")
- raise
-
- # Merge chunk results
- mergedContent = self._mergeChunkResults(chunkResults)
- totalPrice = sum(r.priceUsd for r in chunkResults)
- totalTime = sum(r.processingTime for r in chunkResults)
- totalBytesSent = sum(r.bytesSent for r in chunkResults)
- totalBytesReceived = sum(r.bytesReceived for r in chunkResults)
- totalErrors = sum(r.errorCount for r in chunkResults)
-
- logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
- return AiCallResponse(
- content=mergedContent,
- modelName=model.name,
- priceUsd=totalPrice,
- processingTime=totalTime,
- bytesSent=totalBytesSent,
- bytesReceived=totalBytesReceived,
- errorCount=totalErrors
- )
-
- except Exception as e:
- lastError = e
- error_msg = str(e) if str(e) else f"{type(e).__name__}"
- error_detail = f"❌ Model {model.name} failed for content part: {error_msg}"
- if hasattr(e, 'detail') and e.detail:
- error_detail += f" | Detail: {e.detail}"
- if hasattr(e, 'status_code'):
- error_detail += f" | Status: {e.status_code}"
- logger.warning(error_detail, exc_info=True)
-
- if attempt < len(failoverModelList) - 1:
- logger.info(f"🔄 Trying next failover model...")
- continue
- else:
- logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
- break
-
- # All models failed
- return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
-
- async def _chunkContentPart(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
- """Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output."""
- # Calculate model-specific chunk sizes
- modelContextTokens = model.contextLength # Total context in tokens
- modelMaxOutputTokens = model.maxTokens # Maximum output tokens
-
- # Reserve tokens for:
- # 1. Prompt (user message)
- promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
-
- # 2. System message wrapper ("Context from documents:\n")
- systemMessageTokens = 10 # ~40 bytes = 10 tokens
-
- # 3. Max output tokens (model will reserve space for completion)
- outputTokens = modelMaxOutputTokens
-
- # 4. JSON structure and message overhead (~100 tokens)
- messageOverheadTokens = 100
-
- # Total reserved tokens = input overhead + output reservation
- totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
-
- # Available tokens for content = context length - reserved tokens
- # Use 80% of available for safety margin
- availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
-
- # Ensure we have at least some space
- if availableContentTokens < 100:
- logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
- availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
-
- # Convert tokens to bytes (1 token ≈ 4 bytes)
- availableContentBytes = availableContentTokens * 4
-
- logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
-
- # Use 70% of available content bytes for text chunks (conservative)
- textChunkSize = int(availableContentBytes * 0.7)
- imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
-
- # Build chunking options
- chunkingOptions = {
- "textChunkSize": textChunkSize,
- "imageChunkSize": imageChunkSize,
- "maxSize": availableContentBytes,
- "chunkAllowed": True
- }
-
- # Get appropriate chunker
- from modules.services.serviceExtraction.subRegistry import ChunkerRegistry
- chunkerRegistry = ChunkerRegistry()
- chunker = chunkerRegistry.resolve(contentPart.typeGroup)
-
- if not chunker:
- logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
- return []
-
- # Chunk the content part
- try:
- chunks = chunker.chunk(contentPart, chunkingOptions)
- logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
- return chunks
- except Exception as e:
- logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
- return []
-
- def _mergePartResults(self, partResults: List[AiCallResponse]) -> str:
- """Merge part results using the existing sophisticated merging system."""
- if not partResults:
- return ""
-
- # Convert AiCallResponse results to ContentParts for merging
- from modules.datamodels.datamodelExtraction import ContentPart
- from modules.services.serviceExtraction.subUtils import makeId
-
- content_parts = []
- for i, result in enumerate(partResults):
- if result.content:
- content_part = ContentPart(
- id=str(uuid.uuid4()),
- parentId=None,
- label=f"ai_result_{i}",
- typeGroup="text", # Default to text for AI results
- mimeType="text/plain",
- data=result.content,
- metadata={
- "aiResult": True,
- "modelName": result.modelName,
- "priceUsd": result.priceUsd,
- "processingTime": result.processingTime,
- "bytesSent": result.bytesSent,
- "bytesReceived": result.bytesReceived
- }
- )
- content_parts.append(content_part)
-
- # Use existing merging system
- merge_strategy = MergeStrategy(
- useIntelligentMerging=True,
- groupBy="typeGroup",
- orderBy="id",
- mergeType="concatenate"
- )
-
- merged_parts = applyMerging(content_parts, merge_strategy)
-
- # Convert merged parts back to final string
- final_content = "\n\n".join([part.data for part in merged_parts])
-
- logger.info(f"Merged {len(partResults)} AI results using existing merging system")
- return final_content.strip()
-
- def _mergeChunkResults(self, chunkResults: List[AiCallResponse]) -> str:
- """Merge chunk results using the existing sophisticated merging system."""
- if not chunkResults:
- return ""
-
- # Convert AiCallResponse results to ContentParts for merging
-
- content_parts = []
- for i, result in enumerate(chunkResults):
- if result.content:
- content_part = ContentPart(
- id=str(uuid.uuid4()),
- parentId=None,
- label=f"chunk_result_{i}",
- typeGroup="text", # Default to text for AI results
- mimeType="text/plain",
- data=result.content,
- metadata={
- "aiResult": True,
- "chunk": True,
- "modelName": result.modelName,
- "priceUsd": result.priceUsd,
- "processingTime": result.processingTime,
- "bytesSent": result.bytesSent,
- "bytesReceived": result.bytesReceived
- }
- )
- content_parts.append(content_part)
-
- # Use existing merging system
- merge_strategy = MergeStrategy(
- useIntelligentMerging=True,
- groupBy="typeGroup",
- orderBy="id",
- mergeType="concatenate"
- )
-
- merged_parts = applyMerging(content_parts, merge_strategy)
-
- # Convert merged parts back to final string
- final_content = "\n\n".join([part.data for part in merged_parts])
-
- logger.info(f"Merged {len(chunkResults)} chunk results using existing merging system")
- return final_content.strip()
-
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
"""Create an error response."""
return AiCallResponse(
@@ -659,64 +245,4 @@ class AiObjects:
return [model.displayName for model in models]
-def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]:
- """Apply merging strategy to parts with intelligent token-aware merging."""
- logger.debug(f"applyMerging called with {len(parts)} parts")
-
- # Import merging dependencies
- from modules.services.serviceExtraction.merging.mergerText import TextMerger
- from modules.services.serviceExtraction.merging.mergerTable import TableMerger
- from modules.services.serviceExtraction.merging.mergerDefault import DefaultMerger
- from modules.services.serviceExtraction.subMerger import IntelligentTokenAwareMerger
-
- # Check if intelligent merging is enabled
- if strategy.useIntelligentMerging:
- modelCapabilities = strategy.capabilities or {}
- subMerger = IntelligentTokenAwareMerger(modelCapabilities)
-
- # Use intelligent merging for all parts
- merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
-
- # Calculate and log optimization stats
- stats = subMerger.calculateOptimizationStats(parts, merged)
- logger.info(f"🧠 Intelligent merging stats: {stats}")
- logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
-
- return merged
-
- # Fallback to traditional merging
- textMerger = TextMerger()
- tableMerger = TableMerger()
- defaultMerger = DefaultMerger()
-
- # Group by typeGroup
- textParts = [p for p in parts if p.typeGroup == "text"]
- tableParts = [p for p in parts if p.typeGroup == "table"]
- structureParts = [p for p in parts if p.typeGroup == "structure"]
- otherParts = [p for p in parts if p.typeGroup not in ("text", "table", "structure")]
-
- logger.debug(f"Grouped - text: {len(textParts)}, table: {len(tableParts)}, structure: {len(structureParts)}, other: {len(otherParts)}")
-
- merged: List[ContentPart] = []
-
- if textParts:
- textMerged = textMerger.merge(textParts, strategy)
- logger.debug(f"TextMerger merged {len(textParts)} parts into {len(textMerged)} parts")
- merged.extend(textMerged)
- if tableParts:
- tableMerged = tableMerger.merge(tableParts, strategy)
- logger.debug(f"TableMerger merged {len(tableParts)} parts into {len(tableMerged)} parts")
- merged.extend(tableMerged)
- if structureParts:
- # For now, treat structure like text
- structureMerged = textMerger.merge(structureParts, strategy)
- logger.debug(f"StructureMerger merged {len(structureParts)} parts into {len(structureMerged)} parts")
- merged.extend(structureMerged)
- if otherParts:
- otherMerged = defaultMerger.merge(otherParts, strategy)
- logger.debug(f"DefaultMerger merged {len(otherParts)} parts into {len(otherMerged)} parts")
- merged.extend(otherMerged)
-
- logger.debug(f"applyMerging returning {len(merged)} parts")
- return merged
diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py
index 0b217bd1..de4abc7e 100644
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@@ -37,6 +37,136 @@ logger = logging.getLogger(__name__)
# Singleton factory for Chat instances
_chatInterfaces = {}
+
+def storeDebugMessageAndDocuments(message, currentUser) -> None:
+ """
+ Store message and documents (metadata and file bytes) for debugging purposes.
+ Structure: {log_dir}/debug/messages/m_round_task_action_timestamp/documentlist_label/
+ - message.json, message_text.txt
+ - document_###_metadata.json
+ - document_###_ (actual file bytes)
+
+ Args:
+ message: ChatMessage object to store
+ currentUser: Current user for component interface access
+ """
+ try:
+ import os
+ from datetime import datetime, UTC
+ from modules.shared.debugLogger import _getBaseDebugDir, _ensureDir
+ from modules.interfaces.interfaceDbComponentObjects import getInterface
+
+ # Create base debug directory (use base debug dir, not prompts subdirectory)
+ baseDebugDir = _getBaseDebugDir()
+ debug_root = os.path.join(baseDebugDir, 'messages')
+ _ensureDir(debug_root)
+
+ # Generate timestamp
+ timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
+
+ # Create message folder name: m_round_task_action_timestamp
+ # Use actual values from message, not defaults
+ round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
+ task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
+ action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
+ message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
+
+ message_path = os.path.join(debug_root, message_folder)
+ os.makedirs(message_path, exist_ok=True)
+
+ # Store message data - use dict() instead of model_dump() for compatibility
+ message_file = os.path.join(message_path, "message.json")
+ with open(message_file, "w", encoding="utf-8") as f:
+ # Convert message to dict manually to avoid model_dump() issues
+ message_dict = {
+ "id": message.id,
+ "workflowId": message.workflowId,
+ "parentMessageId": message.parentMessageId,
+ "message": message.message,
+ "role": message.role,
+ "status": message.status,
+ "sequenceNr": message.sequenceNr,
+ "publishedAt": message.publishedAt,
+ "roundNumber": message.roundNumber,
+ "taskNumber": message.taskNumber,
+ "actionNumber": message.actionNumber,
+ "documentsLabel": message.documentsLabel,
+ "actionId": message.actionId,
+ "actionMethod": message.actionMethod,
+ "actionName": message.actionName,
+ "success": message.success,
+ "documents": []
+ }
+ json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
+
+ # Store message content as text
+ if message.message:
+ message_text_file = os.path.join(message_path, "message_text.txt")
+ with open(message_text_file, "w", encoding="utf-8") as f:
+ f.write(str(message.message))
+
+ # Store documents if provided
+ if message.documents and len(message.documents) > 0:
+ # Group documents by documentsLabel
+ documents_by_label = {}
+ for doc in message.documents:
+ label = message.documentsLabel or 'default'
+ if label not in documents_by_label:
+ documents_by_label[label] = []
+ documents_by_label[label].append(doc)
+
+ # Create subfolder for each document label
+ for label, docs in documents_by_label.items():
+ # Sanitize label for filesystem
+ safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
+ safe_label = safe_label.replace(' ', '_')
+ if not safe_label:
+ safe_label = "default"
+
+ label_folder = os.path.join(message_path, safe_label)
+ _ensureDir(label_folder)
+
+ # Store each document
+ for i, doc in enumerate(docs):
+ # Create document metadata file
+ doc_meta = {
+ "id": doc.id,
+ "messageId": doc.messageId,
+ "fileId": doc.fileId,
+ "fileName": doc.fileName,
+ "fileSize": doc.fileSize,
+ "mimeType": doc.mimeType,
+ "roundNumber": doc.roundNumber,
+ "taskNumber": doc.taskNumber,
+ "actionNumber": doc.actionNumber,
+ "actionId": doc.actionId
+ }
+
+ doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
+ with open(doc_meta_file, "w", encoding="utf-8") as f:
+ json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
+
+ # Also store the actual file bytes next to metadata for debugging
+ try:
+ componentInterface = getInterface(currentUser)
+ file_bytes = componentInterface.getFileData(doc.fileId)
+ if file_bytes:
+ # Build a safe filename preserving original name
+ safe_name = doc.fileName or f"document_{i+1:03d}"
+ # Avoid path traversal
+ safe_name = os.path.basename(safe_name)
+ doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
+ with open(doc_file_path, "wb") as df:
+ df.write(file_bytes)
+ else:
+ pass
+ except Exception as e:
+ pass
+
+ except Exception as e:
+ # Silent fail - don't break main flow
+ pass
+
class ChatObjects:
"""
Interface to Chat database and AI Connectors.
@@ -440,7 +570,7 @@ class ChatObjects:
allWorkflows = self.db.getRecordset(ChatWorkflow)
filteredWorkflows = self._uam(ChatWorkflow, allWorkflows)
- # If no pagination requested, return all items
+ # If no pagination requested, return all items (no sorting - frontend handles it)
if pagination is None:
return filteredWorkflows
@@ -448,7 +578,7 @@ class ChatObjects:
if pagination.filters:
filteredWorkflows = self._applyFilters(filteredWorkflows, pagination.filters)
- # Apply sorting (in order of sortFields)
+ # Apply sorting (in order of sortFields) - only if provided by frontend
if pagination.sort:
filteredWorkflows = self._applySorting(filteredWorkflows, pagination.sort)
@@ -893,7 +1023,6 @@ class ChatObjects:
)
# Debug: Store message and documents for debugging - only if debug enabled
- from modules.shared.debugLogger import storeDebugMessageAndDocuments
storeDebugMessageAndDocuments(chat_message, self.currentUser)
return chat_message
@@ -1550,8 +1679,8 @@ class ChatObjects:
if createdAutomation.get("executionLogs") is None:
createdAutomation["executionLogs"] = []
- # Trigger sync (async, don't wait)
- asyncio.create_task(self.syncAutomationEvents())
+ # Trigger automation change callback (async, don't wait)
+ asyncio.create_task(self._notifyAutomationChanged())
return createdAutomation
except Exception as e:
@@ -1581,8 +1710,8 @@ class ChatObjects:
if updatedAutomation.get("executionLogs") is None:
updatedAutomation["executionLogs"] = []
- # Trigger sync (async, don't wait)
- asyncio.create_task(self.syncAutomationEvents())
+ # Trigger automation change callback (async, don't wait)
+ asyncio.create_task(self._notifyAutomationChanged())
return updatedAutomation
except Exception as e:
@@ -1611,374 +1740,22 @@ class ChatObjects:
# Delete automation from database
self.db.recordDelete(AutomationDefinition, automationId)
- # Trigger sync (async, don't wait)
- asyncio.create_task(self.syncAutomationEvents())
+ # Trigger automation change callback (async, don't wait)
+ asyncio.create_task(self._notifyAutomationChanged())
return True
except Exception as e:
logger.error(f"Error deleting automation definition: {str(e)}")
raise
- def _replacePlaceholders(self, template: str, placeholders: Dict[str, str]) -> str:
- """Replace placeholders in template with actual values. Placeholder format: {{KEY:PLACEHOLDER_NAME}}"""
- result = template
- for placeholderName, value in placeholders.items():
- pattern = f"{{{{KEY:{placeholderName}}}}}"
-
- # Check if placeholder is in an array context like ["{{KEY:...}}"]
- # If value is a JSON array/dict, we should replace the entire ["{{KEY:...}}"] with the array
- arrayPattern = f'["{pattern}"]'
- if arrayPattern in result:
- # Check if value is a JSON array/dict
- isArrayValue = False
- arrayValue = None
-
- if isinstance(value, (list, dict)):
- isArrayValue = True
- arrayValue = json.dumps(value)
- elif isinstance(value, str):
- try:
- parsed = json.loads(value)
- if isinstance(parsed, (list, dict)):
- isArrayValue = True
- arrayValue = value # Already valid JSON string
- except (json.JSONDecodeError, ValueError):
- pass
-
- if isArrayValue:
- # Replace ["{{KEY:...}}"] with the array value
- result = result.replace(arrayPattern, arrayValue)
- continue # Skip the regular replacement below
-
- # Regular replacement - check if in quoted context
- patternStart = result.find(pattern)
- isQuoted = False
- if patternStart > 0:
- charBefore = result[patternStart - 1] if patternStart > 0 else None
- patternEnd = patternStart + len(pattern)
- charAfter = result[patternEnd] if patternEnd < len(result) else None
- if charBefore == '"' and charAfter == '"':
- isQuoted = True
-
- # Handle different value types
- if isinstance(value, (list, dict)):
- # Python list/dict - convert to JSON
- replacement = json.dumps(value)
- elif isinstance(value, str):
- # String value - check if it's a JSON string representing list/dict
- try:
- parsed = json.loads(value)
- if isinstance(parsed, (list, dict)):
- # It's a JSON string of a list/dict
- if isQuoted:
- # In quoted context, escape the JSON string
- escaped = json.dumps(value)
- replacement = escaped[1:-1] # Remove outer quotes
- else:
- # In unquoted context, use JSON directly
- replacement = value
- else:
- # It's a JSON string of a primitive
- if isQuoted:
- escaped = json.dumps(value)
- replacement = escaped[1:-1]
- else:
- replacement = value
- except (json.JSONDecodeError, ValueError):
- # Not valid JSON - treat as plain string
- if isQuoted:
- escaped = json.dumps(value)
- replacement = escaped[1:-1]
- else:
- replacement = value
- else:
- # Numbers, booleans, None - convert to string
- replacement = str(value)
- result = result.replace(pattern, replacement)
- return result
-
- def _parseScheduleToCron(self, schedule: str) -> Dict[str, Any]:
- """Parse schedule string to cron kwargs for APScheduler"""
- parts = schedule.split()
- if len(parts) != 5:
- raise ValueError(f"Invalid schedule format: {schedule}")
-
- return {
- "minute": parts[0],
- "hour": parts[1],
- "day": parts[2],
- "month": parts[3],
- "day_of_week": parts[4]
- }
-
- async def executeAutomation(self, automationId: str) -> ChatWorkflow:
- """Execute automation workflow immediately (test mode) with placeholder replacement"""
- executionStartTime = getUtcTimestamp()
- executionLog = {
- "timestamp": executionStartTime,
- "workflowId": None,
- "status": "running",
- "messages": []
- }
-
+ async def _notifyAutomationChanged(self):
+ """Notify registered callbacks about automation changes (decoupled from features)."""
try:
- # 1. Load automation definition
- automation = self.getAutomationDefinition(automationId)
- if not automation:
- raise ValueError(f"Automation {automationId} not found")
-
- executionLog["messages"].append(f"Started execution at {executionStartTime}")
-
- # 2. Replace placeholders in template to generate plan
- template = automation.get("template", "")
- placeholders = automation.get("placeholders", {})
- planJson = self._replacePlaceholders(template, placeholders)
- try:
- plan = json.loads(planJson)
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse plan JSON after placeholder replacement: {str(e)}")
- logger.error(f"Template: {template[:500]}...")
- logger.error(f"Placeholders: {placeholders}")
- logger.error(f"Generated planJson (first 1000 chars): {planJson[:1000]}")
- logger.error(f"Error position: line {e.lineno}, column {e.colno}, char {e.pos}")
- if e.pos:
- start = max(0, e.pos - 100)
- end = min(len(planJson), e.pos + 100)
- logger.error(f"Context around error: ...{planJson[start:end]}...")
- raise ValueError(f"Invalid JSON after placeholder replacement: {str(e)}")
- executionLog["messages"].append("Template placeholders replaced successfully")
-
- # 3. Get user who created automation
- creator_user_id = automation.get("_createdBy")
-
- # If _createdBy is missing, try to fix it by setting it to current user
- # This handles automations created before _createdBy was required
- if not creator_user_id:
- logger.warning(f"Automation {automationId} has no creator user, setting to current user {self.userId}")
- try:
- # Update the automation to set _createdBy
- self.db.recordModify(
- AutomationDefinition,
- automationId,
- {"_createdBy": self.userId}
- )
- creator_user_id = self.userId
- automation["_createdBy"] = self.userId
- logger.info(f"Fixed automation {automationId} by setting _createdBy to {self.userId}")
- executionLog["messages"].append(f"Fixed missing _createdBy field, set to user {self.userId}")
- except Exception as e:
- logger.error(f"Error fixing automation {automationId}: {str(e)}")
- raise ValueError(f"Automation {automationId} has no creator user and could not be fixed")
-
- # Get user from database
- from modules.interfaces.interfaceDbAppObjects import getInterface as getAppInterface
- appInterface = getAppInterface(self.currentUser)
- creator_user = appInterface.getUser(creator_user_id)
- if not creator_user:
- raise ValueError(f"Creator user {creator_user_id} not found")
-
- executionLog["messages"].append(f"Using creator user: {creator_user_id}")
-
- # 4. Create UserInputRequest from plan
- # Embed plan JSON in prompt for TemplateMode to extract
- promptText = self._planToPrompt(plan)
- planJson = json.dumps(plan)
- # Embed plan as JSON comment so TemplateMode can extract it
- promptWithPlan = f"{promptText}\n\n\n{planJson}\n"
-
- userInput = UserInputRequest(
- prompt=promptWithPlan,
- listFileId=[],
- userLanguage=creator_user.language or "en"
- )
-
- executionLog["messages"].append("Starting workflow execution")
-
- # 5. Start workflow using chatStart
- from modules.features.chatPlayground.mainChatPlayground import chatStart
-
- workflow = await chatStart(
- currentUser=creator_user,
- userInput=userInput,
- workflowMode=WorkflowModeEnum.WORKFLOW_AUTOMATION,
- workflowId=None
- )
-
- executionLog["workflowId"] = workflow.id
- executionLog["status"] = "completed"
- executionLog["messages"].append(f"Workflow {workflow.id} started successfully")
- logger.info(f"Started workflow {workflow.id} with plan containing {len(plan.get('tasks', []))} tasks (plan embedded in userInput)")
-
- # Set workflow name with "automated" prefix
- automationLabel = automation.get("label", "Unknown Automation")
- workflowName = f"automated: {automationLabel}"
- workflow = self.updateWorkflow(workflow.id, {"name": workflowName})
- logger.info(f"Set workflow {workflow.id} name to: {workflowName}")
-
- # Update automation with execution log
- executionLogs = automation.get("executionLogs", [])
- executionLogs.append(executionLog)
- # Keep only last 50 executions
- if len(executionLogs) > 50:
- executionLogs = executionLogs[-50:]
-
- self.db.recordModify(
- AutomationDefinition,
- automationId,
- {"executionLogs": executionLogs}
- )
-
- return workflow
+ from modules.shared.callbackRegistry import callbackRegistry
+ # Trigger callbacks without knowing which features are listening
+ await callbackRegistry.trigger('automation.changed', self)
except Exception as e:
- # Log error to execution log
- executionLog["status"] = "error"
- executionLog["messages"].append(f"Error: {str(e)}")
-
- # Update automation with execution log even on error
- try:
- automation = self.getAutomationDefinition(automationId)
- if automation:
- executionLogs = automation.get("executionLogs", [])
- executionLogs.append(executionLog)
- if len(executionLogs) > 50:
- executionLogs = executionLogs[-50:]
- self.db.recordModify(
- AutomationDefinition,
- automationId,
- {"executionLogs": executionLogs}
- )
- except Exception as logError:
- logger.error(f"Error saving execution log: {str(logError)}")
-
- raise
-
- def _planToPrompt(self, plan: Dict) -> str:
- """Convert plan structure to prompt string for workflow execution"""
- return plan.get("userMessage", plan.get("overview", "Execute automation workflow"))
-
- async def syncAutomationEvents(self) -> Dict[str, Any]:
- """Automation event handler - syncs scheduler with all active automations."""
- from modules.shared.eventManagement import eventManager
-
- # Get all automation definitions (for current mandate)
- allAutomations = self.db.getRecordset(AutomationDefinition)
- filtered = self._uam(AutomationDefinition, allAutomations)
-
- registered_events = {}
-
- for automation in filtered:
- automation_id = automation.get("id")
- is_active = automation.get("active", False)
- current_event_id = automation.get("eventId")
- schedule = automation.get("schedule")
-
- if not schedule:
- logger.warning(f"Automation {automation_id} has no schedule, skipping")
- continue
-
- try:
- # Parse schedule to cron kwargs
- cron_kwargs = self._parseScheduleToCron(schedule)
-
- if is_active:
- # Remove existing event if present (handles schedule changes)
- if current_event_id:
- try:
- eventManager.remove(current_event_id)
- except Exception as e:
- logger.warning(f"Error removing old event {current_event_id}: {str(e)}")
-
- # Register new event
- new_event_id = f"automation.{automation_id}"
-
- # Create event handler function
- handler = self._createAutomationEventHandler(automation_id)
-
- # Register cron job
- eventManager.registerCron(
- jobId=new_event_id,
- func=handler,
- cronKwargs=cron_kwargs,
- replaceExisting=True
- )
-
- # Update automation with new eventId
- if current_event_id != new_event_id:
- self.db.recordModify(
- AutomationDefinition,
- automation_id,
- {"eventId": new_event_id}
- )
-
- registered_events[automation_id] = new_event_id
- else:
- # Remove event if exists
- if current_event_id:
- try:
- eventManager.remove(current_event_id)
- self.db.recordModify(
- AutomationDefinition,
- automation_id,
- {"eventId": None}
- )
- except Exception as e:
- logger.warning(f"Error removing event {current_event_id}: {str(e)}")
- except Exception as e:
- logger.error(f"Error syncing automation {automation_id}: {str(e)}")
-
- return {
- "synced": len(registered_events),
- "events": registered_events
- }
-
- def _createAutomationEventHandler(self, automationId: str):
- """Create event handler function for a specific automation"""
- async def handler():
- try:
- # Get event user to access automation (event user can access all automations)
- from modules.interfaces.interfaceDbAppObjects import getRootInterface
- from modules.interfaces.interfaceDbAppObjects import getInterface as getAppInterface
- from modules.interfaces.interfaceDbChatObjects import getInterface as getChatInterface
-
- rootInterface = getRootInterface()
- eventUser = rootInterface.getUserByUsername("event")
-
- if not eventUser:
- logger.error("Could not get event user for automation execution")
- return
-
- # Create ChatObjects interface for event user (to access automation)
- eventInterface = getChatInterface(eventUser)
-
- # Load automation using event user context
- automation = eventInterface.getAutomationDefinition(automationId)
- if not automation or not automation.get("active"):
- logger.warning(f"Automation {automationId} not found or not active, skipping execution")
- return
-
- # Get creator user
- creator_user_id = automation.get("_createdBy")
- if not creator_user_id:
- logger.error(f"Automation {automationId} has no creator user")
- return
-
- # Get creator user from database
- appInterface = getAppInterface(eventUser)
- creator_user = appInterface.getUser(creator_user_id)
- if not creator_user:
- logger.error(f"Creator user {creator_user_id} not found for automation {automationId}")
- return
-
- # Create ChatObjects interface for creator user
- creatorInterface = getChatInterface(creator_user)
-
- # Execute automation with creator user's context
- await creatorInterface.executeAutomation(automationId)
- logger.info(f"Successfully executed automation {automationId} as user {creator_user_id}")
- except Exception as e:
- logger.error(f"Error executing automation {automationId}: {str(e)}")
-
- return handler
+ logger.error(f"Error notifying automation change: {str(e)}")
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py
index 87cb1413..cf3a1f12 100644
--- a/modules/interfaces/interfaceVoiceObjects.py
+++ b/modules/interfaces/interfaceVoiceObjects.py
@@ -99,6 +99,44 @@ class VoiceObjects:
# Translation Operations
+ async def detectLanguage(self, text: str) -> Dict[str, Any]:
+ """
+ Detect the language of text using Google Cloud Translation API.
+
+ Args:
+ text: Text to detect language for
+
+ Returns:
+ Dict containing detected language code and confidence
+ """
+ try:
+ logger.info(f"🔍 Language detection request: '{text[:100]}...'")
+
+ if not text.strip():
+ return {
+ "success": False,
+ "language": "",
+ "error": "Empty text provided"
+ }
+
+ connector = self._getGoogleSpeechConnector()
+ result = await connector.detectLanguage(text)
+
+ if result["success"]:
+ logger.info(f"✅ Language detected: {result['language']}")
+ else:
+ logger.warning(f"⚠️ Language detection failed: {result.get('error', 'Unknown error')}")
+
+ return result
+
+ except Exception as e:
+ logger.error(f"❌ Language detection error: {e}")
+ return {
+ "success": False,
+ "language": "",
+ "error": str(e)
+ }
+
async def translateText(self, text: str, sourceLanguage: str = "de",
targetLanguage: str = "en") -> Dict[str, Any]:
"""
diff --git a/modules/routes/routeAdminAutomationEvents.py b/modules/routes/routeAdminAutomationEvents.py
index bb4a233f..dcac4f27 100644
--- a/modules/routes/routeAdminAutomationEvents.py
+++ b/modules/routes/routeAdminAutomationEvents.py
@@ -86,15 +86,21 @@ async def sync_all_automation_events(
requireSysadmin(currentUser)
try:
- chatInterface = interfaceDbChatObjects.getInterface(currentUser)
+ from modules.interfaces.interfaceDbChatObjects import getInterface as getChatInterface
+ from modules.interfaces.interfaceDbAppObjects import getRootInterface
+ from modules.features.automation import syncAutomationEvents
- if not hasattr(chatInterface, 'syncAutomationEvents'):
+ chatInterface = getChatInterface(currentUser)
+ # Get event user for sync operation (routes can import from interfaces)
+ rootInterface = getRootInterface()
+ eventUser = rootInterface.getUserByUsername("event")
+ if not eventUser:
raise HTTPException(
- status_code=501,
- detail="Automation methods not available"
+ status_code=500,
+ detail="Event user not available"
)
- result = await chatInterface.syncAutomationEvents()
+ result = await syncAutomationEvents(chatInterface, eventUser)
return {
"success": True,
"synced": result.get("synced", 0),
diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py
index 7f33b19c..605feff7 100644
--- a/modules/routes/routeVoiceGoogle.py
+++ b/modules/routes/routeVoiceGoogle.py
@@ -115,6 +115,48 @@ async def speech_to_text(
detail=f"Speech-to-text processing failed: {str(e)}"
)
+@router.post("/detect-language")
+async def detect_language(
+ text: str = Form(...),
+ currentUser: User = Depends(getCurrentUser)
+):
+ """Detect the language of text using Google Cloud Translation API."""
+ try:
+ logger.info(f"🔍 Language detection request: '{text[:100]}...'")
+
+ if not text.strip():
+ raise HTTPException(
+ status_code=400,
+ detail="Empty text provided for language detection"
+ )
+
+ # Get voice interface
+ voiceInterface = _getVoiceInterface(currentUser)
+
+ # Perform language detection
+ result = await voiceInterface.detectLanguage(text)
+
+ if result["success"]:
+ return {
+ "success": True,
+ "language": result["language"],
+ "confidence": result.get("confidence", 1.0)
+ }
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Language detection failed: {result.get('error', 'Unknown error')}"
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"❌ Language detection error: {e}")
+ raise HTTPException(
+ status_code=500,
+ detail=f"Language detection processing failed: {str(e)}"
+ )
+
@router.post("/translate")
async def translate_text(
text: str = Form(...),
diff --git a/modules/services/__init__.py b/modules/services/__init__.py
index 3e33d208..84ef638a 100644
--- a/modules/services/__init__.py
+++ b/modules/services/__init__.py
@@ -84,6 +84,9 @@ class Services:
from .serviceWeb.mainServiceWeb import WebService
self.web = PublicService(WebService(self))
+ from .serviceSecurity.mainServiceSecurity import SecurityService
+ self.security = PublicService(SecurityService(self))
+
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
return Services(user, workflow)
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 57f81aa7..87afd365 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -48,6 +48,18 @@ class AiService:
logger.info("Initializing ExtractionService...")
self.extractionService = ExtractionService(self.services)
+ async def callAi(self, request: AiCallRequest, progressCallback=None):
+ """Router: handles content parts via extractionService, text context via interface.
+
+ Replaces direct calls to self.aiObjects.call() to route content parts processing
+ through serviceExtraction layer.
+ """
+ if hasattr(request, 'contentParts') and request.contentParts:
+ return await self.extractionService.processContentPartsWithAi(
+ request, self.aiObjects, progressCallback
+ )
+ return await self.aiObjects.callWithTextContext(request)
+
async def ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None:
@@ -141,7 +153,7 @@ Respond with ONLY a JSON object in this exact format:
)
)
- response = await self.aiObjects.call(request)
+ response = await self.callAi(request)
# Parse AI response using structured parsing with AiCallOptions model
try:
@@ -193,10 +205,8 @@ Respond with ONLY a JSON object in this exact format:
documentMetadata = None # Store document metadata (title, filename) from first iteration
accumulationState = None # Track accumulation state for string accumulation
- # Get parent log ID for iteration operations
- parentLogId = None
- if operationId:
- parentLogId = self.services.chat.getOperationLogId(operationId)
+ # Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID)
+ parentOperationId = operationId # Use the parent's operationId directly
while iteration < maxIterations:
iteration += 1
@@ -210,7 +220,7 @@ Respond with ONLY a JSON object in this exact format:
"AI Call",
f"Iteration {iteration}",
"",
- parentId=parentLogId
+ parentOperationId=parentOperationId
)
# Build iteration prompt
@@ -223,11 +233,14 @@ Respond with ONLY a JSON object in this exact format:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
- # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext
+ # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services
filteredPromptArgs = {
k: v for k, v in promptArgs.items()
- if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content']
+ if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services']
}
+ # Always include services if available
+ if not filteredPromptArgs.get('services') and hasattr(self, 'services'):
+ filteredPromptArgs['services'] = self.services
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
@@ -251,12 +264,23 @@ Respond with ONLY a JSON object in this exact format:
else:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
- response = await self.aiObjects.call(request)
+ response = await self.callAi(request)
result = response.content
- # Update progress after AI call
+ # Track bytes for progress reporting
+ bytesReceived = len(result.encode('utf-8')) if result else 0
+ totalBytesSoFar = sum(len(section.get('content', '').encode('utf-8')) if isinstance(section.get('content'), str) else 0 for section in allSections) + bytesReceived
+
+ # Update progress after AI call with byte information
if iterationOperationId:
- self.services.chat.progressLogUpdate(iterationOperationId, 0.6, "AI response received")
+ # Format bytes for display (kB or MB)
+ if totalBytesSoFar < 1024:
+ bytesDisplay = f"{totalBytesSoFar}B"
+ elif totalBytesSoFar < 1024 * 1024:
+ bytesDisplay = f"{totalBytesSoFar / 1024:.1f}kB"
+ else:
+ bytesDisplay = f"{totalBytesSoFar / (1024 * 1024):.1f}MB"
+ self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})")
# Write raw AI response to debug file
if iteration == 1:
@@ -457,8 +481,24 @@ Respond with ONLY a JSON object in this exact format:
# The break can occur anywhere - in any section, at any depth
allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
- # Log merged sections for debugging
+ # Calculate total bytes in merged content for progress display
merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False)
+ totalBytesGenerated = len(merged_json_str.encode('utf-8'))
+
+ # Update main operation with byte progress
+ if operationId:
+ # Format bytes for display
+ if totalBytesGenerated < 1024:
+ bytesDisplay = f"{totalBytesGenerated}B"
+ elif totalBytesGenerated < 1024 * 1024:
+ bytesDisplay = f"{totalBytesGenerated / 1024:.1f}kB"
+ else:
+ bytesDisplay = f"{totalBytesGenerated / (1024 * 1024):.1f}MB"
+ # Estimate progress based on iterations (rough estimate)
+ estimatedProgress = min(0.9, 0.4 + (iteration * 0.1))
+ self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})")
+
+ # Log merged sections for debugging
self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}")
# Check if we should continue (completion detection)
@@ -473,14 +513,40 @@ Respond with ONLY a JSON object in this exact format:
if shouldContinue:
# Finish iteration operation (will continue with next iteration)
if iterationOperationId:
+ # Show byte progress in iteration completion
+ iterBytes = len(result.encode('utf-8')) if result else 0
+ if iterBytes < 1024:
+ iterBytesDisplay = f"{iterBytes}B"
+ elif iterBytes < 1024 * 1024:
+ iterBytesDisplay = f"{iterBytes / 1024:.1f}kB"
+ else:
+ iterBytesDisplay = f"{iterBytes / (1024 * 1024):.1f}MB"
+ self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Completed ({iterBytesDisplay})")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# Done - finish iteration and update main operation
if iterationOperationId:
+ # Show final byte count
+ finalBytes = len(merged_json_str.encode('utf-8'))
+ if finalBytes < 1024:
+ finalBytesDisplay = f"{finalBytes}B"
+ elif finalBytes < 1024 * 1024:
+ finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
+ else:
+ finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
+ self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Complete ({finalBytesDisplay})")
self.services.chat.progressLogFinish(iterationOperationId, True)
if operationId:
- self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
+ # Show final size in main operation
+ finalBytes = len(merged_json_str.encode('utf-8'))
+ if finalBytes < 1024:
+ finalBytesDisplay = f"{finalBytes}B"
+ elif finalBytes < 1024 * 1024:
+ finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
+ else:
+ finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
+ self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete: {finalBytesDisplay} ({iteration} iterations, {len(allSections)} sections)")
logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections")
break
@@ -582,7 +648,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
# Write KPI definition prompt to debug file
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
- response = await self.aiObjects.call(request)
+ response = await self.callAi(request)
# Write KPI definition response to debug file
self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response")
@@ -895,7 +961,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
# Debug: persist prompt/response for analysis with context-specific naming
debugPrefix = debugType if debugType else "plan"
self.services.utils.writeDebugFile(fullPrompt, f"{debugPrefix}_prompt")
- response = await self.aiObjects.call(request)
+ response = await self.aiObjects.callWithTextContext(request)
result = response.content or ""
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
return result
@@ -929,10 +995,8 @@ If no trackable items can be identified, return: {{"kpis": []}}
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
- # Get parent log ID if parent operation exists
- parentLogId = None
- if parentOperationId:
- parentLogId = self.services.chat.getOperationLogId(parentOperationId)
+ # Use parent operation ID directly (parentId should be operationId, not log entry ID)
+ # parentOperationId is already the operationId of the parent
# Start progress tracking with parent reference
self.services.chat.progressLogStart(
@@ -940,7 +1004,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
"AI content processing",
"Content Processing",
f"Format: {outputFormat or 'text'}",
- parentId=parentLogId
+ parentOperationId=parentOperationId
)
try:
@@ -966,7 +1030,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
options=options
)
- response = await self.aiObjects.call(request)
+ response = await self.callAi(request)
if response.content:
# Build document data for image
@@ -1011,7 +1075,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
options=options
)
- response = await self.aiObjects.call(request)
+ response = await self.callAi(request)
if response.content:
metadata = AiResponseMetadata(
@@ -1046,7 +1110,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
options.compressContext = False
# Process contentParts for generation prompt (if provided)
- # Use generic _callWithContentParts() which handles all content types (images, text, etc.)
+ # Use generic callWithContentParts() which handles all content types (images, text, etc.)
# This automatically processes images with vision models and merges all results
if contentParts:
# Filter out binary/other parts that shouldn't be processed
@@ -1110,7 +1174,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
# Call generic content parts processor - handles images, text, chunking, merging
- extractionResponse = await self.aiObjects.call(extractionRequest)
+ extractionResponse = await self.callAi(extractionRequest)
# Write debug file for extraction response
if extractionResponse.content:
@@ -1141,14 +1205,15 @@ If no trackable items can be identified, return: {{"kpis": []}}
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt(
- outputFormat, prompt, title, content_for_generation, None
+ outputFormat, prompt, title, content_for_generation, None, self.services
)
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
- "extracted_content": content_for_generation
+ "extracted_content": content_for_generation,
+ "services": self.services
}
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
@@ -1157,6 +1222,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
+ # Track generation progress - the looping function will update with byte progress
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
@@ -1167,7 +1233,16 @@ If no trackable items can be identified, return: {{"kpis": []}}
userPrompt=userPrompt
)
- self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
+ # Calculate final size for completion message
+ finalSize = len(generated_json.encode('utf-8')) if generated_json else 0
+ if finalSize < 1024:
+ finalSizeDisplay = f"{finalSize}B"
+ elif finalSize < 1024 * 1024:
+ finalSizeDisplay = f"{finalSize / 1024:.1f}kB"
+ else:
+ finalSizeDisplay = f"{finalSize / (1024 * 1024):.1f}MB"
+
+ self.services.chat.progressLogUpdate(aiOperationId, 0.7, f"Parsing generated JSON ({finalSizeDisplay})")
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
@@ -1198,13 +1273,13 @@ If no trackable items can be identified, return: {{"kpis": []}}
# Create separate operation for content rendering
renderOperationId = f"{aiOperationId}_render"
- renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
+ # Use aiOperationId directly as parentOperationId (operationId, not log entry ID)
self.services.chat.progressLogStart(
renderOperationId,
"Content Rendering",
"Rendering",
f"Format: {outputFormat}",
- parentId=renderParentLogId
+ parentOperationId=aiOperationId
)
try:
diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py
index 9ff148a8..cb05279f 100644
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@@ -3,7 +3,6 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelUam import User, UserConnection
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage, ChatStat, ChatLog
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
-from modules.security.tokenManager import TokenManager
from modules.shared.progressLogger import ProgressLogger
logger = logging.getLogger(__name__)
@@ -306,9 +305,9 @@ class ChatService:
token = None
token_status = "unknown"
try:
- # Get a fresh token via TokenManager convenience method
+ # Get a fresh token via security service
logger.debug(f"Getting fresh token for connection {connection.id}")
- token = TokenManager().getFreshToken(connection.id)
+ token = self.services.security.getFreshToken(connection.id)
if token:
if hasattr(token, 'expiresAt') and token.expiresAt:
current_time = self.services.utils.timestampGetUtc()
@@ -389,7 +388,7 @@ class ChatService:
Token object or None if not found/expired
"""
try:
- return TokenManager().getFreshToken(connectionId)
+ return self.services.security.getFreshToken(connectionId)
except Exception as e:
logger.error(f"Error getting fresh token for connection {connectionId}: {str(e)}")
return None
@@ -1016,10 +1015,19 @@ class ChatService:
def createProgressLogger(self) -> ProgressLogger:
return ProgressLogger(self.services)
- def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentId: Optional[str] = None):
- """Wrapper for ProgressLogger.startOperation"""
+ def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
+ """Wrapper for ProgressLogger.startOperation
+
+ Args:
+ operationId: Unique identifier for the operation
+ serviceName: Name of the service
+ actionName: Name of the action
+ context: Additional context information
+ parentOperationId: Optional parent operation ID (operationId of parent operation)
+ The parentId in ChatLog will be set to this parentOperationId
+ """
progressLogger = self._getProgressLogger()
- return progressLogger.startOperation(operationId, serviceName, actionName, context, parentId)
+ return progressLogger.startOperation(operationId, serviceName, actionName, context, parentOperationId)
def progressLogUpdate(self, operationId: str, progress: float, statusUpdate: str = ""):
"""Wrapper for ProgressLogger.updateOperation"""
diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py
index d8db9922..e8249180 100644
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@@ -3,13 +3,15 @@ import uuid
import logging
import time
import asyncio
+import base64
from .subRegistry import ExtractorRegistry, ChunkerRegistry
from .subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult
from modules.datamodels.datamodelChat import ChatDocument
-from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions
+from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall
from modules.aicore.aicoreModelRegistry import modelRegistry
+from modules.aicore.aicoreModelSelector import modelSelector
logger = logging.getLogger(__name__)
@@ -32,13 +34,21 @@ class ExtractionService:
if model is None or model.calculatePriceUsd is None:
raise RuntimeError(f"FATAL: Required internal model '{modelDisplayName}' is not available. Check connector registration.")
- def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
+ def extractContent(
+ self,
+ documents: List[ChatDocument],
+ options: ExtractionOptions,
+ operationId: Optional[str] = None,
+ parentOperationId: Optional[str] = None
+ ) -> List[ContentExtracted]:
"""
Extract content from a list of ChatDocument objects.
Args:
documents: List of ChatDocument objects to extract content from
options: Extraction options including maxSize, chunkAllowed, mergeStrategy, etc.
+ operationId: Optional operation ID for progress logging (parent operation)
+ parentOperationId: Optional parent operation ID for hierarchical logging
Returns:
List of ContentExtracted objects, one per input document
@@ -50,125 +60,172 @@ class ExtractionService:
from modules.interfaces.interfaceDbComponentObjects import getInterface
dbInterface = getInterface()
+ totalDocs = len(documents)
+
for i, doc in enumerate(documents):
- logger.info(f"=== DOCUMENT {i}: {doc.fileName} ===")
+ logger.info(f"=== DOCUMENT {i + 1}/{totalDocs}: {doc.fileName} ===")
logger.info(f"Initial MIME type: {doc.mimeType}")
+ # Create child operation for this document if parent operationId is provided
+ docOperationId = None
+ if operationId:
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ docOperationId = f"{operationId}_doc_{i}"
+ self.services.chat.progressLogStart(
+ docOperationId,
+ "Extracting Document",
+ f"Document {i + 1}/{totalDocs}",
+ doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName,
+ parentOperationId=operationId # Use operationId as parent (not parentOperationId)
+ )
+
# Start timing for this document
startTime = time.time()
- # Resolve raw bytes for this document using interface
- documentBytes = dbInterface.getFileData(doc.fileId)
- if not documentBytes:
- raise ValueError(f"No file data found for fileId={doc.fileId}")
-
- # Convert ChatDocument to the format expected by runExtraction
- documentData = {
- "id": doc.id,
- "bytes": documentBytes,
- "fileName": doc.fileName,
- "mimeType": doc.mimeType
- }
-
- ec = runExtraction(
- extractorRegistry=self._extractorRegistry,
- chunkerRegistry=self._chunkerRegistry,
- documentBytes=documentData["bytes"],
- fileName=documentData["fileName"],
- mimeType=documentData["mimeType"],
- options=options
- )
-
- # Log content parts metadata
- logger.debug(f"Content parts: {len(ec.parts)}")
- for j, part in enumerate(ec.parts):
- logger.debug(f" Part {j}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
- if part.metadata:
- logger.debug(f" Metadata: {part.metadata}")
-
- # Attach document id and MIME type to parts if missing
- for p in ec.parts:
- if "documentId" not in p.metadata:
- p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
- if "documentMimeType" not in p.metadata:
- p.metadata["documentMimeType"] = documentData["mimeType"]
-
- # Log chunking information
- chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
- if chunkedParts:
- logger.debug(f"=== CHUNKING RESULTS ===")
- logger.debug(f"Total parts: {len(ec.parts)}")
- logger.debug(f"Chunked parts: {len(chunkedParts)}")
- for chunk in chunkedParts:
- logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
- else:
- logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
-
- # Calculate timing and emit stats
- endTime = time.time()
- processingTime = endTime - startTime
- bytesSent = len(documentBytes)
- bytesReceived = sum(len(part.data) if part.data else 0 for part in ec.parts)
-
- # Emit stats for extraction operation
-
- # Use internal extraction model for pricing
- modelDisplayName = "Internal Document Extractor"
- model = modelRegistry.getModel(modelDisplayName)
- # Hard fail if model is missing; caller must ensure connectors are registered
- if model is None or model.calculatePriceUsd is None:
- raise RuntimeError(f"Pricing model not available: {modelDisplayName}")
- priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
-
- # Create AiCallResponse with real calculation
- # Use model.name for the response (API identifier), not displayName
- aiResponse = AiCallResponse(
- content="", # No content for extraction stats needed
- modelName=model.name,
- priceUsd=priceUsd,
- processingTime=processingTime,
- bytesSent=bytesSent,
- bytesReceived=bytesReceived,
- errorCount=0
- )
-
- self.services.chat.storeWorkflowStat(
- self.services.workflow,
- aiResponse,
- f"extraction.process.{doc.mimeType}"
- )
-
- # Write extraction results to debug file
try:
- from modules.shared.debugLogger import writeDebugFile
- import json
- # Create summary of extraction results for debug
- extractionSummary = {
- "documentName": doc.fileName,
- "documentMimeType": doc.mimeType,
- "partsCount": len(ec.parts),
- "parts": []
- }
- for part in ec.parts:
- partSummary = {
- "typeGroup": part.typeGroup,
- "mimeType": part.mimeType,
- "label": part.label,
- "dataLength": len(part.data) if part.data else 0,
- "metadata": part.metadata
- }
- # Include data preview for small parts (first 500 chars)
- if part.data and len(part.data) <= 500:
- partSummary["dataPreview"] = part.data[:500]
- elif part.data:
- partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
- extractionSummary["parts"].append(partSummary)
+ if docOperationId:
+ self.services.chat.progressLogUpdate(docOperationId, 0.1, "Loading document data")
- writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
- except Exception as e:
- logger.debug(f"Failed to write extraction debug file: {str(e)}")
+ # Resolve raw bytes for this document using interface
+ documentBytes = dbInterface.getFileData(doc.fileId)
+ if not documentBytes:
+ if docOperationId:
+ self.services.chat.progressLogFinish(docOperationId, False)
+ raise ValueError(f"No file data found for fileId={doc.fileId}")
+
+ if docOperationId:
+ self.services.chat.progressLogUpdate(docOperationId, 0.2, "Running extraction pipeline")
+
+ # Convert ChatDocument to the format expected by runExtraction
+ documentData = {
+ "id": doc.id,
+ "bytes": documentBytes,
+ "fileName": doc.fileName,
+ "mimeType": doc.mimeType
+ }
+
+ ec = runExtraction(
+ extractorRegistry=self._extractorRegistry,
+ chunkerRegistry=self._chunkerRegistry,
+ documentBytes=documentData["bytes"],
+ fileName=documentData["fileName"],
+ mimeType=documentData["mimeType"],
+ options=options
+ )
+
+ if docOperationId:
+ self.services.chat.progressLogUpdate(docOperationId, 0.7, f"Extracted {len(ec.parts)} parts")
+
+ # Log content parts metadata
+ logger.debug(f"Content parts: {len(ec.parts)}")
+ for j, part in enumerate(ec.parts):
+ logger.debug(f" Part {j + 1}/{len(ec.parts)}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
+ if part.metadata:
+ logger.debug(f" Metadata: {part.metadata}")
- results.append(ec)
+ # Attach document id and MIME type to parts if missing
+ for p in ec.parts:
+ if "documentId" not in p.metadata:
+ p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
+ if "documentMimeType" not in p.metadata:
+ p.metadata["documentMimeType"] = documentData["mimeType"]
+
+ # Log chunking information
+ chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
+ if chunkedParts:
+ logger.debug(f"=== CHUNKING RESULTS ===")
+ logger.debug(f"Total parts: {len(ec.parts)}")
+ logger.debug(f"Chunked parts: {len(chunkedParts)}")
+ for chunk in chunkedParts:
+ logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
+ else:
+ logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
+
+ if docOperationId:
+ self.services.chat.progressLogUpdate(docOperationId, 0.9, f"Processing complete: {len(ec.parts)} parts extracted")
+
+ # Calculate timing and emit stats
+ endTime = time.time()
+ processingTime = endTime - startTime
+ bytesSent = len(documentBytes)
+ bytesReceived = sum(len(part.data) if part.data else 0 for part in ec.parts)
+
+ # Emit stats for extraction operation
+
+ # Use internal extraction model for pricing
+ modelDisplayName = "Internal Document Extractor"
+ model = modelRegistry.getModel(modelDisplayName)
+ # Hard fail if model is missing; caller must ensure connectors are registered
+ if model is None or model.calculatePriceUsd is None:
+ if docOperationId:
+ self.services.chat.progressLogFinish(docOperationId, False)
+ raise RuntimeError(f"Pricing model not available: {modelDisplayName}")
+ priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
+
+ # Create AiCallResponse with real calculation
+ # Use model.name for the response (API identifier), not displayName
+ aiResponse = AiCallResponse(
+ content="", # No content for extraction stats needed
+ modelName=model.name,
+ priceUsd=priceUsd,
+ processingTime=processingTime,
+ bytesSent=bytesSent,
+ bytesReceived=bytesReceived,
+ errorCount=0
+ )
+
+ self.services.chat.storeWorkflowStat(
+ self.services.workflow,
+ aiResponse,
+ f"extraction.process.{doc.mimeType}"
+ )
+
+ # Write extraction results to debug file
+ try:
+ from modules.shared.debugLogger import writeDebugFile
+ import json
+ # Create summary of extraction results for debug
+ extractionSummary = {
+ "documentName": doc.fileName,
+ "documentMimeType": doc.mimeType,
+ "partsCount": len(ec.parts),
+ "parts": []
+ }
+ for part in ec.parts:
+ partSummary = {
+ "typeGroup": part.typeGroup,
+ "mimeType": part.mimeType,
+ "label": part.label,
+ "dataLength": len(part.data) if part.data else 0,
+ "metadata": part.metadata
+ }
+ # Include data preview for small parts (first 500 chars)
+ if part.data and len(part.data) <= 500:
+ partSummary["dataPreview"] = part.data[:500]
+ elif part.data:
+ partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
+ extractionSummary["parts"].append(partSummary)
+
+ writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
+ except Exception as e:
+ logger.debug(f"Failed to write extraction debug file: {str(e)}")
+
+ results.append(ec)
+
+ # Finish document operation successfully
+ if docOperationId:
+ self.services.chat.progressLogFinish(docOperationId, True)
+
+ except Exception as e:
+ logger.error(f"Error extracting content from document {i + 1}/{totalDocs} ({doc.fileName}): {str(e)}")
+ if docOperationId:
+ try:
+ self.services.chat.progressLogFinish(docOperationId, False)
+ except:
+ pass # Don't fail on progress logging errors
+ # Continue with next document instead of failing completely
+ # This allows parallel processing to continue even if one document fails
+ continue
return results
@@ -479,7 +536,8 @@ class ExtractionService:
# Extract content WITHOUT chunking
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents")
- extractionResult = self.extractContent(documents, extractionOptions)
+ # Pass operationId as parentOperationId for hierarchical logging
+ extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId)
if not isinstance(extractionResult, list):
if operationId:
@@ -489,16 +547,14 @@ class ExtractionService:
# Process parts (not chunks) with model-aware AI calls
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts")
- # Get parent log ID for part operations
- parentLogId = None
- if operationId:
- parentLogId = self.services.chat.getOperationLogId(operationId)
- partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentLogId)
+ # Use parent operation ID directly (parentId should be operationId, not log entry ID)
+ parentOperationId = operationId # Use the parent's operationId directly
+ partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId)
# Merge results using existing merging system
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.9, f"Merging {len(partResults)} part results")
- mergedContent = self._mergePartResults(partResults, options)
+ mergedContent = self.mergePartResults(partResults, options)
# Save merged extraction content to debug
self.services.utils.writeDebugFile(mergedContent or '', "extraction_merged_text")
@@ -520,7 +576,7 @@ class ExtractionService:
aiObjects: Any,
options: Optional[AiCallOptions] = None,
operationId: Optional[str] = None,
- parentLogId: Optional[str] = None
+ parentOperationId: Optional[str] = None
) -> List[PartResult]:
"""Process content parts with model-aware chunking and proper mapping."""
@@ -567,7 +623,7 @@ class ExtractionService:
"Content Processing",
f"Part {part_index + 1}",
f"Type: {part.typeGroup}",
- parentId=parentLogId
+ parentOperationId=parentOperationId
)
try:
@@ -660,54 +716,473 @@ class ExtractionService:
logger.info(f"Completed processing {len(processedResults)} parts")
return processedResults
- def _mergePartResults(
+ def _convertToContentParts(
+ self, partResults: Union[List[PartResult], List[AiCallResponse]]
+ ) -> List[ContentPart]:
+ """Convert part results to ContentParts (internal helper for consolidation).
+
+ Handles both PartResult (from extraction workflow) and AiCallResponse (from content parts processing).
+ """
+ content_parts = []
+
+ if not partResults:
+ return content_parts
+
+ # Detect input type and convert accordingly
+ if isinstance(partResults[0], PartResult):
+ # Existing logic for PartResult (from processDocumentsPerChunk)
+ for part_result in partResults:
+ content_part = ContentPart(
+ id=part_result.originalPart.id,
+ parentId=part_result.originalPart.parentId,
+ label=part_result.originalPart.label,
+ typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
+ mimeType=part_result.originalPart.mimeType,
+ data=part_result.aiResult, # Use AI result as data
+ metadata={
+ **part_result.originalPart.metadata,
+ "aiResult": True,
+ "partIndex": part_result.partIndex,
+ "documentId": part_result.documentId,
+ "processingTime": part_result.processingTime,
+ "success": part_result.metadata.get("success", False)
+ }
+ )
+ content_parts.append(content_part)
+ elif isinstance(partResults[0], AiCallResponse):
+ # Logic from interfaceAiObjects (from content parts processing)
+ for i, result in enumerate(partResults):
+ if result.content:
+ content_part = ContentPart(
+ id=str(uuid.uuid4()),
+ parentId=None,
+ label=f"ai_result_{i}",
+ typeGroup="text", # Default to text for AI results
+ mimeType="text/plain",
+ data=result.content,
+ metadata={
+ "aiResult": True,
+ "modelName": result.modelName,
+ "priceUsd": result.priceUsd,
+ "processingTime": result.processingTime,
+ "bytesSent": result.bytesSent,
+ "bytesReceived": result.bytesReceived
+ }
+ )
+ content_parts.append(content_part)
+
+ return content_parts
+
+ def mergePartResults(
self,
- partResults: List[PartResult],
+ partResults: Union[List[PartResult], List[AiCallResponse]],
options: Optional[AiCallOptions] = None
- ) -> str:
- """Merge part results using existing sophisticated merging system."""
+ ) -> str:
+ """Unified merge for both PartResult and AiCallResponse.
+
+ Consolidated from both interfaceAiObjects.py and existing serviceExtraction method.
+ """
if not partResults:
return ""
- # Convert PartResults back to ContentParts for existing merger system
- content_parts = []
- for part_result in partResults:
- # Create ContentPart from PartResult with proper typeGroup
- content_part = ContentPart(
- id=part_result.originalPart.id,
- parentId=part_result.originalPart.parentId,
- label=part_result.originalPart.label,
- typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
- mimeType=part_result.originalPart.mimeType,
- data=part_result.aiResult, # Use AI result as data
- metadata={
- **part_result.originalPart.metadata,
- "aiResult": True,
- "partIndex": part_result.partIndex,
- "documentId": part_result.documentId,
- "processingTime": part_result.processingTime,
- "success": part_result.metadata.get("success", False)
- }
+ # Convert to ContentParts using unified helper
+ content_parts = self._convertToContentParts(partResults)
+
+ # Determine merge strategy based on input type
+ if isinstance(partResults[0], PartResult):
+ # Use strategy for extraction workflow (group by document, order by part index)
+ merge_strategy = MergeStrategy(
+ useIntelligentMerging=True,
+ groupBy="documentId", # Group by document
+ orderBy="partIndex", # Order by part index
+ mergeType="concatenate"
+ )
+ else:
+ # Default strategy for content parts workflow
+ merge_strategy = MergeStrategy(
+ useIntelligentMerging=True,
+ groupBy="typeGroup",
+ orderBy="id",
+ mergeType="concatenate"
)
- content_parts.append(content_part)
- # Use existing merging strategy from options
- merge_strategy = MergeStrategy(
- useIntelligentMerging=True,
- groupBy="documentId", # Group by document
- orderBy="partIndex", # Order by part index
- mergeType="concatenate"
- )
-
-
- # Apply existing merging logic using the sophisticated merging system
- from modules.interfaces.interfaceAiObjects import applyMerging
+ # Apply merging
merged_parts = applyMerging(content_parts, merge_strategy)
- # Convert merged parts back to final string
+ # Convert back to string
final_content = "\n\n".join([part.data for part in merged_parts])
- logger.info(f"Merged {len(partResults)} parts using existing sophisticated merging system")
+ logger.info(f"Merged {len(partResults)} parts using unified merging system")
return final_content.strip()
+ async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
+ """Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output.
+
+ Moved from interfaceAiObjects.py - model-aware chunking for AI processing.
+ Complementary to existing size-based chunking in extraction pipeline.
+ """
+ # Calculate model-specific chunk sizes
+ modelContextTokens = model.contextLength # Total context in tokens
+ modelMaxOutputTokens = model.maxTokens # Maximum output tokens
+
+ # Reserve tokens for:
+ # 1. Prompt (user message)
+ promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
+
+ # 2. System message wrapper ("Context from documents:\n")
+ systemMessageTokens = 10 # ~40 bytes = 10 tokens
+
+ # 3. Max output tokens (model will reserve space for completion)
+ outputTokens = modelMaxOutputTokens
+
+ # 4. JSON structure and message overhead (~100 tokens)
+ messageOverheadTokens = 100
+
+ # Total reserved tokens = input overhead + output reservation
+ totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
+
+ # Available tokens for content = context length - reserved tokens
+ # Use 80% of available for safety margin
+ availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
+
+ # Ensure we have at least some space
+ if availableContentTokens < 100:
+ logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
+ availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
+
+ # Convert tokens to bytes (1 token ≈ 4 bytes)
+ availableContentBytes = availableContentTokens * 4
+
+ logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
+
+ # Use 70% of available content bytes for text chunks (conservative)
+ textChunkSize = int(availableContentBytes * 0.7)
+ imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
+
+ # Build chunking options
+ chunkingOptions = {
+ "textChunkSize": textChunkSize,
+ "imageChunkSize": imageChunkSize,
+ "maxSize": availableContentBytes,
+ "chunkAllowed": True
+ }
+
+ # Get appropriate chunker (uses existing ChunkerRegistry ✅)
+ chunker = self._chunkerRegistry.resolve(contentPart.typeGroup)
+
+ if not chunker:
+ logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
+ return []
+
+ # Chunk the content part
+ try:
+ chunks = chunker.chunk(contentPart, chunkingOptions)
+ logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
+ return chunks
+ except Exception as e:
+ logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
+ return []
+
+ async def processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList, aiObjects, progressCallback=None) -> AiCallResponse:
+ """Process a single content part with model-aware chunking and fallback.
+
+ Moved from interfaceAiObjects.py - orchestrates chunking and merging.
+ Calls aiObjects._callWithModel() for actual AI calls.
+ """
+ lastError = None
+
+ # Check if this is an image - Vision models need special handling
+ isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
+
+ # Determine the correct operation type based on content type
+ actualOperationType = options.operationType
+ if isImage:
+ actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
+ # Get vision-capable models for images
+ availableModels = modelRegistry.getAvailableModels()
+ visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
+ if visionFailoverList:
+ logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
+ failoverModelList = visionFailoverList
+
+ for attempt, model in enumerate(failoverModelList):
+ try:
+ logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
+
+ # Special handling for images with Vision models
+ if isImage and hasattr(model, 'functionCall'):
+ try:
+ if not contentPart.data:
+ raise ValueError("Image content part has no data")
+
+ mimeType = contentPart.mimeType or "image/jpeg"
+ if not mimeType.startswith("image/"):
+ raise ValueError(f"Invalid mimeType for image: {mimeType}")
+
+ # Prepare base64 data
+ if isinstance(contentPart.data, str):
+ try:
+ base64.b64decode(contentPart.data, validate=True)
+ base64Data = contentPart.data
+ except Exception as e:
+ raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
+ elif isinstance(contentPart.data, bytes):
+ base64Data = base64.b64encode(contentPart.data).decode('utf-8')
+ else:
+ raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
+
+ imageDataUrl = f"data:{mimeType};base64,{base64Data}"
+
+ modelCall = AiModelCall(
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": prompt or ""},
+ {
+ "type": "image_url",
+ "image_url": {"url": imageDataUrl}
+ }
+ ]
+ }
+ ],
+ model=model,
+ options=AiCallOptions(operationType=actualOperationType)
+ )
+
+ modelResponse = await model.functionCall(modelCall)
+
+ if not modelResponse.success:
+ raise ValueError(f"Model call failed: {modelResponse.error}")
+
+ logger.info(f"✅ Image content part processed successfully with model: {model.name}")
+
+ processingTime = getattr(modelResponse, 'processingTime', None) or 0.0
+
+ return AiCallResponse(
+ content=modelResponse.content,
+ modelName=model.name,
+ priceUsd=0.0,
+ processingTime=processingTime,
+ bytesSent=0,
+ bytesReceived=0,
+ errorCount=0
+ )
+ except Exception as e:
+ lastError = e
+ logger.warning(f"❌ Image processing failed with model {model.name}: {str(e)}")
+
+ if attempt < len(failoverModelList) - 1:
+ logger.info(f"🔄 Trying next fallback model for image processing...")
+ continue
+ else:
+ logger.error(f"💥 All {len(failoverModelList)} models failed for image processing")
+ raise
+
+ # For non-image parts, check if part fits in model context
+ partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
+
+ modelContextTokens = model.contextLength
+ modelMaxOutputTokens = model.maxTokens
+
+ promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
+ systemMessageTokens = 10
+ outputTokens = modelMaxOutputTokens
+ messageOverheadTokens = 100
+ totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
+
+ availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
+ if availableContentTokens < 100:
+ availableContentTokens = max(100, int(modelContextTokens * 0.1))
+
+ availableContentBytes = availableContentTokens * 4
+
+ logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes")
+
+ if partSize <= availableContentBytes:
+ # Part fits - call AI directly via aiObjects interface
+ response = await aiObjects._callWithModel(model, prompt, contentPart.data, options)
+ logger.info(f"✅ Content part processed successfully with model: {model.name}")
+ return response
+ else:
+ # Part too large - chunk it
+ chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
+ if not chunks:
+ raise ValueError(f"Failed to chunk content part for model {model.name}")
+
+ logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
+
+ if progressCallback:
+ progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
+
+ chunkResults = []
+ for idx, chunk in enumerate(chunks):
+ chunkNum = idx + 1
+ chunkData = chunk.get('data', '')
+ logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}")
+
+ if progressCallback:
+ progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}")
+
+ try:
+ chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options)
+ chunkResults.append(chunkResponse)
+ logger.info(f"✅ Chunk {chunkNum}/{len(chunks)} processed successfully")
+
+ if progressCallback:
+ progressCallback(chunkNum / len(chunks), f"Chunk {chunkNum}/{len(chunks)} processed")
+ except Exception as e:
+ logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}")
+ raise
+
+ # Merge chunk results
+ mergedContent = self.mergeChunkResults(chunkResults)
+
+ logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
+ return AiCallResponse(
+ content=mergedContent,
+ modelName=model.name,
+ priceUsd=sum(r.priceUsd for r in chunkResults),
+ processingTime=sum(r.processingTime for r in chunkResults),
+ bytesSent=sum(r.bytesSent for r in chunkResults),
+ bytesReceived=sum(r.bytesReceived for r in chunkResults),
+ errorCount=sum(r.errorCount for r in chunkResults)
+ )
+
+ except Exception as e:
+ lastError = e
+ error_msg = str(e) if str(e) else f"{type(e).__name__}"
+ logger.warning(f"❌ Model {model.name} failed for content part: {error_msg}", exc_info=True)
+
+ if attempt < len(failoverModelList) - 1:
+ logger.info(f"🔄 Trying next failover model...")
+ continue
+ else:
+ logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
+ break
+
+ # All models failed
+ return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
+
+ def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
+ """Create an error response."""
+ return AiCallResponse(
+ content=errorMsg,
+ modelName="error",
+ priceUsd=0.0,
+ processingTime=0.0,
+ bytesSent=inputBytes,
+ bytesReceived=outputBytes,
+ errorCount=1
+ )
+
+ async def processContentPartsWithAi(
+ self,
+ request: AiCallRequest,
+ aiObjects, # Pass interface for AI calls
+ progressCallback=None
+ ) -> AiCallResponse:
+ """Process content parts with model-aware chunking and AI calls.
+
+ Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
+ """
+ prompt = request.prompt
+ options = request.options
+ contentParts = request.contentParts
+
+ # Get failover models
+ availableModels = modelRegistry.getAvailableModels()
+ failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
+
+ if not failoverModelList:
+ return self._createErrorResponse("No suitable models found", 0, 0)
+
+ # Process each content part
+ allResults = []
+ for contentPart in contentParts:
+ partResult = await self.processContentPartWithFallback(
+ contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
+ )
+ allResults.append(partResult)
+
+ # Merge all results using unified mergePartResults
+ mergedContent = self.mergePartResults(allResults)
+
+ return AiCallResponse(
+ content=mergedContent,
+ modelName="multiple",
+ priceUsd=sum(r.priceUsd for r in allResults),
+ processingTime=sum(r.processingTime for r in allResults),
+ bytesSent=sum(r.bytesSent for r in allResults),
+ bytesReceived=sum(r.bytesReceived for r in allResults),
+ errorCount=sum(r.errorCount for r in allResults)
+ )
+
+
+# Module-level function for use by subPipeline and ExtractionService
+def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]:
+ """Apply merging strategy to parts with intelligent token-aware merging.
+
+ Moved from interfaceAiObjects.py to resolve dependency violations.
+ Can be used as module-level function or called from ExtractionService methods.
+ """
+ logger.debug(f"applyMerging called with {len(parts)} parts")
+
+ # Import merging dependencies (now local imports ✅)
+ from .merging.mergerText import TextMerger
+ from .merging.mergerTable import TableMerger
+ from .merging.mergerDefault import DefaultMerger
+ from .subMerger import IntelligentTokenAwareMerger
+
+ # Check if intelligent merging is enabled
+ if strategy.useIntelligentMerging:
+ modelCapabilities = strategy.capabilities or {}
+ subMerger = IntelligentTokenAwareMerger(modelCapabilities)
+
+ # Use intelligent merging for all parts
+ merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
+
+ # Calculate and log optimization stats
+ stats = subMerger.calculateOptimizationStats(parts, merged)
+ logger.info(f"🧠 Intelligent merging stats: {stats}")
+ logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
+
+ return merged
+
+ # Fallback to traditional merging
+ textMerger = TextMerger()
+ tableMerger = TableMerger()
+ defaultMerger = DefaultMerger()
+
+ # Group by typeGroup
+ textParts = [p for p in parts if p.typeGroup == "text"]
+ tableParts = [p for p in parts if p.typeGroup == "table"]
+ structureParts = [p for p in parts if p.typeGroup == "structure"]
+ otherParts = [p for p in parts if p.typeGroup not in ("text", "table", "structure")]
+
+ logger.debug(f"Grouped - text: {len(textParts)}, table: {len(tableParts)}, structure: {len(structureParts)}, other: {len(otherParts)}")
+
+ merged: List[ContentPart] = []
+
+ if textParts:
+ textMerged = textMerger.merge(textParts, strategy)
+ logger.debug(f"TextMerger merged {len(textParts)} parts into {len(textMerged)} parts")
+ merged.extend(textMerged)
+ if tableParts:
+ tableMerged = tableMerger.merge(tableParts, strategy)
+ logger.debug(f"TableMerger merged {len(tableParts)} parts into {len(tableMerged)} parts")
+ merged.extend(tableMerged)
+ if structureParts:
+ # For now, treat structure like text
+ structureMerged = textMerger.merge(structureParts, strategy)
+ logger.debug(f"StructureMerger merged {len(structureParts)} parts into {len(structureMerged)} parts")
+ merged.extend(structureMerged)
+ if otherParts:
+ otherMerged = defaultMerger.merge(otherParts, strategy)
+ logger.debug(f"DefaultMerger merged {len(otherParts)} parts into {len(otherMerged)} parts")
+ merged.extend(otherMerged)
+
+ logger.debug(f"applyMerging returning {len(merged)} parts")
+ return merged
+
diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py
index 510bcca8..a1ad6b04 100644
--- a/modules/services/serviceExtraction/subPipeline.py
+++ b/modules/services/serviceExtraction/subPipeline.py
@@ -34,7 +34,8 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
# Apply merging strategy if provided (preserve existing logic)
if options.mergeStrategy:
- from modules.interfaces.interfaceAiObjects import applyMerging
+ # Use module-level applyMerging function
+ from .mainServiceExtraction import applyMerging
parts = applyMerging(parts, options.mergeStrategy)
return ContentExtracted(id=makeId(), parts=parts)
diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
index 7b91579a..9bd503bc 100644
--- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py
+++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
@@ -99,6 +99,18 @@ async def buildExtractionPrompt(
# Parse extraction intent if AI service is available
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
+ # Extract user language for document language instruction
+ userLanguage = 'en' # Default fallback
+ if services:
+ try:
+ # Prefer detected language if available
+ if hasattr(services, 'currentUserLanguage') and services.currentUserLanguage:
+ userLanguage = services.currentUserLanguage
+ elif hasattr(services, 'user') and services.user and hasattr(services.user, 'language'):
+ userLanguage = services.user.language
+ except Exception:
+ pass
+
# Build base prompt with clear user prompt markers
sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
adaptive_prompt = f"""
@@ -114,6 +126,8 @@ You are a document processing assistant that extracts and structures content fro
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
+LANGUAGE REQUIREMENT: All extracted content must be in the language '{userLanguage}'. Extract and preserve content in this language.
+
{extraction_intent}
REQUIREMENTS:
diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
index 069601bc..bea6887c 100644
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@@ -362,7 +362,7 @@ class BaseRenderer(ABC):
self.logger.debug(f"AI Style Template Prompt:")
self.logger.debug(f"{styleTemplate}")
- response = await aiService.aiObjects.call(request)
+ response = await aiService.callAi(request)
# Save styling prompt and response to debug
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py
index ca51a73a..37b72c45 100644
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@@ -205,7 +205,7 @@ Return only the compressed prompt, no explanations.
)
)
- response = await aiService.aiObjects.call(request)
+ response = await aiService.callAi(request)
compressed = response.content.strip()
# Validate the compressed prompt
diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py
index 2c6ea293..053b72de 100644
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@@ -227,7 +227,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
- response = await ai_service.aiObjects.call(request)
+ response = await ai_service.callAi(request)
# Check if response is valid
if not response:
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index 8f10a9a2..d1cd2090 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -424,7 +424,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
- response = await aiService.aiObjects.call(request)
+ response = await aiService.callAi(request)
# Check if response is valid
if not response:
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index b797aba3..37fd7470 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -346,7 +346,7 @@ class RendererXlsx(BaseRenderer):
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
- response = await aiService.aiObjects.call(request)
+ response = await aiService.callAi(request)
import json
import re
diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
index 91011539..d593a626 100644
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@@ -16,7 +16,8 @@ async def buildGenerationPrompt(
userPrompt: str,
title: str,
extracted_content: str = None,
- continuationContext: Dict[str, Any] = None
+ continuationContext: Dict[str, Any] = None,
+ services: Any = None
) -> str:
"""
Build the unified generation prompt using a single JSON template.
@@ -28,10 +29,23 @@ async def buildGenerationPrompt(
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
continuationContext: Optional context from previous generation for continuation
+ services: Optional services instance for accessing user language
Returns:
Complete generation prompt string
"""
+ # Extract user language for document language instruction
+ userLanguage = 'en' # Default fallback
+ if services:
+ try:
+ # Prefer detected language if available
+ if hasattr(services, 'currentUserLanguage') and services.currentUserLanguage:
+ userLanguage = services.currentUserLanguage
+ elif hasattr(services, 'user') and services.user and hasattr(services.user, 'language'):
+ userLanguage = services.user.language
+ except Exception:
+ pass
+
# Create a template - let AI generate title if not provided
titleValue = title if title else "Generated Document"
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
@@ -82,6 +96,8 @@ END OF USER REQUEST / USER PROMPT
⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
+LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language.
+
{continuationText}
JSON structure template:
@@ -92,6 +108,7 @@ Rules:
- Reference elements shown above are ALREADY DELIVERED - DO NOT repeat them.
- Generate ONLY the remaining content that comes AFTER the reference elements.
- DO NOT regenerate the entire JSON structure - start directly with what comes next.
+- All content must be in the language '{userLanguage}'.
- Output JSON only; no markdown fences or extra text.
Continue generating the remaining content now.
@@ -124,6 +141,8 @@ EXTRACTED CONTENT FROM DOCUMENTS:
END OF EXTRACTED CONTENT
{'='*80}
+LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language. If the extracted content is in a different language, translate it to '{userLanguage}' while preserving the structure and meaning.
+
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
@@ -136,6 +155,7 @@ Instructions:
- Do NOT reuse example section IDs; create your own.
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
+- All content must be in the language '{userLanguage}'.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
@@ -151,6 +171,8 @@ USER REQUEST / USER PROMPT:
END OF USER REQUEST / USER PROMPT
{'='*80}
+LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language.
+
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
JSON structure template:
@@ -160,6 +182,7 @@ Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
+- All content must be in the language '{userLanguage}'.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
diff --git a/modules/services/serviceSecurity/mainServiceSecurity.py b/modules/services/serviceSecurity/mainServiceSecurity.py
new file mode 100644
index 00000000..76bae54e
--- /dev/null
+++ b/modules/services/serviceSecurity/mainServiceSecurity.py
@@ -0,0 +1,128 @@
+"""
+Security service for token management operations.
+Provides centralized access to token refresh and management functionality.
+"""
+
+import logging
+from typing import Optional, Callable
+
+from modules.datamodels.datamodelSecurity import Token
+from modules.security.tokenManager import TokenManager
+
+logger = logging.getLogger(__name__)
+
+
+class SecurityService:
+ """Security service providing token management operations."""
+
+ def __init__(self, services):
+ """Initialize security service with service center access.
+
+ Args:
+ services: Service center instance providing access to interfaces
+ """
+ self.services = services
+ self._tokenManager = TokenManager()
+
+ def getFreshToken(self, connectionId: str, secondsBeforeExpiry: int = 30 * 60) -> Optional[Token]:
+ """Get a fresh token for a connection, refreshing when expiring soon.
+
+ Reads the latest stored token via interface layer, then
+ uses ensureFreshToken to refresh if needed and persists the refreshed
+ token via interface layer.
+
+ Args:
+ connectionId: ID of the connection to get token for
+ secondsBeforeExpiry: Threshold window to proactively refresh (default: 30 minutes)
+
+ Returns:
+ Token object or None if not found/expired
+ """
+ try:
+ # Use interface from services instead of getRootInterface()
+ interfaceDbApp = self.services.interfaceDbApp
+
+ token = interfaceDbApp.getConnectionToken(connectionId)
+ if not token:
+ return None
+
+ return self._tokenManager.ensureFreshToken(
+ token,
+ secondsBeforeExpiry=secondsBeforeExpiry,
+ saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
+ )
+ except Exception as e:
+ logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
+ return None
+
+ def refreshToken(self, oldToken: Token) -> Optional[Token]:
+ """Refresh an expired token using the appropriate OAuth service.
+
+ Args:
+ oldToken: Token object to refresh
+
+ Returns:
+ Refreshed Token object or None if refresh failed
+ """
+ try:
+ return self._tokenManager.refreshToken(oldToken)
+ except Exception as e:
+ logger.error(f"refreshToken: Error refreshing token: {e}")
+ return None
+
+ def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60,
+ saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
+ """Ensure a token is fresh; refresh if expiring within threshold.
+
+ Args:
+ token: Existing token to validate/refresh
+ secondsBeforeExpiry: Threshold window to proactively refresh (default: 30 minutes)
+ saveCallback: Optional function to persist a refreshed token
+
+ Returns:
+ A fresh token (refreshed or original) or None if refresh failed
+ """
+ try:
+ return self._tokenManager.ensureFreshToken(
+ token,
+ secondsBeforeExpiry=secondsBeforeExpiry,
+ saveCallback=saveCallback
+ )
+ except Exception as e:
+ logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
+ return None
+
+ def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
+ """Refresh Microsoft OAuth token using refresh token.
+
+ Args:
+ refreshToken: Microsoft refresh token
+ userId: User ID owning the token
+ oldToken: Previous token object to preserve connection ID
+
+ Returns:
+ New Token object or None if refresh failed
+ """
+ try:
+ return self._tokenManager.refreshMicrosoftToken(refreshToken, userId, oldToken)
+ except Exception as e:
+ logger.error(f"refreshMicrosoftToken: Error refreshing Microsoft token: {e}")
+ return None
+
+ def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
+ """Refresh Google OAuth token using refresh token.
+
+ Args:
+ refreshToken: Google refresh token
+ userId: User ID owning the token
+ oldToken: Previous token object to preserve connection ID
+
+ Returns:
+ New Token object or None if refresh failed
+ """
+ try:
+ return self._tokenManager.refreshGoogleToken(refreshToken, userId, oldToken)
+ except Exception as e:
+ logger.error(f"refreshGoogleToken: Error refreshing Google token: {e}")
+ return None
+
diff --git a/modules/services/serviceSharepoint/mainServiceSharepoint.py b/modules/services/serviceSharepoint/mainServiceSharepoint.py
index 70fc52ff..e7f24648 100644
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@@ -47,9 +47,12 @@ class SharepointService:
logger.error("UserConnection must have an 'id' field")
return False
- # Get a fresh token for this specific connection
- from modules.security.tokenManager import TokenManager
- token = TokenManager().getFreshToken(connectionId)
+ # Get a fresh token for this specific connection via security service
+ if not self.services:
+ logger.error("Service center not available for token access")
+ return False
+
+ token = self.services.security.getFreshToken(connectionId)
if not token:
logger.error(f"No token found for connection {connectionId}")
return False
diff --git a/modules/services/serviceUtils/mainServiceUtils.py b/modules/services/serviceUtils/mainServiceUtils.py
index bbee6540..8e106233 100644
--- a/modules/services/serviceUtils/mainServiceUtils.py
+++ b/modules/services/serviceUtils/mainServiceUtils.py
@@ -155,11 +155,11 @@ class UtilsService:
def storeDebugMessageAndDocuments(self, message, currentUser):
"""
- Wrapper to store debug messages and documents via shared debugLogger.
- Mirrors storeDebugMessageAndDocuments() in modules.shared.debugLogger.
+ Wrapper to store debug messages and documents via interfaceDbChatObjects.
+ Mirrors storeDebugMessageAndDocuments() in modules.interfaces.interfaceDbChatObjects.
"""
try:
- from modules.shared.debugLogger import storeDebugMessageAndDocuments as _storeDebugMessageAndDocuments
+ from modules.interfaces.interfaceDbChatObjects import storeDebugMessageAndDocuments as _storeDebugMessageAndDocuments
_storeDebugMessageAndDocuments(message, currentUser)
except Exception:
# Silent fail to never break main flow
diff --git a/modules/services/serviceWeb/mainServiceWeb.py b/modules/services/serviceWeb/mainServiceWeb.py
index b771cb9d..7ce5a72f 100644
--- a/modules/services/serviceWeb/mainServiceWeb.py
+++ b/modules/services/serviceWeb/mainServiceWeb.py
@@ -114,16 +114,14 @@ class WebService:
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
self.services.chat.progressLogUpdate(operationId, 0.6, f"Crawling {len(allUrls)} URLs")
- # Get parent log ID for URL-level operations
- parentLogId = None
- if operationId:
- parentLogId = self.services.chat.getOperationLogId(operationId)
+ # Use parent operation ID directly (parentId should be operationId, not log entry ID)
+ parentOperationId = operationId # Use the parent's operationId directly
crawlResult = await self._performWebCrawl(
instruction=instruction,
urls=allUrls,
maxDepth=maxDepth,
- parentLogId=parentLogId
+ parentOperationId=parentOperationId
)
if operationId:
@@ -131,18 +129,95 @@ class WebService:
self.services.chat.progressLogUpdate(operationId, 0.95, "Completed")
self.services.chat.progressLogFinish(operationId, True)
- # Return consolidated result
+ # Calculate statistics about crawl results
+ totalResults = len(crawlResult) if isinstance(crawlResult, list) else 1
+ totalContentLength = 0
+ urlsWithContent = 0
+
+ # Analyze crawl results to gather statistics
+ if isinstance(crawlResult, list):
+ for item in crawlResult:
+ if isinstance(item, dict):
+ if item.get("url"):
+ urlsWithContent += 1
+ content = item.get("content", "")
+ if isinstance(content, str):
+ totalContentLength += len(content)
+ elif isinstance(content, dict):
+ # Estimate size from dict
+ totalContentLength += len(str(content))
+ elif isinstance(crawlResult, dict):
+ if crawlResult.get("url"):
+ urlsWithContent = 1
+ content = crawlResult.get("content", "")
+ if isinstance(content, str):
+ totalContentLength = len(content)
+ elif isinstance(content, dict):
+ totalContentLength = len(str(content))
+
+ # Convert crawl results into sections format for generic validator
+ sections = []
+ if isinstance(crawlResult, list):
+ for idx, item in enumerate(crawlResult):
+ if isinstance(item, dict):
+ section = {
+ "id": f"result_{idx}",
+ "content_type": "paragraph",
+ "title": item.get("url", f"Result {idx + 1}"),
+ "order": idx
+ }
+ # Add content preview
+ content = item.get("content", "")
+ if isinstance(content, str) and content:
+ section["textPreview"] = content[:200] + ("..." if len(content) > 200 else "")
+ sections.append(section)
+ elif isinstance(crawlResult, dict):
+ section = {
+ "id": "result_0",
+ "content_type": "paragraph",
+ "title": crawlResult.get("url", "Research Result"),
+ "order": 0
+ }
+ content = crawlResult.get("content", "")
+ if isinstance(content, str) and content:
+ section["textPreview"] = content[:200] + ("..." if len(content) > 200 else "")
+ sections.append(section)
+
+ # Return consolidated result with metadata in format that generic validator understands
result = {
+ "metadata": {
+ "title": suggestedFilename or instruction[:100] if instruction else "Web Research Results",
+ "extraction_method": "web_crawl",
+ "research_depth": finalResearchDepth,
+ "max_depth": maxDepth,
+ "country": countryCode,
+ "language": languageCode,
+ "urls_crawled": allUrls[:20], # First 20 URLs for reference
+ "total_urls": len(allUrls),
+ "urls_with_content": urlsWithContent,
+ "total_content_length": totalContentLength,
+ "crawl_date": self.services.utils.timestampGetUtc() if hasattr(self.services, 'utils') else None
+ },
+ "sections": sections,
+ "statistics": {
+ "sectionCount": len(sections),
+ "total_urls": len(allUrls),
+ "results_count": totalResults,
+ "urls_with_content": urlsWithContent,
+ "total_content_length": totalContentLength
+ },
+ # Keep original structure for backward compatibility
"instruction": instruction,
"urls_crawled": allUrls,
"total_urls": len(allUrls),
"results": crawlResult,
- "total_results": len(crawlResult) if isinstance(crawlResult, list) else 1
+ "total_results": totalResults
}
# Add suggested filename if available
if suggestedFilename:
result["suggested_filename"] = suggestedFilename
+ result["metadata"]["suggested_filename"] = suggestedFilename
return result
@@ -311,7 +386,7 @@ Return ONLY valid JSON, no additional text:
instruction: str,
urls: List[str],
maxDepth: int = 2,
- parentLogId: Optional[str] = None
+ parentOperationId: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Perform web crawl on list of URLs - calls plugin for each URL individually."""
crawlResults = []
@@ -320,7 +395,7 @@ Return ONLY valid JSON, no additional text:
for urlIndex, url in enumerate(urls):
# Create separate operation for each URL with parent reference
urlOperationId = None
- if parentLogId:
+ if parentOperationId:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
urlOperationId = f"web_crawl_url_{workflowId}_{urlIndex}_{int(time.time())}"
self.services.chat.progressLogStart(
@@ -328,21 +403,23 @@ Return ONLY valid JSON, no additional text:
"Web Crawl",
f"URL {urlIndex + 1}",
url[:50] + "..." if len(url) > 50 else url,
- parentId=parentLogId
+ parentOperationId=parentOperationId
)
try:
- logger.info(f"Crawling URL: {url}")
+ logger.info(f"Crawling URL {urlIndex + 1}/{len(urls)}: {url}")
if urlOperationId:
- self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating")
+ displayUrl = url[:50] + "..." if len(url) > 50 else url
+ self.services.chat.progressLogUpdate(urlOperationId, 0.2, f"Crawling: {displayUrl}")
+ self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
# Build crawl prompt model for single URL
crawlPromptModel = AiCallPromptWebCrawl(
instruction=instruction,
url=url, # Single URL
maxDepth=maxDepth,
- maxWidth=50
+ maxWidth=5 # Default: 5 pages per level
)
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
@@ -356,16 +433,19 @@ Return ONLY valid JSON, no additional text:
resultFormat="json"
)
- # Use unified callAiContent method
+ if urlOperationId:
+ self.services.chat.progressLogUpdate(urlOperationId, 0.4, "Calling crawl connector")
+
+ # Use unified callAiContent method with parentOperationId for hierarchical logging
crawlResponse = await self.services.ai.callAiContent(
prompt=crawlPrompt,
options=crawlOptions,
- outputFormat="json"
+ outputFormat="json",
+ parentOperationId=urlOperationId # Pass URL operation ID as parent for sub-URL logging
)
if urlOperationId:
- self.services.chat.progressLogUpdate(urlOperationId, 0.8, "Completed")
- self.services.chat.progressLogFinish(urlOperationId, True)
+ self.services.chat.progressLogUpdate(urlOperationId, 0.7, "Processing crawl results")
# Extract content from AiResponse
crawlResult = crawlResponse.content
@@ -387,16 +467,30 @@ Return ONLY valid JSON, no additional text:
else:
crawlData = crawlResult
+ # Process crawl results and create hierarchical progress logging for sub-URLs
+ if urlOperationId:
+ self.services.chat.progressLogUpdate(urlOperationId, 0.8, "Processing crawl results")
+
+ # Recursively process crawl results to find nested URLs and create child operations
+ processedResults = self._processCrawlResultsWithHierarchy(crawlData, url, urlOperationId, maxDepth, 0)
+
+ # Count total URLs crawled (including sub-URLs) for progress message
+ totalUrlsCrawled = self._countUrlsInResults(processedResults)
+
# Ensure it's a list of results
- if isinstance(crawlData, list):
- crawlResults.extend(crawlData)
- elif isinstance(crawlData, dict):
- if "results" in crawlData:
- crawlResults.extend(crawlData["results"])
- else:
- crawlResults.append(crawlData)
+ if isinstance(processedResults, list):
+ crawlResults.extend(processedResults)
+ elif isinstance(processedResults, dict):
+ crawlResults.append(processedResults)
else:
- crawlResults.append({"url": url, "content": str(crawlData)})
+ crawlResults.append({"url": url, "content": str(processedResults)})
+
+ if urlOperationId:
+ if totalUrlsCrawled > 1:
+ self.services.chat.progressLogUpdate(urlOperationId, 0.9, f"Crawled {totalUrlsCrawled} URLs (including sub-URLs)")
+ else:
+ self.services.chat.progressLogUpdate(urlOperationId, 0.9, "Crawl completed")
+ self.services.chat.progressLogFinish(urlOperationId, True)
except Exception as e:
logger.error(f"Error crawling URL {url}: {str(e)}")
@@ -405,4 +499,145 @@ Return ONLY valid JSON, no additional text:
crawlResults.append({"url": url, "error": str(e)})
return crawlResults
+
+ def _processCrawlResultsWithHierarchy(
+ self,
+ crawlData: Any,
+ parentUrl: str,
+ parentOperationId: Optional[str],
+ maxDepth: int,
+ currentDepth: int
+ ) -> List[Dict[str, Any]]:
+ """
+ Recursively process crawl results to create hierarchical progress logging for sub-URLs.
+
+ Args:
+ crawlData: Crawl result data (dict, list, or other)
+ parentUrl: Parent URL being crawled
+ parentOperationId: Parent operation ID for hierarchical logging
+ maxDepth: Maximum crawl depth
+ currentDepth: Current depth in the crawl tree
+
+ Returns:
+ List of processed crawl results
+ """
+ import time
+ results = []
+
+ # Handle list of results
+ if isinstance(crawlData, list):
+ for idx, item in enumerate(crawlData):
+ if isinstance(item, dict):
+ # Check if this item has sub-URLs or nested results
+ itemUrl = item.get("url") or item.get("source") or parentUrl
+
+ # Create child operation for sub-URL if we're not at max depth
+ if currentDepth < maxDepth and parentOperationId:
+ # Check if this item has nested results or children
+ hasNestedResults = "results" in item or "children" in item or "subUrls" in item
+
+ if hasNestedResults or (itemUrl != parentUrl and currentDepth > 0):
+ # This is a sub-URL - create child operation
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ subUrlOperationId = f"{parentOperationId}_sub_{idx}_{int(time.time())}"
+ self.services.chat.progressLogStart(
+ subUrlOperationId,
+ "Crawling Sub-URL",
+ f"Depth {currentDepth + 1}",
+ itemUrl[:50] + "..." if len(itemUrl) > 50 else itemUrl,
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Process nested results recursively
+ if "results" in item:
+ nestedResults = self._processCrawlResultsWithHierarchy(
+ item["results"], itemUrl, subUrlOperationId, maxDepth, currentDepth + 1
+ )
+ item["results"] = nestedResults
+ elif "children" in item:
+ nestedResults = self._processCrawlResultsWithHierarchy(
+ item["children"], itemUrl, subUrlOperationId, maxDepth, currentDepth + 1
+ )
+ item["children"] = nestedResults
+ elif "subUrls" in item:
+ nestedResults = self._processCrawlResultsWithHierarchy(
+ item["subUrls"], itemUrl, subUrlOperationId, maxDepth, currentDepth + 1
+ )
+ item["subUrls"] = nestedResults
+
+ self.services.chat.progressLogUpdate(subUrlOperationId, 0.9, "Completed")
+ self.services.chat.progressLogFinish(subUrlOperationId, True)
+ except Exception as e:
+ logger.error(f"Error processing sub-URL {itemUrl}: {str(e)}")
+ if subUrlOperationId:
+ self.services.chat.progressLogFinish(subUrlOperationId, False)
+
+ results.append(item)
+ else:
+ results.append(item)
+
+ # Handle dict with results array
+ elif isinstance(crawlData, dict):
+ if "results" in crawlData:
+ # Process nested results
+ nestedResults = self._processCrawlResultsWithHierarchy(
+ crawlData["results"], parentUrl, parentOperationId, maxDepth, currentDepth
+ )
+ crawlData["results"] = nestedResults
+ results.append(crawlData)
+ elif "children" in crawlData:
+ # Process children
+ nestedResults = self._processCrawlResultsWithHierarchy(
+ crawlData["children"], parentUrl, parentOperationId, maxDepth, currentDepth
+ )
+ crawlData["children"] = nestedResults
+ results.append(crawlData)
+ elif "subUrls" in crawlData:
+ # Process sub-URLs
+ nestedResults = self._processCrawlResultsWithHierarchy(
+ crawlData["subUrls"], parentUrl, parentOperationId, maxDepth, currentDepth
+ )
+ crawlData["subUrls"] = nestedResults
+ results.append(crawlData)
+ else:
+ # Single result dict
+ results.append(crawlData)
+ else:
+ # Other types - wrap in dict
+ results.append({"url": parentUrl, "content": str(crawlData)})
+
+ return results
+
+ def _countUrlsInResults(self, results: Any) -> int:
+ """
+ Recursively count total URLs in crawl results (including nested sub-URLs).
+
+ Args:
+ results: Crawl results (dict, list, or other)
+
+ Returns:
+ Total count of URLs found
+ """
+ count = 0
+
+ if isinstance(results, list):
+ for item in results:
+ count += self._countUrlsInResults(item)
+ elif isinstance(results, dict):
+ # Count this URL if it has a url field
+ if "url" in results or "source" in results:
+ count += 1
+ # Recursively count nested results
+ if "results" in results:
+ count += self._countUrlsInResults(results["results"])
+ if "children" in results:
+ count += self._countUrlsInResults(results["children"])
+ if "subUrls" in results:
+ count += self._countUrlsInResults(results["subUrls"])
+ elif isinstance(results, str):
+ # Single URL string
+ count = 1
+
+ return count
diff --git a/modules/shared/callbackRegistry.py b/modules/shared/callbackRegistry.py
new file mode 100644
index 00000000..0529ff1d
--- /dev/null
+++ b/modules/shared/callbackRegistry.py
@@ -0,0 +1,70 @@
+"""
+Callback registry for decoupled event notifications.
+
+Allows interfaces to notify about changes without knowing about features.
+Features can register callbacks to be notified when automations change.
+"""
+
+import logging
+from typing import Callable, List, Dict, Any
+import asyncio
+
+logger = logging.getLogger(__name__)
+
+
+class CallbackRegistry:
+ """Registry for callbacks that can be triggered by interfaces without knowing about features."""
+
+ def __init__(self):
+ self._callbacks: Dict[str, List[Callable]] = {}
+
+ def register(self, event_type: str, callback: Callable):
+ """Register a callback for a specific event type.
+
+ Args:
+ event_type: Type of event (e.g., 'automation.changed')
+ callback: Async or sync callback function
+ """
+ if event_type not in self._callbacks:
+ self._callbacks[event_type] = []
+ self._callbacks[event_type].append(callback)
+ logger.debug(f"Registered callback for event type: {event_type}")
+
+ def unregister(self, event_type: str, callback: Callable):
+ """Unregister a callback for a specific event type."""
+ if event_type in self._callbacks:
+ try:
+ self._callbacks[event_type].remove(callback)
+ logger.debug(f"Unregistered callback for event type: {event_type}")
+ except ValueError:
+ logger.warning(f"Callback not found for event type: {event_type}")
+
+ async def trigger(self, event_type: str, *args, **kwargs):
+ """Trigger all callbacks registered for an event type.
+
+ Args:
+ event_type: Type of event to trigger
+ *args, **kwargs: Arguments to pass to callbacks
+ """
+ if event_type not in self._callbacks:
+ return
+
+ callbacks = self._callbacks[event_type].copy() # Copy to avoid modification during iteration
+
+ for callback in callbacks:
+ try:
+ if asyncio.iscoroutinefunction(callback):
+ await callback(*args, **kwargs)
+ else:
+ callback(*args, **kwargs)
+ except Exception as e:
+ logger.error(f"Error executing callback for {event_type}: {str(e)}", exc_info=True)
+
+ def has_callbacks(self, event_type: str) -> bool:
+ """Check if there are any callbacks registered for an event type."""
+ return event_type in self._callbacks and len(self._callbacks[event_type]) > 0
+
+
+# Global singleton instance
+callbackRegistry = CallbackRegistry()
+
diff --git a/modules/shared/debugLogger.py b/modules/shared/debugLogger.py
index 6ee78bc7..2af3f329 100644
--- a/modules/shared/debugLogger.py
+++ b/modules/shared/debugLogger.py
@@ -145,131 +145,3 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
# Don't log debug errors to avoid recursion
pass
-def storeDebugMessageAndDocuments(message, currentUser) -> None:
- """
- Store message and documents (metadata and file bytes) for debugging purposes.
- Structure: {log_dir}/debug/messages/m_round_task_action_timestamp/documentlist_label/
- - message.json, message_text.txt
- - document_###_metadata.json
- - document_###_ (actual file bytes)
-
- Args:
- message: ChatMessage object to store
- currentUser: Current user for component interface access
- """
- try:
- import json
-
- # Create base debug directory (use base debug dir, not prompts subdirectory)
- baseDebugDir = _getBaseDebugDir()
- debug_root = os.path.join(baseDebugDir, 'messages')
- _ensureDir(debug_root)
-
- # Generate timestamp
- timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
-
- # Create message folder name: m_round_task_action_timestamp
- # Use actual values from message, not defaults
- round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
- task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
- action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
- message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
-
- message_path = os.path.join(debug_root, message_folder)
- os.makedirs(message_path, exist_ok=True)
-
- # Store message data - use dict() instead of model_dump() for compatibility
- message_file = os.path.join(message_path, "message.json")
- with open(message_file, "w", encoding="utf-8") as f:
- # Convert message to dict manually to avoid model_dump() issues
- message_dict = {
- "id": message.id,
- "workflowId": message.workflowId,
- "parentMessageId": message.parentMessageId,
- "message": message.message,
- "role": message.role,
- "status": message.status,
- "sequenceNr": message.sequenceNr,
- "publishedAt": message.publishedAt,
- "roundNumber": message.roundNumber,
- "taskNumber": message.taskNumber,
- "actionNumber": message.actionNumber,
- "documentsLabel": message.documentsLabel,
- "actionId": message.actionId,
- "actionMethod": message.actionMethod,
- "actionName": message.actionName,
- "success": message.success,
- "documents": []
- }
- json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
-
- # Store message content as text
- if message.message:
- message_text_file = os.path.join(message_path, "message_text.txt")
- with open(message_text_file, "w", encoding="utf-8") as f:
- f.write(str(message.message))
-
- # Store documents if provided
- if message.documents and len(message.documents) > 0:
- # Group documents by documentsLabel
- documents_by_label = {}
- for doc in message.documents:
- label = message.documentsLabel or 'default'
- if label not in documents_by_label:
- documents_by_label[label] = []
- documents_by_label[label].append(doc)
-
- # Create subfolder for each document label
- for label, docs in documents_by_label.items():
- # Sanitize label for filesystem
- safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
- safe_label = safe_label.replace(' ', '_')
- if not safe_label:
- safe_label = "default"
-
- label_folder = os.path.join(message_path, safe_label)
- _ensureDir(label_folder)
-
- # Store each document
- for i, doc in enumerate(docs):
- # Create document metadata file
- doc_meta = {
- "id": doc.id,
- "messageId": doc.messageId,
- "fileId": doc.fileId,
- "fileName": doc.fileName,
- "fileSize": doc.fileSize,
- "mimeType": doc.mimeType,
- "roundNumber": doc.roundNumber,
- "taskNumber": doc.taskNumber,
- "actionNumber": doc.actionNumber,
- "actionId": doc.actionId
- }
-
- doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
- with open(doc_meta_file, "w", encoding="utf-8") as f:
- json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
-
- # Also store the actual file bytes next to metadata for debugging
- try:
- # Lazy import to avoid circular deps at module load
- from modules.interfaces import interfaceDbComponentObjects as comp
- componentInterface = comp.getInterface(currentUser)
- file_bytes = componentInterface.getFileData(doc.fileId)
- if file_bytes:
- # Build a safe filename preserving original name
- safe_name = doc.fileName or f"document_{i+1:03d}"
- # Avoid path traversal
- safe_name = os.path.basename(safe_name)
- doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
- with open(doc_file_path, "wb") as df:
- df.write(file_bytes)
- else:
- pass
- except Exception as e:
- pass
-
- except Exception as e:
- # Silent fail - don't break main flow
- pass
-
diff --git a/modules/shared/progressLogger.py b/modules/shared/progressLogger.py
index bbc000ae..51207d62 100644
--- a/modules/shared/progressLogger.py
+++ b/modules/shared/progressLogger.py
@@ -24,7 +24,7 @@ class ProgressLogger:
self.finishedOperations = set() # Track finished operations to avoid repeated warnings
self.operationLogIds = {} # Map operationId to the log entry ID for parent reference
- def startOperation(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentId: Optional[str] = None):
+ def startOperation(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
"""Start a new long-running operation.
Args:
@@ -32,7 +32,8 @@ class ProgressLogger:
serviceName: Name of the service (e.g., "Extract", "AI", "Generate")
actionName: Name of the action being performed
context: Additional context information
- parentId: Optional parent log entry ID for hierarchical display
+ parentOperationId: Optional parent operation ID (operationId of parent operation) for hierarchical display
+ The parentId in ChatLog will be set to this parentOperationId
"""
# Remove from finished operations if it was there (for restart scenarios)
self.finishedOperations.discard(operationId)
@@ -42,9 +43,10 @@ class ProgressLogger:
'action': actionName,
'context': context,
'startTime': time.time(),
- 'parentId': parentId
+ 'parentOperationId': parentOperationId # Store parent's operationId, not log entry ID
}
- logId = self._logProgress(operationId, 0.0, f"Starting {actionName}", parentId=parentId)
+ # Use parentOperationId as parentId in ChatLog (parentId should be the operationId of parent)
+ logId = self._logProgress(operationId, 0.0, f"Starting {actionName}", parentOperationId=parentOperationId)
if logId:
self.operationLogIds[operationId] = logId
logger.debug(f"Started operation {operationId}: {serviceName} - {actionName}")
@@ -70,9 +72,9 @@ class ProgressLogger:
op = self.activeOperations[operationId]
context = f"{op['context']} {statusUpdate}".strip()
- # Use the same parentId as the start operation - all logs (start/update/finish) share the same parent
- parentId = op.get('parentId')
- self._logProgress(operationId, progress, context, parentId=parentId)
+ # Use the same parentOperationId as the start operation - all logs (start/update/finish) share the same parent
+ parentOperationId = op.get('parentOperationId')
+ self._logProgress(operationId, progress, context, parentOperationId=parentOperationId)
logger.debug(f"Updated operation {operationId}: {progress:.2f} - {context}")
def finishOperation(self, operationId: str, success: bool = True):
@@ -93,11 +95,11 @@ class ProgressLogger:
finalProgress = 1.0 if success else 0.0
status = "Done" if success else "Failed"
- # Use the same parentId as the start operation - all logs (start/update/finish) share the same parent
- parentId = op.get('parentId')
+ # Use the same parentOperationId as the start operation - all logs (start/update/finish) share the same parent
+ parentOperationId = op.get('parentOperationId')
# Create completion log BEFORE removing from activeOperations
- self._logProgress(operationId, finalProgress, status, parentId=parentId)
+ self._logProgress(operationId, finalProgress, status, parentOperationId=parentOperationId)
# Log completion time
duration = time.time() - op['startTime']
@@ -111,14 +113,15 @@ class ProgressLogger:
# Mark as finished to prevent repeated warnings from updateOperation calls
self.finishedOperations.add(operationId)
- def _logProgress(self, operationId: str, progress: float, status: str, parentId: Optional[str] = None) -> Optional[str]:
+ def _logProgress(self, operationId: str, progress: float, status: str, parentOperationId: Optional[str] = None) -> Optional[str]:
"""Create standardized ChatLog entry.
Args:
operationId: Unique identifier for the operation
progress: Progress value between 0.0 and 1.0
status: Status information for the log entry
- parentId: Optional parent log entry ID for hierarchical display
+ parentOperationId: Optional parent operation ID (operationId of parent operation) for hierarchical display
+ This will be set as parentId in ChatLog (parentId = operationId of parent)
Returns:
The created log entry ID, or None if creation failed
@@ -134,6 +137,7 @@ class ProgressLogger:
logger.warning(f"Cannot log progress: no workflow available")
return None
+ # parentId in ChatLog should be the operationId of the parent operation, not the log entry ID
logData = {
"workflowId": workflow.id,
"message": message,
@@ -141,7 +145,7 @@ class ProgressLogger:
"status": status,
"progress": progress,
"operationId": operationId,
- "parentId": parentId
+ "parentId": parentOperationId # Set to parent's operationId, not log entry ID
}
try:
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py
index 1e837f62..eee848f7 100644
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@@ -130,8 +130,9 @@ class MethodAi(MethodBase):
processDocumentsIndividually=True
)
- # Extract content using extraction service
- extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
+ # Extract content using extraction service with hierarchical progress logging
+ # Pass operationId for per-document progress tracking
+ extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
# Combine all ContentParts from all extracted results
contentParts = []
@@ -172,11 +173,19 @@ class MethodAi(MethodBase):
if aiResponse.documents and len(aiResponse.documents) > 0:
action_documents = []
for doc in aiResponse.documents:
+ validationMetadata = {
+ "actionType": "ai.process",
+ "resultType": normalized_result_type,
+ "outputFormat": output_format,
+ "hasDocuments": True,
+ "documentCount": len(aiResponse.documents)
+ }
action_documents.append(ActionDocument(
documentName=doc.documentName,
documentData=doc.documentData,
mimeType=doc.mimeType or output_mime_type,
- sourceJson=getattr(doc, 'sourceJson', None) # Preserve source JSON for structure validation
+ sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
+ validationMetadata=validationMetadata
))
final_documents = action_documents
@@ -188,10 +197,18 @@ class MethodAi(MethodBase):
extension=extension,
action_name="result"
)
+ validationMetadata = {
+ "actionType": "ai.process",
+ "resultType": normalized_result_type,
+ "outputFormat": output_format,
+ "hasDocuments": False,
+ "contentType": "text"
+ }
action_document = ActionDocument(
documentName=meaningful_name,
documentData=aiResponse.content,
- mimeType=output_mime_type
+ mimeType=output_mime_type,
+ validationMetadata=validationMetadata
)
final_documents = [action_document]
@@ -214,138 +231,6 @@ class MethodAi(MethodBase):
)
- @action
- async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Extract content from documents (separate from AI calls).
-
- This action performs pure content extraction without AI processing.
- The extracted ContentParts can then be used by subsequent AI processing actions.
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract content from.
- - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
-
- Returns:
- - ActionResult with ActionDocument containing ContentExtracted objects
- - ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
- """
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"ai_extract_{workflowId}_{int(time.time())}"
-
- # Extract documentList from parameters dict
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- documentListParam = parameters.get("documentList")
- if not documentListParam:
- return ActionResult.isFailure(error="documentList is required")
-
- # Convert to DocumentReferenceList if needed
- if isinstance(documentListParam, DocumentReferenceList):
- documentList = documentListParam
- elif isinstance(documentListParam, str):
- documentList = DocumentReferenceList.from_string_list([documentListParam])
- elif isinstance(documentListParam, list):
- documentList = DocumentReferenceList.from_string_list(documentListParam)
- else:
- return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
-
- # Start progress tracking
- self.services.chat.progressLogStart(
- operationId,
- "Extracting content from documents",
- "Content Extraction",
- f"Documents: {len(documentList.references)}"
- )
-
- # Get ChatDocuments from documentList
- self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
-
- if not chatDocuments:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No documents found in documentList")
-
- logger.info(f"Extracting content from {len(chatDocuments)} documents")
-
- # Prepare extraction options
- self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
- extractionOptionsParam = parameters.get("extractionOptions")
-
- # Convert dict to ExtractionOptions object if needed, or create defaults
- if extractionOptionsParam:
- if isinstance(extractionOptionsParam, dict):
- # Convert dict to ExtractionOptions object
- extractionOptions = ExtractionOptions(**extractionOptionsParam)
- elif isinstance(extractionOptionsParam, ExtractionOptions):
- extractionOptions = extractionOptionsParam
- else:
- # Invalid type, use defaults
- extractionOptions = None
- else:
- extractionOptions = None
-
- # If extractionOptions not provided, create defaults
- if not extractionOptions:
- # Default extraction options for pure content extraction (no AI processing)
- extractionOptions = ExtractionOptions(
- prompt="Extract all content from the document",
- mergeStrategy=MergeStrategy(
- mergeType="concatenate",
- groupBy="typeGroup",
- orderBy="id"
- ),
- processDocumentsIndividually=True
- )
-
- # Get parent log ID for document-level operations
- parentLogId = self.services.chat.getOperationLogId(operationId)
-
- # Call extraction service
- self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
- extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
-
- # Build ActionDocuments from ContentExtracted results
- self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
- actionDocuments = []
- # Map extracted results back to original documents by index (results are in same order)
- for i, extracted in enumerate(extractedResults):
- # Get original document name if available
- originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
- if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
- # Use original filename with "extracted_" prefix
- baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
- documentName = f"{baseName}_extracted_{extracted.id}.json"
- else:
- # Fallback to generic name with index
- documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
-
- # Store ContentExtracted object in ActionDocument.documentData
- actionDoc = ActionDocument(
- documentName=documentName,
- documentData=extracted, # ContentExtracted object
- mimeType="application/json"
- )
- actionDocuments.append(actionDoc)
-
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(documents=actionDocuments)
-
- except Exception as e:
- logger.error(f"Error in content extraction: {str(e)}")
-
- # Complete progress tracking with failure
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
-
- return ActionResult.isFailure(error=str(e))
-
-
@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
"""
@@ -420,10 +305,20 @@ class MethodAi(MethodBase):
)
from modules.datamodels.datamodelChat import ActionDocument
+ validationMetadata = {
+ "actionType": "ai.webResearch",
+ "prompt": prompt,
+ "urlList": parameters.get("urlList", []),
+ "country": parameters.get("country"),
+ "language": parameters.get("language"),
+ "researchDepth": parameters.get("researchDepth", "general"),
+ "resultFormat": "json"
+ }
actionDocument = ActionDocument(
documentName=meaningfulName,
documentData=result,
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[actionDocument])
@@ -622,11 +517,19 @@ class MethodAi(MethodBase):
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
from modules.datamodels.datamodelChat import ActionDocument
+ validationMetadata = {
+ "actionType": "ai.convert",
+ "inputFormat": normalizedInputFormat,
+ "outputFormat": normalizedOutputFormat,
+ "hasSourceJson": True,
+ "conversionType": "direct_rendering"
+ }
actionDoc = ActionDocument(
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
documentData=rendered_content,
mimeType=mime_type,
- sourceJson=jsonData # Preserve source JSON for structure validation
+ sourceJson=jsonData, # Preserve source JSON for structure validation
+ validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[actionDoc])
@@ -707,171 +610,6 @@ class MethodAi(MethodBase):
return output.getvalue()
- @action
- async def reformat(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Reformat/transform documents with specific transformation rules (e.g., extract arrays, reshape data, apply custom formatting).
- - Input requirements: documentList (required); inputFormat and outputFormat (required); transformationRules (optional).
- - Output format: Document in target format with applied transformation rules.
- - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system with transformation rules.
-
- Parameters:
- - documentList (list, required): Document reference(s) to reformat.
- - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
- - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
- - transformationRules (str, optional): Specific transformation instructions (e.g., "Extract prime numbers array and format as CSV with 10 columns per row").
- - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
- - totalRows (int, optional): For CSV output, total number of rows to create. Default: auto-detect.
- - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
- - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
- - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- inputFormat = parameters.get("inputFormat")
- outputFormat = parameters.get("outputFormat")
- if not inputFormat or not outputFormat:
- return ActionResult.isFailure(error="inputFormat and outputFormat are required")
-
- transformationRules = parameters.get("transformationRules")
- columnsPerRow = parameters.get("columnsPerRow")
- totalRows = parameters.get("totalRows")
- delimiter = parameters.get("delimiter", ",")
- includeHeader = parameters.get("includeHeader", True)
- language = parameters.get("language", "en")
-
- # Normalize formats (remove leading dot if present)
- normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
- normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
-
- # Get documents
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- else:
- docRefList = DocumentReferenceList.from_string_list([documentList])
-
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
- if not chatDocuments:
- return ActionResult.isFailure(error="No documents found in documentList")
-
- # Check if input is standardized JSON format - if so, use direct rendering with transformation
- if normalizedInputFormat == "json" and len(chatDocuments) == 1:
- try:
- import json
- doc = chatDocuments[0]
- # ChatDocument doesn't have documentData - need to load file content using fileId
- docBytes = self.services.chat.getFileData(doc.fileId)
- if not docBytes:
- raise ValueError(f"No file data found for fileId={doc.fileId}")
-
- # Decode bytes to string
- docData = docBytes.decode('utf-8')
-
- # Try to parse as JSON
- if isinstance(docData, str):
- jsonData = json.loads(docData)
- elif isinstance(docData, dict):
- jsonData = docData
- else:
- jsonData = None
-
- # Check if it's standardized JSON format (has "documents" or "sections")
- if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
- # Apply transformation rules if provided
- if transformationRules:
- # Use AI to apply transformation rules to JSON
- aiPrompt = f"Apply the following transformation rules to the JSON document: {transformationRules}"
- if normalizedOutputFormat == "csv":
- aiPrompt += f" Output format: CSV with delimiter '{delimiter}'"
- if columnsPerRow:
- aiPrompt += f", {columnsPerRow} columns per row"
- if totalRows:
- aiPrompt += f", {totalRows} total rows"
- if not includeHeader:
- aiPrompt += ", no header row"
-
- # Use process to apply transformation
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": normalizedOutputFormat
- })
- else:
- # No transformation rules - use direct rendering
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
-
- # Ensure format is "documents" array
- if "documents" not in jsonData:
- jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
-
- # Get title
- title = jsonData.get("metadata", {}).get("title", doc.documentName or "Reformatted Document")
-
- # Render with options
- renderOptions = {}
- if normalizedOutputFormat == "csv":
- renderOptions["delimiter"] = delimiter
- renderOptions["columnsPerRow"] = columnsPerRow
- renderOptions["includeHeader"] = includeHeader
-
- rendered_content, mime_type = await generationService.renderReport(
- jsonData, normalizedOutputFormat, title, None, None
- )
-
- # Apply CSV options if needed
- if normalizedOutputFormat == "csv" and renderOptions:
- rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
-
- from modules.datamodels.datamodelChat import ActionDocument
- actionDoc = ActionDocument(
- documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
- documentData=rendered_content,
- mimeType=mime_type,
- sourceJson=jsonData # Preserve source JSON for structure validation
- )
-
- return ActionResult.isSuccess(documents=[actionDoc])
-
- except Exception as e:
- logger.warning(f"Direct rendering failed, falling back to AI reformatting: {str(e)}")
- # Fall through to AI-based reformatting
-
- # Fallback: Use AI for reformatting with transformation rules
- aiPrompt = f"Reformat the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
-
- if transformationRules:
- aiPrompt += f" Apply the following transformation rules: {transformationRules}"
-
- if normalizedOutputFormat == "csv":
- aiPrompt += f" Use '{delimiter}' as the delimiter character."
- if columnsPerRow:
- aiPrompt += f" Format the output with {columnsPerRow} columns per row."
- if totalRows:
- aiPrompt += f" Create exactly {totalRows} rows total."
- if not includeHeader:
- aiPrompt += " Do not include a header row."
- else:
- aiPrompt += " Include a header row with column names."
-
- if language and language != "en":
- aiPrompt += f" Use language: {language}."
-
- aiPrompt += " Preserve all data and ensure accurate transformation. Maintain data integrity."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": normalizedOutputFormat
- })
-
-
@action
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
@@ -955,160 +693,10 @@ class MethodAi(MethodBase):
})
- @action
- async def extractTables(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Extract tables from documents, preserving structure and data.
- - Input requirements: documentList (required); optional tableFormat.
- - Output format: JSON by default (structured table data), or CSV/XLSX if specified.
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract tables from.
- - tableFormat (str, optional): Output format for tables - json, csv, or xlsx. Default: json.
- - includeHeaders (bool, optional): Include table headers. Default: True.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- tableFormat = parameters.get("tableFormat", "json")
- includeHeaders = parameters.get("includeHeaders", True)
-
- # Map tableFormat to resultType
- formatMap = {
- "json": "json",
- "csv": "csv",
- "xlsx": "xlsx",
- "xls": "xlsx"
- }
- resultType = formatMap.get(tableFormat.lower(), "json")
-
- aiPrompt = "Extract all tables from the provided document(s)."
- if includeHeaders:
- aiPrompt += " Include table headers and preserve the table structure."
- else:
- aiPrompt += " Extract table data without headers."
- aiPrompt += " Maintain accurate data types (numbers as numbers, dates as dates, etc.) and preserve all table relationships."
-
- if resultType == "json":
- aiPrompt += " Structure each table as a JSON object with headers and rows as arrays."
- elif resultType == "csv":
- aiPrompt += " Output each table as CSV format with proper comma separation."
- elif resultType == "xlsx":
- aiPrompt += " Structure the output as an Excel spreadsheet with tables properly formatted."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
# ============================================================================
- # Content Generation Wrappers
+ # Content Generation Wrapper
# ============================================================================
- @action
- async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate comprehensive reports from input documents/data with analysis and insights.
- - Input requirements: documentList (optional, can generate from scratch); optional reportType, sections.
- - Output format: Document in specified format (default: docx).
-
- Parameters:
- - documentList (list, optional): Input documents/data to base the report on.
- - reportType (str, optional): Type of report - summary, analysis, executive, detailed. Default: analysis.
- - sections (list, optional): Specific sections to include (e.g., ["introduction", "findings", "recommendations"]).
- - title (str, optional): Report title.
- - resultType (str, optional): Output format (docx, pdf, md, etc.). Default: docx.
- """
- documentList = parameters.get("documentList", [])
- reportType = parameters.get("reportType", "analysis")
- sections = parameters.get("sections", [])
- title = parameters.get("title")
- resultType = parameters.get("resultType", "docx")
-
- reportTypeInstructions = {
- "summary": "Create a summary report with key highlights and main points.",
- "analysis": "Create an analytical report with insights, findings, and detailed examination.",
- "executive": "Create an executive summary report suitable for senior management with key insights and recommendations.",
- "detailed": "Create a comprehensive detailed report covering all aspects with in-depth analysis."
- }
-
- aiPrompt = f"Generate a {reportType} report."
- if title:
- aiPrompt += f" Title: {title}."
- aiPrompt += f" {reportTypeInstructions.get(reportType.lower(), reportTypeInstructions['analysis'])}"
-
- if sections:
- sectionsStr = ", ".join(sections)
- aiPrompt += f" Include the following sections: {sectionsStr}."
- else:
- aiPrompt += " Include standard report sections such as introduction, main content, analysis, findings, and conclusions."
-
- if documentList:
- aiPrompt += " Base the report on the provided input documents, analyzing and synthesizing the information."
- else:
- aiPrompt += " Create a professional, well-structured report."
-
- processParams = {
- "aiPrompt": aiPrompt,
- "resultType": resultType
- }
- if documentList:
- processParams["documentList"] = documentList
-
- return await self.process(processParams)
-
-
- @action
- async def generateChart(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate charts/graphs from data in documents or structured data.
- - Input requirements: documentList (required); optional chartType, title, labels.
- - Output format: Image (png or jpg).
-
- Parameters:
- - documentList (list, required): Documents containing data to visualize (CSV, Excel, JSON, etc.).
- - chartType (str, optional): Type of chart - bar, line, pie, scatter, area, etc. Default: bar.
- - title (str, optional): Chart title.
- - xAxisLabel (str, optional): X-axis label.
- - yAxisLabel (str, optional): Y-axis label.
- - resultType (str, optional): Image format (png or jpg). Default: png.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- chartType = parameters.get("chartType", "bar")
- title = parameters.get("title")
- xAxisLabel = parameters.get("xAxisLabel")
- yAxisLabel = parameters.get("yAxisLabel")
- resultType = parameters.get("resultType", "png")
-
- # Ensure resultType is an image format
- if resultType.lower() not in ["png", "jpg", "jpeg"]:
- resultType = "png"
-
- aiPrompt = f"Generate a {chartType} chart from the provided data."
- if title:
- aiPrompt += f" Chart title: {title}."
- if xAxisLabel:
- aiPrompt += f" X-axis label: {xAxisLabel}."
- if yAxisLabel:
- aiPrompt += f" Y-axis label: {yAxisLabel}."
- aiPrompt += " Create a clear, professional chart with appropriate labels, legends, and formatting. Ensure the chart is visually appealing and easy to read."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
@action
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
@@ -1146,137 +734,3 @@ class MethodAi(MethodBase):
processParams["documentList"] = documentList
return await self.process(processParams)
-
-
- # ============================================================================
- # Analysis & Comparison Wrappers
- # ============================================================================
-
- @action
- async def analyzeDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Analyze documents and find insights, patterns, trends, and key information.
- - Input requirements: documentList (required); optional analysisType, focus.
- - Output format: Analysis report in specified format (default: txt).
-
- Parameters:
- - documentList (list, required): Document(s) to analyze.
- - analysisType (str, optional): Type of analysis - general, financial, technical, sentiment, etc. Default: general.
- - focus (str, optional): Specific aspect to focus on (e.g., "trends", "risks", "opportunities").
- - resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- analysisType = parameters.get("analysisType", "general")
- focus = parameters.get("focus")
- resultType = parameters.get("resultType", "txt")
-
- aiPrompt = f"Analyze the provided document(s) and find insights, patterns, and key information."
- aiPrompt += f" Perform a {analysisType} analysis."
- if focus:
- aiPrompt += f" Focus specifically on: {focus}."
- aiPrompt += " Identify trends, important findings, relationships, and provide actionable insights. Present the analysis in a clear, structured format."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
- @action
- async def compareDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Compare multiple documents and identify differences, similarities, and changes.
- - Input requirements: documentList (required, should contain 2+ documents); optional comparisonType, focus.
- - Output format: Comparison report in specified format (default: txt).
-
- Parameters:
- - documentList (list, required): Two or more documents to compare.
- - comparisonType (str, optional): Type of comparison - differences, similarities, changes, full. Default: full.
- - focus (str, optional): Specific aspect to focus on (e.g., "content", "structure", "data", "formatting").
- - resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- if isinstance(documentList, str):
- documentList = [documentList]
-
- if len(documentList) < 2:
- return ActionResult.isFailure(error="At least 2 documents are required for comparison")
-
- comparisonType = parameters.get("comparisonType", "full")
- focus = parameters.get("focus")
- resultType = parameters.get("resultType", "txt")
-
- comparisonInstructions = {
- "differences": "Focus on identifying and highlighting all differences between the documents.",
- "similarities": "Focus on identifying commonalities, shared content, and similarities.",
- "changes": "Identify what has changed between versions, what was added, removed, or modified.",
- "full": "Provide a comprehensive comparison including both differences and similarities."
- }
-
- aiPrompt = f"Compare the provided documents."
- aiPrompt += f" {comparisonInstructions.get(comparisonType.lower(), comparisonInstructions['full'])}"
- if focus:
- aiPrompt += f" Focus specifically on: {focus}."
- aiPrompt += " Present the comparison in a clear, structured format that makes differences and similarities easy to understand."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
- @action
- async def validateData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Validate data quality, structure, completeness, and correctness in documents/data files.
- - Input requirements: documentList (required); optional validationRules, schema.
- - Output format: Validation report in JSON or text format (default: json).
-
- Parameters:
- - documentList (list, required): Documents/data files to validate.
- - validationRules (list, optional): Specific validation rules to check (e.g., ["required_fields", "data_types", "ranges"]).
- - schema (dict, optional): Expected data schema/structure to validate against.
- - resultType (str, optional): Output format (json, txt, md, etc.). Default: json.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- validationRules = parameters.get("validationRules", [])
- schema = parameters.get("schema")
- resultType = parameters.get("resultType", "json")
-
- aiPrompt = "Validate the data quality, structure, completeness, and correctness in the provided documents."
-
- if validationRules:
- rulesStr = ", ".join(validationRules)
- aiPrompt += f" Apply the following validation rules: {rulesStr}."
- else:
- aiPrompt += " Check for data completeness, correct data types, required fields, data consistency, and any anomalies or errors."
-
- if schema:
- import json
- schemaStr = json.dumps(schema, indent=2)
- aiPrompt += f" Validate against the following expected schema: {schemaStr}."
-
- if resultType == "json":
- aiPrompt += " Provide the validation results as structured JSON with validation status, errors, warnings, and details for each check."
- else:
- aiPrompt += " Provide a detailed validation report listing all findings, errors, warnings, and pass/fail status for each validation check."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
diff --git a/modules/workflows/methods/methodBase.py b/modules/workflows/methods/methodBase.py
index 3d6742aa..5bbe76c0 100644
--- a/modules/workflows/methods/methodBase.py
+++ b/modules/workflows/methods/methodBase.py
@@ -18,6 +18,19 @@ def action(func):
- success: bool
- documents: List[ActionDocument]
- error: str (if success=False)
+
+ REQUIRED: All ActionDocument instances MUST include validationMetadata for content validation
+ and refinement. Without validationMetadata, results cannot be approved.
+
+ Example validationMetadata structure:
+ validationMetadata = {
+ "actionType": "moduleName.actionName",
+ "param1": value1,
+ "param2": value2,
+ # ... other relevant parameters for validation
+ }
+
+ See MethodBase._createValidationMetadata() for a helper method to create standard metadata.
"""
@wraps(func)
async def wrapper(self, parameters: Dict[str, Any], *args, **kwargs):
@@ -26,7 +39,14 @@ def action(func):
return wrapper
class MethodBase:
- """Base class for all methods"""
+ """Base class for all methods
+
+ IMPORTANT: All actions that return ActionDocument instances MUST include validationMetadata.
+ This metadata is required for content validation and refinement. Without it, results cannot
+ be approved by the validation system.
+
+ Use _createValidationMetadata() helper method to create standardized metadata structures.
+ """
def __init__(self, services: Any):
"""Initialize method with services object"""
@@ -168,6 +188,44 @@ class MethodBase:
else:
return str(type_annotation)
+ def _createValidationMetadata(self, actionName: str, **kwargs) -> Dict[str, Any]:
+ """
+ Helper method to create standardized validationMetadata for ActionDocument instances.
+
+ This method ensures all actions include the required validationMetadata structure
+ for content validation and refinement. Without metadata, results cannot be approved.
+
+ Args:
+ actionName: Name of the action (e.g., "readEmails", "uploadDocument")
+ **kwargs: Additional action-specific metadata fields
+
+ Returns:
+ Dictionary with validationMetadata structure including:
+ - actionType: Full action identifier (moduleName.actionName)
+ - All provided kwargs as additional metadata fields
+
+ Example:
+ validationMetadata = self._createValidationMetadata(
+ "readEmails",
+ connectionReference=connectionReference,
+ folder=folder,
+ limit=limit,
+ emailCount=len(emails)
+ )
+
+ ActionDocument(
+ documentName="emails.json",
+ documentData=json.dumps(data),
+ mimeType="application/json",
+ validationMetadata=validationMetadata # REQUIRED
+ )
+ """
+ metadata = {
+ "actionType": f"{self.name}.{actionName}"
+ }
+ metadata.update(kwargs)
+ return metadata
+
def _generateMeaningfulFileName(self, base_name: str, extension: str, workflow_context: Dict[str, Any] = None, action_name: str = None) -> str:
"""
Generate a meaningful file name with round/task/action information.
diff --git a/modules/workflows/methods/methodContext.py b/modules/workflows/methods/methodContext.py
new file mode 100644
index 00000000..8bd16f9b
--- /dev/null
+++ b/modules/workflows/methods/methodContext.py
@@ -0,0 +1,351 @@
+"""
+Context and workflow information method module.
+Handles workflow context queries and document indexing.
+"""
+
+import time
+import json
+import logging
+from typing import Dict, Any, List
+from datetime import datetime, UTC
+
+from modules.workflows.methods.methodBase import MethodBase, action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
+
+logger = logging.getLogger(__name__)
+
+class MethodContext(MethodBase):
+ """Context and workflow information methods."""
+
+ def __init__(self, services):
+ super().__init__(services)
+ self.name = "context"
+ self.description = "Context and workflow information methods"
+
+ @action
+ async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
+ - Input requirements: No input documents required. Optional resultType parameter.
+ - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
+
+ Parameters:
+ - resultType (str, optional): Output format (json, txt, md). Default: json.
+ """
+ try:
+ workflow = self.services.workflow
+ if not workflow:
+ return ActionResult.isFailure(
+ error="No workflow available"
+ )
+
+ resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
+
+ # Get available documents index from chat service
+ documentsIndex = self.services.chat.getAvailableDocuments(workflow)
+
+ if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
+ # Return empty index structure
+ if resultType == "json":
+ indexData = {
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "totalDocuments": 0,
+ "rounds": [],
+ "documentReferences": []
+ }
+ indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
+ else:
+ indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
+ else:
+ # Parse the document index string to extract structured information
+ indexData = self._parseDocumentIndex(documentsIndex, workflow)
+
+ if resultType == "json":
+ indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
+ elif resultType == "md":
+ indexContent = self._formatAsMarkdown(indexData)
+ else: # txt
+ indexContent = self._formatAsText(indexData, documentsIndex)
+
+ # Generate meaningful filename
+ workflowContext = self.services.chat.getWorkflowContext()
+ filename = self._generateMeaningfulFileName(
+ "document_index",
+ resultType if resultType in ["json", "txt", "md"] else "json",
+ workflowContext,
+ "getDocumentIndex"
+ )
+
+ validationMetadata = {
+ "actionType": "context.getDocumentIndex",
+ "resultType": resultType,
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
+ }
+
+ # Create ActionDocument
+ document = ActionDocument(
+ documentName=filename,
+ documentData=indexContent,
+ mimeType="application/json" if resultType == "json" else "text/plain",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ logger.error(f"Error generating document index: {str(e)}")
+ return ActionResult.isFailure(
+ error=f"Failed to generate document index: {str(e)}"
+ )
+
+ def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
+ """Parse the document index string into structured data."""
+ try:
+ indexData = {
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "generatedAt": datetime.now(UTC).isoformat(),
+ "totalDocuments": 0,
+ "rounds": [],
+ "documentReferences": []
+ }
+
+ # Extract document references from the index string
+ lines = documentsIndex.split('\n')
+ currentRound = None
+ currentDocList = None
+
+ for line in lines:
+ line = line.strip()
+ if not line:
+ continue
+
+ # Check for round headers
+ if "Current round documents:" in line:
+ currentRound = "current"
+ continue
+ elif "Past rounds documents:" in line:
+ currentRound = "past"
+ continue
+
+ # Check for document list references (docList:...)
+ if line.startswith("- docList:"):
+ docListRef = line.replace("- docList:", "").strip()
+ currentDocList = {
+ "reference": docListRef,
+ "round": currentRound,
+ "documents": []
+ }
+ indexData["rounds"].append(currentDocList)
+ continue
+
+ # Check for individual document references (docItem:...)
+ if line.startswith(" - docItem:") or line.startswith("- docItem:"):
+ docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
+ indexData["documentReferences"].append({
+ "reference": docItemRef,
+ "round": currentRound,
+ "docList": currentDocList["reference"] if currentDocList else None
+ })
+ indexData["totalDocuments"] += 1
+ if currentDocList:
+ currentDocList["documents"].append(docItemRef)
+
+ return indexData
+
+ except Exception as e:
+ logger.error(f"Error parsing document index: {str(e)}")
+ return {
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "error": f"Failed to parse document index: {str(e)}",
+ "rawIndex": documentsIndex
+ }
+
+ def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
+ """Format document index as Markdown."""
+ try:
+ md = f"# Document Index\n\n"
+ md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
+ md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
+ md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
+
+ if indexData.get('rounds'):
+ md += "## Documents by Round\n\n"
+ for roundInfo in indexData['rounds']:
+ roundLabel = roundInfo.get('round', 'unknown').title()
+ md += f"### {roundLabel} Round\n\n"
+ md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
+ if roundInfo.get('documents'):
+ md += "**Documents:**\n\n"
+ for docRef in roundInfo['documents']:
+ md += f"- `{docRef}`\n"
+ md += "\n"
+
+ if indexData.get('documentReferences'):
+ md += "## All Document References\n\n"
+ for docRef in indexData['documentReferences']:
+ md += f"- `{docRef.get('reference', 'unknown')}`\n"
+
+ return md
+
+ except Exception as e:
+ logger.error(f"Error formatting as Markdown: {str(e)}")
+ return f"# Document Index\n\nError formatting index: {str(e)}\n"
+
+ def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
+ """Format document index as plain text."""
+ try:
+ text = "Document Index\n"
+ text += "=" * 50 + "\n\n"
+ text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
+ text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
+ text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
+
+ # Include the raw formatted index for readability
+ text += rawIndex
+
+ return text
+
+ except Exception as e:
+ logger.error(f"Error formatting as text: {str(e)}")
+ return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
+
+ @action
+ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Extract content from documents (separate from AI calls).
+
+ This action performs pure content extraction without AI processing.
+ The extracted ContentParts can then be used by subsequent AI processing actions.
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to extract content from.
+ - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
+
+ Returns:
+ - ActionResult with ActionDocument containing ContentExtracted objects
+ - ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
+ """
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"context_extract_{workflowId}_{int(time.time())}"
+
+ # Extract documentList from parameters dict
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ documentListParam = parameters.get("documentList")
+ if not documentListParam:
+ return ActionResult.isFailure(error="documentList is required")
+
+ # Convert to DocumentReferenceList if needed
+ if isinstance(documentListParam, DocumentReferenceList):
+ documentList = documentListParam
+ elif isinstance(documentListParam, str):
+ documentList = DocumentReferenceList.from_string_list([documentListParam])
+ elif isinstance(documentListParam, list):
+ documentList = DocumentReferenceList.from_string_list(documentListParam)
+ else:
+ return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ operationId,
+ "Extracting content from documents",
+ "Content Extraction",
+ f"Documents: {len(documentList.references)}"
+ )
+
+ # Get ChatDocuments from documentList
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+
+ if not chatDocuments:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No documents found in documentList")
+
+ logger.info(f"Extracting content from {len(chatDocuments)} documents")
+
+ # Prepare extraction options
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
+ extractionOptionsParam = parameters.get("extractionOptions")
+
+ # Convert dict to ExtractionOptions object if needed, or create defaults
+ if extractionOptionsParam:
+ if isinstance(extractionOptionsParam, dict):
+ # Convert dict to ExtractionOptions object
+ extractionOptions = ExtractionOptions(**extractionOptionsParam)
+ elif isinstance(extractionOptionsParam, ExtractionOptions):
+ extractionOptions = extractionOptionsParam
+ else:
+ # Invalid type, use defaults
+ extractionOptions = None
+ else:
+ extractionOptions = None
+
+ # If extractionOptions not provided, create defaults
+ if not extractionOptions:
+ # Default extraction options for pure content extraction (no AI processing)
+ extractionOptions = ExtractionOptions(
+ prompt="Extract all content from the document",
+ mergeStrategy=MergeStrategy(
+ mergeType="concatenate",
+ groupBy="typeGroup",
+ orderBy="id"
+ ),
+ processDocumentsIndividually=True
+ )
+
+ # Call extraction service with hierarchical progress logging
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
+ # Pass operationId for hierarchical per-document progress logging
+ extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
+
+ # Build ActionDocuments from ContentExtracted results
+ self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
+ actionDocuments = []
+ # Map extracted results back to original documents by index (results are in same order)
+ for i, extracted in enumerate(extractedResults):
+ # Get original document name if available
+ originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
+ if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
+ # Use original filename with "extracted_" prefix
+ baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
+ documentName = f"{baseName}_extracted_{extracted.id}.json"
+ else:
+ # Fallback to generic name with index
+ documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
+
+ # Store ContentExtracted object in ActionDocument.documentData
+ validationMetadata = {
+ "actionType": "context.extractContent",
+ "documentIndex": i,
+ "extractedId": extracted.id,
+ "partCount": len(extracted.parts) if extracted.parts else 0,
+ "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
+ }
+ actionDoc = ActionDocument(
+ documentName=documentName,
+ documentData=extracted, # ContentExtracted object
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+ actionDocuments.append(actionDoc)
+
+ self.services.chat.progressLogFinish(operationId, True)
+
+ return ActionResult.isSuccess(documents=actionDocuments)
+
+ except Exception as e:
+ logger.error(f"Error in content extraction: {str(e)}")
+
+ # Complete progress tracking with failure
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
+
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py
index fa7b4e47..033b5283 100644
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@@ -326,7 +326,21 @@ class MethodOutlook(MethodBase):
- filter (str, optional): Sender, query operators, or subject text.
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
"""
+ import time
+ operationId = None
try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"outlook_read_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ operationId,
+ "Read Emails",
+ "Outlook Email Reading",
+ f"Folder: {parameters.get('folder', 'Inbox')}"
+ )
+
connectionReference = parameters.get("connectionReference")
folder = parameters.get("folder", "Inbox")
limit = parameters.get("limit", 10)
@@ -334,8 +348,12 @@ class MethodOutlook(MethodBase):
outputMimeType = parameters.get("outputMimeType", "application/json")
if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Validating parameters")
+
# Validate limit parameter
if limit <= 0:
limit = 1000
@@ -351,11 +369,14 @@ class MethodOutlook(MethodBase):
# Get Microsoft connection
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Read emails using Microsoft Graph API
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Reading emails from Microsoft Graph API")
try:
# Microsoft Graph API endpoint for messages
graph_url = "https://graph.microsoft.com/v1.0"
@@ -387,6 +408,11 @@ class MethodOutlook(MethodBase):
# If using $search, remove $orderby as they can't be combined
if "$search" in params:
params.pop("$orderby", None)
+
+ # If using $filter with contains(), remove $orderby as they can't be combined
+ # Microsoft Graph API doesn't support contains() with orderby
+ if "$filter" in params and "contains(" in params["$filter"].lower():
+ params.pop("$orderby", None)
# Filter applied
@@ -403,6 +429,7 @@ class MethodOutlook(MethodBase):
response.raise_for_status()
+ self.services.chat.progressLogUpdate(operationId, 0.7, "Processing email data")
emails_data = response.json()
email_data = {
"emails": emails_data.get("value", []),
@@ -420,22 +447,34 @@ class MethodOutlook(MethodBase):
except ImportError:
logger.error("requests module not available")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="requests module not available")
except requests.exceptions.HTTPError as e:
if e.response.status_code == 400:
logger.error(f"Bad Request (400) - Invalid filter or parameter: {e.response.text}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"Invalid filter syntax. Please check your filter parameter. Error: {e.response.text}")
elif e.response.status_code == 401:
logger.error("Unauthorized (401) - Access token may be expired or invalid")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Authentication failed. Please check your connection and try again.")
elif e.response.status_code == 403:
logger.error("Forbidden (403) - Insufficient permissions to access emails")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Insufficient permissions to read emails from this folder.")
else:
logger.error(f"HTTP Error {e.response.status_code}: {e.response.text}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"HTTP Error {e.response.status_code}: {e.response.text}")
except Exception as e:
logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
# Determine output format based on MIME type
@@ -465,16 +504,35 @@ class MethodOutlook(MethodBase):
"timestamp": self.services.utils.timestampGetUtc()
}
+ validationMetadata = {
+ "actionType": "outlook.readEmails",
+ "connectionReference": connectionReference,
+ "folder": folder,
+ "limit": limit,
+ "filter": filter,
+ "emailCount": email_data.get("count", 0),
+ "outputMimeType": outputMimeType
+ }
+
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {email_data.get('count', 0)} emails")
+ self.services.chat.progressLogFinish(operationId, True)
+
return ActionResult.isSuccess(
documents=[ActionDocument(
documentName=f"outlook_emails_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
except Exception as e:
logger.error(f"Error reading emails: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
return ActionResult.isFailure(
error=str(e)
)
@@ -695,12 +753,23 @@ class MethodOutlook(MethodBase):
"timestamp": self.services.utils.timestampGetUtc()
}
+ validationMetadata = {
+ "actionType": "outlook.searchEmails",
+ "connectionReference": connectionReference,
+ "query": query,
+ "folder": folder,
+ "limit": limit,
+ "resultCount": search_result.get("count", 0),
+ "outputMimeType": outputMimeType
+ }
+
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"outlook_email_search_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
@@ -818,12 +887,22 @@ class MethodOutlook(MethodBase):
"timestamp": self.services.utils.timestampGetUtc()
}
+ validationMetadata = {
+ "actionType": "outlook.listDrafts",
+ "connectionReference": connectionReference,
+ "folder": folder,
+ "limit": limit,
+ "draftCount": drafts_result.get("count", 0),
+ "outputMimeType": outputMimeType
+ }
+
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"outlook_drafts_list_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
@@ -928,12 +1007,21 @@ class MethodOutlook(MethodBase):
"timestamp": self.services.utils.timestampGetUtc()
}
+ validationMetadata = {
+ "actionType": "outlook.findDrafts",
+ "connectionReference": connectionReference,
+ "limit": limit,
+ "totalDrafts": drafts_result.get("totalDrafts", 0),
+ "outputMimeType": outputMimeType
+ }
+
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"outlook_drafts_found_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
@@ -1069,12 +1157,22 @@ class MethodOutlook(MethodBase):
"timestamp": self.services.utils.timestampGetUtc()
}
+ validationMetadata = {
+ "actionType": "outlook.checkDraftsFolder",
+ "connectionReference": connectionReference,
+ "limit": limit,
+ "totalDrafts": drafts_result.get("totalDrafts", 0),
+ "draftsFolderId": drafts_result.get("draftsFolderId"),
+ "outputMimeType": outputMimeType
+ }
+
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"outlook_drafts_folder_check_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
@@ -1440,14 +1538,32 @@ Return JSON:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
"""
+ import time
+ operationId = None
try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"outlook_send_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ operationId,
+ "Send Draft Email",
+ "Outlook Email Sending",
+ f"Processing {len(parameters.get('documentList', []))} draft(s)"
+ )
+
connectionReference = parameters.get("connectionReference")
documentList = parameters.get("documentList", [])
if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
if not documentList:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="documentList is required and cannot be empty")
# Convert single value to list if needed
@@ -1455,16 +1571,21 @@ Return JSON:
documentList = [documentList]
# Get Microsoft connection
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Check permissions
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Checking permissions")
permissions_ok = await self._checkPermissions(connection)
if not permissions_ok:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
# Read draft email JSON documents from documentList
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Reading draft email documents")
draftEmails = []
for docRef in documentList:
try:
@@ -1535,8 +1656,11 @@ Return JSON:
continue
if not draftEmails:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid draft email JSON documents found in documentList")
+ self.services.chat.progressLogUpdate(operationId, 0.6, f"Found {len(draftEmails)} draft email(s) to send")
+
# Send all draft emails
graph_url = "https://graph.microsoft.com/v1.0"
headers = {
@@ -1547,7 +1671,8 @@ Return JSON:
sentResults = []
failedResults = []
- for draftEmail in draftEmails:
+ self.services.chat.progressLogUpdate(operationId, 0.7, "Sending emails")
+ for idx, draftEmail in enumerate(draftEmails):
draftEmailJson = draftEmail["draftEmailJson"]
draftId = draftEmail["draftId"]
sourceDocument = draftEmail["sourceDocument"]
@@ -1577,6 +1702,7 @@ Return JSON:
"sourceDocument": sourceDocument
})
logger.info(f"Email sent successfully. Draft ID: {draftId}, Subject: {subject}")
+ self.services.chat.progressLogUpdate(operationId, 0.7 + (idx + 1) * 0.2 / len(draftEmails), f"Sent {idx + 1}/{len(draftEmails)}: {subject}")
else:
errorResult = {
"status": "error",
@@ -1623,35 +1749,66 @@ Return JSON:
}
# Determine overall success status
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Sent {successfulEmails}/{totalEmails} email(s)")
if successfulEmails == 0:
+ self.services.chat.progressLogFinish(operationId, False)
+ validationMetadata = {
+ "actionType": "outlook.sendDraftEmail",
+ "connectionReference": connectionReference,
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "status": "all_failed"
+ }
return ActionResult.isFailure(
error=f"Failed to send all {totalEmails} email(s)",
documents=[ActionDocument(
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(resultData, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
elif failedEmails > 0:
# Partial success
logger.warning(f"Sent {successfulEmails} out of {totalEmails} emails. {failedEmails} failed.")
+ validationMetadata = {
+ "actionType": "outlook.sendDraftEmail",
+ "connectionReference": connectionReference,
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "status": "partial_success"
+ }
+ self.services.chat.progressLogFinish(operationId, True)
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(resultData, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
else:
# All successful
logger.info(f"Successfully sent all {totalEmails} email(s)")
+ validationMetadata = {
+ "actionType": "outlook.sendDraftEmail",
+ "connectionReference": connectionReference,
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "status": "all_successful"
+ }
+ self.services.chat.progressLogFinish(operationId, True)
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(resultData, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
@@ -1693,12 +1850,19 @@ Return JSON:
"status": "ready"
}
+ validationMetadata = {
+ "actionType": "outlook.checkPermissions",
+ "connectionReference": connectionReference,
+ "permissionsStatus": "ready",
+ "hasPermissions": True
+ }
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
else:
@@ -1711,12 +1875,19 @@ Return JSON:
"message": "Please re-authenticate your Microsoft connection to get updated permissions."
}
+ validationMetadata = {
+ "actionType": "outlook.checkPermissions",
+ "connectionReference": connectionReference,
+ "permissionsStatus": "needs_reauthentication",
+ "hasPermissions": False
+ }
return ActionResult(
success=False,
documents=[ActionDocument(
documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(result_data, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)],
error="Connection lacks necessary permissions for Outlook operations"
)
diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py
index 92d77e8e..da3db26b 100644
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@@ -1072,6 +1072,13 @@ class MethodSharepoint(MethodBase):
outputExtension = ".json" # Default
outputMimeType = "application/json" # Default
+ validationMetadata = {
+ "actionType": "sharepoint.findDocumentPath",
+ "searchQuery": searchQuery,
+ "maxResults": maxResults,
+ "totalResults": len(foundDocuments),
+ "hasResults": len(foundDocuments) > 0
+ }
return ActionResult(
success=True,
@@ -1079,7 +1086,8 @@ class MethodSharepoint(MethodBase):
ActionDocument(
documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{outputExtension}",
documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType
+ mimeType=outputMimeType,
+ validationMetadata=validationMetadata
)
]
)
@@ -1112,7 +1120,21 @@ class MethodSharepoint(MethodBase):
- documentData: Base64-encoded content (binary files) or plain text (text files)
- mimeType: MIME type (e.g., application/pdf, text/plain)
"""
+ import time
+ operationId = None
try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_read_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ operationId,
+ "Read Documents",
+ "SharePoint Document Reading",
+ f"Path: {parameters.get('pathQuery', parameters.get('pathObject', '*'))}"
+ )
+
documentList = parameters.get("documentList")
if isinstance(documentList, str):
documentList = [documentList]
@@ -1123,11 +1145,16 @@ class MethodSharepoint(MethodBase):
# Validate connection reference
if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
# Get connection first - needed for both pathObject and documentList approaches
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# If pathObject is provided, extract SharePoint file IDs and read them directly
@@ -1142,6 +1169,8 @@ class MethodSharepoint(MethodBase):
from modules.datamodels.datamodelDocref import DocumentReferenceList
pathObjectDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject]))
if not pathObjectDocuments or len(pathObjectDocuments) == 0:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
# Get the first document's content (which should be the JSON from findDocumentPath)
@@ -1259,8 +1288,10 @@ class MethodSharepoint(MethodBase):
readResults = []
siteId = sites[0]['id']
- for fileId in sharePointFileIds:
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Reading {len(sharePointFileIds)} file(s) from SharePoint")
+ for idx, fileId in enumerate(sharePointFileIds):
try:
+ self.services.chat.progressLogUpdate(operationId, 0.5 + (idx * 0.3 / len(sharePointFileIds)), f"Reading file {idx + 1}/{len(sharePointFileIds)}")
# Get file info from SharePoint
endpoint = f"sites/{siteId}/drive/items/{fileId}"
fileInfo = await self._makeGraphApiCall(endpoint)
@@ -1306,11 +1337,13 @@ class MethodSharepoint(MethodBase):
continue
if not readResults:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No files could be read from pathObject")
# Convert read results to ActionDocument objects
# IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
# The system will create FileData and ChatDocument automatically
+ self.services.chat.progressLogUpdate(operationId, 0.8, f"Processing {len(readResults)} document(s)")
from modules.datamodels.datamodelChat import ActionDocument
import base64
@@ -1336,19 +1369,40 @@ class MethodSharepoint(MethodBase):
if fileContent and isinstance(fileContent, bytes):
# Encode binary content as Base64 string
base64Content = base64.b64encode(fileContent).decode('utf-8')
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "mimeType": mimeType,
+ "contentType": "binary",
+ "size": len(fileContent),
+ "includeMetadata": includeMetadata
+ }
actionDoc = ActionDocument(
documentName=fileName,
documentData=base64Content, # Base64 string for binary files
- mimeType=mimeType
+ mimeType=mimeType,
+ validationMetadata=validationMetadata
)
actionDocuments.append(actionDoc)
logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument")
elif fileContent:
# Text content - store directly in documentData
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "mimeType": mimeType,
+ "contentType": "text",
+ "includeMetadata": includeMetadata
+ }
actionDoc = ActionDocument(
documentName=fileName,
documentData=fileContent if isinstance(fileContent, str) else str(fileContent),
- mimeType=mimeType
+ mimeType=mimeType,
+ validationMetadata=validationMetadata
)
actionDocuments.append(actionDoc)
else:
@@ -1366,14 +1420,26 @@ class MethodSharepoint(MethodBase):
if resultItem.get("metadata"):
docData["metadata"] = resultItem["metadata"]
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "mimeType": mimeType,
+ "contentType": "metadata_only",
+ "includeMetadata": includeMetadata
+ }
actionDoc = ActionDocument(
documentName=fileName,
documentData=json.dumps(docData, indent=2),
- mimeType=mimeType
+ mimeType=mimeType,
+ validationMetadata=validationMetadata
)
actionDocuments.append(actionDoc)
# Return success with action documents
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Read {len(actionDocuments)} document(s)")
+ self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=actionDocuments)
# Fallback: Use documentList parameter (for backward compatibility)
@@ -1583,6 +1649,13 @@ class MethodSharepoint(MethodBase):
outputExtension = ".json" # Default
outputMimeType = "application/json" # Default
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "connectionReference": connectionReference,
+ "documentCount": len(readResults),
+ "includeMetadata": includeMetadata,
+ "sitesSearched": len(sites)
+ }
return ActionResult(
success=True,
@@ -1590,12 +1663,18 @@ class MethodSharepoint(MethodBase):
ActionDocument(
documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{outputExtension}",
documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType
+ mimeType=outputMimeType,
+ validationMetadata=validationMetadata
)
]
)
except Exception as e:
logger.error(f"Error reading SharePoint documents: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
return ActionResult(
success=False,
error=str(e)
@@ -1998,6 +2077,15 @@ class MethodSharepoint(MethodBase):
outputExtension = ".json" # Default
outputMimeType = "application/json" # Default
+ validationMetadata = {
+ "actionType": "sharepoint.uploadDocument",
+ "connectionReference": connectionReference,
+ "uploadPath": uploadPath,
+ "fileNames": fileNames,
+ "uploadCount": len(uploadResults),
+ "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
+ "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
+ }
return ActionResult(
success=True,
@@ -2005,7 +2093,8 @@ class MethodSharepoint(MethodBase):
ActionDocument(
documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{outputExtension}",
documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType
+ mimeType=outputMimeType,
+ validationMetadata=validationMetadata
)
]
)
@@ -2459,6 +2548,14 @@ class MethodSharepoint(MethodBase):
outputExtension = ".json" # Default
outputMimeType = "application/json" # Default
+ validationMetadata = {
+ "actionType": "sharepoint.listDocuments",
+ "pathQuery": listQuery,
+ "includeSubfolders": includeSubfolders,
+ "sitesSearched": len(sites),
+ "folderCount": len(listResults),
+ "totalItems": sum(len(result.get("siteResults", [])) for result in listResults)
+ }
return ActionResult(
success=True,
@@ -2466,7 +2563,8 @@ class MethodSharepoint(MethodBase):
ActionDocument(
documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{outputExtension}",
documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType
+ mimeType=outputMimeType,
+ validationMetadata=validationMetadata
)
]
)
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index b065b912..ebb433da 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -139,14 +139,11 @@ class ContentValidator:
"statistics": {}
}
- # Extract metadata
+ # Extract metadata - include ALL metadata fields (generic for all action types)
metadata = jsonData.get("metadata", {})
- if metadata:
- summary["metadata"] = {
- "title": metadata.get("title"),
- "split_strategy": metadata.get("split_strategy"),
- "extraction_method": metadata.get("extraction_method")
- }
+ if metadata and isinstance(metadata, dict):
+ # Include all metadata fields, not just specific ones
+ summary["metadata"] = dict(metadata)
# Extract documents array (if present)
documents = jsonData.get("documents", [])
@@ -195,6 +192,17 @@ class ContentValidator:
text = textElement.get("text", "")
if text:
sectionSummary["textPreview"] = text[:100] + ("..." if len(text) > 100 else "")
+ # Also check for textPreview directly in section (for web crawl results)
+ if section.get("textPreview"):
+ sectionSummary["textPreview"] = section.get("textPreview")
+
+ # Include any additional fields from section (generic approach)
+ # This ensures all action-specific fields are preserved
+ for key, value in section.items():
+ if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
+ # Include simple types (str, int, float, bool, list of primitives)
+ if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10):
+ sectionSummary[key] = value
summary["sections"].append(sectionSummary)
else:
@@ -206,7 +214,8 @@ class ContentValidator:
sectionSummary = {
"id": section.get("id"),
"content_type": section.get("content_type"),
- "title": section.get("title")
+ "title": section.get("title"),
+ "order": section.get("order")
}
if section.get("content_type") == "table":
@@ -220,8 +229,21 @@ class ContentValidator:
sectionSummary["rowCount"] = len(rows)
sectionSummary["headers"] = headers
+ # Include any additional fields from section (generic approach)
+ for key, value in section.items():
+ if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
+ # Include simple types (str, int, float, bool, list of primitives)
+ if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10):
+ sectionSummary[key] = value
+
summary["sections"].append(sectionSummary)
+ # Extract statistics from root level (generic - include all statistics fields)
+ rootStatistics = jsonData.get("statistics", {})
+ if rootStatistics and isinstance(rootStatistics, dict):
+ # Merge root statistics into summary statistics
+ summary["statistics"].update(rootStatistics)
+
return summary
except Exception as e:
diff --git a/modules/workflows/processing/core/messageCreator.py b/modules/workflows/processing/core/messageCreator.py
index 55222ece..79ec66a6 100644
--- a/modules/workflows/processing/core/messageCreator.py
+++ b/modules/workflows/processing/core/messageCreator.py
@@ -210,8 +210,14 @@ class MessageCreator:
taskProgress = str(taskIndex)
# Enhanced completion message with criteria details
- if reviewResult and hasattr(reviewResult, 'reason'):
- completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.reason or 'Task completed successfully'}"
+ # Prefer userMessage (user-friendly in user's language), fallback to reason
+ if reviewResult:
+ if hasattr(reviewResult, 'userMessage') and reviewResult.userMessage:
+ completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.userMessage}"
+ elif hasattr(reviewResult, 'reason') and reviewResult.reason:
+ completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.reason}"
+ else:
+ completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ Task completed successfully"
else:
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ Task completed successfully"
diff --git a/modules/workflows/processing/core/taskPlanner.py b/modules/workflows/processing/core/taskPlanner.py
index e6e3b580..597e4096 100644
--- a/modules/workflows/processing/core/taskPlanner.py
+++ b/modules/workflows/processing/core/taskPlanner.py
@@ -28,11 +28,21 @@ class TaskPlanner:
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
logger.info(f"Workflow ID: {workflow.id}")
- logger.info(f"User Input: {userInput}")
+ # Log normalized request instead of raw user input for security
+ normalizedPrompt = getattr(self.services, 'currentUserPromptNormalized', None) if self.services else None
+ if normalizedPrompt:
+ logger.info(f"Normalized Request: {normalizedPrompt}")
+ else:
+ logger.info(f"Normalized Request: {userInput}")
- # Use stored user prompt if available, otherwise use the input
- actualUserPrompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt else userInput
- logger.info(f"Actual User Prompt: {actualUserPrompt}")
+ # Use normalized request if available, otherwise fallback to currentUserPrompt, then userInput
+ actualUserPrompt = None
+ if self.services and hasattr(self.services, 'currentUserPromptNormalized') and self.services.currentUserPromptNormalized:
+ actualUserPrompt = self.services.currentUserPromptNormalized
+ elif self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt:
+ actualUserPrompt = self.services.currentUserPrompt
+ else:
+ actualUserPrompt = userInput
# Check workflow status before calling AI service
checkWorkflowStopped(self.services)
diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py
index 5cc8b866..592b5235 100644
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@@ -96,6 +96,10 @@ class DynamicMode(BaseMode):
# NEW: Reset progress tracking for new task
self.progressTracker.reset()
+ # Initialize executed actions tracking for this task
+ if not hasattr(context, 'executedActions') or context.executedActions is None:
+ context.executedActions = []
+
# Update workflow object before executing task
self._updateWorkflowBeforeExecutingTask(taskIndex)
@@ -104,7 +108,8 @@ class DynamicMode(BaseMode):
state = TaskExecutionState(taskStep)
# Dynamic mode uses max_steps instead of max_retries
- state.max_steps = max(1, int(getattr(workflow, 'maxSteps', 10)))
+ # maxSteps is set in workflowManager.py when workflow is created
+ state.max_steps = int(getattr(workflow, 'maxSteps', 1))
logger.info(f"Using Dynamic mode execution with max_steps: {state.max_steps}")
step = 1
@@ -128,6 +133,19 @@ class DynamicMode(BaseMode):
observation = self._observeBuild(result)
# Note: resultLabel is already set correctly in _observeBuild from actionResult.resultLabel
+ # Store executed action in context for action history
+ if not hasattr(context, 'executedActions') or context.executedActions is None:
+ context.executedActions = []
+ actionName = selection.get('action', 'unknown')
+ actionParameters = selection.get('parameters', {}) or {}
+ # Filter out documentList for clarity in history
+ relevantParams = {k: v for k, v in actionParameters.items() if k not in ['documentList', 'connections']}
+ context.executedActions.append({
+ 'action': actionName,
+ 'parameters': relevantParams,
+ 'step': step
+ })
+
# Content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents:
# Pass ALL documents to validator - validator decides what to validate (generic approach)
@@ -883,9 +901,20 @@ class DynamicMode(BaseMode):
elif progressState['nextActionsSuggested']:
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
- # NEW: Add action history to review content
+ # NEW: Add action history to review content - use all executed actions
+ actionHistory = []
+
+ # First, add all executed actions from the current task
+ if hasattr(context, 'executedActions') and context.executedActions:
+ for executedAction in context.executedActions:
+ action = executedAction.get('action', 'unknown')
+ params = executedAction.get('parameters', {}) or {}
+ paramsStr = json.dumps(params, ensure_ascii=False) if params else "{}"
+ step = executedAction.get('step', 0)
+ actionHistory.append(f"Step {step}: {action} {paramsStr}")
+
+ # Also include refinement decisions for completeness (these show what was planned)
if hasattr(context, 'previousReviewResult') and context.previousReviewResult:
- actionHistory = []
for i, prevDecision in enumerate(context.previousReviewResult, 1):
if prevDecision and hasattr(prevDecision, 'nextAction') and prevDecision.nextAction:
action = prevDecision.nextAction
@@ -895,21 +924,27 @@ class DynamicMode(BaseMode):
paramsStr = json.dumps(relevantParams, ensure_ascii=False) if relevantParams else "{}"
quality = getattr(prevDecision, 'qualityScore', None)
qualityStr = f" (quality: {quality:.2f})" if quality is not None else ""
- actionHistory.append(f"Round {i}: {action} {paramsStr}{qualityStr}")
-
- if actionHistory:
- enhancedReviewContent += f"\nACTION HISTORY:\n"
- enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
- # Detect repeated actions
- actionCounts = {}
- for entry in actionHistory:
- # Extract action name (before first space or {)
- actionName = entry.split()[1] if len(entry.split()) > 1 else "unknown"
+ # Only add if not already in executedActions (avoid duplicates)
+ actionEntry = f"Refinement {i}: {action} {paramsStr}{qualityStr}"
+ if actionEntry not in actionHistory:
+ actionHistory.append(actionEntry)
+
+ if actionHistory:
+ enhancedReviewContent += f"\nACTION HISTORY:\n"
+ enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
+ # Detect repeated actions
+ actionCounts = {}
+ for entry in actionHistory:
+ # Extract action name (after first space, before next space or {)
+ parts = entry.split()
+ if len(parts) > 1:
+ # Skip "Step", "Refinement" prefixes and get the action name
+ actionName = parts[1] if parts[0] in ['Step', 'Refinement'] else parts[0]
actionCounts[actionName] = actionCounts.get(actionName, 0) + 1
-
- repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
- if repeatedActions:
- enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
+
+ repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
+ if repeatedActions:
+ enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
# Update placeholders with enhanced review content
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
diff --git a/modules/workflows/processing/shared/executionState.py b/modules/workflows/processing/shared/executionState.py
index fd1299cf..2db6a3f3 100644
--- a/modules/workflows/processing/shared/executionState.py
+++ b/modules/workflows/processing/shared/executionState.py
@@ -19,7 +19,7 @@ class TaskExecutionState:
self.max_retries = 3
# Iterative loop (dynamic mode)
self.current_step = 0
- self.max_steps = 5
+ self.max_steps = 0 # Will be overridden by workflow.maxSteps from workflowManager.py
def addSuccessfulAction(self, action_result: ActionResult):
"""Add a successful action to the state"""
@@ -56,7 +56,7 @@ class TaskExecutionState:
patterns.append("permission_issues")
return list(set(patterns))
-def shouldContinue(observation: Optional[Observation], review=None, current_step: int = 0, max_steps: int = 5) -> bool:
+def shouldContinue(observation: Optional[Observation], review=None, current_step: int = 0, max_steps: int = 1) -> bool:
"""Helper to decide if the iterative loop should continue
Args:
diff --git a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
index a58467fb..266c2e4f 100644
--- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
@@ -350,34 +350,36 @@ Return ONLY JSON (no markdown, no explanations). The decision MUST:
- Match parameter names exactly as defined in AVAILABLE_METHODS
{{
- "status": "continue",
- "reason": "Brief reason explaining why continuing",
- "nextAction": "Selected_action_from_ACTIONS",
+ "status": "continue" | "success",
+ "reason": "Brief reason explaining why continuing or why task is complete",
+ "userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining the task status (1 sentence, first person, friendly tone)",
+ "nextAction": "Selected_action_from_ACTIONS" | null,
"nextActionParameters": {{
"documentList": ["docItem::", "docList: