commit
1017818ef3
47 changed files with 3089 additions and 2156 deletions
11
app.py
11
app.py
|
|
@ -16,6 +16,7 @@ from datetime import datetime
|
|||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.eventManagement import eventManager
|
||||
from modules.features import featuresLifecycle as featuresLifecycle
|
||||
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||
|
||||
class DailyRotatingFileHandler(RotatingFileHandler):
|
||||
"""
|
||||
|
|
@ -275,15 +276,21 @@ instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
|
|||
async def lifespan(app: FastAPI):
|
||||
logger.info("Application is starting up")
|
||||
|
||||
# Get event user for feature lifecycle (system-level user for background operations)
|
||||
rootInterface = getRootInterface()
|
||||
eventUser = rootInterface.getUserByUsername("event")
|
||||
if not eventUser:
|
||||
logger.error("Could not get event user - some features may not start properly")
|
||||
|
||||
# --- Init Managers ---
|
||||
await featuresLifecycle.start()
|
||||
await featuresLifecycle.start(eventUser)
|
||||
eventManager.start()
|
||||
|
||||
yield
|
||||
|
||||
# --- Stop Managers ---
|
||||
eventManager.stop()
|
||||
await featuresLifecycle.stop()
|
||||
await featuresLifecycle.stop(eventUser)
|
||||
logger.info("Application has been shut down")
|
||||
|
||||
|
||||
|
|
|
|||
49
how --stat HEAD
Normal file
49
how --stat HEAD
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
M app.py
|
||||
A modules/.$DEPENDENCY_DIAGRAM.drawio.bkp
|
||||
A modules/AUTOMATION_FEATURE_ANALYSIS.md
|
||||
A modules/BIDIRECTIONAL_IMPORTS.md
|
||||
A modules/DEPENDENCY_DIAGRAM.drawio
|
||||
A modules/FEATURES_TO_INTERFACES_IMPORTS.md
|
||||
M modules/connectors/connectorVoiceGoogle.py
|
||||
M modules/datamodels/datamodelChat.py
|
||||
M modules/datamodels/datamodelPagination.py
|
||||
A modules/features/automation/__init__.py
|
||||
A modules/features/automation/mainAutomation.py
|
||||
A modules/features/automation/subAutomationUtils.py
|
||||
D modules/features/chatAlthaus/COMPONENT_DIAGRAM.md
|
||||
M modules/features/featuresLifecycle.py
|
||||
M modules/interfaces/interfaceAiObjects.py
|
||||
M modules/interfaces/interfaceDbAppObjects.py
|
||||
M modules/interfaces/interfaceDbChatObjects.py
|
||||
M modules/interfaces/interfaceDbComponentObjects.py
|
||||
M modules/interfaces/interfaceVoiceObjects.py
|
||||
M modules/routes/routeAdminAutomationEvents.py
|
||||
M modules/routes/routeVoiceGoogle.py
|
||||
M modules/services/__init__.py
|
||||
M modules/services/serviceAi/mainServiceAi.py
|
||||
M modules/services/serviceAi/subJsonResponseHandling.py
|
||||
M modules/services/serviceChat/mainServiceChat.py
|
||||
M modules/services/serviceExtraction/mainServiceExtraction.py
|
||||
M modules/services/serviceExtraction/subPipeline.py
|
||||
M modules/services/serviceExtraction/subPromptBuilderExtraction.py
|
||||
M modules/services/serviceGeneration/renderers/rendererXlsx.py
|
||||
M modules/services/serviceGeneration/subPromptBuilderGeneration.py
|
||||
A modules/services/serviceSecurity/mainServiceSecurity.py
|
||||
M modules/services/serviceSharepoint/mainServiceSharepoint.py
|
||||
M modules/services/serviceUtils/mainServiceUtils.py
|
||||
A modules/shared/callbackRegistry.py
|
||||
M modules/shared/debugLogger.py
|
||||
M modules/shared/jsonUtils.py
|
||||
M modules/workflows/methods/methodAi.py
|
||||
M modules/workflows/methods/methodBase.py
|
||||
A modules/workflows/methods/methodContext.py
|
||||
M modules/workflows/methods/methodOutlook.py
|
||||
M modules/workflows/methods/methodSharepoint.py
|
||||
M modules/workflows/processing/adaptive/contentValidator.py
|
||||
M modules/workflows/processing/core/messageCreator.py
|
||||
M modules/workflows/processing/modes/modeAutomation.py
|
||||
M modules/workflows/processing/modes/modeDynamic.py
|
||||
M modules/workflows/processing/shared/promptGenerationActionsDynamic.py
|
||||
M modules/workflows/processing/shared/promptGenerationTaskplan.py
|
||||
M modules/workflows/processing/workflowProcessor.py
|
||||
M modules/workflows/workflowManager.py
|
||||
|
|
@ -403,6 +403,61 @@ class ConnectorGoogleSpeech:
|
|||
"error": str(e)
|
||||
}
|
||||
|
||||
async def detectLanguage(self, text: str) -> Dict:
|
||||
"""
|
||||
Detect the language of text using Google Cloud Translation API.
|
||||
|
||||
Args:
|
||||
text: Text to detect language for
|
||||
|
||||
Returns:
|
||||
Dict containing detected language code and confidence
|
||||
"""
|
||||
try:
|
||||
if not text.strip():
|
||||
logger.warning("⚠️ Empty text provided for language detection")
|
||||
return {
|
||||
"success": False,
|
||||
"language": "",
|
||||
"error": "Empty text provided"
|
||||
}
|
||||
|
||||
# Use a sample of the text (middle 1000 bytes or full text if smaller)
|
||||
textBytes = text.encode('utf-8')
|
||||
if len(textBytes) > 1000:
|
||||
# Take 1000 bytes from the middle
|
||||
startPos = (len(textBytes) - 1000) // 2
|
||||
textSample = textBytes[startPos:startPos + 1000].decode('utf-8', errors='ignore')
|
||||
else:
|
||||
textSample = text
|
||||
|
||||
logger.info(f"🔍 Detecting language for text sample: '{textSample[:100]}...'")
|
||||
|
||||
# Use translation API with auto-detection (source_language=None)
|
||||
result = self.translate_client.translate(
|
||||
textSample,
|
||||
source_language=None, # Auto-detect
|
||||
target_language='en' # Dummy target, we only need detection
|
||||
)
|
||||
|
||||
detectedLanguage = result.get('detectedSourceLanguage', '')
|
||||
|
||||
logger.info(f"✅ Language detected: {detectedLanguage}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"language": detectedLanguage,
|
||||
"confidence": 1.0 # Google Translation API doesn't provide confidence, assume high
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Google Cloud Language Detection error: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"language": "",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def speechToTranslatedText(self, audioContent: bytes,
|
||||
fromLanguage: str = "de-DE",
|
||||
toLanguage: str = "en") -> Dict:
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class ChatLog(BaseModel):
|
|||
None, description="Performance metrics"
|
||||
)
|
||||
parentId: Optional[str] = Field(
|
||||
None, description="Parent log entry ID for hierarchical display"
|
||||
None, description="Parent operation ID (operationId of parent operation) for hierarchical display"
|
||||
)
|
||||
operationId: Optional[str] = Field(
|
||||
None, description="Operation ID to group related log entries"
|
||||
|
|
@ -828,6 +828,7 @@ class TaskContext(BaseModel):
|
|||
failurePatterns: Optional[list[str]] = Field(default_factory=list)
|
||||
failedActions: Optional[list] = Field(default_factory=list)
|
||||
successfulActions: Optional[list] = Field(default_factory=list)
|
||||
executedActions: Optional[list] = Field(default_factory=list, description="List of executed actions with action name, parameters, and step number")
|
||||
criteriaProgress: Optional[dict] = None
|
||||
|
||||
# Stage 2 context fields (NEW)
|
||||
|
|
|
|||
12
modules/features/automation/__init__.py
Normal file
12
modules/features/automation/__init__.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
"""
|
||||
Automation feature - handles automated workflow execution and scheduling.
|
||||
|
||||
Moved from interfaces/interfaceDbChatObjects.py to follow proper architectural separation:
|
||||
- Interface layer: Data access only (getAutomationDefinition, etc.)
|
||||
- Feature layer: Business logic and orchestration (executeAutomation, syncAutomationEvents)
|
||||
"""
|
||||
|
||||
from .mainAutomation import executeAutomation, syncAutomationEvents, createAutomationEventHandler
|
||||
|
||||
__all__ = ['executeAutomation', 'syncAutomationEvents', 'createAutomationEventHandler']
|
||||
|
||||
287
modules/features/automation/mainAutomation.py
Normal file
287
modules/features/automation/mainAutomation.py
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
"""
|
||||
Main automation service - handles automation workflow execution and scheduling.
|
||||
|
||||
Moved from interfaces/interfaceDbChatObjects.py to follow proper architectural separation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow, UserInputRequest, WorkflowModeEnum, AutomationDefinition
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
from modules.shared.eventManagement import eventManager
|
||||
from modules.services import getInterface as getServices
|
||||
from modules.features.chatPlayground.mainChatPlayground import chatStart
|
||||
from .subAutomationUtils import parseScheduleToCron, planToPrompt, replacePlaceholders
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def executeAutomation(automationId: str, chatInterface) -> ChatWorkflow:
|
||||
"""Execute automation workflow immediately (test mode) with placeholder replacement.
|
||||
|
||||
Args:
|
||||
automationId: ID of automation to execute
|
||||
chatInterface: ChatObjects interface instance for data access
|
||||
|
||||
Returns:
|
||||
ChatWorkflow instance created by automation execution
|
||||
"""
|
||||
executionStartTime = getUtcTimestamp()
|
||||
executionLog = {
|
||||
"timestamp": executionStartTime,
|
||||
"workflowId": None,
|
||||
"status": "running",
|
||||
"messages": []
|
||||
}
|
||||
|
||||
try:
|
||||
# 1. Load automation definition
|
||||
automation = chatInterface.getAutomationDefinition(automationId)
|
||||
if not automation:
|
||||
raise ValueError(f"Automation {automationId} not found")
|
||||
|
||||
executionLog["messages"].append(f"Started execution at {executionStartTime}")
|
||||
|
||||
# 2. Replace placeholders in template to generate plan
|
||||
template = automation.get("template", "")
|
||||
placeholders = automation.get("placeholders", {})
|
||||
planJson = replacePlaceholders(template, placeholders)
|
||||
try:
|
||||
plan = json.loads(planJson)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse plan JSON after placeholder replacement: {str(e)}")
|
||||
logger.error(f"Template: {template[:500]}...")
|
||||
logger.error(f"Placeholders: {placeholders}")
|
||||
logger.error(f"Generated planJson (first 1000 chars): {planJson[:1000]}")
|
||||
logger.error(f"Error position: line {e.lineno}, column {e.colno}, char {e.pos}")
|
||||
if e.pos:
|
||||
start = max(0, e.pos - 100)
|
||||
end = min(len(planJson), e.pos + 100)
|
||||
logger.error(f"Context around error: ...{planJson[start:end]}...")
|
||||
raise ValueError(f"Invalid JSON after placeholder replacement: {str(e)}")
|
||||
executionLog["messages"].append("Template placeholders replaced successfully")
|
||||
|
||||
# 3. Get user who created automation
|
||||
creatorUserId = automation.get("_createdBy")
|
||||
|
||||
# CRITICAL: Automation MUST run as creator user only, or fail
|
||||
if not creatorUserId:
|
||||
errorMsg = f"Automation {automationId} has no creator user (_createdBy field missing). Cannot execute automation."
|
||||
logger.error(errorMsg)
|
||||
executionLog["messages"].append(errorMsg)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# Get user from database using services
|
||||
services = getServices(chatInterface.currentUser, None)
|
||||
creatorUser = services.interfaceDbApp.getUser(creatorUserId)
|
||||
if not creatorUser:
|
||||
raise ValueError(f"Creator user {creatorUserId} not found")
|
||||
|
||||
executionLog["messages"].append(f"Using creator user: {creatorUserId}")
|
||||
|
||||
# 4. Create UserInputRequest from plan
|
||||
# Embed plan JSON in prompt for TemplateMode to extract
|
||||
promptText = planToPrompt(plan)
|
||||
planJsonStr = json.dumps(plan)
|
||||
# Embed plan as JSON comment so TemplateMode can extract it
|
||||
promptWithPlan = f"{promptText}\n\n<!--TEMPLATE_PLAN_START-->\n{planJsonStr}\n<!--TEMPLATE_PLAN_END-->"
|
||||
|
||||
userInput = UserInputRequest(
|
||||
prompt=promptWithPlan,
|
||||
listFileId=[],
|
||||
userLanguage=creatorUser.language or "en"
|
||||
)
|
||||
|
||||
executionLog["messages"].append("Starting workflow execution")
|
||||
|
||||
# 5. Start workflow using chatStart
|
||||
workflow = await chatStart(
|
||||
currentUser=creatorUser,
|
||||
userInput=userInput,
|
||||
workflowMode=WorkflowModeEnum.WORKFLOW_AUTOMATION,
|
||||
workflowId=None
|
||||
)
|
||||
|
||||
executionLog["workflowId"] = workflow.id
|
||||
executionLog["status"] = "completed"
|
||||
executionLog["messages"].append(f"Workflow {workflow.id} started successfully")
|
||||
logger.info(f"Started workflow {workflow.id} with plan containing {len(plan.get('tasks', []))} tasks (plan embedded in userInput)")
|
||||
|
||||
# Set workflow name with "automated" prefix
|
||||
automationLabel = automation.get("label", "Unknown Automation")
|
||||
workflowName = f"automated: {automationLabel}"
|
||||
workflow = chatInterface.updateWorkflow(workflow.id, {"name": workflowName})
|
||||
logger.info(f"Set workflow {workflow.id} name to: {workflowName}")
|
||||
|
||||
# Update automation with execution log
|
||||
executionLogs = automation.get("executionLogs", [])
|
||||
executionLogs.append(executionLog)
|
||||
# Keep only last 50 executions
|
||||
if len(executionLogs) > 50:
|
||||
executionLogs = executionLogs[-50:]
|
||||
|
||||
chatInterface.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"executionLogs": executionLogs}
|
||||
)
|
||||
|
||||
return workflow
|
||||
except Exception as e:
|
||||
# Log error to execution log
|
||||
executionLog["status"] = "error"
|
||||
executionLog["messages"].append(f"Error: {str(e)}")
|
||||
|
||||
# Update automation with execution log even on error
|
||||
try:
|
||||
automation = chatInterface.getAutomationDefinition(automationId)
|
||||
if automation:
|
||||
executionLogs = automation.get("executionLogs", [])
|
||||
executionLogs.append(executionLog)
|
||||
if len(executionLogs) > 50:
|
||||
executionLogs = executionLogs[-50:]
|
||||
chatInterface.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"executionLogs": executionLogs}
|
||||
)
|
||||
except Exception as logError:
|
||||
logger.error(f"Error saving execution log: {str(logError)}")
|
||||
|
||||
raise
|
||||
|
||||
|
||||
async def syncAutomationEvents(chatInterface, eventUser) -> Dict[str, Any]:
|
||||
"""Automation event handler - syncs scheduler with all active automations.
|
||||
|
||||
Args:
|
||||
chatInterface: ChatObjects interface instance for data access
|
||||
eventUser: System-level event user for accessing automations
|
||||
|
||||
Returns:
|
||||
Dictionary with sync results (synced count and event IDs)
|
||||
"""
|
||||
# Get all automation definitions (for current mandate)
|
||||
allAutomations = chatInterface.db.getRecordset(AutomationDefinition)
|
||||
filtered = chatInterface._uam(AutomationDefinition, allAutomations)
|
||||
|
||||
registeredEvents = {}
|
||||
|
||||
for automation in filtered:
|
||||
automationId = automation.get("id")
|
||||
isActive = automation.get("active", False)
|
||||
currentEventId = automation.get("eventId")
|
||||
schedule = automation.get("schedule")
|
||||
|
||||
if not schedule:
|
||||
logger.warning(f"Automation {automationId} has no schedule, skipping")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse schedule to cron kwargs
|
||||
cronKwargs = parseScheduleToCron(schedule)
|
||||
|
||||
if isActive:
|
||||
# Remove existing event if present (handles schedule changes)
|
||||
if currentEventId:
|
||||
try:
|
||||
eventManager.remove(currentEventId)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error removing old event {currentEventId}: {str(e)}")
|
||||
|
||||
# Register new event
|
||||
newEventId = f"automation.{automationId}"
|
||||
|
||||
# Create event handler function
|
||||
handler = createAutomationEventHandler(automationId, eventUser)
|
||||
|
||||
# Register cron job
|
||||
eventManager.registerCron(
|
||||
jobId=newEventId,
|
||||
func=handler,
|
||||
cronKwargs=cronKwargs,
|
||||
replaceExisting=True
|
||||
)
|
||||
|
||||
# Update automation with new eventId
|
||||
if currentEventId != newEventId:
|
||||
chatInterface.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"eventId": newEventId}
|
||||
)
|
||||
|
||||
registeredEvents[automationId] = newEventId
|
||||
else:
|
||||
# Remove event if exists
|
||||
if currentEventId:
|
||||
try:
|
||||
eventManager.remove(currentEventId)
|
||||
chatInterface.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"eventId": None}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error removing event {currentEventId}: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing automation {automationId}: {str(e)}")
|
||||
|
||||
return {
|
||||
"synced": len(registeredEvents),
|
||||
"events": registeredEvents
|
||||
}
|
||||
|
||||
|
||||
def createAutomationEventHandler(automationId: str, eventUser):
|
||||
"""Create event handler function for a specific automation.
|
||||
|
||||
Args:
|
||||
automationId: ID of automation to create handler for
|
||||
eventUser: System-level event user for accessing automations (captured in closure)
|
||||
|
||||
Returns:
|
||||
Async handler function for scheduled automation execution
|
||||
"""
|
||||
async def handler():
|
||||
try:
|
||||
if not eventUser:
|
||||
logger.error("Event user not available for automation execution")
|
||||
return
|
||||
|
||||
# Get services for event user (provides access to interfaces)
|
||||
eventServices = getServices(eventUser, None)
|
||||
|
||||
# Load automation using event user context
|
||||
automation = eventServices.interfaceDbChat.getAutomationDefinition(automationId)
|
||||
if not automation or not automation.get("active"):
|
||||
logger.warning(f"Automation {automationId} not found or not active, skipping execution")
|
||||
return
|
||||
|
||||
# Get creator user
|
||||
creatorUserId = automation.get("_createdBy")
|
||||
if not creatorUserId:
|
||||
logger.error(f"Automation {automationId} has no creator user")
|
||||
return
|
||||
|
||||
# Get creator user from database using services
|
||||
eventServices = getServices(eventUser, None)
|
||||
creatorUser = eventServices.interfaceDbApp.getUser(creatorUserId)
|
||||
if not creatorUser:
|
||||
logger.error(f"Creator user {creatorUserId} not found for automation {automationId}")
|
||||
return
|
||||
|
||||
# Get services for creator user (provides access to interfaces)
|
||||
creatorServices = getServices(creatorUser, None)
|
||||
|
||||
# Execute automation with creator user's context
|
||||
# executeAutomation is in same module, so we can call it directly
|
||||
await executeAutomation(automationId, creatorServices.interfaceDbChat)
|
||||
logger.info(f"Successfully executed automation {automationId} as user {creatorUserId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing automation {automationId}: {str(e)}")
|
||||
|
||||
return handler
|
||||
|
||||
108
modules/features/automation/subAutomationUtils.py
Normal file
108
modules/features/automation/subAutomationUtils.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
"""
|
||||
Utility functions for automation feature.
|
||||
|
||||
Moved from interfaces/interfaceDbChatObjects.py.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
def parseScheduleToCron(schedule: str) -> Dict[str, Any]:
|
||||
"""Parse schedule string to cron kwargs for APScheduler"""
|
||||
parts = schedule.split()
|
||||
if len(parts) != 5:
|
||||
raise ValueError(f"Invalid schedule format: {schedule}")
|
||||
|
||||
return {
|
||||
"minute": parts[0],
|
||||
"hour": parts[1],
|
||||
"day": parts[2],
|
||||
"month": parts[3],
|
||||
"day_of_week": parts[4]
|
||||
}
|
||||
|
||||
|
||||
def planToPrompt(plan: Dict) -> str:
|
||||
"""Convert plan structure to prompt string for workflow execution"""
|
||||
return plan.get("userMessage", plan.get("overview", "Execute automation workflow"))
|
||||
|
||||
|
||||
def replacePlaceholders(template: str, placeholders: Dict[str, str]) -> str:
|
||||
"""Replace placeholders in template with actual values. Placeholder format: {{KEY:PLACEHOLDER_NAME}}"""
|
||||
result = template
|
||||
for placeholderName, value in placeholders.items():
|
||||
pattern = f"{{{{KEY:{placeholderName}}}}}"
|
||||
|
||||
# Check if placeholder is in an array context like ["{{KEY:...}}"]
|
||||
# If value is a JSON array/dict, we should replace the entire ["{{KEY:...}}"] with the array
|
||||
arrayPattern = f'["{pattern}"]'
|
||||
if arrayPattern in result:
|
||||
# Check if value is a JSON array/dict
|
||||
isArrayValue = False
|
||||
arrayValue = None
|
||||
|
||||
if isinstance(value, (list, dict)):
|
||||
isArrayValue = True
|
||||
arrayValue = json.dumps(value)
|
||||
elif isinstance(value, str):
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
if isinstance(parsed, (list, dict)):
|
||||
isArrayValue = True
|
||||
arrayValue = value # Already valid JSON string
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
if isArrayValue:
|
||||
# Replace ["{{KEY:...}}"] with the array value
|
||||
result = result.replace(arrayPattern, arrayValue)
|
||||
continue # Skip the regular replacement below
|
||||
|
||||
# Regular replacement - check if in quoted context
|
||||
patternStart = result.find(pattern)
|
||||
isQuoted = False
|
||||
if patternStart > 0:
|
||||
charBefore = result[patternStart - 1] if patternStart > 0 else None
|
||||
patternEnd = patternStart + len(pattern)
|
||||
charAfter = result[patternEnd] if patternEnd < len(result) else None
|
||||
if charBefore == '"' and charAfter == '"':
|
||||
isQuoted = True
|
||||
|
||||
# Handle different value types
|
||||
if isinstance(value, (list, dict)):
|
||||
# Python list/dict - convert to JSON
|
||||
replacement = json.dumps(value)
|
||||
elif isinstance(value, str):
|
||||
# String value - check if it's a JSON string representing list/dict
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
if isinstance(parsed, (list, dict)):
|
||||
# It's a JSON string of a list/dict
|
||||
if isQuoted:
|
||||
# In quoted context, escape the JSON string
|
||||
escaped = json.dumps(value)
|
||||
replacement = escaped[1:-1] # Remove outer quotes
|
||||
else:
|
||||
# In unquoted context, use JSON directly
|
||||
replacement = value
|
||||
else:
|
||||
# It's a JSON string of a primitive
|
||||
if isQuoted:
|
||||
escaped = json.dumps(value)
|
||||
replacement = escaped[1:-1]
|
||||
else:
|
||||
replacement = value
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
# Not valid JSON - treat as plain string
|
||||
if isQuoted:
|
||||
escaped = json.dumps(value)
|
||||
replacement = escaped[1:-1]
|
||||
else:
|
||||
replacement = value
|
||||
else:
|
||||
# Numbers, booleans, None - convert to string
|
||||
replacement = str(value)
|
||||
result = result.replace(pattern, replacement)
|
||||
return result
|
||||
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
# Komponentendiagramm: Kunden-Chatbot Althaus
|
||||
|
||||
## Übersicht
|
||||
|
||||
Dieses Diagramm zeigt die High-Level-Architektur der Althaus Chatbot-Anwendung mit allen beteiligten Komponenten, Datenflüssen und Kommunikationswegen.
|
||||
|
||||
## Komponentendiagramm
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "PowerOn Chat UI"
|
||||
ChatUI[Chat Interface]
|
||||
end
|
||||
|
||||
subgraph "PowerOn Platform"
|
||||
Gateway[Gateway Backend<br/>Event Scheduler & Data Query API]
|
||||
GatewayDB[(PostgreSQL)]
|
||||
AIServices[Dynamic AI, Tavily]
|
||||
end
|
||||
|
||||
subgraph "Tenant althaus-ag.ch"
|
||||
subgraph "PowerOn PreProcessing"
|
||||
PreProcessing[Pre-Processing Service]
|
||||
PreProcessingDB[(PostgreSQL<br/>Memory DB)]
|
||||
end
|
||||
|
||||
subgraph "MSFT Services"
|
||||
PowerBI[Power BI]
|
||||
TenantServices[Azure DC, DNA Center]
|
||||
end
|
||||
end
|
||||
|
||||
%% Hauptkommunikation
|
||||
ChatUI -->|"Data Queries<br/>User/Password Auth"| Gateway
|
||||
Gateway -->|"SQL Queries<br/>X-PP-API-Key"| PreProcessing
|
||||
Gateway -->|"Config Update<br/>Daily 01:00 UTC"| PreProcessing
|
||||
|
||||
%% Datenfluss
|
||||
PowerBI -->|"Rohdaten"| PreProcessing
|
||||
PreProcessing --> PreProcessingDB
|
||||
PreProcessingDB -->|"Query Results"| Gateway
|
||||
Gateway --> ChatUI
|
||||
Gateway --> GatewayDB
|
||||
|
||||
%% Styling
|
||||
classDef platform fill:#e1f5ff,stroke:#01579b,stroke-width:2px
|
||||
classDef frontend fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
|
||||
classDef preprocessing fill:#fff3e0,stroke:#e65100,stroke-width:2px
|
||||
classDef customer fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px
|
||||
classDef database fill:#fce4ec,stroke:#880e4f,stroke-width:2px
|
||||
|
||||
class Gateway,AIServices platform
|
||||
class ChatUI frontend
|
||||
class PreProcessing preprocessing
|
||||
class PowerBI,TenantServices customer
|
||||
class GatewayDB,PreProcessingDB database
|
||||
```
|
||||
|
||||
## Komponentenbeschreibungen
|
||||
|
||||
### 1. Gateway Backend (gateway.poweron-center.net)
|
||||
|
||||
**Hauptkomponenten:**
|
||||
- **FastAPI Application**: Zentrale Backend-Anwendung der PowerOn Platform
|
||||
- **Event Scheduler (chatAlthaus)**:
|
||||
- Täglicher Scheduler um 01:00 UTC
|
||||
- Sendet Konfigurations-Updates an Pre-Processing Service
|
||||
- Verwendet `X-PP-API-Key` Header für Authentifizierung
|
||||
- **Configuration Management**:
|
||||
- Verwaltung von Secrets und Environment-Variablen
|
||||
- Verschlüsselung/Entschlüsselung von Secrets
|
||||
- Unterstützt verschiedene Umgebungen (dev, int, prod)
|
||||
- **Data Query API**:
|
||||
- `POST /api/v1/dataquery/query` - SQL Query ausführen
|
||||
- `GET /api/v1/dataquery/schema` - Datenbankschema abrufen
|
||||
- `GET /api/v1/dataquery/schema/{table_name}` - Tabellenschema abrufen
|
||||
- **PostgreSQL Database**: Zentrale Datenbank für Gateway-Daten
|
||||
|
||||
**Technologie:**
|
||||
- Python/FastAPI
|
||||
- PostgreSQL
|
||||
- APScheduler für Event-Management
|
||||
|
||||
**Externe AI-Services:**
|
||||
- **Dynamic AI**: LLM Service für AI-Anfragen
|
||||
- **Tavily**: Web-Such-Service für Web-Recherchen
|
||||
|
||||
### 2. PowerOn Chat UI (althaus-chat.poweron-center.net)
|
||||
|
||||
**Hauptkomponenten:**
|
||||
- **React Application**: Frontend-Interface für den Chatbot
|
||||
- **Authentication**: User/Password-basierte Authentifizierung mit JWT-Token
|
||||
|
||||
**Kommunikation:**
|
||||
- Nutzt 3 Data Query Endpunkte vom Gateway
|
||||
- Authentifiziert sich mit User/Password beim Gateway
|
||||
- Erhält Antworten über Gateway API
|
||||
|
||||
**Technologie:**
|
||||
- React
|
||||
- REST API Calls
|
||||
|
||||
### 3. Tenant althaus-ag.ch
|
||||
|
||||
#### 3.1 PowerOn PreProcessing
|
||||
|
||||
**Hauptkomponenten:**
|
||||
- **FastAPI Application**: Pre-Processing Service im Azure-Tenant des Kunden
|
||||
- **Pre-Processing API**:
|
||||
- `POST /api/v1/dataprocessor/update-db-with-config` - Datenbank mit Konfiguration aktualisieren
|
||||
- Authentifizierung: `X-PP-API-Key` Header
|
||||
- **PostgreSQL Memory Database**:
|
||||
- Speichert verarbeitete Daten
|
||||
- Wird vom Chat für Queries genutzt
|
||||
|
||||
**Datenfluss:**
|
||||
- Empfängt Rohdaten aus Power BI Semantikmodell
|
||||
- Verarbeitet Daten nach konfigurierten Schritten (keep, fillna, to_numeric, dropna, etc.)
|
||||
- Speichert verarbeitete Daten in Memory Database
|
||||
- Beantwortet SQL-Queries vom Gateway
|
||||
|
||||
**Technologie:**
|
||||
- Python/FastAPI
|
||||
- PostgreSQL
|
||||
- Azure App Service (im Kunden-Tenant althaus-ag.ch)
|
||||
|
||||
#### 3.2 MSFT Services
|
||||
|
||||
**Power BI Semantikmodell:**
|
||||
- Datenquelle für Rohdaten
|
||||
- Wird vom Pre-Processing Service gelesen
|
||||
|
||||
**Azure Domänen-Controller:**
|
||||
- Authentifizierungs-Service
|
||||
- Wird vom Gateway für Authentifizierung genutzt
|
||||
|
||||
**DNA Center:**
|
||||
- Netzwerk-Management-Service
|
||||
- Wird vom Gateway genutzt
|
||||
|
||||
## Datenfluss
|
||||
|
||||
### 1. Datenaktualisierung (Scheduled)
|
||||
```
|
||||
Power BI Semantikmodell (Tenant althaus-ag.ch)
|
||||
→ PowerOn PreProcessing (verarbeitet Daten)
|
||||
→ PostgreSQL Memory DB (speichert verarbeitete Daten)
|
||||
|
||||
Gateway Event Scheduler (01:00 UTC täglich)
|
||||
→ POST /api/v1/dataprocessor/update-db-with-config
|
||||
→ PowerOn PreProcessing (aktualisiert Konfiguration)
|
||||
```
|
||||
|
||||
### 2. Chat-Interaktion (User Request)
|
||||
```
|
||||
PowerOn Chat UI
|
||||
→ POST /api/v1/dataquery/query (mit User/Password Auth)
|
||||
→ Gateway Data Query API
|
||||
→ POST /api/v1/dataquery/query (mit X-PP-API-Key)
|
||||
→ PowerOn PreProcessing
|
||||
→ PostgreSQL Memory DB (führt Query aus)
|
||||
→ PowerOn PreProcessing (gibt Ergebnisse zurück)
|
||||
→ Gateway Data Query API
|
||||
→ PowerOn Chat UI (zeigt Antwort)
|
||||
```
|
||||
|
||||
### 3. AI-Integration
|
||||
```
|
||||
PowerOn Chat UI
|
||||
→ Gateway (vermittelt AI-Anfragen)
|
||||
→ Dynamic AI & Tavily (in PowerOn Platform)
|
||||
→ Gateway (kombiniert Ergebnisse)
|
||||
→ PowerOn Chat UI (zeigt Antwort)
|
||||
```
|
||||
|
||||
## Authentifizierung
|
||||
|
||||
### Gateway → PowerOn PreProcessing
|
||||
- **Header**: `X-PP-API-Key`
|
||||
- **Wert**: Aus Gateway Config (`PREPROCESS_ALTHAUS_CHAT_SECRET`)
|
||||
- **Verwendung**: Event Scheduler und Data Query API
|
||||
|
||||
### PowerOn Chat UI → Gateway
|
||||
- **Methode**: User/Password
|
||||
- **Token**: JWT Token (nach erfolgreicher Authentifizierung)
|
||||
- **Verwendung**: Alle API-Calls vom Chat Frontend
|
||||
|
||||
### Weitere Authentifizierung
|
||||
- Gateway nutzt Azure Domänen-Controller für zusätzliche Authentifizierung
|
||||
- Verschiedene API-Endpunkte können unterschiedliche Authentifizierungsmechanismen haben
|
||||
|
||||
## Deployment
|
||||
|
||||
- **PowerOn Platform**: gateway.poweron-center.net
|
||||
- **PowerOn Chat UI**: althaus-chat.poweron-center.net
|
||||
- **PowerOn PreProcessing**: Azure App Service im Kunden-Tenant (althaus-ag.ch)
|
||||
- URL: `poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net`
|
||||
- **Tenant althaus-ag.ch**: Enthält PowerOn PreProcessing und MSFT Services (Power BI, Azure DC, DNA Center) im Azure-Tenant von Althaus AG
|
||||
|
||||
## Konfiguration
|
||||
|
||||
### Gateway Config Keys
|
||||
- `PREPROCESS_ALTHAUS_CHAT_SECRET`: API-Key für Pre-Processing Service
|
||||
- `APP_ENV_TYPE`: Umgebung (dev, int, prod)
|
||||
- Weitere Gateway-spezifische Konfigurationen
|
||||
|
||||
### Pre-Processing Config
|
||||
- Konfiguration wird als JSON im Gateway Code definiert
|
||||
- Wird täglich um 01:00 UTC an Pre-Processing Service gesendet
|
||||
- Definiert Tabellen, Spalten, Verarbeitungsschritte
|
||||
|
||||
|
|
@ -1,24 +1,37 @@
|
|||
import logging
|
||||
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||
from modules.services import getInterface as getServices
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def start() -> None:
|
||||
""" Start feature triggers and background managers """
|
||||
|
||||
# Provide Event User
|
||||
rootInterface = getRootInterface()
|
||||
eventUser = rootInterface.getUserByUsername("event")
|
||||
async def start(eventUser) -> None:
|
||||
""" Start feature triggers and background managers
|
||||
|
||||
Args:
|
||||
eventUser: System-level event user for background operations (provided by app.py)
|
||||
"""
|
||||
|
||||
# Feature Automation Events
|
||||
if eventUser:
|
||||
try:
|
||||
from modules.interfaces.interfaceDbChatObjects import getInterface as getChatInterface
|
||||
chatInterface = getChatInterface(eventUser)
|
||||
await chatInterface.syncAutomationEvents()
|
||||
from modules.features.automation import syncAutomationEvents
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
|
||||
# Get services for event user (provides access to interfaces)
|
||||
services = getServices(eventUser, None)
|
||||
|
||||
# Register callback for automation changes
|
||||
async def onAutomationChanged(chatInterface):
|
||||
"""Callback triggered when automations are created/updated/deleted."""
|
||||
await syncAutomationEvents(chatInterface, eventUser)
|
||||
|
||||
callbackRegistry.register('automation.changed', onAutomationChanged)
|
||||
logger.info("Registered automation change callback")
|
||||
|
||||
# Initial sync on startup - use interface from services
|
||||
await syncAutomationEvents(services.interfaceDbChat, eventUser)
|
||||
logger.info("Automation events synced on startup")
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing automation events on startup: {str(e)}")
|
||||
logger.error(f"Error setting up automation events on startup: {str(e)}")
|
||||
# Don't fail startup if automation sync fails
|
||||
|
||||
# Feature SyncDelta
|
||||
|
|
@ -36,8 +49,21 @@ async def start() -> None:
|
|||
|
||||
|
||||
|
||||
async def stop() -> None:
|
||||
""" Stop feature triggers and background managers """
|
||||
async def stop(eventUser) -> None:
|
||||
""" Stop feature triggers and background managers
|
||||
|
||||
Args:
|
||||
eventUser: System-level event user (provided by app.py)
|
||||
"""
|
||||
|
||||
# Unregister automation callback
|
||||
try:
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
# Note: We'd need to store the callback reference to unregister it properly
|
||||
# For now, callbacks will remain registered (acceptable for shutdown)
|
||||
logger.info("Automation callbacks remain registered (will be cleaned up on process exit)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during automation callback cleanup: {str(e)}")
|
||||
|
||||
# Feature ...
|
||||
|
||||
|
|
|
|||
|
|
@ -75,15 +75,7 @@ class AiObjects:
|
|||
|
||||
|
||||
# AI for Extraction, Processing, Generation
|
||||
async def call(self, request: AiCallRequest, progressCallback=None) -> AiCallResponse:
|
||||
"""Call AI model for text generation with model-aware chunking."""
|
||||
# Handle content parts (unified path)
|
||||
if hasattr(request, 'contentParts') and request.contentParts:
|
||||
return await self._callWithContentParts(request, progressCallback)
|
||||
# Handle traditional text/context calls
|
||||
return await self._callWithTextContext(request)
|
||||
|
||||
async def _callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
|
||||
async def callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
|
||||
"""Call AI model for traditional text/context calls with fallback mechanism."""
|
||||
prompt = request.prompt
|
||||
context = request.context or ""
|
||||
|
|
@ -148,412 +140,6 @@ class AiObjects:
|
|||
errorCount=1
|
||||
)
|
||||
|
||||
async def _callWithContentParts(self, request: AiCallRequest, progressCallback=None) -> AiCallResponse:
|
||||
"""Process content parts with model-aware chunking (unified for single and multiple parts)."""
|
||||
prompt = request.prompt
|
||||
options = request.options
|
||||
contentParts = request.contentParts
|
||||
|
||||
# Get failover models
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
|
||||
|
||||
if not failoverModelList:
|
||||
return self._createErrorResponse("No suitable models found", 0, 0)
|
||||
|
||||
# Process each content part
|
||||
allResults = []
|
||||
for contentPart in contentParts:
|
||||
partResult = await self._processContentPartWithFallback(contentPart, prompt, options, failoverModelList, progressCallback)
|
||||
allResults.append(partResult)
|
||||
|
||||
# Merge all results
|
||||
mergedContent = self._mergePartResults(allResults)
|
||||
|
||||
return AiCallResponse(
|
||||
content=mergedContent,
|
||||
modelName="multiple",
|
||||
priceUsd=sum(r.priceUsd for r in allResults),
|
||||
processingTime=sum(r.processingTime for r in allResults),
|
||||
bytesSent=sum(r.bytesSent for r in allResults),
|
||||
bytesReceived=sum(r.bytesReceived for r in allResults),
|
||||
errorCount=sum(r.errorCount for r in allResults)
|
||||
)
|
||||
|
||||
async def _processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList, progressCallback=None) -> AiCallResponse:
|
||||
"""Process a single content part with model-aware chunking and fallback."""
|
||||
lastError = None
|
||||
|
||||
# Check if this is an image - Vision models need special handling
|
||||
isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
|
||||
|
||||
# Determine the correct operation type based on content type
|
||||
# Images should use IMAGE_ANALYSE, not the generic operation type
|
||||
actualOperationType = options.operationType
|
||||
if isImage:
|
||||
actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
|
||||
# Get vision-capable models for images
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
|
||||
if visionFailoverList:
|
||||
logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
|
||||
failoverModelList = visionFailoverList
|
||||
|
||||
for attempt, model in enumerate(failoverModelList):
|
||||
try:
|
||||
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||
|
||||
# Special handling for images with Vision models
|
||||
if isImage and hasattr(model, 'functionCall'):
|
||||
# Call model's functionCall directly (for Vision models this is callAiImage)
|
||||
from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts
|
||||
|
||||
try:
|
||||
# Validate and prepare image data
|
||||
if not contentPart.data:
|
||||
raise ValueError("Image content part has no data")
|
||||
|
||||
# Ensure mimeType is valid
|
||||
mimeType = contentPart.mimeType or "image/jpeg"
|
||||
if not mimeType.startswith("image/"):
|
||||
raise ValueError(f"Invalid mimeType for image: {mimeType}")
|
||||
|
||||
# Prepare base64 data
|
||||
if isinstance(contentPart.data, str):
|
||||
# Already base64 encoded - validate it
|
||||
try:
|
||||
base64.b64decode(contentPart.data, validate=True)
|
||||
base64Data = contentPart.data
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
|
||||
elif isinstance(contentPart.data, bytes):
|
||||
# Binary data - encode to base64
|
||||
base64Data = base64.b64encode(contentPart.data).decode('utf-8')
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
|
||||
|
||||
# Create data URL
|
||||
imageDataUrl = f"data:{mimeType};base64,{base64Data}"
|
||||
|
||||
modelCall = AiModelCall(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt or ""},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": imageDataUrl
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
model=model,
|
||||
options=AiCallOpts(operationType=actualOperationType)
|
||||
)
|
||||
|
||||
modelResponse = await model.functionCall(modelCall)
|
||||
|
||||
if not modelResponse.success:
|
||||
raise ValueError(f"Model call failed: {modelResponse.error}")
|
||||
|
||||
logger.info(f"✅ Image content part processed successfully with model: {model.name}")
|
||||
|
||||
# Convert to AiCallResponse format
|
||||
# Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
|
||||
# Calculate processing time if not provided (fallback to 0.0)
|
||||
processingTime = getattr(modelResponse, 'processingTime', None)
|
||||
if processingTime is None:
|
||||
processingTime = 0.0
|
||||
|
||||
return AiCallResponse(
|
||||
content=modelResponse.content,
|
||||
modelName=model.name,
|
||||
priceUsd=0.0, # Price will be calculated elsewhere if needed
|
||||
processingTime=processingTime,
|
||||
bytesSent=0, # Will be calculated elsewhere
|
||||
bytesReceived=0, # Will be calculated elsewhere
|
||||
errorCount=0
|
||||
)
|
||||
except Exception as e:
|
||||
# Image processing failed with this model
|
||||
lastError = e
|
||||
logger.warning(f"❌ Image processing failed with model {model.name}: {str(e)}")
|
||||
|
||||
# If this is not the last model, try the next one
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
logger.info(f"🔄 Trying next fallback model for image processing...")
|
||||
continue
|
||||
else:
|
||||
# All models failed
|
||||
logger.error(f"💥 All {len(failoverModelList)} models failed for image processing")
|
||||
raise
|
||||
|
||||
# For non-image parts, check if part fits in model context
|
||||
# Calculate available space accounting for prompt, system message, and output reservation
|
||||
partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
|
||||
|
||||
# Use same calculation as _chunkContentPart to determine actual available space
|
||||
modelContextTokens = model.contextLength
|
||||
modelMaxOutputTokens = model.maxTokens
|
||||
|
||||
# Reserve tokens for prompt, system message, output, and message overhead
|
||||
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
|
||||
systemMessageTokens = 10 # ~40 bytes = 10 tokens
|
||||
outputTokens = modelMaxOutputTokens
|
||||
messageOverheadTokens = 100
|
||||
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
|
||||
|
||||
# Available tokens for content (with 80% safety margin)
|
||||
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
|
||||
if availableContentTokens < 100:
|
||||
availableContentTokens = max(100, int(modelContextTokens * 0.1))
|
||||
|
||||
# Convert to bytes (1 token ≈ 4 bytes)
|
||||
availableContentBytes = availableContentTokens * 4
|
||||
|
||||
logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes (contextLength={modelContextTokens} tokens, reserved={totalReservedTokens:.0f} tokens)")
|
||||
|
||||
if partSize <= availableContentBytes:
|
||||
# Part fits - call AI directly
|
||||
response = await self._callWithModel(model, prompt, contentPart.data, options)
|
||||
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
||||
return response
|
||||
else:
|
||||
# Part too large - chunk it (pass prompt to account for it in chunk size calculation)
|
||||
chunks = await self._chunkContentPart(contentPart, model, options, prompt)
|
||||
if not chunks:
|
||||
raise ValueError(f"Failed to chunk content part for model {model.name}")
|
||||
|
||||
logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
|
||||
|
||||
# Log progress if callback provided
|
||||
if progressCallback:
|
||||
progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
|
||||
|
||||
# Process each chunk
|
||||
chunkResults = []
|
||||
for idx, chunk in enumerate(chunks):
|
||||
chunkNum = idx + 1
|
||||
chunkData = chunk.get('data', '')
|
||||
chunkSize = len(chunkData.encode('utf-8')) if chunkData else 0
|
||||
logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}, chunk size: {chunkSize} bytes")
|
||||
|
||||
# Calculate and log progress
|
||||
if progressCallback:
|
||||
progress = chunkNum / len(chunks)
|
||||
progressCallback(progress, f"Processing chunk {chunkNum}/{len(chunks)}")
|
||||
|
||||
try:
|
||||
chunkResponse = await self._callWithModel(model, prompt, chunkData, options)
|
||||
chunkResults.append(chunkResponse)
|
||||
logger.info(f"✅ Chunk {chunkNum}/{len(chunks)} processed successfully")
|
||||
|
||||
# Log completion progress
|
||||
if progressCallback:
|
||||
progressCallback(chunkNum / len(chunks), f"Chunk {chunkNum}/{len(chunks)} processed")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}")
|
||||
raise
|
||||
|
||||
# Merge chunk results
|
||||
mergedContent = self._mergeChunkResults(chunkResults)
|
||||
totalPrice = sum(r.priceUsd for r in chunkResults)
|
||||
totalTime = sum(r.processingTime for r in chunkResults)
|
||||
totalBytesSent = sum(r.bytesSent for r in chunkResults)
|
||||
totalBytesReceived = sum(r.bytesReceived for r in chunkResults)
|
||||
totalErrors = sum(r.errorCount for r in chunkResults)
|
||||
|
||||
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
|
||||
return AiCallResponse(
|
||||
content=mergedContent,
|
||||
modelName=model.name,
|
||||
priceUsd=totalPrice,
|
||||
processingTime=totalTime,
|
||||
bytesSent=totalBytesSent,
|
||||
bytesReceived=totalBytesReceived,
|
||||
errorCount=totalErrors
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
error_msg = str(e) if str(e) else f"{type(e).__name__}"
|
||||
error_detail = f"❌ Model {model.name} failed for content part: {error_msg}"
|
||||
if hasattr(e, 'detail') and e.detail:
|
||||
error_detail += f" | Detail: {e.detail}"
|
||||
if hasattr(e, 'status_code'):
|
||||
error_detail += f" | Status: {e.status_code}"
|
||||
logger.warning(error_detail, exc_info=True)
|
||||
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
logger.info(f"🔄 Trying next failover model...")
|
||||
continue
|
||||
else:
|
||||
logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
|
||||
break
|
||||
|
||||
# All models failed
|
||||
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
|
||||
|
||||
async def _chunkContentPart(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
|
||||
"""Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output."""
|
||||
# Calculate model-specific chunk sizes
|
||||
modelContextTokens = model.contextLength # Total context in tokens
|
||||
modelMaxOutputTokens = model.maxTokens # Maximum output tokens
|
||||
|
||||
# Reserve tokens for:
|
||||
# 1. Prompt (user message)
|
||||
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
|
||||
|
||||
# 2. System message wrapper ("Context from documents:\n")
|
||||
systemMessageTokens = 10 # ~40 bytes = 10 tokens
|
||||
|
||||
# 3. Max output tokens (model will reserve space for completion)
|
||||
outputTokens = modelMaxOutputTokens
|
||||
|
||||
# 4. JSON structure and message overhead (~100 tokens)
|
||||
messageOverheadTokens = 100
|
||||
|
||||
# Total reserved tokens = input overhead + output reservation
|
||||
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
|
||||
|
||||
# Available tokens for content = context length - reserved tokens
|
||||
# Use 80% of available for safety margin
|
||||
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
|
||||
|
||||
# Ensure we have at least some space
|
||||
if availableContentTokens < 100:
|
||||
logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
|
||||
availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
|
||||
|
||||
# Convert tokens to bytes (1 token ≈ 4 bytes)
|
||||
availableContentBytes = availableContentTokens * 4
|
||||
|
||||
logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
|
||||
|
||||
# Use 70% of available content bytes for text chunks (conservative)
|
||||
textChunkSize = int(availableContentBytes * 0.7)
|
||||
imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
|
||||
|
||||
# Build chunking options
|
||||
chunkingOptions = {
|
||||
"textChunkSize": textChunkSize,
|
||||
"imageChunkSize": imageChunkSize,
|
||||
"maxSize": availableContentBytes,
|
||||
"chunkAllowed": True
|
||||
}
|
||||
|
||||
# Get appropriate chunker
|
||||
from modules.services.serviceExtraction.subRegistry import ChunkerRegistry
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
chunker = chunkerRegistry.resolve(contentPart.typeGroup)
|
||||
|
||||
if not chunker:
|
||||
logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
|
||||
return []
|
||||
|
||||
# Chunk the content part
|
||||
try:
|
||||
chunks = chunker.chunk(contentPart, chunkingOptions)
|
||||
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
|
||||
return chunks
|
||||
except Exception as e:
|
||||
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
|
||||
return []
|
||||
|
||||
def _mergePartResults(self, partResults: List[AiCallResponse]) -> str:
|
||||
"""Merge part results using the existing sophisticated merging system."""
|
||||
if not partResults:
|
||||
return ""
|
||||
|
||||
# Convert AiCallResponse results to ContentParts for merging
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.services.serviceExtraction.subUtils import makeId
|
||||
|
||||
content_parts = []
|
||||
for i, result in enumerate(partResults):
|
||||
if result.content:
|
||||
content_part = ContentPart(
|
||||
id=str(uuid.uuid4()),
|
||||
parentId=None,
|
||||
label=f"ai_result_{i}",
|
||||
typeGroup="text", # Default to text for AI results
|
||||
mimeType="text/plain",
|
||||
data=result.content,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"modelName": result.modelName,
|
||||
"priceUsd": result.priceUsd,
|
||||
"processingTime": result.processingTime,
|
||||
"bytesSent": result.bytesSent,
|
||||
"bytesReceived": result.bytesReceived
|
||||
}
|
||||
)
|
||||
content_parts.append(content_part)
|
||||
|
||||
# Use existing merging system
|
||||
merge_strategy = MergeStrategy(
|
||||
useIntelligentMerging=True,
|
||||
groupBy="typeGroup",
|
||||
orderBy="id",
|
||||
mergeType="concatenate"
|
||||
)
|
||||
|
||||
merged_parts = applyMerging(content_parts, merge_strategy)
|
||||
|
||||
# Convert merged parts back to final string
|
||||
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||
|
||||
logger.info(f"Merged {len(partResults)} AI results using existing merging system")
|
||||
return final_content.strip()
|
||||
|
||||
def _mergeChunkResults(self, chunkResults: List[AiCallResponse]) -> str:
|
||||
"""Merge chunk results using the existing sophisticated merging system."""
|
||||
if not chunkResults:
|
||||
return ""
|
||||
|
||||
# Convert AiCallResponse results to ContentParts for merging
|
||||
|
||||
content_parts = []
|
||||
for i, result in enumerate(chunkResults):
|
||||
if result.content:
|
||||
content_part = ContentPart(
|
||||
id=str(uuid.uuid4()),
|
||||
parentId=None,
|
||||
label=f"chunk_result_{i}",
|
||||
typeGroup="text", # Default to text for AI results
|
||||
mimeType="text/plain",
|
||||
data=result.content,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"chunk": True,
|
||||
"modelName": result.modelName,
|
||||
"priceUsd": result.priceUsd,
|
||||
"processingTime": result.processingTime,
|
||||
"bytesSent": result.bytesSent,
|
||||
"bytesReceived": result.bytesReceived
|
||||
}
|
||||
)
|
||||
content_parts.append(content_part)
|
||||
|
||||
# Use existing merging system
|
||||
merge_strategy = MergeStrategy(
|
||||
useIntelligentMerging=True,
|
||||
groupBy="typeGroup",
|
||||
orderBy="id",
|
||||
mergeType="concatenate"
|
||||
)
|
||||
|
||||
merged_parts = applyMerging(content_parts, merge_strategy)
|
||||
|
||||
# Convert merged parts back to final string
|
||||
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||
|
||||
logger.info(f"Merged {len(chunkResults)} chunk results using existing merging system")
|
||||
return final_content.strip()
|
||||
|
||||
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
|
||||
"""Create an error response."""
|
||||
return AiCallResponse(
|
||||
|
|
@ -659,64 +245,4 @@ class AiObjects:
|
|||
return [model.displayName for model in models]
|
||||
|
||||
|
||||
def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]:
|
||||
"""Apply merging strategy to parts with intelligent token-aware merging."""
|
||||
logger.debug(f"applyMerging called with {len(parts)} parts")
|
||||
|
||||
# Import merging dependencies
|
||||
from modules.services.serviceExtraction.merging.mergerText import TextMerger
|
||||
from modules.services.serviceExtraction.merging.mergerTable import TableMerger
|
||||
from modules.services.serviceExtraction.merging.mergerDefault import DefaultMerger
|
||||
from modules.services.serviceExtraction.subMerger import IntelligentTokenAwareMerger
|
||||
|
||||
# Check if intelligent merging is enabled
|
||||
if strategy.useIntelligentMerging:
|
||||
modelCapabilities = strategy.capabilities or {}
|
||||
subMerger = IntelligentTokenAwareMerger(modelCapabilities)
|
||||
|
||||
# Use intelligent merging for all parts
|
||||
merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
|
||||
|
||||
# Calculate and log optimization stats
|
||||
stats = subMerger.calculateOptimizationStats(parts, merged)
|
||||
logger.info(f"🧠 Intelligent merging stats: {stats}")
|
||||
logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
|
||||
|
||||
return merged
|
||||
|
||||
# Fallback to traditional merging
|
||||
textMerger = TextMerger()
|
||||
tableMerger = TableMerger()
|
||||
defaultMerger = DefaultMerger()
|
||||
|
||||
# Group by typeGroup
|
||||
textParts = [p for p in parts if p.typeGroup == "text"]
|
||||
tableParts = [p for p in parts if p.typeGroup == "table"]
|
||||
structureParts = [p for p in parts if p.typeGroup == "structure"]
|
||||
otherParts = [p for p in parts if p.typeGroup not in ("text", "table", "structure")]
|
||||
|
||||
logger.debug(f"Grouped - text: {len(textParts)}, table: {len(tableParts)}, structure: {len(structureParts)}, other: {len(otherParts)}")
|
||||
|
||||
merged: List[ContentPart] = []
|
||||
|
||||
if textParts:
|
||||
textMerged = textMerger.merge(textParts, strategy)
|
||||
logger.debug(f"TextMerger merged {len(textParts)} parts into {len(textMerged)} parts")
|
||||
merged.extend(textMerged)
|
||||
if tableParts:
|
||||
tableMerged = tableMerger.merge(tableParts, strategy)
|
||||
logger.debug(f"TableMerger merged {len(tableParts)} parts into {len(tableMerged)} parts")
|
||||
merged.extend(tableMerged)
|
||||
if structureParts:
|
||||
# For now, treat structure like text
|
||||
structureMerged = textMerger.merge(structureParts, strategy)
|
||||
logger.debug(f"StructureMerger merged {len(structureParts)} parts into {len(structureMerged)} parts")
|
||||
merged.extend(structureMerged)
|
||||
if otherParts:
|
||||
otherMerged = defaultMerger.merge(otherParts, strategy)
|
||||
logger.debug(f"DefaultMerger merged {len(otherParts)} parts into {len(otherMerged)} parts")
|
||||
merged.extend(otherMerged)
|
||||
|
||||
logger.debug(f"applyMerging returning {len(merged)} parts")
|
||||
return merged
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,136 @@ logger = logging.getLogger(__name__)
|
|||
# Singleton factory for Chat instances
|
||||
_chatInterfaces = {}
|
||||
|
||||
|
||||
def storeDebugMessageAndDocuments(message, currentUser) -> None:
|
||||
"""
|
||||
Store message and documents (metadata and file bytes) for debugging purposes.
|
||||
Structure: {log_dir}/debug/messages/m_round_task_action_timestamp/documentlist_label/
|
||||
- message.json, message_text.txt
|
||||
- document_###_metadata.json
|
||||
- document_###_<original_filename> (actual file bytes)
|
||||
|
||||
Args:
|
||||
message: ChatMessage object to store
|
||||
currentUser: Current user for component interface access
|
||||
"""
|
||||
try:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
from modules.shared.debugLogger import _getBaseDebugDir, _ensureDir
|
||||
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
||||
|
||||
# Create base debug directory (use base debug dir, not prompts subdirectory)
|
||||
baseDebugDir = _getBaseDebugDir()
|
||||
debug_root = os.path.join(baseDebugDir, 'messages')
|
||||
_ensureDir(debug_root)
|
||||
|
||||
# Generate timestamp
|
||||
timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
|
||||
|
||||
# Create message folder name: m_round_task_action_timestamp
|
||||
# Use actual values from message, not defaults
|
||||
round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
|
||||
task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
|
||||
action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
|
||||
message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
|
||||
|
||||
message_path = os.path.join(debug_root, message_folder)
|
||||
os.makedirs(message_path, exist_ok=True)
|
||||
|
||||
# Store message data - use dict() instead of model_dump() for compatibility
|
||||
message_file = os.path.join(message_path, "message.json")
|
||||
with open(message_file, "w", encoding="utf-8") as f:
|
||||
# Convert message to dict manually to avoid model_dump() issues
|
||||
message_dict = {
|
||||
"id": message.id,
|
||||
"workflowId": message.workflowId,
|
||||
"parentMessageId": message.parentMessageId,
|
||||
"message": message.message,
|
||||
"role": message.role,
|
||||
"status": message.status,
|
||||
"sequenceNr": message.sequenceNr,
|
||||
"publishedAt": message.publishedAt,
|
||||
"roundNumber": message.roundNumber,
|
||||
"taskNumber": message.taskNumber,
|
||||
"actionNumber": message.actionNumber,
|
||||
"documentsLabel": message.documentsLabel,
|
||||
"actionId": message.actionId,
|
||||
"actionMethod": message.actionMethod,
|
||||
"actionName": message.actionName,
|
||||
"success": message.success,
|
||||
"documents": []
|
||||
}
|
||||
json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
# Store message content as text
|
||||
if message.message:
|
||||
message_text_file = os.path.join(message_path, "message_text.txt")
|
||||
with open(message_text_file, "w", encoding="utf-8") as f:
|
||||
f.write(str(message.message))
|
||||
|
||||
# Store documents if provided
|
||||
if message.documents and len(message.documents) > 0:
|
||||
# Group documents by documentsLabel
|
||||
documents_by_label = {}
|
||||
for doc in message.documents:
|
||||
label = message.documentsLabel or 'default'
|
||||
if label not in documents_by_label:
|
||||
documents_by_label[label] = []
|
||||
documents_by_label[label].append(doc)
|
||||
|
||||
# Create subfolder for each document label
|
||||
for label, docs in documents_by_label.items():
|
||||
# Sanitize label for filesystem
|
||||
safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
||||
safe_label = safe_label.replace(' ', '_')
|
||||
if not safe_label:
|
||||
safe_label = "default"
|
||||
|
||||
label_folder = os.path.join(message_path, safe_label)
|
||||
_ensureDir(label_folder)
|
||||
|
||||
# Store each document
|
||||
for i, doc in enumerate(docs):
|
||||
# Create document metadata file
|
||||
doc_meta = {
|
||||
"id": doc.id,
|
||||
"messageId": doc.messageId,
|
||||
"fileId": doc.fileId,
|
||||
"fileName": doc.fileName,
|
||||
"fileSize": doc.fileSize,
|
||||
"mimeType": doc.mimeType,
|
||||
"roundNumber": doc.roundNumber,
|
||||
"taskNumber": doc.taskNumber,
|
||||
"actionNumber": doc.actionNumber,
|
||||
"actionId": doc.actionId
|
||||
}
|
||||
|
||||
doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
|
||||
with open(doc_meta_file, "w", encoding="utf-8") as f:
|
||||
json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
# Also store the actual file bytes next to metadata for debugging
|
||||
try:
|
||||
componentInterface = getInterface(currentUser)
|
||||
file_bytes = componentInterface.getFileData(doc.fileId)
|
||||
if file_bytes:
|
||||
# Build a safe filename preserving original name
|
||||
safe_name = doc.fileName or f"document_{i+1:03d}"
|
||||
# Avoid path traversal
|
||||
safe_name = os.path.basename(safe_name)
|
||||
doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
|
||||
with open(doc_file_path, "wb") as df:
|
||||
df.write(file_bytes)
|
||||
else:
|
||||
pass
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
# Silent fail - don't break main flow
|
||||
pass
|
||||
|
||||
class ChatObjects:
|
||||
"""
|
||||
Interface to Chat database and AI Connectors.
|
||||
|
|
@ -440,7 +570,7 @@ class ChatObjects:
|
|||
allWorkflows = self.db.getRecordset(ChatWorkflow)
|
||||
filteredWorkflows = self._uam(ChatWorkflow, allWorkflows)
|
||||
|
||||
# If no pagination requested, return all items
|
||||
# If no pagination requested, return all items (no sorting - frontend handles it)
|
||||
if pagination is None:
|
||||
return filteredWorkflows
|
||||
|
||||
|
|
@ -448,7 +578,7 @@ class ChatObjects:
|
|||
if pagination.filters:
|
||||
filteredWorkflows = self._applyFilters(filteredWorkflows, pagination.filters)
|
||||
|
||||
# Apply sorting (in order of sortFields)
|
||||
# Apply sorting (in order of sortFields) - only if provided by frontend
|
||||
if pagination.sort:
|
||||
filteredWorkflows = self._applySorting(filteredWorkflows, pagination.sort)
|
||||
|
||||
|
|
@ -893,7 +1023,6 @@ class ChatObjects:
|
|||
)
|
||||
|
||||
# Debug: Store message and documents for debugging - only if debug enabled
|
||||
from modules.shared.debugLogger import storeDebugMessageAndDocuments
|
||||
storeDebugMessageAndDocuments(chat_message, self.currentUser)
|
||||
|
||||
return chat_message
|
||||
|
|
@ -1550,8 +1679,8 @@ class ChatObjects:
|
|||
if createdAutomation.get("executionLogs") is None:
|
||||
createdAutomation["executionLogs"] = []
|
||||
|
||||
# Trigger sync (async, don't wait)
|
||||
asyncio.create_task(self.syncAutomationEvents())
|
||||
# Trigger automation change callback (async, don't wait)
|
||||
asyncio.create_task(self._notifyAutomationChanged())
|
||||
|
||||
return createdAutomation
|
||||
except Exception as e:
|
||||
|
|
@ -1581,8 +1710,8 @@ class ChatObjects:
|
|||
if updatedAutomation.get("executionLogs") is None:
|
||||
updatedAutomation["executionLogs"] = []
|
||||
|
||||
# Trigger sync (async, don't wait)
|
||||
asyncio.create_task(self.syncAutomationEvents())
|
||||
# Trigger automation change callback (async, don't wait)
|
||||
asyncio.create_task(self._notifyAutomationChanged())
|
||||
|
||||
return updatedAutomation
|
||||
except Exception as e:
|
||||
|
|
@ -1611,374 +1740,22 @@ class ChatObjects:
|
|||
# Delete automation from database
|
||||
self.db.recordDelete(AutomationDefinition, automationId)
|
||||
|
||||
# Trigger sync (async, don't wait)
|
||||
asyncio.create_task(self.syncAutomationEvents())
|
||||
# Trigger automation change callback (async, don't wait)
|
||||
asyncio.create_task(self._notifyAutomationChanged())
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting automation definition: {str(e)}")
|
||||
raise
|
||||
|
||||
def _replacePlaceholders(self, template: str, placeholders: Dict[str, str]) -> str:
|
||||
"""Replace placeholders in template with actual values. Placeholder format: {{KEY:PLACEHOLDER_NAME}}"""
|
||||
result = template
|
||||
for placeholderName, value in placeholders.items():
|
||||
pattern = f"{{{{KEY:{placeholderName}}}}}"
|
||||
|
||||
# Check if placeholder is in an array context like ["{{KEY:...}}"]
|
||||
# If value is a JSON array/dict, we should replace the entire ["{{KEY:...}}"] with the array
|
||||
arrayPattern = f'["{pattern}"]'
|
||||
if arrayPattern in result:
|
||||
# Check if value is a JSON array/dict
|
||||
isArrayValue = False
|
||||
arrayValue = None
|
||||
|
||||
if isinstance(value, (list, dict)):
|
||||
isArrayValue = True
|
||||
arrayValue = json.dumps(value)
|
||||
elif isinstance(value, str):
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
if isinstance(parsed, (list, dict)):
|
||||
isArrayValue = True
|
||||
arrayValue = value # Already valid JSON string
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
if isArrayValue:
|
||||
# Replace ["{{KEY:...}}"] with the array value
|
||||
result = result.replace(arrayPattern, arrayValue)
|
||||
continue # Skip the regular replacement below
|
||||
|
||||
# Regular replacement - check if in quoted context
|
||||
patternStart = result.find(pattern)
|
||||
isQuoted = False
|
||||
if patternStart > 0:
|
||||
charBefore = result[patternStart - 1] if patternStart > 0 else None
|
||||
patternEnd = patternStart + len(pattern)
|
||||
charAfter = result[patternEnd] if patternEnd < len(result) else None
|
||||
if charBefore == '"' and charAfter == '"':
|
||||
isQuoted = True
|
||||
|
||||
# Handle different value types
|
||||
if isinstance(value, (list, dict)):
|
||||
# Python list/dict - convert to JSON
|
||||
replacement = json.dumps(value)
|
||||
elif isinstance(value, str):
|
||||
# String value - check if it's a JSON string representing list/dict
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
if isinstance(parsed, (list, dict)):
|
||||
# It's a JSON string of a list/dict
|
||||
if isQuoted:
|
||||
# In quoted context, escape the JSON string
|
||||
escaped = json.dumps(value)
|
||||
replacement = escaped[1:-1] # Remove outer quotes
|
||||
else:
|
||||
# In unquoted context, use JSON directly
|
||||
replacement = value
|
||||
else:
|
||||
# It's a JSON string of a primitive
|
||||
if isQuoted:
|
||||
escaped = json.dumps(value)
|
||||
replacement = escaped[1:-1]
|
||||
else:
|
||||
replacement = value
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
# Not valid JSON - treat as plain string
|
||||
if isQuoted:
|
||||
escaped = json.dumps(value)
|
||||
replacement = escaped[1:-1]
|
||||
else:
|
||||
replacement = value
|
||||
else:
|
||||
# Numbers, booleans, None - convert to string
|
||||
replacement = str(value)
|
||||
result = result.replace(pattern, replacement)
|
||||
return result
|
||||
|
||||
def _parseScheduleToCron(self, schedule: str) -> Dict[str, Any]:
|
||||
"""Parse schedule string to cron kwargs for APScheduler"""
|
||||
parts = schedule.split()
|
||||
if len(parts) != 5:
|
||||
raise ValueError(f"Invalid schedule format: {schedule}")
|
||||
|
||||
return {
|
||||
"minute": parts[0],
|
||||
"hour": parts[1],
|
||||
"day": parts[2],
|
||||
"month": parts[3],
|
||||
"day_of_week": parts[4]
|
||||
}
|
||||
|
||||
async def executeAutomation(self, automationId: str) -> ChatWorkflow:
|
||||
"""Execute automation workflow immediately (test mode) with placeholder replacement"""
|
||||
executionStartTime = getUtcTimestamp()
|
||||
executionLog = {
|
||||
"timestamp": executionStartTime,
|
||||
"workflowId": None,
|
||||
"status": "running",
|
||||
"messages": []
|
||||
}
|
||||
|
||||
async def _notifyAutomationChanged(self):
|
||||
"""Notify registered callbacks about automation changes (decoupled from features)."""
|
||||
try:
|
||||
# 1. Load automation definition
|
||||
automation = self.getAutomationDefinition(automationId)
|
||||
if not automation:
|
||||
raise ValueError(f"Automation {automationId} not found")
|
||||
|
||||
executionLog["messages"].append(f"Started execution at {executionStartTime}")
|
||||
|
||||
# 2. Replace placeholders in template to generate plan
|
||||
template = automation.get("template", "")
|
||||
placeholders = automation.get("placeholders", {})
|
||||
planJson = self._replacePlaceholders(template, placeholders)
|
||||
try:
|
||||
plan = json.loads(planJson)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse plan JSON after placeholder replacement: {str(e)}")
|
||||
logger.error(f"Template: {template[:500]}...")
|
||||
logger.error(f"Placeholders: {placeholders}")
|
||||
logger.error(f"Generated planJson (first 1000 chars): {planJson[:1000]}")
|
||||
logger.error(f"Error position: line {e.lineno}, column {e.colno}, char {e.pos}")
|
||||
if e.pos:
|
||||
start = max(0, e.pos - 100)
|
||||
end = min(len(planJson), e.pos + 100)
|
||||
logger.error(f"Context around error: ...{planJson[start:end]}...")
|
||||
raise ValueError(f"Invalid JSON after placeholder replacement: {str(e)}")
|
||||
executionLog["messages"].append("Template placeholders replaced successfully")
|
||||
|
||||
# 3. Get user who created automation
|
||||
creator_user_id = automation.get("_createdBy")
|
||||
|
||||
# If _createdBy is missing, try to fix it by setting it to current user
|
||||
# This handles automations created before _createdBy was required
|
||||
if not creator_user_id:
|
||||
logger.warning(f"Automation {automationId} has no creator user, setting to current user {self.userId}")
|
||||
try:
|
||||
# Update the automation to set _createdBy
|
||||
self.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"_createdBy": self.userId}
|
||||
)
|
||||
creator_user_id = self.userId
|
||||
automation["_createdBy"] = self.userId
|
||||
logger.info(f"Fixed automation {automationId} by setting _createdBy to {self.userId}")
|
||||
executionLog["messages"].append(f"Fixed missing _createdBy field, set to user {self.userId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error fixing automation {automationId}: {str(e)}")
|
||||
raise ValueError(f"Automation {automationId} has no creator user and could not be fixed")
|
||||
|
||||
# Get user from database
|
||||
from modules.interfaces.interfaceDbAppObjects import getInterface as getAppInterface
|
||||
appInterface = getAppInterface(self.currentUser)
|
||||
creator_user = appInterface.getUser(creator_user_id)
|
||||
if not creator_user:
|
||||
raise ValueError(f"Creator user {creator_user_id} not found")
|
||||
|
||||
executionLog["messages"].append(f"Using creator user: {creator_user_id}")
|
||||
|
||||
# 4. Create UserInputRequest from plan
|
||||
# Embed plan JSON in prompt for TemplateMode to extract
|
||||
promptText = self._planToPrompt(plan)
|
||||
planJson = json.dumps(plan)
|
||||
# Embed plan as JSON comment so TemplateMode can extract it
|
||||
promptWithPlan = f"{promptText}\n\n<!--TEMPLATE_PLAN_START-->\n{planJson}\n<!--TEMPLATE_PLAN_END-->"
|
||||
|
||||
userInput = UserInputRequest(
|
||||
prompt=promptWithPlan,
|
||||
listFileId=[],
|
||||
userLanguage=creator_user.language or "en"
|
||||
)
|
||||
|
||||
executionLog["messages"].append("Starting workflow execution")
|
||||
|
||||
# 5. Start workflow using chatStart
|
||||
from modules.features.chatPlayground.mainChatPlayground import chatStart
|
||||
|
||||
workflow = await chatStart(
|
||||
currentUser=creator_user,
|
||||
userInput=userInput,
|
||||
workflowMode=WorkflowModeEnum.WORKFLOW_AUTOMATION,
|
||||
workflowId=None
|
||||
)
|
||||
|
||||
executionLog["workflowId"] = workflow.id
|
||||
executionLog["status"] = "completed"
|
||||
executionLog["messages"].append(f"Workflow {workflow.id} started successfully")
|
||||
logger.info(f"Started workflow {workflow.id} with plan containing {len(plan.get('tasks', []))} tasks (plan embedded in userInput)")
|
||||
|
||||
# Set workflow name with "automated" prefix
|
||||
automationLabel = automation.get("label", "Unknown Automation")
|
||||
workflowName = f"automated: {automationLabel}"
|
||||
workflow = self.updateWorkflow(workflow.id, {"name": workflowName})
|
||||
logger.info(f"Set workflow {workflow.id} name to: {workflowName}")
|
||||
|
||||
# Update automation with execution log
|
||||
executionLogs = automation.get("executionLogs", [])
|
||||
executionLogs.append(executionLog)
|
||||
# Keep only last 50 executions
|
||||
if len(executionLogs) > 50:
|
||||
executionLogs = executionLogs[-50:]
|
||||
|
||||
self.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"executionLogs": executionLogs}
|
||||
)
|
||||
|
||||
return workflow
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
# Trigger callbacks without knowing which features are listening
|
||||
await callbackRegistry.trigger('automation.changed', self)
|
||||
except Exception as e:
|
||||
# Log error to execution log
|
||||
executionLog["status"] = "error"
|
||||
executionLog["messages"].append(f"Error: {str(e)}")
|
||||
|
||||
# Update automation with execution log even on error
|
||||
try:
|
||||
automation = self.getAutomationDefinition(automationId)
|
||||
if automation:
|
||||
executionLogs = automation.get("executionLogs", [])
|
||||
executionLogs.append(executionLog)
|
||||
if len(executionLogs) > 50:
|
||||
executionLogs = executionLogs[-50:]
|
||||
self.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automationId,
|
||||
{"executionLogs": executionLogs}
|
||||
)
|
||||
except Exception as logError:
|
||||
logger.error(f"Error saving execution log: {str(logError)}")
|
||||
|
||||
raise
|
||||
|
||||
def _planToPrompt(self, plan: Dict) -> str:
|
||||
"""Convert plan structure to prompt string for workflow execution"""
|
||||
return plan.get("userMessage", plan.get("overview", "Execute automation workflow"))
|
||||
|
||||
async def syncAutomationEvents(self) -> Dict[str, Any]:
|
||||
"""Automation event handler - syncs scheduler with all active automations."""
|
||||
from modules.shared.eventManagement import eventManager
|
||||
|
||||
# Get all automation definitions (for current mandate)
|
||||
allAutomations = self.db.getRecordset(AutomationDefinition)
|
||||
filtered = self._uam(AutomationDefinition, allAutomations)
|
||||
|
||||
registered_events = {}
|
||||
|
||||
for automation in filtered:
|
||||
automation_id = automation.get("id")
|
||||
is_active = automation.get("active", False)
|
||||
current_event_id = automation.get("eventId")
|
||||
schedule = automation.get("schedule")
|
||||
|
||||
if not schedule:
|
||||
logger.warning(f"Automation {automation_id} has no schedule, skipping")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse schedule to cron kwargs
|
||||
cron_kwargs = self._parseScheduleToCron(schedule)
|
||||
|
||||
if is_active:
|
||||
# Remove existing event if present (handles schedule changes)
|
||||
if current_event_id:
|
||||
try:
|
||||
eventManager.remove(current_event_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error removing old event {current_event_id}: {str(e)}")
|
||||
|
||||
# Register new event
|
||||
new_event_id = f"automation.{automation_id}"
|
||||
|
||||
# Create event handler function
|
||||
handler = self._createAutomationEventHandler(automation_id)
|
||||
|
||||
# Register cron job
|
||||
eventManager.registerCron(
|
||||
jobId=new_event_id,
|
||||
func=handler,
|
||||
cronKwargs=cron_kwargs,
|
||||
replaceExisting=True
|
||||
)
|
||||
|
||||
# Update automation with new eventId
|
||||
if current_event_id != new_event_id:
|
||||
self.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automation_id,
|
||||
{"eventId": new_event_id}
|
||||
)
|
||||
|
||||
registered_events[automation_id] = new_event_id
|
||||
else:
|
||||
# Remove event if exists
|
||||
if current_event_id:
|
||||
try:
|
||||
eventManager.remove(current_event_id)
|
||||
self.db.recordModify(
|
||||
AutomationDefinition,
|
||||
automation_id,
|
||||
{"eventId": None}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error removing event {current_event_id}: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing automation {automation_id}: {str(e)}")
|
||||
|
||||
return {
|
||||
"synced": len(registered_events),
|
||||
"events": registered_events
|
||||
}
|
||||
|
||||
def _createAutomationEventHandler(self, automationId: str):
|
||||
"""Create event handler function for a specific automation"""
|
||||
async def handler():
|
||||
try:
|
||||
# Get event user to access automation (event user can access all automations)
|
||||
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||
from modules.interfaces.interfaceDbAppObjects import getInterface as getAppInterface
|
||||
from modules.interfaces.interfaceDbChatObjects import getInterface as getChatInterface
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
eventUser = rootInterface.getUserByUsername("event")
|
||||
|
||||
if not eventUser:
|
||||
logger.error("Could not get event user for automation execution")
|
||||
return
|
||||
|
||||
# Create ChatObjects interface for event user (to access automation)
|
||||
eventInterface = getChatInterface(eventUser)
|
||||
|
||||
# Load automation using event user context
|
||||
automation = eventInterface.getAutomationDefinition(automationId)
|
||||
if not automation or not automation.get("active"):
|
||||
logger.warning(f"Automation {automationId} not found or not active, skipping execution")
|
||||
return
|
||||
|
||||
# Get creator user
|
||||
creator_user_id = automation.get("_createdBy")
|
||||
if not creator_user_id:
|
||||
logger.error(f"Automation {automationId} has no creator user")
|
||||
return
|
||||
|
||||
# Get creator user from database
|
||||
appInterface = getAppInterface(eventUser)
|
||||
creator_user = appInterface.getUser(creator_user_id)
|
||||
if not creator_user:
|
||||
logger.error(f"Creator user {creator_user_id} not found for automation {automationId}")
|
||||
return
|
||||
|
||||
# Create ChatObjects interface for creator user
|
||||
creatorInterface = getChatInterface(creator_user)
|
||||
|
||||
# Execute automation with creator user's context
|
||||
await creatorInterface.executeAutomation(automationId)
|
||||
logger.info(f"Successfully executed automation {automationId} as user {creator_user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing automation {automationId}: {str(e)}")
|
||||
|
||||
return handler
|
||||
logger.error(f"Error notifying automation change: {str(e)}")
|
||||
|
||||
|
||||
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
|
||||
|
|
|
|||
|
|
@ -99,6 +99,44 @@ class VoiceObjects:
|
|||
|
||||
# Translation Operations
|
||||
|
||||
async def detectLanguage(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect the language of text using Google Cloud Translation API.
|
||||
|
||||
Args:
|
||||
text: Text to detect language for
|
||||
|
||||
Returns:
|
||||
Dict containing detected language code and confidence
|
||||
"""
|
||||
try:
|
||||
logger.info(f"🔍 Language detection request: '{text[:100]}...'")
|
||||
|
||||
if not text.strip():
|
||||
return {
|
||||
"success": False,
|
||||
"language": "",
|
||||
"error": "Empty text provided"
|
||||
}
|
||||
|
||||
connector = self._getGoogleSpeechConnector()
|
||||
result = await connector.detectLanguage(text)
|
||||
|
||||
if result["success"]:
|
||||
logger.info(f"✅ Language detected: {result['language']}")
|
||||
else:
|
||||
logger.warning(f"⚠️ Language detection failed: {result.get('error', 'Unknown error')}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Language detection error: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"language": "",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def translateText(self, text: str, sourceLanguage: str = "de",
|
||||
targetLanguage: str = "en") -> Dict[str, Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -86,15 +86,21 @@ async def sync_all_automation_events(
|
|||
requireSysadmin(currentUser)
|
||||
|
||||
try:
|
||||
chatInterface = interfaceDbChatObjects.getInterface(currentUser)
|
||||
from modules.interfaces.interfaceDbChatObjects import getInterface as getChatInterface
|
||||
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||
from modules.features.automation import syncAutomationEvents
|
||||
|
||||
if not hasattr(chatInterface, 'syncAutomationEvents'):
|
||||
chatInterface = getChatInterface(currentUser)
|
||||
# Get event user for sync operation (routes can import from interfaces)
|
||||
rootInterface = getRootInterface()
|
||||
eventUser = rootInterface.getUserByUsername("event")
|
||||
if not eventUser:
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Automation methods not available"
|
||||
status_code=500,
|
||||
detail="Event user not available"
|
||||
)
|
||||
|
||||
result = await chatInterface.syncAutomationEvents()
|
||||
result = await syncAutomationEvents(chatInterface, eventUser)
|
||||
return {
|
||||
"success": True,
|
||||
"synced": result.get("synced", 0),
|
||||
|
|
|
|||
|
|
@ -115,6 +115,48 @@ async def speech_to_text(
|
|||
detail=f"Speech-to-text processing failed: {str(e)}"
|
||||
)
|
||||
|
||||
@router.post("/detect-language")
|
||||
async def detect_language(
|
||||
text: str = Form(...),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Detect the language of text using Google Cloud Translation API."""
|
||||
try:
|
||||
logger.info(f"🔍 Language detection request: '{text[:100]}...'")
|
||||
|
||||
if not text.strip():
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Empty text provided for language detection"
|
||||
)
|
||||
|
||||
# Get voice interface
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
|
||||
# Perform language detection
|
||||
result = await voiceInterface.detectLanguage(text)
|
||||
|
||||
if result["success"]:
|
||||
return {
|
||||
"success": True,
|
||||
"language": result["language"],
|
||||
"confidence": result.get("confidence", 1.0)
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Language detection failed: {result.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Language detection error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Language detection processing failed: {str(e)}"
|
||||
)
|
||||
|
||||
@router.post("/translate")
|
||||
async def translate_text(
|
||||
text: str = Form(...),
|
||||
|
|
|
|||
|
|
@ -84,6 +84,9 @@ class Services:
|
|||
from .serviceWeb.mainServiceWeb import WebService
|
||||
self.web = PublicService(WebService(self))
|
||||
|
||||
from .serviceSecurity.mainServiceSecurity import SecurityService
|
||||
self.security = PublicService(SecurityService(self))
|
||||
|
||||
|
||||
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
||||
return Services(user, workflow)
|
||||
|
|
|
|||
|
|
@ -48,6 +48,18 @@ class AiService:
|
|||
logger.info("Initializing ExtractionService...")
|
||||
self.extractionService = ExtractionService(self.services)
|
||||
|
||||
async def callAi(self, request: AiCallRequest, progressCallback=None):
|
||||
"""Router: handles content parts via extractionService, text context via interface.
|
||||
|
||||
Replaces direct calls to self.aiObjects.call() to route content parts processing
|
||||
through serviceExtraction layer.
|
||||
"""
|
||||
if hasattr(request, 'contentParts') and request.contentParts:
|
||||
return await self.extractionService.processContentPartsWithAi(
|
||||
request, self.aiObjects, progressCallback
|
||||
)
|
||||
return await self.aiObjects.callWithTextContext(request)
|
||||
|
||||
async def ensureAiObjectsInitialized(self):
|
||||
"""Ensure aiObjects is initialized and submodules are ready."""
|
||||
if self.aiObjects is None:
|
||||
|
|
@ -141,7 +153,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
)
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
response = await self.callAi(request)
|
||||
|
||||
# Parse AI response using structured parsing with AiCallOptions model
|
||||
try:
|
||||
|
|
@ -193,10 +205,8 @@ Respond with ONLY a JSON object in this exact format:
|
|||
documentMetadata = None # Store document metadata (title, filename) from first iteration
|
||||
accumulationState = None # Track accumulation state for string accumulation
|
||||
|
||||
# Get parent log ID for iteration operations
|
||||
parentLogId = None
|
||||
if operationId:
|
||||
parentLogId = self.services.chat.getOperationLogId(operationId)
|
||||
# Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID)
|
||||
parentOperationId = operationId # Use the parent's operationId directly
|
||||
|
||||
while iteration < maxIterations:
|
||||
iteration += 1
|
||||
|
|
@ -210,7 +220,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
"AI Call",
|
||||
f"Iteration {iteration}",
|
||||
"",
|
||||
parentId=parentLogId
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
# Build iteration prompt
|
||||
|
|
@ -223,11 +233,14 @@ Respond with ONLY a JSON object in this exact format:
|
|||
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
||||
|
||||
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
|
||||
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext
|
||||
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services
|
||||
filteredPromptArgs = {
|
||||
k: v for k, v in promptArgs.items()
|
||||
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content']
|
||||
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services']
|
||||
}
|
||||
# Always include services if available
|
||||
if not filteredPromptArgs.get('services') and hasattr(self, 'services'):
|
||||
filteredPromptArgs['services'] = self.services
|
||||
|
||||
# Rebuild prompt with continuation context using the provided prompt builder
|
||||
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
|
||||
|
|
@ -251,12 +264,23 @@ Respond with ONLY a JSON object in this exact format:
|
|||
else:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
response = await self.callAi(request)
|
||||
result = response.content
|
||||
|
||||
# Update progress after AI call
|
||||
# Track bytes for progress reporting
|
||||
bytesReceived = len(result.encode('utf-8')) if result else 0
|
||||
totalBytesSoFar = sum(len(section.get('content', '').encode('utf-8')) if isinstance(section.get('content'), str) else 0 for section in allSections) + bytesReceived
|
||||
|
||||
# Update progress after AI call with byte information
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, "AI response received")
|
||||
# Format bytes for display (kB or MB)
|
||||
if totalBytesSoFar < 1024:
|
||||
bytesDisplay = f"{totalBytesSoFar}B"
|
||||
elif totalBytesSoFar < 1024 * 1024:
|
||||
bytesDisplay = f"{totalBytesSoFar / 1024:.1f}kB"
|
||||
else:
|
||||
bytesDisplay = f"{totalBytesSoFar / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})")
|
||||
|
||||
# Write raw AI response to debug file
|
||||
if iteration == 1:
|
||||
|
|
@ -457,8 +481,24 @@ Respond with ONLY a JSON object in this exact format:
|
|||
# The break can occur anywhere - in any section, at any depth
|
||||
allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
||||
|
||||
# Log merged sections for debugging
|
||||
# Calculate total bytes in merged content for progress display
|
||||
merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False)
|
||||
totalBytesGenerated = len(merged_json_str.encode('utf-8'))
|
||||
|
||||
# Update main operation with byte progress
|
||||
if operationId:
|
||||
# Format bytes for display
|
||||
if totalBytesGenerated < 1024:
|
||||
bytesDisplay = f"{totalBytesGenerated}B"
|
||||
elif totalBytesGenerated < 1024 * 1024:
|
||||
bytesDisplay = f"{totalBytesGenerated / 1024:.1f}kB"
|
||||
else:
|
||||
bytesDisplay = f"{totalBytesGenerated / (1024 * 1024):.1f}MB"
|
||||
# Estimate progress based on iterations (rough estimate)
|
||||
estimatedProgress = min(0.9, 0.4 + (iteration * 0.1))
|
||||
self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})")
|
||||
|
||||
# Log merged sections for debugging
|
||||
self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}")
|
||||
|
||||
# Check if we should continue (completion detection)
|
||||
|
|
@ -473,14 +513,40 @@ Respond with ONLY a JSON object in this exact format:
|
|||
if shouldContinue:
|
||||
# Finish iteration operation (will continue with next iteration)
|
||||
if iterationOperationId:
|
||||
# Show byte progress in iteration completion
|
||||
iterBytes = len(result.encode('utf-8')) if result else 0
|
||||
if iterBytes < 1024:
|
||||
iterBytesDisplay = f"{iterBytes}B"
|
||||
elif iterBytes < 1024 * 1024:
|
||||
iterBytesDisplay = f"{iterBytes / 1024:.1f}kB"
|
||||
else:
|
||||
iterBytesDisplay = f"{iterBytes / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Completed ({iterBytesDisplay})")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
else:
|
||||
# Done - finish iteration and update main operation
|
||||
if iterationOperationId:
|
||||
# Show final byte count
|
||||
finalBytes = len(merged_json_str.encode('utf-8'))
|
||||
if finalBytes < 1024:
|
||||
finalBytesDisplay = f"{finalBytes}B"
|
||||
elif finalBytes < 1024 * 1024:
|
||||
finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
|
||||
else:
|
||||
finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Complete ({finalBytesDisplay})")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
|
||||
# Show final size in main operation
|
||||
finalBytes = len(merged_json_str.encode('utf-8'))
|
||||
if finalBytes < 1024:
|
||||
finalBytesDisplay = f"{finalBytes}B"
|
||||
elif finalBytes < 1024 * 1024:
|
||||
finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
|
||||
else:
|
||||
finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete: {finalBytesDisplay} ({iteration} iterations, {len(allSections)} sections)")
|
||||
logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections")
|
||||
break
|
||||
|
||||
|
|
@ -582,7 +648,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
# Write KPI definition prompt to debug file
|
||||
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
response = await self.callAi(request)
|
||||
|
||||
# Write KPI definition response to debug file
|
||||
self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response")
|
||||
|
|
@ -895,7 +961,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
# Debug: persist prompt/response for analysis with context-specific naming
|
||||
debugPrefix = debugType if debugType else "plan"
|
||||
self.services.utils.writeDebugFile(fullPrompt, f"{debugPrefix}_prompt")
|
||||
response = await self.aiObjects.call(request)
|
||||
response = await self.aiObjects.callWithTextContext(request)
|
||||
result = response.content or ""
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||
return result
|
||||
|
|
@ -929,10 +995,8 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Get parent log ID if parent operation exists
|
||||
parentLogId = None
|
||||
if parentOperationId:
|
||||
parentLogId = self.services.chat.getOperationLogId(parentOperationId)
|
||||
# Use parent operation ID directly (parentId should be operationId, not log entry ID)
|
||||
# parentOperationId is already the operationId of the parent
|
||||
|
||||
# Start progress tracking with parent reference
|
||||
self.services.chat.progressLogStart(
|
||||
|
|
@ -940,7 +1004,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
"AI content processing",
|
||||
"Content Processing",
|
||||
f"Format: {outputFormat or 'text'}",
|
||||
parentId=parentLogId
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -966,7 +1030,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
options=options
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
response = await self.callAi(request)
|
||||
|
||||
if response.content:
|
||||
# Build document data for image
|
||||
|
|
@ -1011,7 +1075,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
options=options
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
response = await self.callAi(request)
|
||||
|
||||
if response.content:
|
||||
metadata = AiResponseMetadata(
|
||||
|
|
@ -1046,7 +1110,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
options.compressContext = False
|
||||
|
||||
# Process contentParts for generation prompt (if provided)
|
||||
# Use generic _callWithContentParts() which handles all content types (images, text, etc.)
|
||||
# Use generic callWithContentParts() which handles all content types (images, text, etc.)
|
||||
# This automatically processes images with vision models and merges all results
|
||||
if contentParts:
|
||||
# Filter out binary/other parts that shouldn't be processed
|
||||
|
|
@ -1110,7 +1174,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
|
||||
|
||||
# Call generic content parts processor - handles images, text, chunking, merging
|
||||
extractionResponse = await self.aiObjects.call(extractionRequest)
|
||||
extractionResponse = await self.callAi(extractionRequest)
|
||||
|
||||
# Write debug file for extraction response
|
||||
if extractionResponse.content:
|
||||
|
|
@ -1141,14 +1205,15 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
|
||||
generation_prompt = await buildGenerationPrompt(
|
||||
outputFormat, prompt, title, content_for_generation, None
|
||||
outputFormat, prompt, title, content_for_generation, None, self.services
|
||||
)
|
||||
|
||||
promptArgs = {
|
||||
"outputFormat": outputFormat,
|
||||
"userPrompt": prompt,
|
||||
"title": title,
|
||||
"extracted_content": content_for_generation
|
||||
"extracted_content": content_for_generation,
|
||||
"services": self.services
|
||||
}
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
||||
|
|
@ -1157,6 +1222,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
if promptArgs:
|
||||
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
|
||||
|
||||
# Track generation progress - the looping function will update with byte progress
|
||||
generated_json = await self._callAiWithLooping(
|
||||
generation_prompt,
|
||||
options,
|
||||
|
|
@ -1167,7 +1233,16 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
userPrompt=userPrompt
|
||||
)
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
||||
# Calculate final size for completion message
|
||||
finalSize = len(generated_json.encode('utf-8')) if generated_json else 0
|
||||
if finalSize < 1024:
|
||||
finalSizeDisplay = f"{finalSize}B"
|
||||
elif finalSize < 1024 * 1024:
|
||||
finalSizeDisplay = f"{finalSize / 1024:.1f}kB"
|
||||
else:
|
||||
finalSizeDisplay = f"{finalSize / (1024 * 1024):.1f}MB"
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.7, f"Parsing generated JSON ({finalSizeDisplay})")
|
||||
try:
|
||||
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
||||
generated_data = json.loads(extracted_json)
|
||||
|
|
@ -1198,13 +1273,13 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
|
||||
# Create separate operation for content rendering
|
||||
renderOperationId = f"{aiOperationId}_render"
|
||||
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
|
||||
# Use aiOperationId directly as parentOperationId (operationId, not log entry ID)
|
||||
self.services.chat.progressLogStart(
|
||||
renderOperationId,
|
||||
"Content Rendering",
|
||||
"Rendering",
|
||||
f"Format: {outputFormat}",
|
||||
parentId=renderParentLogId
|
||||
parentOperationId=aiOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ from typing import Dict, Any, List, Optional
|
|||
from modules.datamodels.datamodelUam import User, UserConnection
|
||||
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage, ChatStat, ChatLog
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.security.tokenManager import TokenManager
|
||||
from modules.shared.progressLogger import ProgressLogger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -306,9 +305,9 @@ class ChatService:
|
|||
token = None
|
||||
token_status = "unknown"
|
||||
try:
|
||||
# Get a fresh token via TokenManager convenience method
|
||||
# Get a fresh token via security service
|
||||
logger.debug(f"Getting fresh token for connection {connection.id}")
|
||||
token = TokenManager().getFreshToken(connection.id)
|
||||
token = self.services.security.getFreshToken(connection.id)
|
||||
if token:
|
||||
if hasattr(token, 'expiresAt') and token.expiresAt:
|
||||
current_time = self.services.utils.timestampGetUtc()
|
||||
|
|
@ -389,7 +388,7 @@ class ChatService:
|
|||
Token object or None if not found/expired
|
||||
"""
|
||||
try:
|
||||
return TokenManager().getFreshToken(connectionId)
|
||||
return self.services.security.getFreshToken(connectionId)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting fresh token for connection {connectionId}: {str(e)}")
|
||||
return None
|
||||
|
|
@ -1016,10 +1015,19 @@ class ChatService:
|
|||
def createProgressLogger(self) -> ProgressLogger:
|
||||
return ProgressLogger(self.services)
|
||||
|
||||
def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentId: Optional[str] = None):
|
||||
"""Wrapper for ProgressLogger.startOperation"""
|
||||
def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
|
||||
"""Wrapper for ProgressLogger.startOperation
|
||||
|
||||
Args:
|
||||
operationId: Unique identifier for the operation
|
||||
serviceName: Name of the service
|
||||
actionName: Name of the action
|
||||
context: Additional context information
|
||||
parentOperationId: Optional parent operation ID (operationId of parent operation)
|
||||
The parentId in ChatLog will be set to this parentOperationId
|
||||
"""
|
||||
progressLogger = self._getProgressLogger()
|
||||
return progressLogger.startOperation(operationId, serviceName, actionName, context, parentId)
|
||||
return progressLogger.startOperation(operationId, serviceName, actionName, context, parentOperationId)
|
||||
|
||||
def progressLogUpdate(self, operationId: str, progress: float, statusUpdate: str = ""):
|
||||
"""Wrapper for ProgressLogger.updateOperation"""
|
||||
|
|
|
|||
|
|
@ -3,13 +3,15 @@ import uuid
|
|||
import logging
|
||||
import time
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||
from .subPipeline import runExtraction
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions
|
||||
from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall
|
||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||
from modules.aicore.aicoreModelSelector import modelSelector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -32,13 +34,21 @@ class ExtractionService:
|
|||
if model is None or model.calculatePriceUsd is None:
|
||||
raise RuntimeError(f"FATAL: Required internal model '{modelDisplayName}' is not available. Check connector registration.")
|
||||
|
||||
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
|
||||
def extractContent(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
options: ExtractionOptions,
|
||||
operationId: Optional[str] = None,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[ContentExtracted]:
|
||||
"""
|
||||
Extract content from a list of ChatDocument objects.
|
||||
|
||||
Args:
|
||||
documents: List of ChatDocument objects to extract content from
|
||||
options: Extraction options including maxSize, chunkAllowed, mergeStrategy, etc.
|
||||
operationId: Optional operation ID for progress logging (parent operation)
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
|
||||
Returns:
|
||||
List of ContentExtracted objects, one per input document
|
||||
|
|
@ -50,125 +60,172 @@ class ExtractionService:
|
|||
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
||||
dbInterface = getInterface()
|
||||
|
||||
totalDocs = len(documents)
|
||||
|
||||
for i, doc in enumerate(documents):
|
||||
logger.info(f"=== DOCUMENT {i}: {doc.fileName} ===")
|
||||
logger.info(f"=== DOCUMENT {i + 1}/{totalDocs}: {doc.fileName} ===")
|
||||
logger.info(f"Initial MIME type: {doc.mimeType}")
|
||||
|
||||
# Create child operation for this document if parent operationId is provided
|
||||
docOperationId = None
|
||||
if operationId:
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
docOperationId = f"{operationId}_doc_{i}"
|
||||
self.services.chat.progressLogStart(
|
||||
docOperationId,
|
||||
"Extracting Document",
|
||||
f"Document {i + 1}/{totalDocs}",
|
||||
doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName,
|
||||
parentOperationId=operationId # Use operationId as parent (not parentOperationId)
|
||||
)
|
||||
|
||||
# Start timing for this document
|
||||
startTime = time.time()
|
||||
|
||||
# Resolve raw bytes for this document using interface
|
||||
documentBytes = dbInterface.getFileData(doc.fileId)
|
||||
if not documentBytes:
|
||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||
|
||||
# Convert ChatDocument to the format expected by runExtraction
|
||||
documentData = {
|
||||
"id": doc.id,
|
||||
"bytes": documentBytes,
|
||||
"fileName": doc.fileName,
|
||||
"mimeType": doc.mimeType
|
||||
}
|
||||
|
||||
ec = runExtraction(
|
||||
extractorRegistry=self._extractorRegistry,
|
||||
chunkerRegistry=self._chunkerRegistry,
|
||||
documentBytes=documentData["bytes"],
|
||||
fileName=documentData["fileName"],
|
||||
mimeType=documentData["mimeType"],
|
||||
options=options
|
||||
)
|
||||
|
||||
# Log content parts metadata
|
||||
logger.debug(f"Content parts: {len(ec.parts)}")
|
||||
for j, part in enumerate(ec.parts):
|
||||
logger.debug(f" Part {j}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
|
||||
if part.metadata:
|
||||
logger.debug(f" Metadata: {part.metadata}")
|
||||
|
||||
# Attach document id and MIME type to parts if missing
|
||||
for p in ec.parts:
|
||||
if "documentId" not in p.metadata:
|
||||
p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
|
||||
if "documentMimeType" not in p.metadata:
|
||||
p.metadata["documentMimeType"] = documentData["mimeType"]
|
||||
|
||||
# Log chunking information
|
||||
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||
if chunkedParts:
|
||||
logger.debug(f"=== CHUNKING RESULTS ===")
|
||||
logger.debug(f"Total parts: {len(ec.parts)}")
|
||||
logger.debug(f"Chunked parts: {len(chunkedParts)}")
|
||||
for chunk in chunkedParts:
|
||||
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
||||
else:
|
||||
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
||||
|
||||
# Calculate timing and emit stats
|
||||
endTime = time.time()
|
||||
processingTime = endTime - startTime
|
||||
bytesSent = len(documentBytes)
|
||||
bytesReceived = sum(len(part.data) if part.data else 0 for part in ec.parts)
|
||||
|
||||
# Emit stats for extraction operation
|
||||
|
||||
# Use internal extraction model for pricing
|
||||
modelDisplayName = "Internal Document Extractor"
|
||||
model = modelRegistry.getModel(modelDisplayName)
|
||||
# Hard fail if model is missing; caller must ensure connectors are registered
|
||||
if model is None or model.calculatePriceUsd is None:
|
||||
raise RuntimeError(f"Pricing model not available: {modelDisplayName}")
|
||||
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
|
||||
|
||||
# Create AiCallResponse with real calculation
|
||||
# Use model.name for the response (API identifier), not displayName
|
||||
aiResponse = AiCallResponse(
|
||||
content="", # No content for extraction stats needed
|
||||
modelName=model.name,
|
||||
priceUsd=priceUsd,
|
||||
processingTime=processingTime,
|
||||
bytesSent=bytesSent,
|
||||
bytesReceived=bytesReceived,
|
||||
errorCount=0
|
||||
)
|
||||
|
||||
self.services.chat.storeWorkflowStat(
|
||||
self.services.workflow,
|
||||
aiResponse,
|
||||
f"extraction.process.{doc.mimeType}"
|
||||
)
|
||||
|
||||
# Write extraction results to debug file
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
import json
|
||||
# Create summary of extraction results for debug
|
||||
extractionSummary = {
|
||||
"documentName": doc.fileName,
|
||||
"documentMimeType": doc.mimeType,
|
||||
"partsCount": len(ec.parts),
|
||||
"parts": []
|
||||
}
|
||||
for part in ec.parts:
|
||||
partSummary = {
|
||||
"typeGroup": part.typeGroup,
|
||||
"mimeType": part.mimeType,
|
||||
"label": part.label,
|
||||
"dataLength": len(part.data) if part.data else 0,
|
||||
"metadata": part.metadata
|
||||
}
|
||||
# Include data preview for small parts (first 500 chars)
|
||||
if part.data and len(part.data) <= 500:
|
||||
partSummary["dataPreview"] = part.data[:500]
|
||||
elif part.data:
|
||||
partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
|
||||
extractionSummary["parts"].append(partSummary)
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.1, "Loading document data")
|
||||
|
||||
writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to write extraction debug file: {str(e)}")
|
||||
# Resolve raw bytes for this document using interface
|
||||
documentBytes = dbInterface.getFileData(doc.fileId)
|
||||
if not documentBytes:
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogFinish(docOperationId, False)
|
||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.2, "Running extraction pipeline")
|
||||
|
||||
# Convert ChatDocument to the format expected by runExtraction
|
||||
documentData = {
|
||||
"id": doc.id,
|
||||
"bytes": documentBytes,
|
||||
"fileName": doc.fileName,
|
||||
"mimeType": doc.mimeType
|
||||
}
|
||||
|
||||
ec = runExtraction(
|
||||
extractorRegistry=self._extractorRegistry,
|
||||
chunkerRegistry=self._chunkerRegistry,
|
||||
documentBytes=documentData["bytes"],
|
||||
fileName=documentData["fileName"],
|
||||
mimeType=documentData["mimeType"],
|
||||
options=options
|
||||
)
|
||||
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.7, f"Extracted {len(ec.parts)} parts")
|
||||
|
||||
# Log content parts metadata
|
||||
logger.debug(f"Content parts: {len(ec.parts)}")
|
||||
for j, part in enumerate(ec.parts):
|
||||
logger.debug(f" Part {j + 1}/{len(ec.parts)}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
|
||||
if part.metadata:
|
||||
logger.debug(f" Metadata: {part.metadata}")
|
||||
|
||||
results.append(ec)
|
||||
# Attach document id and MIME type to parts if missing
|
||||
for p in ec.parts:
|
||||
if "documentId" not in p.metadata:
|
||||
p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
|
||||
if "documentMimeType" not in p.metadata:
|
||||
p.metadata["documentMimeType"] = documentData["mimeType"]
|
||||
|
||||
# Log chunking information
|
||||
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||
if chunkedParts:
|
||||
logger.debug(f"=== CHUNKING RESULTS ===")
|
||||
logger.debug(f"Total parts: {len(ec.parts)}")
|
||||
logger.debug(f"Chunked parts: {len(chunkedParts)}")
|
||||
for chunk in chunkedParts:
|
||||
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
||||
else:
|
||||
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
||||
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.9, f"Processing complete: {len(ec.parts)} parts extracted")
|
||||
|
||||
# Calculate timing and emit stats
|
||||
endTime = time.time()
|
||||
processingTime = endTime - startTime
|
||||
bytesSent = len(documentBytes)
|
||||
bytesReceived = sum(len(part.data) if part.data else 0 for part in ec.parts)
|
||||
|
||||
# Emit stats for extraction operation
|
||||
|
||||
# Use internal extraction model for pricing
|
||||
modelDisplayName = "Internal Document Extractor"
|
||||
model = modelRegistry.getModel(modelDisplayName)
|
||||
# Hard fail if model is missing; caller must ensure connectors are registered
|
||||
if model is None or model.calculatePriceUsd is None:
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogFinish(docOperationId, False)
|
||||
raise RuntimeError(f"Pricing model not available: {modelDisplayName}")
|
||||
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
|
||||
|
||||
# Create AiCallResponse with real calculation
|
||||
# Use model.name for the response (API identifier), not displayName
|
||||
aiResponse = AiCallResponse(
|
||||
content="", # No content for extraction stats needed
|
||||
modelName=model.name,
|
||||
priceUsd=priceUsd,
|
||||
processingTime=processingTime,
|
||||
bytesSent=bytesSent,
|
||||
bytesReceived=bytesReceived,
|
||||
errorCount=0
|
||||
)
|
||||
|
||||
self.services.chat.storeWorkflowStat(
|
||||
self.services.workflow,
|
||||
aiResponse,
|
||||
f"extraction.process.{doc.mimeType}"
|
||||
)
|
||||
|
||||
# Write extraction results to debug file
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
import json
|
||||
# Create summary of extraction results for debug
|
||||
extractionSummary = {
|
||||
"documentName": doc.fileName,
|
||||
"documentMimeType": doc.mimeType,
|
||||
"partsCount": len(ec.parts),
|
||||
"parts": []
|
||||
}
|
||||
for part in ec.parts:
|
||||
partSummary = {
|
||||
"typeGroup": part.typeGroup,
|
||||
"mimeType": part.mimeType,
|
||||
"label": part.label,
|
||||
"dataLength": len(part.data) if part.data else 0,
|
||||
"metadata": part.metadata
|
||||
}
|
||||
# Include data preview for small parts (first 500 chars)
|
||||
if part.data and len(part.data) <= 500:
|
||||
partSummary["dataPreview"] = part.data[:500]
|
||||
elif part.data:
|
||||
partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
|
||||
extractionSummary["parts"].append(partSummary)
|
||||
|
||||
writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to write extraction debug file: {str(e)}")
|
||||
|
||||
results.append(ec)
|
||||
|
||||
# Finish document operation successfully
|
||||
if docOperationId:
|
||||
self.services.chat.progressLogFinish(docOperationId, True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content from document {i + 1}/{totalDocs} ({doc.fileName}): {str(e)}")
|
||||
if docOperationId:
|
||||
try:
|
||||
self.services.chat.progressLogFinish(docOperationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
# Continue with next document instead of failing completely
|
||||
# This allows parallel processing to continue even if one document fails
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
|
@ -479,7 +536,8 @@ class ExtractionService:
|
|||
# Extract content WITHOUT chunking
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents")
|
||||
extractionResult = self.extractContent(documents, extractionOptions)
|
||||
# Pass operationId as parentOperationId for hierarchical logging
|
||||
extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId)
|
||||
|
||||
if not isinstance(extractionResult, list):
|
||||
if operationId:
|
||||
|
|
@ -489,16 +547,14 @@ class ExtractionService:
|
|||
# Process parts (not chunks) with model-aware AI calls
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts")
|
||||
# Get parent log ID for part operations
|
||||
parentLogId = None
|
||||
if operationId:
|
||||
parentLogId = self.services.chat.getOperationLogId(operationId)
|
||||
partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentLogId)
|
||||
# Use parent operation ID directly (parentId should be operationId, not log entry ID)
|
||||
parentOperationId = operationId # Use the parent's operationId directly
|
||||
partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId)
|
||||
|
||||
# Merge results using existing merging system
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.9, f"Merging {len(partResults)} part results")
|
||||
mergedContent = self._mergePartResults(partResults, options)
|
||||
mergedContent = self.mergePartResults(partResults, options)
|
||||
|
||||
# Save merged extraction content to debug
|
||||
self.services.utils.writeDebugFile(mergedContent or '', "extraction_merged_text")
|
||||
|
|
@ -520,7 +576,7 @@ class ExtractionService:
|
|||
aiObjects: Any,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
operationId: Optional[str] = None,
|
||||
parentLogId: Optional[str] = None
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[PartResult]:
|
||||
"""Process content parts with model-aware chunking and proper mapping."""
|
||||
|
||||
|
|
@ -567,7 +623,7 @@ class ExtractionService:
|
|||
"Content Processing",
|
||||
f"Part {part_index + 1}",
|
||||
f"Type: {part.typeGroup}",
|
||||
parentId=parentLogId
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -660,54 +716,473 @@ class ExtractionService:
|
|||
logger.info(f"Completed processing {len(processedResults)} parts")
|
||||
return processedResults
|
||||
|
||||
def _mergePartResults(
|
||||
def _convertToContentParts(
|
||||
self, partResults: Union[List[PartResult], List[AiCallResponse]]
|
||||
) -> List[ContentPart]:
|
||||
"""Convert part results to ContentParts (internal helper for consolidation).
|
||||
|
||||
Handles both PartResult (from extraction workflow) and AiCallResponse (from content parts processing).
|
||||
"""
|
||||
content_parts = []
|
||||
|
||||
if not partResults:
|
||||
return content_parts
|
||||
|
||||
# Detect input type and convert accordingly
|
||||
if isinstance(partResults[0], PartResult):
|
||||
# Existing logic for PartResult (from processDocumentsPerChunk)
|
||||
for part_result in partResults:
|
||||
content_part = ContentPart(
|
||||
id=part_result.originalPart.id,
|
||||
parentId=part_result.originalPart.parentId,
|
||||
label=part_result.originalPart.label,
|
||||
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
|
||||
mimeType=part_result.originalPart.mimeType,
|
||||
data=part_result.aiResult, # Use AI result as data
|
||||
metadata={
|
||||
**part_result.originalPart.metadata,
|
||||
"aiResult": True,
|
||||
"partIndex": part_result.partIndex,
|
||||
"documentId": part_result.documentId,
|
||||
"processingTime": part_result.processingTime,
|
||||
"success": part_result.metadata.get("success", False)
|
||||
}
|
||||
)
|
||||
content_parts.append(content_part)
|
||||
elif isinstance(partResults[0], AiCallResponse):
|
||||
# Logic from interfaceAiObjects (from content parts processing)
|
||||
for i, result in enumerate(partResults):
|
||||
if result.content:
|
||||
content_part = ContentPart(
|
||||
id=str(uuid.uuid4()),
|
||||
parentId=None,
|
||||
label=f"ai_result_{i}",
|
||||
typeGroup="text", # Default to text for AI results
|
||||
mimeType="text/plain",
|
||||
data=result.content,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"modelName": result.modelName,
|
||||
"priceUsd": result.priceUsd,
|
||||
"processingTime": result.processingTime,
|
||||
"bytesSent": result.bytesSent,
|
||||
"bytesReceived": result.bytesReceived
|
||||
}
|
||||
)
|
||||
content_parts.append(content_part)
|
||||
|
||||
return content_parts
|
||||
|
||||
def mergePartResults(
|
||||
self,
|
||||
partResults: List[PartResult],
|
||||
partResults: Union[List[PartResult], List[AiCallResponse]],
|
||||
options: Optional[AiCallOptions] = None
|
||||
) -> str:
|
||||
"""Merge part results using existing sophisticated merging system."""
|
||||
) -> str:
|
||||
"""Unified merge for both PartResult and AiCallResponse.
|
||||
|
||||
Consolidated from both interfaceAiObjects.py and existing serviceExtraction method.
|
||||
"""
|
||||
if not partResults:
|
||||
return ""
|
||||
|
||||
# Convert PartResults back to ContentParts for existing merger system
|
||||
content_parts = []
|
||||
for part_result in partResults:
|
||||
# Create ContentPart from PartResult with proper typeGroup
|
||||
content_part = ContentPart(
|
||||
id=part_result.originalPart.id,
|
||||
parentId=part_result.originalPart.parentId,
|
||||
label=part_result.originalPart.label,
|
||||
typeGroup=part_result.originalPart.typeGroup, # Use original typeGroup
|
||||
mimeType=part_result.originalPart.mimeType,
|
||||
data=part_result.aiResult, # Use AI result as data
|
||||
metadata={
|
||||
**part_result.originalPart.metadata,
|
||||
"aiResult": True,
|
||||
"partIndex": part_result.partIndex,
|
||||
"documentId": part_result.documentId,
|
||||
"processingTime": part_result.processingTime,
|
||||
"success": part_result.metadata.get("success", False)
|
||||
}
|
||||
# Convert to ContentParts using unified helper
|
||||
content_parts = self._convertToContentParts(partResults)
|
||||
|
||||
# Determine merge strategy based on input type
|
||||
if isinstance(partResults[0], PartResult):
|
||||
# Use strategy for extraction workflow (group by document, order by part index)
|
||||
merge_strategy = MergeStrategy(
|
||||
useIntelligentMerging=True,
|
||||
groupBy="documentId", # Group by document
|
||||
orderBy="partIndex", # Order by part index
|
||||
mergeType="concatenate"
|
||||
)
|
||||
else:
|
||||
# Default strategy for content parts workflow
|
||||
merge_strategy = MergeStrategy(
|
||||
useIntelligentMerging=True,
|
||||
groupBy="typeGroup",
|
||||
orderBy="id",
|
||||
mergeType="concatenate"
|
||||
)
|
||||
content_parts.append(content_part)
|
||||
|
||||
# Use existing merging strategy from options
|
||||
merge_strategy = MergeStrategy(
|
||||
useIntelligentMerging=True,
|
||||
groupBy="documentId", # Group by document
|
||||
orderBy="partIndex", # Order by part index
|
||||
mergeType="concatenate"
|
||||
)
|
||||
|
||||
|
||||
# Apply existing merging logic using the sophisticated merging system
|
||||
from modules.interfaces.interfaceAiObjects import applyMerging
|
||||
# Apply merging
|
||||
merged_parts = applyMerging(content_parts, merge_strategy)
|
||||
|
||||
# Convert merged parts back to final string
|
||||
# Convert back to string
|
||||
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||
|
||||
logger.info(f"Merged {len(partResults)} parts using existing sophisticated merging system")
|
||||
logger.info(f"Merged {len(partResults)} parts using unified merging system")
|
||||
return final_content.strip()
|
||||
|
||||
async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
|
||||
"""Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output.
|
||||
|
||||
Moved from interfaceAiObjects.py - model-aware chunking for AI processing.
|
||||
Complementary to existing size-based chunking in extraction pipeline.
|
||||
"""
|
||||
# Calculate model-specific chunk sizes
|
||||
modelContextTokens = model.contextLength # Total context in tokens
|
||||
modelMaxOutputTokens = model.maxTokens # Maximum output tokens
|
||||
|
||||
# Reserve tokens for:
|
||||
# 1. Prompt (user message)
|
||||
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
|
||||
|
||||
# 2. System message wrapper ("Context from documents:\n")
|
||||
systemMessageTokens = 10 # ~40 bytes = 10 tokens
|
||||
|
||||
# 3. Max output tokens (model will reserve space for completion)
|
||||
outputTokens = modelMaxOutputTokens
|
||||
|
||||
# 4. JSON structure and message overhead (~100 tokens)
|
||||
messageOverheadTokens = 100
|
||||
|
||||
# Total reserved tokens = input overhead + output reservation
|
||||
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
|
||||
|
||||
# Available tokens for content = context length - reserved tokens
|
||||
# Use 80% of available for safety margin
|
||||
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
|
||||
|
||||
# Ensure we have at least some space
|
||||
if availableContentTokens < 100:
|
||||
logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
|
||||
availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
|
||||
|
||||
# Convert tokens to bytes (1 token ≈ 4 bytes)
|
||||
availableContentBytes = availableContentTokens * 4
|
||||
|
||||
logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
|
||||
|
||||
# Use 70% of available content bytes for text chunks (conservative)
|
||||
textChunkSize = int(availableContentBytes * 0.7)
|
||||
imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
|
||||
|
||||
# Build chunking options
|
||||
chunkingOptions = {
|
||||
"textChunkSize": textChunkSize,
|
||||
"imageChunkSize": imageChunkSize,
|
||||
"maxSize": availableContentBytes,
|
||||
"chunkAllowed": True
|
||||
}
|
||||
|
||||
# Get appropriate chunker (uses existing ChunkerRegistry ✅)
|
||||
chunker = self._chunkerRegistry.resolve(contentPart.typeGroup)
|
||||
|
||||
if not chunker:
|
||||
logger.warning(f"No chunker found for typeGroup: {contentPart.typeGroup}")
|
||||
return []
|
||||
|
||||
# Chunk the content part
|
||||
try:
|
||||
chunks = chunker.chunk(contentPart, chunkingOptions)
|
||||
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
|
||||
return chunks
|
||||
except Exception as e:
|
||||
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
|
||||
return []
|
||||
|
||||
async def processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList, aiObjects, progressCallback=None) -> AiCallResponse:
|
||||
"""Process a single content part with model-aware chunking and fallback.
|
||||
|
||||
Moved from interfaceAiObjects.py - orchestrates chunking and merging.
|
||||
Calls aiObjects._callWithModel() for actual AI calls.
|
||||
"""
|
||||
lastError = None
|
||||
|
||||
# Check if this is an image - Vision models need special handling
|
||||
isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
|
||||
|
||||
# Determine the correct operation type based on content type
|
||||
actualOperationType = options.operationType
|
||||
if isImage:
|
||||
actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
|
||||
# Get vision-capable models for images
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
|
||||
if visionFailoverList:
|
||||
logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
|
||||
failoverModelList = visionFailoverList
|
||||
|
||||
for attempt, model in enumerate(failoverModelList):
|
||||
try:
|
||||
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||
|
||||
# Special handling for images with Vision models
|
||||
if isImage and hasattr(model, 'functionCall'):
|
||||
try:
|
||||
if not contentPart.data:
|
||||
raise ValueError("Image content part has no data")
|
||||
|
||||
mimeType = contentPart.mimeType or "image/jpeg"
|
||||
if not mimeType.startswith("image/"):
|
||||
raise ValueError(f"Invalid mimeType for image: {mimeType}")
|
||||
|
||||
# Prepare base64 data
|
||||
if isinstance(contentPart.data, str):
|
||||
try:
|
||||
base64.b64decode(contentPart.data, validate=True)
|
||||
base64Data = contentPart.data
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
|
||||
elif isinstance(contentPart.data, bytes):
|
||||
base64Data = base64.b64encode(contentPart.data).decode('utf-8')
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
|
||||
|
||||
imageDataUrl = f"data:{mimeType};base64,{base64Data}"
|
||||
|
||||
modelCall = AiModelCall(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt or ""},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": imageDataUrl}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
model=model,
|
||||
options=AiCallOptions(operationType=actualOperationType)
|
||||
)
|
||||
|
||||
modelResponse = await model.functionCall(modelCall)
|
||||
|
||||
if not modelResponse.success:
|
||||
raise ValueError(f"Model call failed: {modelResponse.error}")
|
||||
|
||||
logger.info(f"✅ Image content part processed successfully with model: {model.name}")
|
||||
|
||||
processingTime = getattr(modelResponse, 'processingTime', None) or 0.0
|
||||
|
||||
return AiCallResponse(
|
||||
content=modelResponse.content,
|
||||
modelName=model.name,
|
||||
priceUsd=0.0,
|
||||
processingTime=processingTime,
|
||||
bytesSent=0,
|
||||
bytesReceived=0,
|
||||
errorCount=0
|
||||
)
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
logger.warning(f"❌ Image processing failed with model {model.name}: {str(e)}")
|
||||
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
logger.info(f"🔄 Trying next fallback model for image processing...")
|
||||
continue
|
||||
else:
|
||||
logger.error(f"💥 All {len(failoverModelList)} models failed for image processing")
|
||||
raise
|
||||
|
||||
# For non-image parts, check if part fits in model context
|
||||
partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
|
||||
|
||||
modelContextTokens = model.contextLength
|
||||
modelMaxOutputTokens = model.maxTokens
|
||||
|
||||
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
|
||||
systemMessageTokens = 10
|
||||
outputTokens = modelMaxOutputTokens
|
||||
messageOverheadTokens = 100
|
||||
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
|
||||
|
||||
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
|
||||
if availableContentTokens < 100:
|
||||
availableContentTokens = max(100, int(modelContextTokens * 0.1))
|
||||
|
||||
availableContentBytes = availableContentTokens * 4
|
||||
|
||||
logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes")
|
||||
|
||||
if partSize <= availableContentBytes:
|
||||
# Part fits - call AI directly via aiObjects interface
|
||||
response = await aiObjects._callWithModel(model, prompt, contentPart.data, options)
|
||||
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
||||
return response
|
||||
else:
|
||||
# Part too large - chunk it
|
||||
chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
|
||||
if not chunks:
|
||||
raise ValueError(f"Failed to chunk content part for model {model.name}")
|
||||
|
||||
logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
|
||||
|
||||
chunkResults = []
|
||||
for idx, chunk in enumerate(chunks):
|
||||
chunkNum = idx + 1
|
||||
chunkData = chunk.get('data', '')
|
||||
logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}")
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}")
|
||||
|
||||
try:
|
||||
chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options)
|
||||
chunkResults.append(chunkResponse)
|
||||
logger.info(f"✅ Chunk {chunkNum}/{len(chunks)} processed successfully")
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(chunkNum / len(chunks), f"Chunk {chunkNum}/{len(chunks)} processed")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}")
|
||||
raise
|
||||
|
||||
# Merge chunk results
|
||||
mergedContent = self.mergeChunkResults(chunkResults)
|
||||
|
||||
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
|
||||
return AiCallResponse(
|
||||
content=mergedContent,
|
||||
modelName=model.name,
|
||||
priceUsd=sum(r.priceUsd for r in chunkResults),
|
||||
processingTime=sum(r.processingTime for r in chunkResults),
|
||||
bytesSent=sum(r.bytesSent for r in chunkResults),
|
||||
bytesReceived=sum(r.bytesReceived for r in chunkResults),
|
||||
errorCount=sum(r.errorCount for r in chunkResults)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
error_msg = str(e) if str(e) else f"{type(e).__name__}"
|
||||
logger.warning(f"❌ Model {model.name} failed for content part: {error_msg}", exc_info=True)
|
||||
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
logger.info(f"🔄 Trying next failover model...")
|
||||
continue
|
||||
else:
|
||||
logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
|
||||
break
|
||||
|
||||
# All models failed
|
||||
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
|
||||
|
||||
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
|
||||
"""Create an error response."""
|
||||
return AiCallResponse(
|
||||
content=errorMsg,
|
||||
modelName="error",
|
||||
priceUsd=0.0,
|
||||
processingTime=0.0,
|
||||
bytesSent=inputBytes,
|
||||
bytesReceived=outputBytes,
|
||||
errorCount=1
|
||||
)
|
||||
|
||||
async def processContentPartsWithAi(
|
||||
self,
|
||||
request: AiCallRequest,
|
||||
aiObjects, # Pass interface for AI calls
|
||||
progressCallback=None
|
||||
) -> AiCallResponse:
|
||||
"""Process content parts with model-aware chunking and AI calls.
|
||||
|
||||
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
|
||||
"""
|
||||
prompt = request.prompt
|
||||
options = request.options
|
||||
contentParts = request.contentParts
|
||||
|
||||
# Get failover models
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
|
||||
|
||||
if not failoverModelList:
|
||||
return self._createErrorResponse("No suitable models found", 0, 0)
|
||||
|
||||
# Process each content part
|
||||
allResults = []
|
||||
for contentPart in contentParts:
|
||||
partResult = await self.processContentPartWithFallback(
|
||||
contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
|
||||
)
|
||||
allResults.append(partResult)
|
||||
|
||||
# Merge all results using unified mergePartResults
|
||||
mergedContent = self.mergePartResults(allResults)
|
||||
|
||||
return AiCallResponse(
|
||||
content=mergedContent,
|
||||
modelName="multiple",
|
||||
priceUsd=sum(r.priceUsd for r in allResults),
|
||||
processingTime=sum(r.processingTime for r in allResults),
|
||||
bytesSent=sum(r.bytesSent for r in allResults),
|
||||
bytesReceived=sum(r.bytesReceived for r in allResults),
|
||||
errorCount=sum(r.errorCount for r in allResults)
|
||||
)
|
||||
|
||||
|
||||
# Module-level function for use by subPipeline and ExtractionService
|
||||
def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]:
|
||||
"""Apply merging strategy to parts with intelligent token-aware merging.
|
||||
|
||||
Moved from interfaceAiObjects.py to resolve dependency violations.
|
||||
Can be used as module-level function or called from ExtractionService methods.
|
||||
"""
|
||||
logger.debug(f"applyMerging called with {len(parts)} parts")
|
||||
|
||||
# Import merging dependencies (now local imports ✅)
|
||||
from .merging.mergerText import TextMerger
|
||||
from .merging.mergerTable import TableMerger
|
||||
from .merging.mergerDefault import DefaultMerger
|
||||
from .subMerger import IntelligentTokenAwareMerger
|
||||
|
||||
# Check if intelligent merging is enabled
|
||||
if strategy.useIntelligentMerging:
|
||||
modelCapabilities = strategy.capabilities or {}
|
||||
subMerger = IntelligentTokenAwareMerger(modelCapabilities)
|
||||
|
||||
# Use intelligent merging for all parts
|
||||
merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
|
||||
|
||||
# Calculate and log optimization stats
|
||||
stats = subMerger.calculateOptimizationStats(parts, merged)
|
||||
logger.info(f"🧠 Intelligent merging stats: {stats}")
|
||||
logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
|
||||
|
||||
return merged
|
||||
|
||||
# Fallback to traditional merging
|
||||
textMerger = TextMerger()
|
||||
tableMerger = TableMerger()
|
||||
defaultMerger = DefaultMerger()
|
||||
|
||||
# Group by typeGroup
|
||||
textParts = [p for p in parts if p.typeGroup == "text"]
|
||||
tableParts = [p for p in parts if p.typeGroup == "table"]
|
||||
structureParts = [p for p in parts if p.typeGroup == "structure"]
|
||||
otherParts = [p for p in parts if p.typeGroup not in ("text", "table", "structure")]
|
||||
|
||||
logger.debug(f"Grouped - text: {len(textParts)}, table: {len(tableParts)}, structure: {len(structureParts)}, other: {len(otherParts)}")
|
||||
|
||||
merged: List[ContentPart] = []
|
||||
|
||||
if textParts:
|
||||
textMerged = textMerger.merge(textParts, strategy)
|
||||
logger.debug(f"TextMerger merged {len(textParts)} parts into {len(textMerged)} parts")
|
||||
merged.extend(textMerged)
|
||||
if tableParts:
|
||||
tableMerged = tableMerger.merge(tableParts, strategy)
|
||||
logger.debug(f"TableMerger merged {len(tableParts)} parts into {len(tableMerged)} parts")
|
||||
merged.extend(tableMerged)
|
||||
if structureParts:
|
||||
# For now, treat structure like text
|
||||
structureMerged = textMerger.merge(structureParts, strategy)
|
||||
logger.debug(f"StructureMerger merged {len(structureParts)} parts into {len(structureMerged)} parts")
|
||||
merged.extend(structureMerged)
|
||||
if otherParts:
|
||||
otherMerged = defaultMerger.merge(otherParts, strategy)
|
||||
logger.debug(f"DefaultMerger merged {len(otherParts)} parts into {len(otherMerged)} parts")
|
||||
merged.extend(otherMerged)
|
||||
|
||||
logger.debug(f"applyMerging returning {len(merged)} parts")
|
||||
return merged
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,8 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
|
|||
|
||||
# Apply merging strategy if provided (preserve existing logic)
|
||||
if options.mergeStrategy:
|
||||
from modules.interfaces.interfaceAiObjects import applyMerging
|
||||
# Use module-level applyMerging function
|
||||
from .mainServiceExtraction import applyMerging
|
||||
parts = applyMerging(parts, options.mergeStrategy)
|
||||
|
||||
return ContentExtracted(id=makeId(), parts=parts)
|
||||
|
|
|
|||
|
|
@ -99,6 +99,18 @@ async def buildExtractionPrompt(
|
|||
# Parse extraction intent if AI service is available
|
||||
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
|
||||
|
||||
# Extract user language for document language instruction
|
||||
userLanguage = 'en' # Default fallback
|
||||
if services:
|
||||
try:
|
||||
# Prefer detected language if available
|
||||
if hasattr(services, 'currentUserLanguage') and services.currentUserLanguage:
|
||||
userLanguage = services.currentUserLanguage
|
||||
elif hasattr(services, 'user') and services.user and hasattr(services.user, 'language'):
|
||||
userLanguage = services.user.language
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build base prompt with clear user prompt markers
|
||||
sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
|
||||
adaptive_prompt = f"""
|
||||
|
|
@ -114,6 +126,8 @@ You are a document processing assistant that extracts and structures content fro
|
|||
|
||||
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
|
||||
|
||||
LANGUAGE REQUIREMENT: All extracted content must be in the language '{userLanguage}'. Extract and preserve content in this language.
|
||||
|
||||
{extraction_intent}
|
||||
|
||||
REQUIREMENTS:
|
||||
|
|
|
|||
|
|
@ -362,7 +362,7 @@ class BaseRenderer(ABC):
|
|||
self.logger.debug(f"AI Style Template Prompt:")
|
||||
self.logger.debug(f"{styleTemplate}")
|
||||
|
||||
response = await aiService.aiObjects.call(request)
|
||||
response = await aiService.callAi(request)
|
||||
|
||||
# Save styling prompt and response to debug
|
||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ Return only the compressed prompt, no explanations.
|
|||
)
|
||||
)
|
||||
|
||||
response = await aiService.aiObjects.call(request)
|
||||
response = await aiService.callAi(request)
|
||||
compressed = response.content.strip()
|
||||
|
||||
# Validate the compressed prompt
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning("AI service not properly configured, using defaults")
|
||||
return default_styles
|
||||
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
response = await ai_service.callAi(request)
|
||||
|
||||
# Check if response is valid
|
||||
if not response:
|
||||
|
|
|
|||
|
|
@ -424,7 +424,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
self.logger.warning("AI service not properly configured, using defaults")
|
||||
return default_styles
|
||||
|
||||
response = await aiService.aiObjects.call(request)
|
||||
response = await aiService.callAi(request)
|
||||
|
||||
# Check if response is valid
|
||||
if not response:
|
||||
|
|
|
|||
|
|
@ -346,7 +346,7 @@ class RendererXlsx(BaseRenderer):
|
|||
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
response = await aiService.callAi(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ async def buildGenerationPrompt(
|
|||
userPrompt: str,
|
||||
title: str,
|
||||
extracted_content: str = None,
|
||||
continuationContext: Dict[str, Any] = None
|
||||
continuationContext: Dict[str, Any] = None,
|
||||
services: Any = None
|
||||
) -> str:
|
||||
"""
|
||||
Build the unified generation prompt using a single JSON template.
|
||||
|
|
@ -28,10 +29,23 @@ async def buildGenerationPrompt(
|
|||
title: Title for the document
|
||||
extracted_content: Optional extracted content from documents to prepend to prompt
|
||||
continuationContext: Optional context from previous generation for continuation
|
||||
services: Optional services instance for accessing user language
|
||||
|
||||
Returns:
|
||||
Complete generation prompt string
|
||||
"""
|
||||
# Extract user language for document language instruction
|
||||
userLanguage = 'en' # Default fallback
|
||||
if services:
|
||||
try:
|
||||
# Prefer detected language if available
|
||||
if hasattr(services, 'currentUserLanguage') and services.currentUserLanguage:
|
||||
userLanguage = services.currentUserLanguage
|
||||
elif hasattr(services, 'user') and services.user and hasattr(services.user, 'language'):
|
||||
userLanguage = services.user.language
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Create a template - let AI generate title if not provided
|
||||
titleValue = title if title else "Generated Document"
|
||||
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
|
||||
|
|
@ -82,6 +96,8 @@ END OF USER REQUEST / USER PROMPT
|
|||
|
||||
⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
|
||||
|
||||
LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language.
|
||||
|
||||
{continuationText}
|
||||
|
||||
JSON structure template:
|
||||
|
|
@ -92,6 +108,7 @@ Rules:
|
|||
- Reference elements shown above are ALREADY DELIVERED - DO NOT repeat them.
|
||||
- Generate ONLY the remaining content that comes AFTER the reference elements.
|
||||
- DO NOT regenerate the entire JSON structure - start directly with what comes next.
|
||||
- All content must be in the language '{userLanguage}'.
|
||||
- Output JSON only; no markdown fences or extra text.
|
||||
|
||||
Continue generating the remaining content now.
|
||||
|
|
@ -124,6 +141,8 @@ EXTRACTED CONTENT FROM DOCUMENTS:
|
|||
END OF EXTRACTED CONTENT
|
||||
{'='*80}
|
||||
|
||||
LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language. If the extracted content is in a different language, translate it to '{userLanguage}' while preserving the structure and meaning.
|
||||
|
||||
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
|
||||
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
|
||||
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
|
||||
|
|
@ -136,6 +155,7 @@ Instructions:
|
|||
- Do NOT reuse example section IDs; create your own.
|
||||
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
|
||||
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
|
||||
- All content must be in the language '{userLanguage}'.
|
||||
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
|
||||
- Output JSON only; no markdown fences or extra text.
|
||||
|
||||
|
|
@ -151,6 +171,8 @@ USER REQUEST / USER PROMPT:
|
|||
END OF USER REQUEST / USER PROMPT
|
||||
{'='*80}
|
||||
|
||||
LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language.
|
||||
|
||||
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
|
||||
|
||||
JSON structure template:
|
||||
|
|
@ -160,6 +182,7 @@ Instructions:
|
|||
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
|
||||
- Do NOT reuse example section IDs; create your own.
|
||||
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
|
||||
- All content must be in the language '{userLanguage}'.
|
||||
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
|
||||
- Output JSON only; no markdown fences or extra text.
|
||||
|
||||
|
|
|
|||
128
modules/services/serviceSecurity/mainServiceSecurity.py
Normal file
128
modules/services/serviceSecurity/mainServiceSecurity.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
"""
|
||||
Security service for token management operations.
|
||||
Provides centralized access to token refresh and management functionality.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, Callable
|
||||
|
||||
from modules.datamodels.datamodelSecurity import Token
|
||||
from modules.security.tokenManager import TokenManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SecurityService:
|
||||
"""Security service providing token management operations."""
|
||||
|
||||
def __init__(self, services):
|
||||
"""Initialize security service with service center access.
|
||||
|
||||
Args:
|
||||
services: Service center instance providing access to interfaces
|
||||
"""
|
||||
self.services = services
|
||||
self._tokenManager = TokenManager()
|
||||
|
||||
def getFreshToken(self, connectionId: str, secondsBeforeExpiry: int = 30 * 60) -> Optional[Token]:
|
||||
"""Get a fresh token for a connection, refreshing when expiring soon.
|
||||
|
||||
Reads the latest stored token via interface layer, then
|
||||
uses ensureFreshToken to refresh if needed and persists the refreshed
|
||||
token via interface layer.
|
||||
|
||||
Args:
|
||||
connectionId: ID of the connection to get token for
|
||||
secondsBeforeExpiry: Threshold window to proactively refresh (default: 30 minutes)
|
||||
|
||||
Returns:
|
||||
Token object or None if not found/expired
|
||||
"""
|
||||
try:
|
||||
# Use interface from services instead of getRootInterface()
|
||||
interfaceDbApp = self.services.interfaceDbApp
|
||||
|
||||
token = interfaceDbApp.getConnectionToken(connectionId)
|
||||
if not token:
|
||||
return None
|
||||
|
||||
return self._tokenManager.ensureFreshToken(
|
||||
token,
|
||||
secondsBeforeExpiry=secondsBeforeExpiry,
|
||||
saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
|
||||
return None
|
||||
|
||||
def refreshToken(self, oldToken: Token) -> Optional[Token]:
|
||||
"""Refresh an expired token using the appropriate OAuth service.
|
||||
|
||||
Args:
|
||||
oldToken: Token object to refresh
|
||||
|
||||
Returns:
|
||||
Refreshed Token object or None if refresh failed
|
||||
"""
|
||||
try:
|
||||
return self._tokenManager.refreshToken(oldToken)
|
||||
except Exception as e:
|
||||
logger.error(f"refreshToken: Error refreshing token: {e}")
|
||||
return None
|
||||
|
||||
def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60,
|
||||
saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
||||
"""Ensure a token is fresh; refresh if expiring within threshold.
|
||||
|
||||
Args:
|
||||
token: Existing token to validate/refresh
|
||||
secondsBeforeExpiry: Threshold window to proactively refresh (default: 30 minutes)
|
||||
saveCallback: Optional function to persist a refreshed token
|
||||
|
||||
Returns:
|
||||
A fresh token (refreshed or original) or None if refresh failed
|
||||
"""
|
||||
try:
|
||||
return self._tokenManager.ensureFreshToken(
|
||||
token,
|
||||
secondsBeforeExpiry=secondsBeforeExpiry,
|
||||
saveCallback=saveCallback
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
|
||||
return None
|
||||
|
||||
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||
"""Refresh Microsoft OAuth token using refresh token.
|
||||
|
||||
Args:
|
||||
refreshToken: Microsoft refresh token
|
||||
userId: User ID owning the token
|
||||
oldToken: Previous token object to preserve connection ID
|
||||
|
||||
Returns:
|
||||
New Token object or None if refresh failed
|
||||
"""
|
||||
try:
|
||||
return self._tokenManager.refreshMicrosoftToken(refreshToken, userId, oldToken)
|
||||
except Exception as e:
|
||||
logger.error(f"refreshMicrosoftToken: Error refreshing Microsoft token: {e}")
|
||||
return None
|
||||
|
||||
def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||
"""Refresh Google OAuth token using refresh token.
|
||||
|
||||
Args:
|
||||
refreshToken: Google refresh token
|
||||
userId: User ID owning the token
|
||||
oldToken: Previous token object to preserve connection ID
|
||||
|
||||
Returns:
|
||||
New Token object or None if refresh failed
|
||||
"""
|
||||
try:
|
||||
return self._tokenManager.refreshGoogleToken(refreshToken, userId, oldToken)
|
||||
except Exception as e:
|
||||
logger.error(f"refreshGoogleToken: Error refreshing Google token: {e}")
|
||||
return None
|
||||
|
||||
|
|
@ -47,9 +47,12 @@ class SharepointService:
|
|||
logger.error("UserConnection must have an 'id' field")
|
||||
return False
|
||||
|
||||
# Get a fresh token for this specific connection
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
# Get a fresh token for this specific connection via security service
|
||||
if not self.services:
|
||||
logger.error("Service center not available for token access")
|
||||
return False
|
||||
|
||||
token = self.services.security.getFreshToken(connectionId)
|
||||
if not token:
|
||||
logger.error(f"No token found for connection {connectionId}")
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -155,11 +155,11 @@ class UtilsService:
|
|||
|
||||
def storeDebugMessageAndDocuments(self, message, currentUser):
|
||||
"""
|
||||
Wrapper to store debug messages and documents via shared debugLogger.
|
||||
Mirrors storeDebugMessageAndDocuments() in modules.shared.debugLogger.
|
||||
Wrapper to store debug messages and documents via interfaceDbChatObjects.
|
||||
Mirrors storeDebugMessageAndDocuments() in modules.interfaces.interfaceDbChatObjects.
|
||||
"""
|
||||
try:
|
||||
from modules.shared.debugLogger import storeDebugMessageAndDocuments as _storeDebugMessageAndDocuments
|
||||
from modules.interfaces.interfaceDbChatObjects import storeDebugMessageAndDocuments as _storeDebugMessageAndDocuments
|
||||
_storeDebugMessageAndDocuments(message, currentUser)
|
||||
except Exception:
|
||||
# Silent fail to never break main flow
|
||||
|
|
|
|||
|
|
@ -114,16 +114,14 @@ class WebService:
|
|||
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, f"Crawling {len(allUrls)} URLs")
|
||||
|
||||
# Get parent log ID for URL-level operations
|
||||
parentLogId = None
|
||||
if operationId:
|
||||
parentLogId = self.services.chat.getOperationLogId(operationId)
|
||||
# Use parent operation ID directly (parentId should be operationId, not log entry ID)
|
||||
parentOperationId = operationId # Use the parent's operationId directly
|
||||
|
||||
crawlResult = await self._performWebCrawl(
|
||||
instruction=instruction,
|
||||
urls=allUrls,
|
||||
maxDepth=maxDepth,
|
||||
parentLogId=parentLogId
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
if operationId:
|
||||
|
|
@ -131,18 +129,95 @@ class WebService:
|
|||
self.services.chat.progressLogUpdate(operationId, 0.95, "Completed")
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
# Return consolidated result
|
||||
# Calculate statistics about crawl results
|
||||
totalResults = len(crawlResult) if isinstance(crawlResult, list) else 1
|
||||
totalContentLength = 0
|
||||
urlsWithContent = 0
|
||||
|
||||
# Analyze crawl results to gather statistics
|
||||
if isinstance(crawlResult, list):
|
||||
for item in crawlResult:
|
||||
if isinstance(item, dict):
|
||||
if item.get("url"):
|
||||
urlsWithContent += 1
|
||||
content = item.get("content", "")
|
||||
if isinstance(content, str):
|
||||
totalContentLength += len(content)
|
||||
elif isinstance(content, dict):
|
||||
# Estimate size from dict
|
||||
totalContentLength += len(str(content))
|
||||
elif isinstance(crawlResult, dict):
|
||||
if crawlResult.get("url"):
|
||||
urlsWithContent = 1
|
||||
content = crawlResult.get("content", "")
|
||||
if isinstance(content, str):
|
||||
totalContentLength = len(content)
|
||||
elif isinstance(content, dict):
|
||||
totalContentLength = len(str(content))
|
||||
|
||||
# Convert crawl results into sections format for generic validator
|
||||
sections = []
|
||||
if isinstance(crawlResult, list):
|
||||
for idx, item in enumerate(crawlResult):
|
||||
if isinstance(item, dict):
|
||||
section = {
|
||||
"id": f"result_{idx}",
|
||||
"content_type": "paragraph",
|
||||
"title": item.get("url", f"Result {idx + 1}"),
|
||||
"order": idx
|
||||
}
|
||||
# Add content preview
|
||||
content = item.get("content", "")
|
||||
if isinstance(content, str) and content:
|
||||
section["textPreview"] = content[:200] + ("..." if len(content) > 200 else "")
|
||||
sections.append(section)
|
||||
elif isinstance(crawlResult, dict):
|
||||
section = {
|
||||
"id": "result_0",
|
||||
"content_type": "paragraph",
|
||||
"title": crawlResult.get("url", "Research Result"),
|
||||
"order": 0
|
||||
}
|
||||
content = crawlResult.get("content", "")
|
||||
if isinstance(content, str) and content:
|
||||
section["textPreview"] = content[:200] + ("..." if len(content) > 200 else "")
|
||||
sections.append(section)
|
||||
|
||||
# Return consolidated result with metadata in format that generic validator understands
|
||||
result = {
|
||||
"metadata": {
|
||||
"title": suggestedFilename or instruction[:100] if instruction else "Web Research Results",
|
||||
"extraction_method": "web_crawl",
|
||||
"research_depth": finalResearchDepth,
|
||||
"max_depth": maxDepth,
|
||||
"country": countryCode,
|
||||
"language": languageCode,
|
||||
"urls_crawled": allUrls[:20], # First 20 URLs for reference
|
||||
"total_urls": len(allUrls),
|
||||
"urls_with_content": urlsWithContent,
|
||||
"total_content_length": totalContentLength,
|
||||
"crawl_date": self.services.utils.timestampGetUtc() if hasattr(self.services, 'utils') else None
|
||||
},
|
||||
"sections": sections,
|
||||
"statistics": {
|
||||
"sectionCount": len(sections),
|
||||
"total_urls": len(allUrls),
|
||||
"results_count": totalResults,
|
||||
"urls_with_content": urlsWithContent,
|
||||
"total_content_length": totalContentLength
|
||||
},
|
||||
# Keep original structure for backward compatibility
|
||||
"instruction": instruction,
|
||||
"urls_crawled": allUrls,
|
||||
"total_urls": len(allUrls),
|
||||
"results": crawlResult,
|
||||
"total_results": len(crawlResult) if isinstance(crawlResult, list) else 1
|
||||
"total_results": totalResults
|
||||
}
|
||||
|
||||
# Add suggested filename if available
|
||||
if suggestedFilename:
|
||||
result["suggested_filename"] = suggestedFilename
|
||||
result["metadata"]["suggested_filename"] = suggestedFilename
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -311,7 +386,7 @@ Return ONLY valid JSON, no additional text:
|
|||
instruction: str,
|
||||
urls: List[str],
|
||||
maxDepth: int = 2,
|
||||
parentLogId: Optional[str] = None
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform web crawl on list of URLs - calls plugin for each URL individually."""
|
||||
crawlResults = []
|
||||
|
|
@ -320,7 +395,7 @@ Return ONLY valid JSON, no additional text:
|
|||
for urlIndex, url in enumerate(urls):
|
||||
# Create separate operation for each URL with parent reference
|
||||
urlOperationId = None
|
||||
if parentLogId:
|
||||
if parentOperationId:
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
urlOperationId = f"web_crawl_url_{workflowId}_{urlIndex}_{int(time.time())}"
|
||||
self.services.chat.progressLogStart(
|
||||
|
|
@ -328,21 +403,23 @@ Return ONLY valid JSON, no additional text:
|
|||
"Web Crawl",
|
||||
f"URL {urlIndex + 1}",
|
||||
url[:50] + "..." if len(url) > 50 else url,
|
||||
parentId=parentLogId
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info(f"Crawling URL: {url}")
|
||||
logger.info(f"Crawling URL {urlIndex + 1}/{len(urls)}: {url}")
|
||||
|
||||
if urlOperationId:
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating")
|
||||
displayUrl = url[:50] + "..." if len(url) > 50 else url
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.2, f"Crawling: {displayUrl}")
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
|
||||
|
||||
# Build crawl prompt model for single URL
|
||||
crawlPromptModel = AiCallPromptWebCrawl(
|
||||
instruction=instruction,
|
||||
url=url, # Single URL
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=50
|
||||
maxWidth=5 # Default: 5 pages per level
|
||||
)
|
||||
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||
|
||||
|
|
@ -356,16 +433,19 @@ Return ONLY valid JSON, no additional text:
|
|||
resultFormat="json"
|
||||
)
|
||||
|
||||
# Use unified callAiContent method
|
||||
if urlOperationId:
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.4, "Calling crawl connector")
|
||||
|
||||
# Use unified callAiContent method with parentOperationId for hierarchical logging
|
||||
crawlResponse = await self.services.ai.callAiContent(
|
||||
prompt=crawlPrompt,
|
||||
options=crawlOptions,
|
||||
outputFormat="json"
|
||||
outputFormat="json",
|
||||
parentOperationId=urlOperationId # Pass URL operation ID as parent for sub-URL logging
|
||||
)
|
||||
|
||||
if urlOperationId:
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.8, "Completed")
|
||||
self.services.chat.progressLogFinish(urlOperationId, True)
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.7, "Processing crawl results")
|
||||
|
||||
# Extract content from AiResponse
|
||||
crawlResult = crawlResponse.content
|
||||
|
|
@ -387,16 +467,30 @@ Return ONLY valid JSON, no additional text:
|
|||
else:
|
||||
crawlData = crawlResult
|
||||
|
||||
# Process crawl results and create hierarchical progress logging for sub-URLs
|
||||
if urlOperationId:
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.8, "Processing crawl results")
|
||||
|
||||
# Recursively process crawl results to find nested URLs and create child operations
|
||||
processedResults = self._processCrawlResultsWithHierarchy(crawlData, url, urlOperationId, maxDepth, 0)
|
||||
|
||||
# Count total URLs crawled (including sub-URLs) for progress message
|
||||
totalUrlsCrawled = self._countUrlsInResults(processedResults)
|
||||
|
||||
# Ensure it's a list of results
|
||||
if isinstance(crawlData, list):
|
||||
crawlResults.extend(crawlData)
|
||||
elif isinstance(crawlData, dict):
|
||||
if "results" in crawlData:
|
||||
crawlResults.extend(crawlData["results"])
|
||||
else:
|
||||
crawlResults.append(crawlData)
|
||||
if isinstance(processedResults, list):
|
||||
crawlResults.extend(processedResults)
|
||||
elif isinstance(processedResults, dict):
|
||||
crawlResults.append(processedResults)
|
||||
else:
|
||||
crawlResults.append({"url": url, "content": str(crawlData)})
|
||||
crawlResults.append({"url": url, "content": str(processedResults)})
|
||||
|
||||
if urlOperationId:
|
||||
if totalUrlsCrawled > 1:
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.9, f"Crawled {totalUrlsCrawled} URLs (including sub-URLs)")
|
||||
else:
|
||||
self.services.chat.progressLogUpdate(urlOperationId, 0.9, "Crawl completed")
|
||||
self.services.chat.progressLogFinish(urlOperationId, True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error crawling URL {url}: {str(e)}")
|
||||
|
|
@ -405,4 +499,145 @@ Return ONLY valid JSON, no additional text:
|
|||
crawlResults.append({"url": url, "error": str(e)})
|
||||
|
||||
return crawlResults
|
||||
|
||||
def _processCrawlResultsWithHierarchy(
|
||||
self,
|
||||
crawlData: Any,
|
||||
parentUrl: str,
|
||||
parentOperationId: Optional[str],
|
||||
maxDepth: int,
|
||||
currentDepth: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Recursively process crawl results to create hierarchical progress logging for sub-URLs.
|
||||
|
||||
Args:
|
||||
crawlData: Crawl result data (dict, list, or other)
|
||||
parentUrl: Parent URL being crawled
|
||||
parentOperationId: Parent operation ID for hierarchical logging
|
||||
maxDepth: Maximum crawl depth
|
||||
currentDepth: Current depth in the crawl tree
|
||||
|
||||
Returns:
|
||||
List of processed crawl results
|
||||
"""
|
||||
import time
|
||||
results = []
|
||||
|
||||
# Handle list of results
|
||||
if isinstance(crawlData, list):
|
||||
for idx, item in enumerate(crawlData):
|
||||
if isinstance(item, dict):
|
||||
# Check if this item has sub-URLs or nested results
|
||||
itemUrl = item.get("url") or item.get("source") or parentUrl
|
||||
|
||||
# Create child operation for sub-URL if we're not at max depth
|
||||
if currentDepth < maxDepth and parentOperationId:
|
||||
# Check if this item has nested results or children
|
||||
hasNestedResults = "results" in item or "children" in item or "subUrls" in item
|
||||
|
||||
if hasNestedResults or (itemUrl != parentUrl and currentDepth > 0):
|
||||
# This is a sub-URL - create child operation
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
subUrlOperationId = f"{parentOperationId}_sub_{idx}_{int(time.time())}"
|
||||
self.services.chat.progressLogStart(
|
||||
subUrlOperationId,
|
||||
"Crawling Sub-URL",
|
||||
f"Depth {currentDepth + 1}",
|
||||
itemUrl[:50] + "..." if len(itemUrl) > 50 else itemUrl,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
# Process nested results recursively
|
||||
if "results" in item:
|
||||
nestedResults = self._processCrawlResultsWithHierarchy(
|
||||
item["results"], itemUrl, subUrlOperationId, maxDepth, currentDepth + 1
|
||||
)
|
||||
item["results"] = nestedResults
|
||||
elif "children" in item:
|
||||
nestedResults = self._processCrawlResultsWithHierarchy(
|
||||
item["children"], itemUrl, subUrlOperationId, maxDepth, currentDepth + 1
|
||||
)
|
||||
item["children"] = nestedResults
|
||||
elif "subUrls" in item:
|
||||
nestedResults = self._processCrawlResultsWithHierarchy(
|
||||
item["subUrls"], itemUrl, subUrlOperationId, maxDepth, currentDepth + 1
|
||||
)
|
||||
item["subUrls"] = nestedResults
|
||||
|
||||
self.services.chat.progressLogUpdate(subUrlOperationId, 0.9, "Completed")
|
||||
self.services.chat.progressLogFinish(subUrlOperationId, True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing sub-URL {itemUrl}: {str(e)}")
|
||||
if subUrlOperationId:
|
||||
self.services.chat.progressLogFinish(subUrlOperationId, False)
|
||||
|
||||
results.append(item)
|
||||
else:
|
||||
results.append(item)
|
||||
|
||||
# Handle dict with results array
|
||||
elif isinstance(crawlData, dict):
|
||||
if "results" in crawlData:
|
||||
# Process nested results
|
||||
nestedResults = self._processCrawlResultsWithHierarchy(
|
||||
crawlData["results"], parentUrl, parentOperationId, maxDepth, currentDepth
|
||||
)
|
||||
crawlData["results"] = nestedResults
|
||||
results.append(crawlData)
|
||||
elif "children" in crawlData:
|
||||
# Process children
|
||||
nestedResults = self._processCrawlResultsWithHierarchy(
|
||||
crawlData["children"], parentUrl, parentOperationId, maxDepth, currentDepth
|
||||
)
|
||||
crawlData["children"] = nestedResults
|
||||
results.append(crawlData)
|
||||
elif "subUrls" in crawlData:
|
||||
# Process sub-URLs
|
||||
nestedResults = self._processCrawlResultsWithHierarchy(
|
||||
crawlData["subUrls"], parentUrl, parentOperationId, maxDepth, currentDepth
|
||||
)
|
||||
crawlData["subUrls"] = nestedResults
|
||||
results.append(crawlData)
|
||||
else:
|
||||
# Single result dict
|
||||
results.append(crawlData)
|
||||
else:
|
||||
# Other types - wrap in dict
|
||||
results.append({"url": parentUrl, "content": str(crawlData)})
|
||||
|
||||
return results
|
||||
|
||||
def _countUrlsInResults(self, results: Any) -> int:
|
||||
"""
|
||||
Recursively count total URLs in crawl results (including nested sub-URLs).
|
||||
|
||||
Args:
|
||||
results: Crawl results (dict, list, or other)
|
||||
|
||||
Returns:
|
||||
Total count of URLs found
|
||||
"""
|
||||
count = 0
|
||||
|
||||
if isinstance(results, list):
|
||||
for item in results:
|
||||
count += self._countUrlsInResults(item)
|
||||
elif isinstance(results, dict):
|
||||
# Count this URL if it has a url field
|
||||
if "url" in results or "source" in results:
|
||||
count += 1
|
||||
# Recursively count nested results
|
||||
if "results" in results:
|
||||
count += self._countUrlsInResults(results["results"])
|
||||
if "children" in results:
|
||||
count += self._countUrlsInResults(results["children"])
|
||||
if "subUrls" in results:
|
||||
count += self._countUrlsInResults(results["subUrls"])
|
||||
elif isinstance(results, str):
|
||||
# Single URL string
|
||||
count = 1
|
||||
|
||||
return count
|
||||
|
||||
|
|
|
|||
70
modules/shared/callbackRegistry.py
Normal file
70
modules/shared/callbackRegistry.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""
|
||||
Callback registry for decoupled event notifications.
|
||||
|
||||
Allows interfaces to notify about changes without knowing about features.
|
||||
Features can register callbacks to be notified when automations change.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Callable, List, Dict, Any
|
||||
import asyncio
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CallbackRegistry:
|
||||
"""Registry for callbacks that can be triggered by interfaces without knowing about features."""
|
||||
|
||||
def __init__(self):
|
||||
self._callbacks: Dict[str, List[Callable]] = {}
|
||||
|
||||
def register(self, event_type: str, callback: Callable):
|
||||
"""Register a callback for a specific event type.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (e.g., 'automation.changed')
|
||||
callback: Async or sync callback function
|
||||
"""
|
||||
if event_type not in self._callbacks:
|
||||
self._callbacks[event_type] = []
|
||||
self._callbacks[event_type].append(callback)
|
||||
logger.debug(f"Registered callback for event type: {event_type}")
|
||||
|
||||
def unregister(self, event_type: str, callback: Callable):
|
||||
"""Unregister a callback for a specific event type."""
|
||||
if event_type in self._callbacks:
|
||||
try:
|
||||
self._callbacks[event_type].remove(callback)
|
||||
logger.debug(f"Unregistered callback for event type: {event_type}")
|
||||
except ValueError:
|
||||
logger.warning(f"Callback not found for event type: {event_type}")
|
||||
|
||||
async def trigger(self, event_type: str, *args, **kwargs):
|
||||
"""Trigger all callbacks registered for an event type.
|
||||
|
||||
Args:
|
||||
event_type: Type of event to trigger
|
||||
*args, **kwargs: Arguments to pass to callbacks
|
||||
"""
|
||||
if event_type not in self._callbacks:
|
||||
return
|
||||
|
||||
callbacks = self._callbacks[event_type].copy() # Copy to avoid modification during iteration
|
||||
|
||||
for callback in callbacks:
|
||||
try:
|
||||
if asyncio.iscoroutinefunction(callback):
|
||||
await callback(*args, **kwargs)
|
||||
else:
|
||||
callback(*args, **kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing callback for {event_type}: {str(e)}", exc_info=True)
|
||||
|
||||
def has_callbacks(self, event_type: str) -> bool:
|
||||
"""Check if there are any callbacks registered for an event type."""
|
||||
return event_type in self._callbacks and len(self._callbacks[event_type]) > 0
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
callbackRegistry = CallbackRegistry()
|
||||
|
||||
|
|
@ -145,131 +145,3 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
|
|||
# Don't log debug errors to avoid recursion
|
||||
pass
|
||||
|
||||
def storeDebugMessageAndDocuments(message, currentUser) -> None:
|
||||
"""
|
||||
Store message and documents (metadata and file bytes) for debugging purposes.
|
||||
Structure: {log_dir}/debug/messages/m_round_task_action_timestamp/documentlist_label/
|
||||
- message.json, message_text.txt
|
||||
- document_###_metadata.json
|
||||
- document_###_<original_filename> (actual file bytes)
|
||||
|
||||
Args:
|
||||
message: ChatMessage object to store
|
||||
currentUser: Current user for component interface access
|
||||
"""
|
||||
try:
|
||||
import json
|
||||
|
||||
# Create base debug directory (use base debug dir, not prompts subdirectory)
|
||||
baseDebugDir = _getBaseDebugDir()
|
||||
debug_root = os.path.join(baseDebugDir, 'messages')
|
||||
_ensureDir(debug_root)
|
||||
|
||||
# Generate timestamp
|
||||
timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
|
||||
|
||||
# Create message folder name: m_round_task_action_timestamp
|
||||
# Use actual values from message, not defaults
|
||||
round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
|
||||
task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
|
||||
action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
|
||||
message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
|
||||
|
||||
message_path = os.path.join(debug_root, message_folder)
|
||||
os.makedirs(message_path, exist_ok=True)
|
||||
|
||||
# Store message data - use dict() instead of model_dump() for compatibility
|
||||
message_file = os.path.join(message_path, "message.json")
|
||||
with open(message_file, "w", encoding="utf-8") as f:
|
||||
# Convert message to dict manually to avoid model_dump() issues
|
||||
message_dict = {
|
||||
"id": message.id,
|
||||
"workflowId": message.workflowId,
|
||||
"parentMessageId": message.parentMessageId,
|
||||
"message": message.message,
|
||||
"role": message.role,
|
||||
"status": message.status,
|
||||
"sequenceNr": message.sequenceNr,
|
||||
"publishedAt": message.publishedAt,
|
||||
"roundNumber": message.roundNumber,
|
||||
"taskNumber": message.taskNumber,
|
||||
"actionNumber": message.actionNumber,
|
||||
"documentsLabel": message.documentsLabel,
|
||||
"actionId": message.actionId,
|
||||
"actionMethod": message.actionMethod,
|
||||
"actionName": message.actionName,
|
||||
"success": message.success,
|
||||
"documents": []
|
||||
}
|
||||
json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
# Store message content as text
|
||||
if message.message:
|
||||
message_text_file = os.path.join(message_path, "message_text.txt")
|
||||
with open(message_text_file, "w", encoding="utf-8") as f:
|
||||
f.write(str(message.message))
|
||||
|
||||
# Store documents if provided
|
||||
if message.documents and len(message.documents) > 0:
|
||||
# Group documents by documentsLabel
|
||||
documents_by_label = {}
|
||||
for doc in message.documents:
|
||||
label = message.documentsLabel or 'default'
|
||||
if label not in documents_by_label:
|
||||
documents_by_label[label] = []
|
||||
documents_by_label[label].append(doc)
|
||||
|
||||
# Create subfolder for each document label
|
||||
for label, docs in documents_by_label.items():
|
||||
# Sanitize label for filesystem
|
||||
safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
||||
safe_label = safe_label.replace(' ', '_')
|
||||
if not safe_label:
|
||||
safe_label = "default"
|
||||
|
||||
label_folder = os.path.join(message_path, safe_label)
|
||||
_ensureDir(label_folder)
|
||||
|
||||
# Store each document
|
||||
for i, doc in enumerate(docs):
|
||||
# Create document metadata file
|
||||
doc_meta = {
|
||||
"id": doc.id,
|
||||
"messageId": doc.messageId,
|
||||
"fileId": doc.fileId,
|
||||
"fileName": doc.fileName,
|
||||
"fileSize": doc.fileSize,
|
||||
"mimeType": doc.mimeType,
|
||||
"roundNumber": doc.roundNumber,
|
||||
"taskNumber": doc.taskNumber,
|
||||
"actionNumber": doc.actionNumber,
|
||||
"actionId": doc.actionId
|
||||
}
|
||||
|
||||
doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
|
||||
with open(doc_meta_file, "w", encoding="utf-8") as f:
|
||||
json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
# Also store the actual file bytes next to metadata for debugging
|
||||
try:
|
||||
# Lazy import to avoid circular deps at module load
|
||||
from modules.interfaces import interfaceDbComponentObjects as comp
|
||||
componentInterface = comp.getInterface(currentUser)
|
||||
file_bytes = componentInterface.getFileData(doc.fileId)
|
||||
if file_bytes:
|
||||
# Build a safe filename preserving original name
|
||||
safe_name = doc.fileName or f"document_{i+1:03d}"
|
||||
# Avoid path traversal
|
||||
safe_name = os.path.basename(safe_name)
|
||||
doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
|
||||
with open(doc_file_path, "wb") as df:
|
||||
df.write(file_bytes)
|
||||
else:
|
||||
pass
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
# Silent fail - don't break main flow
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class ProgressLogger:
|
|||
self.finishedOperations = set() # Track finished operations to avoid repeated warnings
|
||||
self.operationLogIds = {} # Map operationId to the log entry ID for parent reference
|
||||
|
||||
def startOperation(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentId: Optional[str] = None):
|
||||
def startOperation(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
|
||||
"""Start a new long-running operation.
|
||||
|
||||
Args:
|
||||
|
|
@ -32,7 +32,8 @@ class ProgressLogger:
|
|||
serviceName: Name of the service (e.g., "Extract", "AI", "Generate")
|
||||
actionName: Name of the action being performed
|
||||
context: Additional context information
|
||||
parentId: Optional parent log entry ID for hierarchical display
|
||||
parentOperationId: Optional parent operation ID (operationId of parent operation) for hierarchical display
|
||||
The parentId in ChatLog will be set to this parentOperationId
|
||||
"""
|
||||
# Remove from finished operations if it was there (for restart scenarios)
|
||||
self.finishedOperations.discard(operationId)
|
||||
|
|
@ -42,9 +43,10 @@ class ProgressLogger:
|
|||
'action': actionName,
|
||||
'context': context,
|
||||
'startTime': time.time(),
|
||||
'parentId': parentId
|
||||
'parentOperationId': parentOperationId # Store parent's operationId, not log entry ID
|
||||
}
|
||||
logId = self._logProgress(operationId, 0.0, f"Starting {actionName}", parentId=parentId)
|
||||
# Use parentOperationId as parentId in ChatLog (parentId should be the operationId of parent)
|
||||
logId = self._logProgress(operationId, 0.0, f"Starting {actionName}", parentOperationId=parentOperationId)
|
||||
if logId:
|
||||
self.operationLogIds[operationId] = logId
|
||||
logger.debug(f"Started operation {operationId}: {serviceName} - {actionName}")
|
||||
|
|
@ -70,9 +72,9 @@ class ProgressLogger:
|
|||
|
||||
op = self.activeOperations[operationId]
|
||||
context = f"{op['context']} {statusUpdate}".strip()
|
||||
# Use the same parentId as the start operation - all logs (start/update/finish) share the same parent
|
||||
parentId = op.get('parentId')
|
||||
self._logProgress(operationId, progress, context, parentId=parentId)
|
||||
# Use the same parentOperationId as the start operation - all logs (start/update/finish) share the same parent
|
||||
parentOperationId = op.get('parentOperationId')
|
||||
self._logProgress(operationId, progress, context, parentOperationId=parentOperationId)
|
||||
logger.debug(f"Updated operation {operationId}: {progress:.2f} - {context}")
|
||||
|
||||
def finishOperation(self, operationId: str, success: bool = True):
|
||||
|
|
@ -93,11 +95,11 @@ class ProgressLogger:
|
|||
finalProgress = 1.0 if success else 0.0
|
||||
status = "Done" if success else "Failed"
|
||||
|
||||
# Use the same parentId as the start operation - all logs (start/update/finish) share the same parent
|
||||
parentId = op.get('parentId')
|
||||
# Use the same parentOperationId as the start operation - all logs (start/update/finish) share the same parent
|
||||
parentOperationId = op.get('parentOperationId')
|
||||
|
||||
# Create completion log BEFORE removing from activeOperations
|
||||
self._logProgress(operationId, finalProgress, status, parentId=parentId)
|
||||
self._logProgress(operationId, finalProgress, status, parentOperationId=parentOperationId)
|
||||
|
||||
# Log completion time
|
||||
duration = time.time() - op['startTime']
|
||||
|
|
@ -111,14 +113,15 @@ class ProgressLogger:
|
|||
# Mark as finished to prevent repeated warnings from updateOperation calls
|
||||
self.finishedOperations.add(operationId)
|
||||
|
||||
def _logProgress(self, operationId: str, progress: float, status: str, parentId: Optional[str] = None) -> Optional[str]:
|
||||
def _logProgress(self, operationId: str, progress: float, status: str, parentOperationId: Optional[str] = None) -> Optional[str]:
|
||||
"""Create standardized ChatLog entry.
|
||||
|
||||
Args:
|
||||
operationId: Unique identifier for the operation
|
||||
progress: Progress value between 0.0 and 1.0
|
||||
status: Status information for the log entry
|
||||
parentId: Optional parent log entry ID for hierarchical display
|
||||
parentOperationId: Optional parent operation ID (operationId of parent operation) for hierarchical display
|
||||
This will be set as parentId in ChatLog (parentId = operationId of parent)
|
||||
|
||||
Returns:
|
||||
The created log entry ID, or None if creation failed
|
||||
|
|
@ -134,6 +137,7 @@ class ProgressLogger:
|
|||
logger.warning(f"Cannot log progress: no workflow available")
|
||||
return None
|
||||
|
||||
# parentId in ChatLog should be the operationId of the parent operation, not the log entry ID
|
||||
logData = {
|
||||
"workflowId": workflow.id,
|
||||
"message": message,
|
||||
|
|
@ -141,7 +145,7 @@ class ProgressLogger:
|
|||
"status": status,
|
||||
"progress": progress,
|
||||
"operationId": operationId,
|
||||
"parentId": parentId
|
||||
"parentId": parentOperationId # Set to parent's operationId, not log entry ID
|
||||
}
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -130,8 +130,9 @@ class MethodAi(MethodBase):
|
|||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Extract content using extraction service
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
||||
# Extract content using extraction service with hierarchical progress logging
|
||||
# Pass operationId for per-document progress tracking
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
||||
|
||||
# Combine all ContentParts from all extracted results
|
||||
contentParts = []
|
||||
|
|
@ -172,11 +173,19 @@ class MethodAi(MethodBase):
|
|||
if aiResponse.documents and len(aiResponse.documents) > 0:
|
||||
action_documents = []
|
||||
for doc in aiResponse.documents:
|
||||
validationMetadata = {
|
||||
"actionType": "ai.process",
|
||||
"resultType": normalized_result_type,
|
||||
"outputFormat": output_format,
|
||||
"hasDocuments": True,
|
||||
"documentCount": len(aiResponse.documents)
|
||||
}
|
||||
action_documents.append(ActionDocument(
|
||||
documentName=doc.documentName,
|
||||
documentData=doc.documentData,
|
||||
mimeType=doc.mimeType or output_mime_type,
|
||||
sourceJson=getattr(doc, 'sourceJson', None) # Preserve source JSON for structure validation
|
||||
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
|
||||
validationMetadata=validationMetadata
|
||||
))
|
||||
|
||||
final_documents = action_documents
|
||||
|
|
@ -188,10 +197,18 @@ class MethodAi(MethodBase):
|
|||
extension=extension,
|
||||
action_name="result"
|
||||
)
|
||||
validationMetadata = {
|
||||
"actionType": "ai.process",
|
||||
"resultType": normalized_result_type,
|
||||
"outputFormat": output_format,
|
||||
"hasDocuments": False,
|
||||
"contentType": "text"
|
||||
}
|
||||
action_document = ActionDocument(
|
||||
documentName=meaningful_name,
|
||||
documentData=aiResponse.content,
|
||||
mimeType=output_mime_type
|
||||
mimeType=output_mime_type,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
final_documents = [action_document]
|
||||
|
||||
|
|
@ -214,138 +231,6 @@ class MethodAi(MethodBase):
|
|||
)
|
||||
|
||||
|
||||
@action
|
||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract content from documents (separate from AI calls).
|
||||
|
||||
This action performs pure content extraction without AI processing.
|
||||
The extracted ContentParts can then be used by subsequent AI processing actions.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract content from.
|
||||
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
||||
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"ai_extract_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Extract documentList from parameters dict
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
documentListParam = parameters.get("documentList")
|
||||
if not documentListParam:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
# Convert to DocumentReferenceList if needed
|
||||
if isinstance(documentListParam, DocumentReferenceList):
|
||||
documentList = documentListParam
|
||||
elif isinstance(documentListParam, str):
|
||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
||||
elif isinstance(documentListParam, list):
|
||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
||||
else:
|
||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Extracting content from documents",
|
||||
"Content Extraction",
|
||||
f"Documents: {len(documentList.references)}"
|
||||
)
|
||||
|
||||
# Get ChatDocuments from documentList
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
if not chatDocuments:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No documents found in documentList")
|
||||
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
||||
extractionOptionsParam = parameters.get("extractionOptions")
|
||||
|
||||
# Convert dict to ExtractionOptions object if needed, or create defaults
|
||||
if extractionOptionsParam:
|
||||
if isinstance(extractionOptionsParam, dict):
|
||||
# Convert dict to ExtractionOptions object
|
||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
||||
extractionOptions = extractionOptionsParam
|
||||
else:
|
||||
# Invalid type, use defaults
|
||||
extractionOptions = None
|
||||
else:
|
||||
extractionOptions = None
|
||||
|
||||
# If extractionOptions not provided, create defaults
|
||||
if not extractionOptions:
|
||||
# Default extraction options for pure content extraction (no AI processing)
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Get parent log ID for document-level operations
|
||||
parentLogId = self.services.chat.getOperationLogId(operationId)
|
||||
|
||||
# Call extraction service
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
|
||||
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
||||
|
||||
# Build ActionDocuments from ContentExtracted results
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
||||
actionDocuments = []
|
||||
# Map extracted results back to original documents by index (results are in same order)
|
||||
for i, extracted in enumerate(extractedResults):
|
||||
# Get original document name if available
|
||||
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
|
||||
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
|
||||
# Use original filename with "extracted_" prefix
|
||||
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
|
||||
documentName = f"{baseName}_extracted_{extracted.id}.json"
|
||||
else:
|
||||
# Fallback to generic name with index
|
||||
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
|
||||
|
||||
# Store ContentExtracted object in ActionDocument.documentData
|
||||
actionDoc = ActionDocument(
|
||||
documentName=documentName,
|
||||
documentData=extracted, # ContentExtracted object
|
||||
mimeType="application/json"
|
||||
)
|
||||
actionDocuments.append(actionDoc)
|
||||
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
return ActionResult.isSuccess(documents=actionDocuments)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in content extraction: {str(e)}")
|
||||
|
||||
# Complete progress tracking with failure
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
||||
@action
|
||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
|
|
@ -420,10 +305,20 @@ class MethodAi(MethodBase):
|
|||
)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
validationMetadata = {
|
||||
"actionType": "ai.webResearch",
|
||||
"prompt": prompt,
|
||||
"urlList": parameters.get("urlList", []),
|
||||
"country": parameters.get("country"),
|
||||
"language": parameters.get("language"),
|
||||
"researchDepth": parameters.get("researchDepth", "general"),
|
||||
"resultFormat": "json"
|
||||
}
|
||||
actionDocument = ActionDocument(
|
||||
documentName=meaningfulName,
|
||||
documentData=result,
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDocument])
|
||||
|
|
@ -622,11 +517,19 @@ class MethodAi(MethodBase):
|
|||
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
validationMetadata = {
|
||||
"actionType": "ai.convert",
|
||||
"inputFormat": normalizedInputFormat,
|
||||
"outputFormat": normalizedOutputFormat,
|
||||
"hasSourceJson": True,
|
||||
"conversionType": "direct_rendering"
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
||||
documentData=rendered_content,
|
||||
mimeType=mime_type,
|
||||
sourceJson=jsonData # Preserve source JSON for structure validation
|
||||
sourceJson=jsonData, # Preserve source JSON for structure validation
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDoc])
|
||||
|
|
@ -707,171 +610,6 @@ class MethodAi(MethodBase):
|
|||
return output.getvalue()
|
||||
|
||||
|
||||
@action
|
||||
async def reformat(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Reformat/transform documents with specific transformation rules (e.g., extract arrays, reshape data, apply custom formatting).
|
||||
- Input requirements: documentList (required); inputFormat and outputFormat (required); transformationRules (optional).
|
||||
- Output format: Document in target format with applied transformation rules.
|
||||
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system with transformation rules.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to reformat.
|
||||
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
|
||||
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
|
||||
- transformationRules (str, optional): Specific transformation instructions (e.g., "Extract prime numbers array and format as CSV with 10 columns per row").
|
||||
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
|
||||
- totalRows (int, optional): For CSV output, total number of rows to create. Default: auto-detect.
|
||||
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
|
||||
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
|
||||
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
inputFormat = parameters.get("inputFormat")
|
||||
outputFormat = parameters.get("outputFormat")
|
||||
if not inputFormat or not outputFormat:
|
||||
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
|
||||
|
||||
transformationRules = parameters.get("transformationRules")
|
||||
columnsPerRow = parameters.get("columnsPerRow")
|
||||
totalRows = parameters.get("totalRows")
|
||||
delimiter = parameters.get("delimiter", ",")
|
||||
includeHeader = parameters.get("includeHeader", True)
|
||||
language = parameters.get("language", "en")
|
||||
|
||||
# Normalize formats (remove leading dot if present)
|
||||
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
|
||||
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
|
||||
|
||||
# Get documents
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
if isinstance(documentList, DocumentReferenceList):
|
||||
docRefList = documentList
|
||||
elif isinstance(documentList, list):
|
||||
docRefList = DocumentReferenceList.from_string_list(documentList)
|
||||
else:
|
||||
docRefList = DocumentReferenceList.from_string_list([documentList])
|
||||
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
||||
if not chatDocuments:
|
||||
return ActionResult.isFailure(error="No documents found in documentList")
|
||||
|
||||
# Check if input is standardized JSON format - if so, use direct rendering with transformation
|
||||
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
|
||||
try:
|
||||
import json
|
||||
doc = chatDocuments[0]
|
||||
# ChatDocument doesn't have documentData - need to load file content using fileId
|
||||
docBytes = self.services.chat.getFileData(doc.fileId)
|
||||
if not docBytes:
|
||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||
|
||||
# Decode bytes to string
|
||||
docData = docBytes.decode('utf-8')
|
||||
|
||||
# Try to parse as JSON
|
||||
if isinstance(docData, str):
|
||||
jsonData = json.loads(docData)
|
||||
elif isinstance(docData, dict):
|
||||
jsonData = docData
|
||||
else:
|
||||
jsonData = None
|
||||
|
||||
# Check if it's standardized JSON format (has "documents" or "sections")
|
||||
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
|
||||
# Apply transformation rules if provided
|
||||
if transformationRules:
|
||||
# Use AI to apply transformation rules to JSON
|
||||
aiPrompt = f"Apply the following transformation rules to the JSON document: {transformationRules}"
|
||||
if normalizedOutputFormat == "csv":
|
||||
aiPrompt += f" Output format: CSV with delimiter '{delimiter}'"
|
||||
if columnsPerRow:
|
||||
aiPrompt += f", {columnsPerRow} columns per row"
|
||||
if totalRows:
|
||||
aiPrompt += f", {totalRows} total rows"
|
||||
if not includeHeader:
|
||||
aiPrompt += ", no header row"
|
||||
|
||||
# Use process to apply transformation
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": normalizedOutputFormat
|
||||
})
|
||||
else:
|
||||
# No transformation rules - use direct rendering
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
# Ensure format is "documents" array
|
||||
if "documents" not in jsonData:
|
||||
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
|
||||
|
||||
# Get title
|
||||
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Reformatted Document")
|
||||
|
||||
# Render with options
|
||||
renderOptions = {}
|
||||
if normalizedOutputFormat == "csv":
|
||||
renderOptions["delimiter"] = delimiter
|
||||
renderOptions["columnsPerRow"] = columnsPerRow
|
||||
renderOptions["includeHeader"] = includeHeader
|
||||
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
jsonData, normalizedOutputFormat, title, None, None
|
||||
)
|
||||
|
||||
# Apply CSV options if needed
|
||||
if normalizedOutputFormat == "csv" and renderOptions:
|
||||
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
actionDoc = ActionDocument(
|
||||
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
||||
documentData=rendered_content,
|
||||
mimeType=mime_type,
|
||||
sourceJson=jsonData # Preserve source JSON for structure validation
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDoc])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Direct rendering failed, falling back to AI reformatting: {str(e)}")
|
||||
# Fall through to AI-based reformatting
|
||||
|
||||
# Fallback: Use AI for reformatting with transformation rules
|
||||
aiPrompt = f"Reformat the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
|
||||
|
||||
if transformationRules:
|
||||
aiPrompt += f" Apply the following transformation rules: {transformationRules}"
|
||||
|
||||
if normalizedOutputFormat == "csv":
|
||||
aiPrompt += f" Use '{delimiter}' as the delimiter character."
|
||||
if columnsPerRow:
|
||||
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
|
||||
if totalRows:
|
||||
aiPrompt += f" Create exactly {totalRows} rows total."
|
||||
if not includeHeader:
|
||||
aiPrompt += " Do not include a header row."
|
||||
else:
|
||||
aiPrompt += " Include a header row with column names."
|
||||
|
||||
if language and language != "en":
|
||||
aiPrompt += f" Use language: {language}."
|
||||
|
||||
aiPrompt += " Preserve all data and ensure accurate transformation. Maintain data integrity."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": normalizedOutputFormat
|
||||
})
|
||||
|
||||
|
||||
@action
|
||||
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
|
|
@ -955,160 +693,10 @@ class MethodAi(MethodBase):
|
|||
})
|
||||
|
||||
|
||||
@action
|
||||
async def extractTables(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Extract tables from documents, preserving structure and data.
|
||||
- Input requirements: documentList (required); optional tableFormat.
|
||||
- Output format: JSON by default (structured table data), or CSV/XLSX if specified.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract tables from.
|
||||
- tableFormat (str, optional): Output format for tables - json, csv, or xlsx. Default: json.
|
||||
- includeHeaders (bool, optional): Include table headers. Default: True.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
tableFormat = parameters.get("tableFormat", "json")
|
||||
includeHeaders = parameters.get("includeHeaders", True)
|
||||
|
||||
# Map tableFormat to resultType
|
||||
formatMap = {
|
||||
"json": "json",
|
||||
"csv": "csv",
|
||||
"xlsx": "xlsx",
|
||||
"xls": "xlsx"
|
||||
}
|
||||
resultType = formatMap.get(tableFormat.lower(), "json")
|
||||
|
||||
aiPrompt = "Extract all tables from the provided document(s)."
|
||||
if includeHeaders:
|
||||
aiPrompt += " Include table headers and preserve the table structure."
|
||||
else:
|
||||
aiPrompt += " Extract table data without headers."
|
||||
aiPrompt += " Maintain accurate data types (numbers as numbers, dates as dates, etc.) and preserve all table relationships."
|
||||
|
||||
if resultType == "json":
|
||||
aiPrompt += " Structure each table as a JSON object with headers and rows as arrays."
|
||||
elif resultType == "csv":
|
||||
aiPrompt += " Output each table as CSV format with proper comma separation."
|
||||
elif resultType == "xlsx":
|
||||
aiPrompt += " Structure the output as an Excel spreadsheet with tables properly formatted."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Content Generation Wrappers
|
||||
# Content Generation Wrapper
|
||||
# ============================================================================
|
||||
|
||||
@action
|
||||
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate comprehensive reports from input documents/data with analysis and insights.
|
||||
- Input requirements: documentList (optional, can generate from scratch); optional reportType, sections.
|
||||
- Output format: Document in specified format (default: docx).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, optional): Input documents/data to base the report on.
|
||||
- reportType (str, optional): Type of report - summary, analysis, executive, detailed. Default: analysis.
|
||||
- sections (list, optional): Specific sections to include (e.g., ["introduction", "findings", "recommendations"]).
|
||||
- title (str, optional): Report title.
|
||||
- resultType (str, optional): Output format (docx, pdf, md, etc.). Default: docx.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
reportType = parameters.get("reportType", "analysis")
|
||||
sections = parameters.get("sections", [])
|
||||
title = parameters.get("title")
|
||||
resultType = parameters.get("resultType", "docx")
|
||||
|
||||
reportTypeInstructions = {
|
||||
"summary": "Create a summary report with key highlights and main points.",
|
||||
"analysis": "Create an analytical report with insights, findings, and detailed examination.",
|
||||
"executive": "Create an executive summary report suitable for senior management with key insights and recommendations.",
|
||||
"detailed": "Create a comprehensive detailed report covering all aspects with in-depth analysis."
|
||||
}
|
||||
|
||||
aiPrompt = f"Generate a {reportType} report."
|
||||
if title:
|
||||
aiPrompt += f" Title: {title}."
|
||||
aiPrompt += f" {reportTypeInstructions.get(reportType.lower(), reportTypeInstructions['analysis'])}"
|
||||
|
||||
if sections:
|
||||
sectionsStr = ", ".join(sections)
|
||||
aiPrompt += f" Include the following sections: {sectionsStr}."
|
||||
else:
|
||||
aiPrompt += " Include standard report sections such as introduction, main content, analysis, findings, and conclusions."
|
||||
|
||||
if documentList:
|
||||
aiPrompt += " Base the report on the provided input documents, analyzing and synthesizing the information."
|
||||
else:
|
||||
aiPrompt += " Create a professional, well-structured report."
|
||||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"resultType": resultType
|
||||
}
|
||||
if documentList:
|
||||
processParams["documentList"] = documentList
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
||||
@action
|
||||
async def generateChart(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate charts/graphs from data in documents or structured data.
|
||||
- Input requirements: documentList (required); optional chartType, title, labels.
|
||||
- Output format: Image (png or jpg).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Documents containing data to visualize (CSV, Excel, JSON, etc.).
|
||||
- chartType (str, optional): Type of chart - bar, line, pie, scatter, area, etc. Default: bar.
|
||||
- title (str, optional): Chart title.
|
||||
- xAxisLabel (str, optional): X-axis label.
|
||||
- yAxisLabel (str, optional): Y-axis label.
|
||||
- resultType (str, optional): Image format (png or jpg). Default: png.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
chartType = parameters.get("chartType", "bar")
|
||||
title = parameters.get("title")
|
||||
xAxisLabel = parameters.get("xAxisLabel")
|
||||
yAxisLabel = parameters.get("yAxisLabel")
|
||||
resultType = parameters.get("resultType", "png")
|
||||
|
||||
# Ensure resultType is an image format
|
||||
if resultType.lower() not in ["png", "jpg", "jpeg"]:
|
||||
resultType = "png"
|
||||
|
||||
aiPrompt = f"Generate a {chartType} chart from the provided data."
|
||||
if title:
|
||||
aiPrompt += f" Chart title: {title}."
|
||||
if xAxisLabel:
|
||||
aiPrompt += f" X-axis label: {xAxisLabel}."
|
||||
if yAxisLabel:
|
||||
aiPrompt += f" Y-axis label: {yAxisLabel}."
|
||||
aiPrompt += " Create a clear, professional chart with appropriate labels, legends, and formatting. Ensure the chart is visually appealing and easy to read."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
||||
@action
|
||||
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
|
|
@ -1146,137 +734,3 @@ class MethodAi(MethodBase):
|
|||
processParams["documentList"] = documentList
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Analysis & Comparison Wrappers
|
||||
# ============================================================================
|
||||
|
||||
@action
|
||||
async def analyzeDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Analyze documents and find insights, patterns, trends, and key information.
|
||||
- Input requirements: documentList (required); optional analysisType, focus.
|
||||
- Output format: Analysis report in specified format (default: txt).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document(s) to analyze.
|
||||
- analysisType (str, optional): Type of analysis - general, financial, technical, sentiment, etc. Default: general.
|
||||
- focus (str, optional): Specific aspect to focus on (e.g., "trends", "risks", "opportunities").
|
||||
- resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
analysisType = parameters.get("analysisType", "general")
|
||||
focus = parameters.get("focus")
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
|
||||
aiPrompt = f"Analyze the provided document(s) and find insights, patterns, and key information."
|
||||
aiPrompt += f" Perform a {analysisType} analysis."
|
||||
if focus:
|
||||
aiPrompt += f" Focus specifically on: {focus}."
|
||||
aiPrompt += " Identify trends, important findings, relationships, and provide actionable insights. Present the analysis in a clear, structured format."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
||||
@action
|
||||
async def compareDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Compare multiple documents and identify differences, similarities, and changes.
|
||||
- Input requirements: documentList (required, should contain 2+ documents); optional comparisonType, focus.
|
||||
- Output format: Comparison report in specified format (default: txt).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Two or more documents to compare.
|
||||
- comparisonType (str, optional): Type of comparison - differences, similarities, changes, full. Default: full.
|
||||
- focus (str, optional): Specific aspect to focus on (e.g., "content", "structure", "data", "formatting").
|
||||
- resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
|
||||
if len(documentList) < 2:
|
||||
return ActionResult.isFailure(error="At least 2 documents are required for comparison")
|
||||
|
||||
comparisonType = parameters.get("comparisonType", "full")
|
||||
focus = parameters.get("focus")
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
|
||||
comparisonInstructions = {
|
||||
"differences": "Focus on identifying and highlighting all differences between the documents.",
|
||||
"similarities": "Focus on identifying commonalities, shared content, and similarities.",
|
||||
"changes": "Identify what has changed between versions, what was added, removed, or modified.",
|
||||
"full": "Provide a comprehensive comparison including both differences and similarities."
|
||||
}
|
||||
|
||||
aiPrompt = f"Compare the provided documents."
|
||||
aiPrompt += f" {comparisonInstructions.get(comparisonType.lower(), comparisonInstructions['full'])}"
|
||||
if focus:
|
||||
aiPrompt += f" Focus specifically on: {focus}."
|
||||
aiPrompt += " Present the comparison in a clear, structured format that makes differences and similarities easy to understand."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
||||
@action
|
||||
async def validateData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Validate data quality, structure, completeness, and correctness in documents/data files.
|
||||
- Input requirements: documentList (required); optional validationRules, schema.
|
||||
- Output format: Validation report in JSON or text format (default: json).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Documents/data files to validate.
|
||||
- validationRules (list, optional): Specific validation rules to check (e.g., ["required_fields", "data_types", "ranges"]).
|
||||
- schema (dict, optional): Expected data schema/structure to validate against.
|
||||
- resultType (str, optional): Output format (json, txt, md, etc.). Default: json.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
validationRules = parameters.get("validationRules", [])
|
||||
schema = parameters.get("schema")
|
||||
resultType = parameters.get("resultType", "json")
|
||||
|
||||
aiPrompt = "Validate the data quality, structure, completeness, and correctness in the provided documents."
|
||||
|
||||
if validationRules:
|
||||
rulesStr = ", ".join(validationRules)
|
||||
aiPrompt += f" Apply the following validation rules: {rulesStr}."
|
||||
else:
|
||||
aiPrompt += " Check for data completeness, correct data types, required fields, data consistency, and any anomalies or errors."
|
||||
|
||||
if schema:
|
||||
import json
|
||||
schemaStr = json.dumps(schema, indent=2)
|
||||
aiPrompt += f" Validate against the following expected schema: {schemaStr}."
|
||||
|
||||
if resultType == "json":
|
||||
aiPrompt += " Provide the validation results as structured JSON with validation status, errors, warnings, and details for each check."
|
||||
else:
|
||||
aiPrompt += " Provide a detailed validation report listing all findings, errors, warnings, and pass/fail status for each validation check."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
|
|
|||
|
|
@ -18,6 +18,19 @@ def action(func):
|
|||
- success: bool
|
||||
- documents: List[ActionDocument]
|
||||
- error: str (if success=False)
|
||||
|
||||
REQUIRED: All ActionDocument instances MUST include validationMetadata for content validation
|
||||
and refinement. Without validationMetadata, results cannot be approved.
|
||||
|
||||
Example validationMetadata structure:
|
||||
validationMetadata = {
|
||||
"actionType": "moduleName.actionName",
|
||||
"param1": value1,
|
||||
"param2": value2,
|
||||
# ... other relevant parameters for validation
|
||||
}
|
||||
|
||||
See MethodBase._createValidationMetadata() for a helper method to create standard metadata.
|
||||
"""
|
||||
@wraps(func)
|
||||
async def wrapper(self, parameters: Dict[str, Any], *args, **kwargs):
|
||||
|
|
@ -26,7 +39,14 @@ def action(func):
|
|||
return wrapper
|
||||
|
||||
class MethodBase:
|
||||
"""Base class for all methods"""
|
||||
"""Base class for all methods
|
||||
|
||||
IMPORTANT: All actions that return ActionDocument instances MUST include validationMetadata.
|
||||
This metadata is required for content validation and refinement. Without it, results cannot
|
||||
be approved by the validation system.
|
||||
|
||||
Use _createValidationMetadata() helper method to create standardized metadata structures.
|
||||
"""
|
||||
|
||||
def __init__(self, services: Any):
|
||||
"""Initialize method with services object"""
|
||||
|
|
@ -168,6 +188,44 @@ class MethodBase:
|
|||
else:
|
||||
return str(type_annotation)
|
||||
|
||||
def _createValidationMetadata(self, actionName: str, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Helper method to create standardized validationMetadata for ActionDocument instances.
|
||||
|
||||
This method ensures all actions include the required validationMetadata structure
|
||||
for content validation and refinement. Without metadata, results cannot be approved.
|
||||
|
||||
Args:
|
||||
actionName: Name of the action (e.g., "readEmails", "uploadDocument")
|
||||
**kwargs: Additional action-specific metadata fields
|
||||
|
||||
Returns:
|
||||
Dictionary with validationMetadata structure including:
|
||||
- actionType: Full action identifier (moduleName.actionName)
|
||||
- All provided kwargs as additional metadata fields
|
||||
|
||||
Example:
|
||||
validationMetadata = self._createValidationMetadata(
|
||||
"readEmails",
|
||||
connectionReference=connectionReference,
|
||||
folder=folder,
|
||||
limit=limit,
|
||||
emailCount=len(emails)
|
||||
)
|
||||
|
||||
ActionDocument(
|
||||
documentName="emails.json",
|
||||
documentData=json.dumps(data),
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata # REQUIRED
|
||||
)
|
||||
"""
|
||||
metadata = {
|
||||
"actionType": f"{self.name}.{actionName}"
|
||||
}
|
||||
metadata.update(kwargs)
|
||||
return metadata
|
||||
|
||||
def _generateMeaningfulFileName(self, base_name: str, extension: str, workflow_context: Dict[str, Any] = None, action_name: str = None) -> str:
|
||||
"""
|
||||
Generate a meaningful file name with round/task/action information.
|
||||
|
|
|
|||
351
modules/workflows/methods/methodContext.py
Normal file
351
modules/workflows/methods/methodContext.py
Normal file
|
|
@ -0,0 +1,351 @@
|
|||
"""
|
||||
Context and workflow information method module.
|
||||
Handles workflow context queries and document indexing.
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MethodContext(MethodBase):
|
||||
"""Context and workflow information methods."""
|
||||
|
||||
def __init__(self, services):
|
||||
super().__init__(services)
|
||||
self.name = "context"
|
||||
self.description = "Context and workflow information methods"
|
||||
|
||||
@action
|
||||
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
|
||||
- Input requirements: No input documents required. Optional resultType parameter.
|
||||
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
|
||||
|
||||
Parameters:
|
||||
- resultType (str, optional): Output format (json, txt, md). Default: json.
|
||||
"""
|
||||
try:
|
||||
workflow = self.services.workflow
|
||||
if not workflow:
|
||||
return ActionResult.isFailure(
|
||||
error="No workflow available"
|
||||
)
|
||||
|
||||
resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
|
||||
|
||||
# Get available documents index from chat service
|
||||
documentsIndex = self.services.chat.getAvailableDocuments(workflow)
|
||||
|
||||
if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
|
||||
# Return empty index structure
|
||||
if resultType == "json":
|
||||
indexData = {
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"totalDocuments": 0,
|
||||
"rounds": [],
|
||||
"documentReferences": []
|
||||
}
|
||||
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
|
||||
else:
|
||||
# Parse the document index string to extract structured information
|
||||
indexData = self._parseDocumentIndex(documentsIndex, workflow)
|
||||
|
||||
if resultType == "json":
|
||||
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
|
||||
elif resultType == "md":
|
||||
indexContent = self._formatAsMarkdown(indexData)
|
||||
else: # txt
|
||||
indexContent = self._formatAsText(indexData, documentsIndex)
|
||||
|
||||
# Generate meaningful filename
|
||||
workflowContext = self.services.chat.getWorkflowContext()
|
||||
filename = self._generateMeaningfulFileName(
|
||||
"document_index",
|
||||
resultType if resultType in ["json", "txt", "md"] else "json",
|
||||
workflowContext,
|
||||
"getDocumentIndex"
|
||||
)
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "context.getDocumentIndex",
|
||||
"resultType": resultType,
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
|
||||
}
|
||||
|
||||
# Create ActionDocument
|
||||
document = ActionDocument(
|
||||
documentName=filename,
|
||||
documentData=indexContent,
|
||||
mimeType="application/json" if resultType == "json" else "text/plain",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[document])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating document index: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=f"Failed to generate document index: {str(e)}"
|
||||
)
|
||||
|
||||
def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
|
||||
"""Parse the document index string into structured data."""
|
||||
try:
|
||||
indexData = {
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"generatedAt": datetime.now(UTC).isoformat(),
|
||||
"totalDocuments": 0,
|
||||
"rounds": [],
|
||||
"documentReferences": []
|
||||
}
|
||||
|
||||
# Extract document references from the index string
|
||||
lines = documentsIndex.split('\n')
|
||||
currentRound = None
|
||||
currentDocList = None
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for round headers
|
||||
if "Current round documents:" in line:
|
||||
currentRound = "current"
|
||||
continue
|
||||
elif "Past rounds documents:" in line:
|
||||
currentRound = "past"
|
||||
continue
|
||||
|
||||
# Check for document list references (docList:...)
|
||||
if line.startswith("- docList:"):
|
||||
docListRef = line.replace("- docList:", "").strip()
|
||||
currentDocList = {
|
||||
"reference": docListRef,
|
||||
"round": currentRound,
|
||||
"documents": []
|
||||
}
|
||||
indexData["rounds"].append(currentDocList)
|
||||
continue
|
||||
|
||||
# Check for individual document references (docItem:...)
|
||||
if line.startswith(" - docItem:") or line.startswith("- docItem:"):
|
||||
docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
|
||||
indexData["documentReferences"].append({
|
||||
"reference": docItemRef,
|
||||
"round": currentRound,
|
||||
"docList": currentDocList["reference"] if currentDocList else None
|
||||
})
|
||||
indexData["totalDocuments"] += 1
|
||||
if currentDocList:
|
||||
currentDocList["documents"].append(docItemRef)
|
||||
|
||||
return indexData
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing document index: {str(e)}")
|
||||
return {
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"error": f"Failed to parse document index: {str(e)}",
|
||||
"rawIndex": documentsIndex
|
||||
}
|
||||
|
||||
def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
|
||||
"""Format document index as Markdown."""
|
||||
try:
|
||||
md = f"# Document Index\n\n"
|
||||
md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
|
||||
md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
|
||||
md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
|
||||
|
||||
if indexData.get('rounds'):
|
||||
md += "## Documents by Round\n\n"
|
||||
for roundInfo in indexData['rounds']:
|
||||
roundLabel = roundInfo.get('round', 'unknown').title()
|
||||
md += f"### {roundLabel} Round\n\n"
|
||||
md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
|
||||
if roundInfo.get('documents'):
|
||||
md += "**Documents:**\n\n"
|
||||
for docRef in roundInfo['documents']:
|
||||
md += f"- `{docRef}`\n"
|
||||
md += "\n"
|
||||
|
||||
if indexData.get('documentReferences'):
|
||||
md += "## All Document References\n\n"
|
||||
for docRef in indexData['documentReferences']:
|
||||
md += f"- `{docRef.get('reference', 'unknown')}`\n"
|
||||
|
||||
return md
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting as Markdown: {str(e)}")
|
||||
return f"# Document Index\n\nError formatting index: {str(e)}\n"
|
||||
|
||||
def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
|
||||
"""Format document index as plain text."""
|
||||
try:
|
||||
text = "Document Index\n"
|
||||
text += "=" * 50 + "\n\n"
|
||||
text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
|
||||
text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
|
||||
text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
|
||||
|
||||
# Include the raw formatted index for readability
|
||||
text += rawIndex
|
||||
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting as text: {str(e)}")
|
||||
return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
|
||||
|
||||
@action
|
||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract content from documents (separate from AI calls).
|
||||
|
||||
This action performs pure content extraction without AI processing.
|
||||
The extracted ContentParts can then be used by subsequent AI processing actions.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract content from.
|
||||
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
||||
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"context_extract_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Extract documentList from parameters dict
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
documentListParam = parameters.get("documentList")
|
||||
if not documentListParam:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
# Convert to DocumentReferenceList if needed
|
||||
if isinstance(documentListParam, DocumentReferenceList):
|
||||
documentList = documentListParam
|
||||
elif isinstance(documentListParam, str):
|
||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
||||
elif isinstance(documentListParam, list):
|
||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
||||
else:
|
||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Extracting content from documents",
|
||||
"Content Extraction",
|
||||
f"Documents: {len(documentList.references)}"
|
||||
)
|
||||
|
||||
# Get ChatDocuments from documentList
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
if not chatDocuments:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No documents found in documentList")
|
||||
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
||||
extractionOptionsParam = parameters.get("extractionOptions")
|
||||
|
||||
# Convert dict to ExtractionOptions object if needed, or create defaults
|
||||
if extractionOptionsParam:
|
||||
if isinstance(extractionOptionsParam, dict):
|
||||
# Convert dict to ExtractionOptions object
|
||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
||||
extractionOptions = extractionOptionsParam
|
||||
else:
|
||||
# Invalid type, use defaults
|
||||
extractionOptions = None
|
||||
else:
|
||||
extractionOptions = None
|
||||
|
||||
# If extractionOptions not provided, create defaults
|
||||
if not extractionOptions:
|
||||
# Default extraction options for pure content extraction (no AI processing)
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Call extraction service with hierarchical progress logging
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
|
||||
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
|
||||
# Pass operationId for hierarchical per-document progress logging
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
||||
|
||||
# Build ActionDocuments from ContentExtracted results
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
||||
actionDocuments = []
|
||||
# Map extracted results back to original documents by index (results are in same order)
|
||||
for i, extracted in enumerate(extractedResults):
|
||||
# Get original document name if available
|
||||
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
|
||||
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
|
||||
# Use original filename with "extracted_" prefix
|
||||
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
|
||||
documentName = f"{baseName}_extracted_{extracted.id}.json"
|
||||
else:
|
||||
# Fallback to generic name with index
|
||||
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
|
||||
|
||||
# Store ContentExtracted object in ActionDocument.documentData
|
||||
validationMetadata = {
|
||||
"actionType": "context.extractContent",
|
||||
"documentIndex": i,
|
||||
"extractedId": extracted.id,
|
||||
"partCount": len(extracted.parts) if extracted.parts else 0,
|
||||
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=documentName,
|
||||
documentData=extracted, # ContentExtracted object
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
actionDocuments.append(actionDoc)
|
||||
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
return ActionResult.isSuccess(documents=actionDocuments)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in content extraction: {str(e)}")
|
||||
|
||||
# Complete progress tracking with failure
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
|
@ -326,7 +326,21 @@ class MethodOutlook(MethodBase):
|
|||
- filter (str, optional): Sender, query operators, or subject text.
|
||||
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
|
||||
"""
|
||||
import time
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"outlook_read_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Read Emails",
|
||||
"Outlook Email Reading",
|
||||
f"Folder: {parameters.get('folder', 'Inbox')}"
|
||||
)
|
||||
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
folder = parameters.get("folder", "Inbox")
|
||||
limit = parameters.get("limit", 10)
|
||||
|
|
@ -334,8 +348,12 @@ class MethodOutlook(MethodBase):
|
|||
outputMimeType = parameters.get("outputMimeType", "application/json")
|
||||
|
||||
if not connectionReference:
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="Connection reference is required")
|
||||
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Validating parameters")
|
||||
|
||||
# Validate limit parameter
|
||||
if limit <= 0:
|
||||
limit = 1000
|
||||
|
|
@ -351,11 +369,14 @@ class MethodOutlook(MethodBase):
|
|||
|
||||
|
||||
# Get Microsoft connection
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
|
||||
connection = self._getMicrosoftConnection(connectionReference)
|
||||
if not connection:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
||||
|
||||
# Read emails using Microsoft Graph API
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Reading emails from Microsoft Graph API")
|
||||
try:
|
||||
# Microsoft Graph API endpoint for messages
|
||||
graph_url = "https://graph.microsoft.com/v1.0"
|
||||
|
|
@ -387,6 +408,11 @@ class MethodOutlook(MethodBase):
|
|||
# If using $search, remove $orderby as they can't be combined
|
||||
if "$search" in params:
|
||||
params.pop("$orderby", None)
|
||||
|
||||
# If using $filter with contains(), remove $orderby as they can't be combined
|
||||
# Microsoft Graph API doesn't support contains() with orderby
|
||||
if "$filter" in params and "contains(" in params["$filter"].lower():
|
||||
params.pop("$orderby", None)
|
||||
|
||||
# Filter applied
|
||||
|
||||
|
|
@ -403,6 +429,7 @@ class MethodOutlook(MethodBase):
|
|||
|
||||
response.raise_for_status()
|
||||
|
||||
self.services.chat.progressLogUpdate(operationId, 0.7, "Processing email data")
|
||||
emails_data = response.json()
|
||||
email_data = {
|
||||
"emails": emails_data.get("value", []),
|
||||
|
|
@ -420,22 +447,34 @@ class MethodOutlook(MethodBase):
|
|||
|
||||
except ImportError:
|
||||
logger.error("requests module not available")
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="requests module not available")
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 400:
|
||||
logger.error(f"Bad Request (400) - Invalid filter or parameter: {e.response.text}")
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error=f"Invalid filter syntax. Please check your filter parameter. Error: {e.response.text}")
|
||||
elif e.response.status_code == 401:
|
||||
logger.error("Unauthorized (401) - Access token may be expired or invalid")
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="Authentication failed. Please check your connection and try again.")
|
||||
elif e.response.status_code == 403:
|
||||
logger.error("Forbidden (403) - Insufficient permissions to access emails")
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="Insufficient permissions to read emails from this folder.")
|
||||
else:
|
||||
logger.error(f"HTTP Error {e.response.status_code}: {e.response.text}")
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error=f"HTTP Error {e.response.status_code}: {e.response.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
|
||||
|
||||
# Determine output format based on MIME type
|
||||
|
|
@ -465,16 +504,35 @@ class MethodOutlook(MethodBase):
|
|||
"timestamp": self.services.utils.timestampGetUtc()
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.readEmails",
|
||||
"connectionReference": connectionReference,
|
||||
"folder": folder,
|
||||
"limit": limit,
|
||||
"filter": filter,
|
||||
"emailCount": email_data.get("count", 0),
|
||||
"outputMimeType": outputMimeType
|
||||
}
|
||||
|
||||
self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {email_data.get('count', 0)} emails")
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
return ActionResult.isSuccess(
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_emails_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading emails: {str(e)}")
|
||||
if operationId:
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
return ActionResult.isFailure(
|
||||
error=str(e)
|
||||
)
|
||||
|
|
@ -695,12 +753,23 @@ class MethodOutlook(MethodBase):
|
|||
"timestamp": self.services.utils.timestampGetUtc()
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.searchEmails",
|
||||
"connectionReference": connectionReference,
|
||||
"query": query,
|
||||
"folder": folder,
|
||||
"limit": limit,
|
||||
"resultCount": search_result.get("count", 0),
|
||||
"outputMimeType": outputMimeType
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_email_search_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
|
||||
|
|
@ -818,12 +887,22 @@ class MethodOutlook(MethodBase):
|
|||
"timestamp": self.services.utils.timestampGetUtc()
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.listDrafts",
|
||||
"connectionReference": connectionReference,
|
||||
"folder": folder,
|
||||
"limit": limit,
|
||||
"draftCount": drafts_result.get("count", 0),
|
||||
"outputMimeType": outputMimeType
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_drafts_list_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
|
||||
|
|
@ -928,12 +1007,21 @@ class MethodOutlook(MethodBase):
|
|||
"timestamp": self.services.utils.timestampGetUtc()
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.findDrafts",
|
||||
"connectionReference": connectionReference,
|
||||
"limit": limit,
|
||||
"totalDrafts": drafts_result.get("totalDrafts", 0),
|
||||
"outputMimeType": outputMimeType
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_drafts_found_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
|
||||
|
|
@ -1069,12 +1157,22 @@ class MethodOutlook(MethodBase):
|
|||
"timestamp": self.services.utils.timestampGetUtc()
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.checkDraftsFolder",
|
||||
"connectionReference": connectionReference,
|
||||
"limit": limit,
|
||||
"totalDrafts": drafts_result.get("totalDrafts", 0),
|
||||
"draftsFolderId": drafts_result.get("draftsFolderId"),
|
||||
"outputMimeType": outputMimeType
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_drafts_folder_check_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
|
||||
|
|
@ -1440,14 +1538,32 @@ Return JSON:
|
|||
- connectionReference (str, required): Microsoft connection label.
|
||||
- documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
|
||||
"""
|
||||
import time
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"outlook_send_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Send Draft Email",
|
||||
"Outlook Email Sending",
|
||||
f"Processing {len(parameters.get('documentList', []))} draft(s)"
|
||||
)
|
||||
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
documentList = parameters.get("documentList", [])
|
||||
|
||||
if not connectionReference:
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="Connection reference is required")
|
||||
|
||||
if not documentList:
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="documentList is required and cannot be empty")
|
||||
|
||||
# Convert single value to list if needed
|
||||
|
|
@ -1455,16 +1571,21 @@ Return JSON:
|
|||
documentList = [documentList]
|
||||
|
||||
# Get Microsoft connection
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
|
||||
connection = self._getMicrosoftConnection(connectionReference)
|
||||
if not connection:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
||||
|
||||
# Check permissions
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Checking permissions")
|
||||
permissions_ok = await self._checkPermissions(connection)
|
||||
if not permissions_ok:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
|
||||
|
||||
# Read draft email JSON documents from documentList
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Reading draft email documents")
|
||||
draftEmails = []
|
||||
for docRef in documentList:
|
||||
try:
|
||||
|
|
@ -1535,8 +1656,11 @@ Return JSON:
|
|||
continue
|
||||
|
||||
if not draftEmails:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No valid draft email JSON documents found in documentList")
|
||||
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, f"Found {len(draftEmails)} draft email(s) to send")
|
||||
|
||||
# Send all draft emails
|
||||
graph_url = "https://graph.microsoft.com/v1.0"
|
||||
headers = {
|
||||
|
|
@ -1547,7 +1671,8 @@ Return JSON:
|
|||
sentResults = []
|
||||
failedResults = []
|
||||
|
||||
for draftEmail in draftEmails:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.7, "Sending emails")
|
||||
for idx, draftEmail in enumerate(draftEmails):
|
||||
draftEmailJson = draftEmail["draftEmailJson"]
|
||||
draftId = draftEmail["draftId"]
|
||||
sourceDocument = draftEmail["sourceDocument"]
|
||||
|
|
@ -1577,6 +1702,7 @@ Return JSON:
|
|||
"sourceDocument": sourceDocument
|
||||
})
|
||||
logger.info(f"Email sent successfully. Draft ID: {draftId}, Subject: {subject}")
|
||||
self.services.chat.progressLogUpdate(operationId, 0.7 + (idx + 1) * 0.2 / len(draftEmails), f"Sent {idx + 1}/{len(draftEmails)}: {subject}")
|
||||
else:
|
||||
errorResult = {
|
||||
"status": "error",
|
||||
|
|
@ -1623,35 +1749,66 @@ Return JSON:
|
|||
}
|
||||
|
||||
# Determine overall success status
|
||||
self.services.chat.progressLogUpdate(operationId, 0.9, f"Sent {successfulEmails}/{totalEmails} email(s)")
|
||||
if successfulEmails == 0:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.sendDraftEmail",
|
||||
"connectionReference": connectionReference,
|
||||
"totalEmails": totalEmails,
|
||||
"successfulEmails": successfulEmails,
|
||||
"failedEmails": failedEmails,
|
||||
"status": "all_failed"
|
||||
}
|
||||
return ActionResult.isFailure(
|
||||
error=f"Failed to send all {totalEmails} email(s)",
|
||||
documents=[ActionDocument(
|
||||
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
elif failedEmails > 0:
|
||||
# Partial success
|
||||
logger.warning(f"Sent {successfulEmails} out of {totalEmails} emails. {failedEmails} failed.")
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.sendDraftEmail",
|
||||
"connectionReference": connectionReference,
|
||||
"totalEmails": totalEmails,
|
||||
"successfulEmails": successfulEmails,
|
||||
"failedEmails": failedEmails,
|
||||
"status": "partial_success"
|
||||
}
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
else:
|
||||
# All successful
|
||||
logger.info(f"Successfully sent all {totalEmails} email(s)")
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.sendDraftEmail",
|
||||
"connectionReference": connectionReference,
|
||||
"totalEmails": totalEmails,
|
||||
"successfulEmails": successfulEmails,
|
||||
"failedEmails": failedEmails,
|
||||
"status": "all_successful"
|
||||
}
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
|
||||
|
|
@ -1693,12 +1850,19 @@ Return JSON:
|
|||
"status": "ready"
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.checkPermissions",
|
||||
"connectionReference": connectionReference,
|
||||
"permissionsStatus": "ready",
|
||||
"hasPermissions": True
|
||||
}
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
else:
|
||||
|
|
@ -1711,12 +1875,19 @@ Return JSON:
|
|||
"message": "Please re-authenticate your Microsoft connection to get updated permissions."
|
||||
}
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.checkPermissions",
|
||||
"connectionReference": connectionReference,
|
||||
"permissionsStatus": "needs_reauthentication",
|
||||
"hasPermissions": False
|
||||
}
|
||||
return ActionResult(
|
||||
success=False,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)],
|
||||
error="Connection lacks necessary permissions for Outlook operations"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1072,6 +1072,13 @@ class MethodSharepoint(MethodBase):
|
|||
outputExtension = ".json" # Default
|
||||
outputMimeType = "application/json" # Default
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.findDocumentPath",
|
||||
"searchQuery": searchQuery,
|
||||
"maxResults": maxResults,
|
||||
"totalResults": len(foundDocuments),
|
||||
"hasResults": len(foundDocuments) > 0
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
|
|
@ -1079,7 +1086,8 @@ class MethodSharepoint(MethodBase):
|
|||
ActionDocument(
|
||||
documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{outputExtension}",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType=outputMimeType
|
||||
mimeType=outputMimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
@ -1112,7 +1120,21 @@ class MethodSharepoint(MethodBase):
|
|||
- documentData: Base64-encoded content (binary files) or plain text (text files)
|
||||
- mimeType: MIME type (e.g., application/pdf, text/plain)
|
||||
"""
|
||||
import time
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"sharepoint_read_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Read Documents",
|
||||
"SharePoint Document Reading",
|
||||
f"Path: {parameters.get('pathQuery', parameters.get('pathObject', '*'))}"
|
||||
)
|
||||
|
||||
documentList = parameters.get("documentList")
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
|
|
@ -1123,11 +1145,16 @@ class MethodSharepoint(MethodBase):
|
|||
|
||||
# Validate connection reference
|
||||
if not connectionReference:
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="Connection reference is required")
|
||||
|
||||
# Get connection first - needed for both pathObject and documentList approaches
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
|
||||
connection = self._getMicrosoftConnection(connectionReference)
|
||||
if not connection:
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
||||
|
||||
# If pathObject is provided, extract SharePoint file IDs and read them directly
|
||||
|
|
@ -1142,6 +1169,8 @@ class MethodSharepoint(MethodBase):
|
|||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
pathObjectDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject]))
|
||||
if not pathObjectDocuments or len(pathObjectDocuments) == 0:
|
||||
if operationId:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
||||
|
||||
# Get the first document's content (which should be the JSON from findDocumentPath)
|
||||
|
|
@ -1259,8 +1288,10 @@ class MethodSharepoint(MethodBase):
|
|||
readResults = []
|
||||
siteId = sites[0]['id']
|
||||
|
||||
for fileId in sharePointFileIds:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.5, f"Reading {len(sharePointFileIds)} file(s) from SharePoint")
|
||||
for idx, fileId in enumerate(sharePointFileIds):
|
||||
try:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.5 + (idx * 0.3 / len(sharePointFileIds)), f"Reading file {idx + 1}/{len(sharePointFileIds)}")
|
||||
# Get file info from SharePoint
|
||||
endpoint = f"sites/{siteId}/drive/items/{fileId}"
|
||||
fileInfo = await self._makeGraphApiCall(endpoint)
|
||||
|
|
@ -1306,11 +1337,13 @@ class MethodSharepoint(MethodBase):
|
|||
continue
|
||||
|
||||
if not readResults:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No files could be read from pathObject")
|
||||
|
||||
# Convert read results to ActionDocument objects
|
||||
# IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
|
||||
# The system will create FileData and ChatDocument automatically
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, f"Processing {len(readResults)} document(s)")
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
import base64
|
||||
|
||||
|
|
@ -1336,19 +1369,40 @@ class MethodSharepoint(MethodBase):
|
|||
if fileContent and isinstance(fileContent, bytes):
|
||||
# Encode binary content as Base64 string
|
||||
base64Content = base64.b64encode(fileContent).decode('utf-8')
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.readDocuments",
|
||||
"fileName": fileName,
|
||||
"sharepointFileId": resultItem.get("sharepointFileId"),
|
||||
"siteName": resultItem.get("siteName"),
|
||||
"mimeType": mimeType,
|
||||
"contentType": "binary",
|
||||
"size": len(fileContent),
|
||||
"includeMetadata": includeMetadata
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=fileName,
|
||||
documentData=base64Content, # Base64 string for binary files
|
||||
mimeType=mimeType
|
||||
mimeType=mimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
actionDocuments.append(actionDoc)
|
||||
logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument")
|
||||
elif fileContent:
|
||||
# Text content - store directly in documentData
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.readDocuments",
|
||||
"fileName": fileName,
|
||||
"sharepointFileId": resultItem.get("sharepointFileId"),
|
||||
"siteName": resultItem.get("siteName"),
|
||||
"mimeType": mimeType,
|
||||
"contentType": "text",
|
||||
"includeMetadata": includeMetadata
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=fileName,
|
||||
documentData=fileContent if isinstance(fileContent, str) else str(fileContent),
|
||||
mimeType=mimeType
|
||||
mimeType=mimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
actionDocuments.append(actionDoc)
|
||||
else:
|
||||
|
|
@ -1366,14 +1420,26 @@ class MethodSharepoint(MethodBase):
|
|||
if resultItem.get("metadata"):
|
||||
docData["metadata"] = resultItem["metadata"]
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.readDocuments",
|
||||
"fileName": fileName,
|
||||
"sharepointFileId": resultItem.get("sharepointFileId"),
|
||||
"siteName": resultItem.get("siteName"),
|
||||
"mimeType": mimeType,
|
||||
"contentType": "metadata_only",
|
||||
"includeMetadata": includeMetadata
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=fileName,
|
||||
documentData=json.dumps(docData, indent=2),
|
||||
mimeType=mimeType
|
||||
mimeType=mimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
actionDocuments.append(actionDoc)
|
||||
|
||||
# Return success with action documents
|
||||
self.services.chat.progressLogUpdate(operationId, 0.9, f"Read {len(actionDocuments)} document(s)")
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
return ActionResult.isSuccess(documents=actionDocuments)
|
||||
|
||||
# Fallback: Use documentList parameter (for backward compatibility)
|
||||
|
|
@ -1583,6 +1649,13 @@ class MethodSharepoint(MethodBase):
|
|||
outputExtension = ".json" # Default
|
||||
outputMimeType = "application/json" # Default
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.readDocuments",
|
||||
"connectionReference": connectionReference,
|
||||
"documentCount": len(readResults),
|
||||
"includeMetadata": includeMetadata,
|
||||
"sitesSearched": len(sites)
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
|
|
@ -1590,12 +1663,18 @@ class MethodSharepoint(MethodBase):
|
|||
ActionDocument(
|
||||
documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{outputExtension}",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType=outputMimeType
|
||||
mimeType=outputMimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading SharePoint documents: {str(e)}")
|
||||
if operationId:
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
return ActionResult(
|
||||
success=False,
|
||||
error=str(e)
|
||||
|
|
@ -1998,6 +2077,15 @@ class MethodSharepoint(MethodBase):
|
|||
outputExtension = ".json" # Default
|
||||
outputMimeType = "application/json" # Default
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.uploadDocument",
|
||||
"connectionReference": connectionReference,
|
||||
"uploadPath": uploadPath,
|
||||
"fileNames": fileNames,
|
||||
"uploadCount": len(uploadResults),
|
||||
"successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
|
||||
"failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
|
|
@ -2005,7 +2093,8 @@ class MethodSharepoint(MethodBase):
|
|||
ActionDocument(
|
||||
documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{outputExtension}",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType=outputMimeType
|
||||
mimeType=outputMimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
@ -2459,6 +2548,14 @@ class MethodSharepoint(MethodBase):
|
|||
outputExtension = ".json" # Default
|
||||
outputMimeType = "application/json" # Default
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "sharepoint.listDocuments",
|
||||
"pathQuery": listQuery,
|
||||
"includeSubfolders": includeSubfolders,
|
||||
"sitesSearched": len(sites),
|
||||
"folderCount": len(listResults),
|
||||
"totalItems": sum(len(result.get("siteResults", [])) for result in listResults)
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
|
|
@ -2466,7 +2563,8 @@ class MethodSharepoint(MethodBase):
|
|||
ActionDocument(
|
||||
documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{outputExtension}",
|
||||
documentData=json.dumps(resultData, indent=2),
|
||||
mimeType=outputMimeType
|
||||
mimeType=outputMimeType,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -139,14 +139,11 @@ class ContentValidator:
|
|||
"statistics": {}
|
||||
}
|
||||
|
||||
# Extract metadata
|
||||
# Extract metadata - include ALL metadata fields (generic for all action types)
|
||||
metadata = jsonData.get("metadata", {})
|
||||
if metadata:
|
||||
summary["metadata"] = {
|
||||
"title": metadata.get("title"),
|
||||
"split_strategy": metadata.get("split_strategy"),
|
||||
"extraction_method": metadata.get("extraction_method")
|
||||
}
|
||||
if metadata and isinstance(metadata, dict):
|
||||
# Include all metadata fields, not just specific ones
|
||||
summary["metadata"] = dict(metadata)
|
||||
|
||||
# Extract documents array (if present)
|
||||
documents = jsonData.get("documents", [])
|
||||
|
|
@ -195,6 +192,17 @@ class ContentValidator:
|
|||
text = textElement.get("text", "")
|
||||
if text:
|
||||
sectionSummary["textPreview"] = text[:100] + ("..." if len(text) > 100 else "")
|
||||
# Also check for textPreview directly in section (for web crawl results)
|
||||
if section.get("textPreview"):
|
||||
sectionSummary["textPreview"] = section.get("textPreview")
|
||||
|
||||
# Include any additional fields from section (generic approach)
|
||||
# This ensures all action-specific fields are preserved
|
||||
for key, value in section.items():
|
||||
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
|
||||
# Include simple types (str, int, float, bool, list of primitives)
|
||||
if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10):
|
||||
sectionSummary[key] = value
|
||||
|
||||
summary["sections"].append(sectionSummary)
|
||||
else:
|
||||
|
|
@ -206,7 +214,8 @@ class ContentValidator:
|
|||
sectionSummary = {
|
||||
"id": section.get("id"),
|
||||
"content_type": section.get("content_type"),
|
||||
"title": section.get("title")
|
||||
"title": section.get("title"),
|
||||
"order": section.get("order")
|
||||
}
|
||||
|
||||
if section.get("content_type") == "table":
|
||||
|
|
@ -220,8 +229,21 @@ class ContentValidator:
|
|||
sectionSummary["rowCount"] = len(rows)
|
||||
sectionSummary["headers"] = headers
|
||||
|
||||
# Include any additional fields from section (generic approach)
|
||||
for key, value in section.items():
|
||||
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
|
||||
# Include simple types (str, int, float, bool, list of primitives)
|
||||
if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10):
|
||||
sectionSummary[key] = value
|
||||
|
||||
summary["sections"].append(sectionSummary)
|
||||
|
||||
# Extract statistics from root level (generic - include all statistics fields)
|
||||
rootStatistics = jsonData.get("statistics", {})
|
||||
if rootStatistics and isinstance(rootStatistics, dict):
|
||||
# Merge root statistics into summary statistics
|
||||
summary["statistics"].update(rootStatistics)
|
||||
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -210,8 +210,14 @@ class MessageCreator:
|
|||
taskProgress = str(taskIndex)
|
||||
|
||||
# Enhanced completion message with criteria details
|
||||
if reviewResult and hasattr(reviewResult, 'reason'):
|
||||
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.reason or 'Task completed successfully'}"
|
||||
# Prefer userMessage (user-friendly in user's language), fallback to reason
|
||||
if reviewResult:
|
||||
if hasattr(reviewResult, 'userMessage') and reviewResult.userMessage:
|
||||
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.userMessage}"
|
||||
elif hasattr(reviewResult, 'reason') and reviewResult.reason:
|
||||
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.reason}"
|
||||
else:
|
||||
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ Task completed successfully"
|
||||
else:
|
||||
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ Task completed successfully"
|
||||
|
||||
|
|
|
|||
|
|
@ -28,11 +28,21 @@ class TaskPlanner:
|
|||
|
||||
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||
logger.info(f"Workflow ID: {workflow.id}")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
# Log normalized request instead of raw user input for security
|
||||
normalizedPrompt = getattr(self.services, 'currentUserPromptNormalized', None) if self.services else None
|
||||
if normalizedPrompt:
|
||||
logger.info(f"Normalized Request: {normalizedPrompt}")
|
||||
else:
|
||||
logger.info(f"Normalized Request: {userInput}")
|
||||
|
||||
# Use stored user prompt if available, otherwise use the input
|
||||
actualUserPrompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt else userInput
|
||||
logger.info(f"Actual User Prompt: {actualUserPrompt}")
|
||||
# Use normalized request if available, otherwise fallback to currentUserPrompt, then userInput
|
||||
actualUserPrompt = None
|
||||
if self.services and hasattr(self.services, 'currentUserPromptNormalized') and self.services.currentUserPromptNormalized:
|
||||
actualUserPrompt = self.services.currentUserPromptNormalized
|
||||
elif self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt:
|
||||
actualUserPrompt = self.services.currentUserPrompt
|
||||
else:
|
||||
actualUserPrompt = userInput
|
||||
|
||||
# Check workflow status before calling AI service
|
||||
checkWorkflowStopped(self.services)
|
||||
|
|
|
|||
|
|
@ -96,6 +96,10 @@ class DynamicMode(BaseMode):
|
|||
# NEW: Reset progress tracking for new task
|
||||
self.progressTracker.reset()
|
||||
|
||||
# Initialize executed actions tracking for this task
|
||||
if not hasattr(context, 'executedActions') or context.executedActions is None:
|
||||
context.executedActions = []
|
||||
|
||||
# Update workflow object before executing task
|
||||
self._updateWorkflowBeforeExecutingTask(taskIndex)
|
||||
|
||||
|
|
@ -104,7 +108,8 @@ class DynamicMode(BaseMode):
|
|||
|
||||
state = TaskExecutionState(taskStep)
|
||||
# Dynamic mode uses max_steps instead of max_retries
|
||||
state.max_steps = max(1, int(getattr(workflow, 'maxSteps', 10)))
|
||||
# maxSteps is set in workflowManager.py when workflow is created
|
||||
state.max_steps = int(getattr(workflow, 'maxSteps', 1))
|
||||
logger.info(f"Using Dynamic mode execution with max_steps: {state.max_steps}")
|
||||
|
||||
step = 1
|
||||
|
|
@ -128,6 +133,19 @@ class DynamicMode(BaseMode):
|
|||
observation = self._observeBuild(result)
|
||||
# Note: resultLabel is already set correctly in _observeBuild from actionResult.resultLabel
|
||||
|
||||
# Store executed action in context for action history
|
||||
if not hasattr(context, 'executedActions') or context.executedActions is None:
|
||||
context.executedActions = []
|
||||
actionName = selection.get('action', 'unknown')
|
||||
actionParameters = selection.get('parameters', {}) or {}
|
||||
# Filter out documentList for clarity in history
|
||||
relevantParams = {k: v for k, v in actionParameters.items() if k not in ['documentList', 'connections']}
|
||||
context.executedActions.append({
|
||||
'action': actionName,
|
||||
'parameters': relevantParams,
|
||||
'step': step
|
||||
})
|
||||
|
||||
# Content validation (against original cleaned user prompt / workflow intent)
|
||||
if getattr(self, 'workflowIntent', None) and result.documents:
|
||||
# Pass ALL documents to validator - validator decides what to validate (generic approach)
|
||||
|
|
@ -883,9 +901,20 @@ class DynamicMode(BaseMode):
|
|||
elif progressState['nextActionsSuggested']:
|
||||
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
|
||||
|
||||
# NEW: Add action history to review content
|
||||
# NEW: Add action history to review content - use all executed actions
|
||||
actionHistory = []
|
||||
|
||||
# First, add all executed actions from the current task
|
||||
if hasattr(context, 'executedActions') and context.executedActions:
|
||||
for executedAction in context.executedActions:
|
||||
action = executedAction.get('action', 'unknown')
|
||||
params = executedAction.get('parameters', {}) or {}
|
||||
paramsStr = json.dumps(params, ensure_ascii=False) if params else "{}"
|
||||
step = executedAction.get('step', 0)
|
||||
actionHistory.append(f"Step {step}: {action} {paramsStr}")
|
||||
|
||||
# Also include refinement decisions for completeness (these show what was planned)
|
||||
if hasattr(context, 'previousReviewResult') and context.previousReviewResult:
|
||||
actionHistory = []
|
||||
for i, prevDecision in enumerate(context.previousReviewResult, 1):
|
||||
if prevDecision and hasattr(prevDecision, 'nextAction') and prevDecision.nextAction:
|
||||
action = prevDecision.nextAction
|
||||
|
|
@ -895,21 +924,27 @@ class DynamicMode(BaseMode):
|
|||
paramsStr = json.dumps(relevantParams, ensure_ascii=False) if relevantParams else "{}"
|
||||
quality = getattr(prevDecision, 'qualityScore', None)
|
||||
qualityStr = f" (quality: {quality:.2f})" if quality is not None else ""
|
||||
actionHistory.append(f"Round {i}: {action} {paramsStr}{qualityStr}")
|
||||
|
||||
if actionHistory:
|
||||
enhancedReviewContent += f"\nACTION HISTORY:\n"
|
||||
enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
|
||||
# Detect repeated actions
|
||||
actionCounts = {}
|
||||
for entry in actionHistory:
|
||||
# Extract action name (before first space or {)
|
||||
actionName = entry.split()[1] if len(entry.split()) > 1 else "unknown"
|
||||
# Only add if not already in executedActions (avoid duplicates)
|
||||
actionEntry = f"Refinement {i}: {action} {paramsStr}{qualityStr}"
|
||||
if actionEntry not in actionHistory:
|
||||
actionHistory.append(actionEntry)
|
||||
|
||||
if actionHistory:
|
||||
enhancedReviewContent += f"\nACTION HISTORY:\n"
|
||||
enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
|
||||
# Detect repeated actions
|
||||
actionCounts = {}
|
||||
for entry in actionHistory:
|
||||
# Extract action name (after first space, before next space or {)
|
||||
parts = entry.split()
|
||||
if len(parts) > 1:
|
||||
# Skip "Step", "Refinement" prefixes and get the action name
|
||||
actionName = parts[1] if parts[0] in ['Step', 'Refinement'] else parts[0]
|
||||
actionCounts[actionName] = actionCounts.get(actionName, 0) + 1
|
||||
|
||||
repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
|
||||
if repeatedActions:
|
||||
enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
|
||||
|
||||
repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
|
||||
if repeatedActions:
|
||||
enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
|
||||
|
||||
# Update placeholders with enhanced review content
|
||||
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class TaskExecutionState:
|
|||
self.max_retries = 3
|
||||
# Iterative loop (dynamic mode)
|
||||
self.current_step = 0
|
||||
self.max_steps = 5
|
||||
self.max_steps = 0 # Will be overridden by workflow.maxSteps from workflowManager.py
|
||||
|
||||
def addSuccessfulAction(self, action_result: ActionResult):
|
||||
"""Add a successful action to the state"""
|
||||
|
|
@ -56,7 +56,7 @@ class TaskExecutionState:
|
|||
patterns.append("permission_issues")
|
||||
return list(set(patterns))
|
||||
|
||||
def shouldContinue(observation: Optional[Observation], review=None, current_step: int = 0, max_steps: int = 5) -> bool:
|
||||
def shouldContinue(observation: Optional[Observation], review=None, current_step: int = 0, max_steps: int = 1) -> bool:
|
||||
"""Helper to decide if the iterative loop should continue
|
||||
|
||||
Args:
|
||||
|
|
|
|||
|
|
@ -350,34 +350,36 @@ Return ONLY JSON (no markdown, no explanations). The decision MUST:
|
|||
- Match parameter names exactly as defined in AVAILABLE_METHODS
|
||||
|
||||
{{
|
||||
"status": "continue",
|
||||
"reason": "Brief reason explaining why continuing",
|
||||
"nextAction": "Selected_action_from_ACTIONS",
|
||||
"status": "continue" | "success",
|
||||
"reason": "Brief reason explaining why continuing or why task is complete",
|
||||
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining the task status (1 sentence, first person, friendly tone)",
|
||||
"nextAction": "Selected_action_from_ACTIONS" | null,
|
||||
"nextActionParameters": {{
|
||||
"documentList": ["docItem:<documentId>:<filename>", "docList:<label>"],
|
||||
"connectionReference": "connection:reference_from_AVAILABLE_CONNECTIONS_INDEX",
|
||||
"parameter1": "value1",
|
||||
"parameter2": "value2"
|
||||
}},
|
||||
"nextActionObjective": "Clear description of what this action will achieve based on improvement suggestions"
|
||||
}} | null,
|
||||
"nextActionObjective": "Clear description of what this action will achieve based on improvement suggestions" | null
|
||||
}}
|
||||
|
||||
=== RULES ===
|
||||
1. Return ONLY JSON - no markdown, no explanations
|
||||
2. If "continue": MUST provide nextAction and nextActionParameters
|
||||
3. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
||||
4. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
||||
5. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
|
||||
2. userMessage: REQUIRED - Provide a user-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining the task status (for "continue": explain what's being done next; for "success": explain what was accomplished)
|
||||
3. If "continue": MUST provide nextAction and nextActionParameters
|
||||
4. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
||||
5. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
||||
6. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
|
||||
- For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
|
||||
- For document lists: use docList:<label> format
|
||||
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
|
||||
6. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
|
||||
7. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
|
||||
8. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
|
||||
9. Do NOT repeat failed actions - suggest DIFFERENT approach
|
||||
10. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
|
||||
11. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
|
||||
12. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
|
||||
7. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
|
||||
8. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
|
||||
9. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
|
||||
10. Do NOT repeat failed actions - suggest DIFFERENT approach
|
||||
11. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
|
||||
12. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
|
||||
13. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,14 @@ logger = logging.getLogger(__name__)
|
|||
def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
# Extract user language from services
|
||||
userLanguage = getattr(services, 'currentUserLanguage', None) or 'en'
|
||||
# Prefer currentUserLanguage (set from user intention analysis), fallback to user.language, then 'en'
|
||||
userLanguage = getattr(services, 'currentUserLanguage', None)
|
||||
if not userLanguage:
|
||||
userLanguage = getattr(services.user, 'language', None) if hasattr(services, 'user') and services.user else None
|
||||
if not userLanguage:
|
||||
userLanguage = 'en'
|
||||
|
||||
logger.debug(f"Task planning prompt using user language: {userLanguage}")
|
||||
|
||||
# Extract workflowIntent from workflow object if available
|
||||
workflowIntent = {}
|
||||
|
|
@ -54,6 +61,7 @@ Break down user requests into logical, executable task steps.
|
|||
## 📋 Context
|
||||
|
||||
### User Request
|
||||
The following is the user's normalized request:
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
### Workflow Intent
|
||||
|
|
|
|||
|
|
@ -55,26 +55,39 @@ class WorkflowProcessor:
|
|||
f"Mode: {workflow.workflowMode.value if hasattr(workflow.workflowMode, 'value') else workflow.workflowMode}"
|
||||
)
|
||||
|
||||
# Initialize currentUserLanguage to empty at workflow start
|
||||
self.services.currentUserLanguage = ""
|
||||
# currentUserLanguage should already be set from user intention analysis in _sendFirstMessage
|
||||
# Do NOT reset it here, as it contains the detected language from the user's input
|
||||
# Only initialize if not already set (should not happen in normal flow)
|
||||
if not hasattr(self.services, 'currentUserLanguage') or not self.services.currentUserLanguage:
|
||||
self.services.currentUserLanguage = getattr(self.services.user, 'language', None) or 'en'
|
||||
|
||||
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||
logger.info(f"Using user language: {self.services.currentUserLanguage}")
|
||||
logger.info(f"Workflow ID: {workflow.id}")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
# Log normalized request instead of raw user input for security
|
||||
normalizedPrompt = getattr(self.services, 'currentUserPromptNormalized', None) or userInput
|
||||
logger.info(f"Normalized Request: {normalizedPrompt}")
|
||||
modeValue = workflow.workflowMode.value if hasattr(workflow.workflowMode, 'value') else workflow.workflowMode
|
||||
logger.info(f"Workflow Mode: {modeValue}")
|
||||
|
||||
# Update progress - generating task plan
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Analyzing input")
|
||||
|
||||
# Use normalized request instead of raw userInput for security
|
||||
normalizedPrompt = getattr(self.services, 'currentUserPromptNormalized', None) or userInput
|
||||
|
||||
# Delegate to the appropriate mode
|
||||
taskPlan = await self.mode.generateTaskPlan(userInput, workflow)
|
||||
taskPlan = await self.mode.generateTaskPlan(normalizedPrompt, workflow)
|
||||
|
||||
# Update progress - creating task plan message
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Creating plan")
|
||||
|
||||
# Create task plan message
|
||||
await self.mode.createTaskPlanMessage(taskPlan, workflow)
|
||||
# Create task plan message only if there are 2+ tasks
|
||||
# Single-task workflows don't need a task plan message
|
||||
if taskPlan.tasks and len(taskPlan.tasks) >= 2:
|
||||
await self.mode.createTaskPlanMessage(taskPlan, workflow)
|
||||
else:
|
||||
logger.info(f"Skipping task plan message creation - only {len(taskPlan.tasks) if taskPlan.tasks else 0} task(s)")
|
||||
|
||||
# Complete progress tracking
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
|
@ -311,12 +324,16 @@ class WorkflowProcessor:
|
|||
|
||||
# Fast Path Implementation
|
||||
|
||||
async def detectComplexity(self, prompt: str, documents: Optional[List[ChatDocument]] = None) -> str:
|
||||
async def detectComplexity(self, prompt: str, documents: Optional[List[ChatDocument]] = None) -> tuple[str, bool, Optional[str]]:
|
||||
"""
|
||||
Detect request complexity using AI-based semantic understanding.
|
||||
Also detects user language for fast path responses.
|
||||
|
||||
Returns:
|
||||
"simple" | "moderate" | "complex"
|
||||
Tuple of (complexity: str, needsWorkflowHistory: bool, detectedLanguage: Optional[str])
|
||||
complexity: "simple" | "moderate" | "complex"
|
||||
needsWorkflowHistory: True if request needs previous workflow rounds/history
|
||||
detectedLanguage: ISO 639-1 language code (e.g., "de", "en") or None
|
||||
|
||||
Simple: Single question, no documents, straightforward answer (5-15s)
|
||||
Moderate: Multiple steps, some documents, structured response (30-60s)
|
||||
|
|
@ -326,38 +343,47 @@ class WorkflowProcessor:
|
|||
# Ensure AI service is initialized
|
||||
await self.services.ai.ensureAiObjectsInitialized()
|
||||
|
||||
# Build complexity detection prompt (language-agnostic, semantic)
|
||||
# Build complexity detection prompt (includes language detection)
|
||||
# JSON template comes BEFORE user input for security
|
||||
complexityPrompt = (
|
||||
"You are a complexity analyzer. Analyze the user's request and determine its complexity level.\n\n"
|
||||
"You are a complexity analyzer. Analyze the user's request and determine its complexity level and language.\n\n"
|
||||
"Consider:\n"
|
||||
"- Number of distinct tasks or steps required\n"
|
||||
"- Amount and type of documents provided\n"
|
||||
"- Need for external research or web search\n"
|
||||
"- Need for document analysis or extraction\n"
|
||||
"- Need for content generation (reports, summaries, etc.)\n"
|
||||
"- Need for multi-step reasoning or planning\n\n"
|
||||
"- Need for multi-step reasoning or planning\n"
|
||||
"- Need for previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work)\n"
|
||||
"- Language: Detect the ISO 639-1 language code (e.g., de, en, fr, it) from the user's request\n\n"
|
||||
"Complexity levels:\n"
|
||||
"- 'simple': Single question, no documents or minimal documents, straightforward answer that can be provided in one AI response (5-15s)\n"
|
||||
"- 'moderate': Multiple steps, some documents, structured response requiring some processing (30-60s)\n"
|
||||
"- 'complex': Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s)\n\n"
|
||||
f"User request:\n{prompt}\n\n"
|
||||
"Return ONLY a JSON object with this exact structure:\n"
|
||||
"{\n"
|
||||
' "complexity": "simple" | "moderate" | "complex",\n'
|
||||
' "reasoning": "Brief explanation of why this complexity level",\n'
|
||||
' "needsWorkflowHistory": true|false,\n'
|
||||
' "detectedLanguage": "de|en|fr|it|..." (ISO 639-1 language code)\n'
|
||||
"}\n\n"
|
||||
"################ USER INPUT START #################\n"
|
||||
)
|
||||
|
||||
# Add sanitized user input with clear delimiters
|
||||
# Escape curly braces for f-string safety, but preserve format (no quote wrapping)
|
||||
sanitizedPrompt = prompt.replace('{', '{{').replace('}', '}}') if prompt else ""
|
||||
complexityPrompt += f"{sanitizedPrompt}\n"
|
||||
|
||||
complexityPrompt += "################ USER INPUT FINISH #################\n\n"
|
||||
|
||||
if documents and len(documents) > 0:
|
||||
complexityPrompt += f"\nDocuments provided: {len(documents)} document(s)\n"
|
||||
complexityPrompt += f"Documents provided: {len(documents)} document(s)\n"
|
||||
# Add document types
|
||||
docTypes = [doc.mimeType for doc in documents if hasattr(doc, 'mimeType')]
|
||||
if docTypes:
|
||||
complexityPrompt += f"Document types: {', '.join(set(docTypes))}\n"
|
||||
|
||||
complexityPrompt += (
|
||||
"\nReturn ONLY a JSON object with this exact structure:\n"
|
||||
"{\n"
|
||||
' "complexity": "simple" | "moderate" | "complex",\n'
|
||||
' "reasoning": "Brief explanation of why this complexity level"\n'
|
||||
"}\n"
|
||||
)
|
||||
|
||||
# Call AI for complexity detection (planning call - no documents needed)
|
||||
aiResponse = await self.services.ai.callAiPlanning(
|
||||
prompt=complexityPrompt,
|
||||
|
|
@ -367,6 +393,8 @@ class WorkflowProcessor:
|
|||
|
||||
# Parse response
|
||||
complexity = "moderate" # Default fallback
|
||||
needsWorkflowHistory = False # Default fallback
|
||||
detectedLanguage = None # Default fallback
|
||||
try:
|
||||
# callAiPlanning returns a string directly, not an object
|
||||
responseContent = str(aiResponse) if aiResponse else ""
|
||||
|
|
@ -380,19 +408,21 @@ class WorkflowProcessor:
|
|||
if jsonStr:
|
||||
parsed = json.loads(jsonStr)
|
||||
complexity = parsed.get("complexity", "moderate")
|
||||
needsWorkflowHistory = parsed.get("needsWorkflowHistory", False)
|
||||
detectedLanguage = parsed.get("detectedLanguage") or None
|
||||
reasoning = parsed.get("reasoning", "")
|
||||
logger.info(f"Complexity detected: {complexity} - {reasoning}")
|
||||
logger.info(f"Complexity detected: {complexity}, needsWorkflowHistory: {needsWorkflowHistory}, language: {detectedLanguage} - {reasoning}")
|
||||
else:
|
||||
logger.warning("Could not parse complexity detection response, defaulting to 'moderate'")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing complexity detection: {str(e)}, defaulting to 'moderate'")
|
||||
|
||||
return complexity
|
||||
return (complexity, needsWorkflowHistory, detectedLanguage)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in detectComplexity: {str(e)}")
|
||||
# Default to moderate on error (safe fallback)
|
||||
return "moderate"
|
||||
return ("moderate", False, None)
|
||||
|
||||
async def fastPathExecute(self, prompt: str, documents: Optional[List[ChatDocument]] = None, userLanguage: Optional[str] = None) -> ActionResult:
|
||||
"""
|
||||
|
|
@ -412,9 +442,12 @@ class WorkflowProcessor:
|
|||
await self.services.ai.ensureAiObjectsInitialized()
|
||||
|
||||
# Build fast path prompt (understand + execute + deliver in one call)
|
||||
# Clearly separate user prompt for security
|
||||
fastPathPrompt = (
|
||||
"You are a helpful assistant. Answer the user's question directly and comprehensively.\n\n"
|
||||
f"User question:\n{prompt}\n\n"
|
||||
"## User Question\n"
|
||||
"The following is the user's request:\n\n"
|
||||
f"{prompt}\n\n"
|
||||
)
|
||||
|
||||
# Add user language context if available
|
||||
|
|
@ -439,7 +472,7 @@ class WorkflowProcessor:
|
|||
)
|
||||
|
||||
# Call AI directly (no document generation - just plain text response)
|
||||
# Use aiObjects.call() instead of callAiContent() to avoid document generation path
|
||||
# Use callWithTextContext() for text-only calls
|
||||
aiRequest = AiCallRequest(
|
||||
prompt=fastPathPrompt,
|
||||
context="",
|
||||
|
|
@ -447,7 +480,7 @@ class WorkflowProcessor:
|
|||
contentParts=None # Fast path doesn't process documents
|
||||
)
|
||||
|
||||
aiCallResponse = await self.services.ai.aiObjects.call(aiRequest)
|
||||
aiCallResponse = await self.services.ai.aiObjects.callWithTextContext(aiRequest)
|
||||
|
||||
# Extract response content (AiCallResponse.content is a string)
|
||||
responseText = aiCallResponse.content if aiCallResponse.content else ""
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class WorkflowManager:
|
|||
"mandateId": self.services.user.mandateId,
|
||||
"messageIds": [],
|
||||
"workflowMode": workflowMode,
|
||||
"maxSteps": 5 if workflowMode == WorkflowModeEnum.WORKFLOW_DYNAMIC else 1, # Set maxSteps for Dynamic mode
|
||||
"maxSteps": 10 , # Set maxSteps
|
||||
}
|
||||
|
||||
workflow = self.services.chat.createWorkflow(workflowData)
|
||||
|
|
@ -160,6 +160,9 @@ class WorkflowManager:
|
|||
# Reset progress logger for new workflow
|
||||
self.services.chat._progressLogger = None
|
||||
|
||||
# Reset workflow history flag at start of each workflow
|
||||
setattr(self.services, '_needsWorkflowHistory', False)
|
||||
|
||||
self.workflowProcessor = WorkflowProcessor(self.services)
|
||||
|
||||
# Get workflow mode to determine if complexity detection is needed
|
||||
|
|
@ -169,6 +172,8 @@ class WorkflowManager:
|
|||
if skipComplexityDetection:
|
||||
logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
|
||||
complexity = "moderate" # Default for automation workflows
|
||||
needsWorkflowHistory = False # Automation workflows don't need history
|
||||
detectedLanguage = None # No language detection in automation mode
|
||||
else:
|
||||
# Process user-uploaded documents from userInput for complexity detection
|
||||
# This is the correct way: use the input data directly, not workflow state
|
||||
|
|
@ -180,18 +185,28 @@ class WorkflowManager:
|
|||
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
|
||||
|
||||
# Detect complexity (AI-based semantic understanding) using user input documents
|
||||
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
|
||||
logger.info(f"Request complexity detected: {complexity}")
|
||||
# Also detects language for fast path responses
|
||||
complexity, needsWorkflowHistory, detectedLanguage = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
|
||||
logger.info(f"Request complexity detected: {complexity}, needsWorkflowHistory: {needsWorkflowHistory}, language: {detectedLanguage}")
|
||||
|
||||
# Set detected language for fast path (if detected)
|
||||
if detectedLanguage and isinstance(detectedLanguage, str):
|
||||
self._setUserLanguage(detectedLanguage)
|
||||
try:
|
||||
setattr(self.services, 'currentUserLanguage', detectedLanguage)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Now send the first message (which will also process the documents again, but that's fine)
|
||||
await self._sendFirstMessage(userInput)
|
||||
|
||||
# Route to fast path for simple requests (skip for automation mode)
|
||||
if not skipComplexityDetection and complexity == "simple":
|
||||
# Route to fast path for simple requests if history is not needed
|
||||
# Skip fast path for automation mode or if history is needed
|
||||
if complexity == "simple" and not needsWorkflowHistory:
|
||||
logger.info("Routing to fast path for simple request")
|
||||
await self._executeFastPath(userInput, documents)
|
||||
return # Fast path completes the workflow
|
||||
|
||||
# Now send the first message (which will also process the documents again, but that's fine)
|
||||
await self._sendFirstMessage(userInput)
|
||||
|
||||
# Route to full workflow for moderate/complex requests or automation mode
|
||||
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
|
||||
taskPlan = await self._planTasks(userInput)
|
||||
|
|
@ -215,9 +230,10 @@ class WorkflowManager:
|
|||
# Get user language if available
|
||||
userLanguage = getattr(self.services, 'currentUserLanguage', None)
|
||||
|
||||
# Execute fast path
|
||||
# Execute fast path - use normalizedRequest if available, otherwise use raw prompt
|
||||
normalizedPrompt = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt
|
||||
result = await self.workflowProcessor.fastPathExecute(
|
||||
prompt=userInput.prompt,
|
||||
prompt=normalizedPrompt,
|
||||
documents=documents,
|
||||
userLanguage=userLanguage
|
||||
)
|
||||
|
|
@ -272,12 +288,20 @@ class WorkflowManager:
|
|||
}
|
||||
chatDocuments.append(chatDoc)
|
||||
|
||||
# Mark workflow as completed BEFORE storing message (so UI polling stops)
|
||||
workflow.status = "completed"
|
||||
workflow.lastActivity = self.services.utils.timestampGetUtc()
|
||||
self.services.chat.updateWorkflow(workflow.id, {
|
||||
"status": "completed",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
|
||||
# Create ChatMessage with fast path response (in user's language)
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": responseText or "Fast path response completed",
|
||||
"status": "last", # Fast path completes the workflow
|
||||
"status": "last", # Fast path completes the workflow - UI polling stops on this
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.timestampGetUtc(),
|
||||
"documentsLabel": "fast_path_response",
|
||||
|
|
@ -294,14 +318,6 @@ class WorkflowManager:
|
|||
# Store message with documents
|
||||
self.services.chat.storeMessageWithDocuments(workflow, messageData, chatDocuments)
|
||||
|
||||
# Mark workflow as completed
|
||||
workflow.status = "completed"
|
||||
workflow.lastActivity = self.services.utils.timestampGetUtc()
|
||||
self.services.chat.updateWorkflow(workflow.id, {
|
||||
"status": "completed",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
|
||||
logger.info(f"Fast path completed successfully, response length: {len(responseText)} chars")
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -369,7 +385,8 @@ class WorkflowManager:
|
|||
"6) dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown).\n"
|
||||
"7) expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., [\"xlsx\", \"pdf\"]). If format is unclear or not specified, use empty list [].\n"
|
||||
"8) qualityRequirements: Quality requirements they have (accuracy, completeness) as {accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}.\n"
|
||||
"9) successCriteria: Specific success criteria that define completion (array of strings).\n\n"
|
||||
"9) successCriteria: Specific success criteria that define completion (array of strings).\n"
|
||||
"10) needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history to be understood or completed (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work). Return true if the request is a continuation, retry, modification, or builds upon previous work.\n\n"
|
||||
"Rules:\n"
|
||||
"- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
|
||||
"- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
|
||||
|
|
@ -394,9 +411,14 @@ class WorkflowManager:
|
|||
" \"accuracyThreshold\": 0.0-1.0,\n"
|
||||
" \"completenessThreshold\": 0.0-1.0\n"
|
||||
" },\n"
|
||||
" \"successCriteria\": [\"specific criterion 1\", \"specific criterion 2\"]\n"
|
||||
" \"successCriteria\": [\"specific criterion 1\", \"specific criterion 2\"],\n"
|
||||
" \"needsWorkflowHistory\": true|false\n"
|
||||
"}\n\n"
|
||||
f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
|
||||
"## User Message\n"
|
||||
"The following is the user's original input message. Extract intent, normalize the request, and identify any large context blocks that should be moved to separate documents:\n\n"
|
||||
"################ USER INPUT START #################\n"
|
||||
f"{userInput.prompt.replace('{', '{{').replace('}', '}}') if userInput.prompt else ''}\n"
|
||||
"################ USER INPUT FINISH #################"
|
||||
)
|
||||
|
||||
# Call AI analyzer (planning call - will use static parameters)
|
||||
|
|
@ -431,15 +453,23 @@ class WorkflowManager:
|
|||
'expectedFormats': parsed.get('expectedFormats', []),
|
||||
'qualityRequirements': parsed.get('qualityRequirements', {}),
|
||||
'successCriteria': parsed.get('successCriteria', []),
|
||||
'languageUserDetected': detectedLanguage
|
||||
'languageUserDetected': detectedLanguage,
|
||||
'needsWorkflowHistory': parsed.get('needsWorkflowHistory', False)
|
||||
}
|
||||
|
||||
# Store needsWorkflowHistory in services for fast path decision
|
||||
needsHistoryFromIntention = parsed.get('needsWorkflowHistory', False)
|
||||
# Always set the value - default to False if not a boolean
|
||||
setattr(self.services, '_needsWorkflowHistory', bool(needsHistoryFromIntention) if isinstance(needsHistoryFromIntention, bool) else False)
|
||||
|
||||
# Store workflowIntent in workflow object for reuse
|
||||
if hasattr(self.services, 'workflow') and self.services.workflow:
|
||||
self.services.workflow._workflowIntent = workflowIntent
|
||||
except Exception:
|
||||
contextItems = []
|
||||
workflowIntent = None
|
||||
# Ensure needsWorkflowHistory is False if parsing fails
|
||||
setattr(self.services, '_needsWorkflowHistory', False)
|
||||
|
||||
# Update services state
|
||||
if detectedLanguage and isinstance(detectedLanguage, str):
|
||||
|
|
@ -516,7 +546,9 @@ class WorkflowManager:
|
|||
workflow = self.services.workflow
|
||||
handling = self.workflowProcessor
|
||||
# Generate task plan first (shared for both modes)
|
||||
taskPlan = await handling.generateTaskPlan(userInput.prompt, workflow)
|
||||
# Use normalizedRequest instead of raw userInput.prompt for security
|
||||
normalizedPrompt = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt
|
||||
taskPlan = await handling.generateTaskPlan(normalizedPrompt, workflow)
|
||||
if not taskPlan or not taskPlan.tasks:
|
||||
raise Exception("No tasks generated in task plan.")
|
||||
workflowMode = getattr(workflow, 'workflowMode')
|
||||
|
|
@ -1089,3 +1121,22 @@ class WorkflowManager:
|
|||
logger.error(f"Error during content neutralization: {str(e)}")
|
||||
# Return original content on error
|
||||
return contentBytes
|
||||
|
||||
def _checkIfHistoryAvailable(self) -> bool:
|
||||
"""Check if workflow history is available (previous rounds exist).
|
||||
|
||||
Returns True if there are previous workflow rounds with messages.
|
||||
"""
|
||||
try:
|
||||
from modules.workflows.processing.shared.placeholderFactory import getPreviousRoundContext
|
||||
|
||||
history = getPreviousRoundContext(self.services)
|
||||
|
||||
# Check if history contains actual content (not just "No previous round context available")
|
||||
if history and history != "No previous round context available":
|
||||
return True
|
||||
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking if history is available: {str(e)}")
|
||||
return False
|
||||
|
|
|
|||
Loading…
Reference in a new issue