From 909ee9528f5fad7abb8c1ac0ef88d0bfbe0709bf Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 31 Dec 2025 02:12:10 +0100
Subject: [PATCH] extraction and generation engine enhanced for doc, code,
image
---
modules/services/serviceAi/mainServiceAi.py | 410 ++-
.../services/serviceAi/subAiCallLooping.py | 67 +-
.../services/serviceAi/subLoopingUseCases.py | 231 ++
.../services/serviceAi/subStructureFilling.py | 165 +-
.../serviceAi/subStructureGeneration.py | 88 +-
.../mainServiceExtraction.py | 67 +-
.../serviceGeneration/paths/codePath.py | 584 ++++
.../serviceGeneration/paths/documentPath.py | 258 ++
.../serviceGeneration/paths/imagePath.py | 132 +
modules/workflows/methods/methodAi.py.old | 742 -----
.../methods/methodAi/actions/__init__.py | 2 +
.../methodAi/actions/convertDocument.py | 18 -
.../methods/methodAi/actions/generateCode.py | 135 +
.../methodAi/actions/generateDocument.py | 25 +-
.../methods/methodAi/actions/process.py | 128 +-
.../methodAi/actions/summarizeDocument.py | 22 +-
.../methodAi/actions/translateDocument.py | 23 +-
.../methods/methodAi/actions/webResearch.py | 20 -
.../workflows/methods/methodAi/methodAi.py | 39 +
.../workflows/methods/methodContext.py.old | 460 ---
.../methodContext/actions/extractContent.py | 33 -
.../methodContext/actions/getDocumentIndex.py | 16 -
.../methodContext/actions/neutralizeData.py | 19 -
.../actions/triggerPreprocessingServer.py | 21 -
modules/workflows/methods/methodJira.py.old | 1101 -------
.../methods/methodJira/actions/connectJira.py | 21 -
.../methodJira/actions/createCsvContent.py | 19 -
.../methodJira/actions/createExcelContent.py | 19 -
.../methodJira/actions/exportTicketsAsJson.py | 17 -
.../actions/importTicketsFromJson.py | 18 -
.../methodJira/actions/mergeTicketData.py | 19 -
.../methodJira/actions/parseCsvContent.py | 18 -
.../methodJira/actions/parseExcelContent.py | 18 -
.../workflows/methods/methodOutlook.py.old | 1904 -----------
.../composeAndDraftEmailWithContext.py | 23 -
.../methodOutlook/actions/readEmails.py | 20 -
.../methodOutlook/actions/searchEmails.py | 20 -
.../methodOutlook/actions/sendDraftEmail.py | 17 -
.../workflows/methods/methodSharepoint.py.old | 2840 -----------------
.../actions/analyzeFolderUsage.py | 20 -
.../methodSharepoint/actions/copyFile.py | 21 -
.../actions/downloadFileByPath.py | 18 -
.../actions/findDocumentPath.py | 19 -
.../methodSharepoint/actions/findSiteByUrl.py | 18 -
.../methodSharepoint/actions/listDocuments.py | 18 -
.../methodSharepoint/actions/readDocuments.py | 28 -
.../actions/uploadDocument.py | 18 -
.../methodSharepoint/actions/uploadFile.py | 20 -
.../processing/adaptive/contentValidator.py | 97 +-
.../workflows/processing/modes/modeDynamic.py | 27 +-
.../test10_document_generation_formats.py | 9 +-
51 files changed, 2137 insertions(+), 7955 deletions(-)
create mode 100644 modules/services/serviceAi/subLoopingUseCases.py
create mode 100644 modules/services/serviceGeneration/paths/codePath.py
create mode 100644 modules/services/serviceGeneration/paths/documentPath.py
create mode 100644 modules/services/serviceGeneration/paths/imagePath.py
delete mode 100644 modules/workflows/methods/methodAi.py.old
create mode 100644 modules/workflows/methods/methodAi/actions/generateCode.py
delete mode 100644 modules/workflows/methods/methodContext.py.old
delete mode 100644 modules/workflows/methods/methodJira.py.old
delete mode 100644 modules/workflows/methods/methodOutlook.py.old
delete mode 100644 modules/workflows/methods/methodSharepoint.py.old
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 65bae155..cb54a42b 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -14,10 +14,6 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata,
from modules.datamodels.datamodelDocument import RenderedDocument
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.jsonUtils import (
- extractJsonString,
- repairBrokenJson,
- extractSectionsFromDocument,
- buildContinuationContext,
parseJsonWithModel
)
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
@@ -209,7 +205,7 @@ Respond with ONLY a JSON object in this exact format:
processingMode=ProcessingModeEnum.BASIC
)
- async def _callAiWithLooping(
+ async def callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
@@ -218,11 +214,12 @@ Respond with ONLY a JSON object in this exact format:
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None,
userPrompt: Optional[str] = None,
- contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
+ contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
+ useCaseId: Optional[str] = None # REQUIRED: Explicit use case ID for generic looping system
) -> str:
- """Delegate to AiCallLooper."""
+ """Public method: Delegate to AiCallLooper for AI calls with looping support."""
return await self.aiCallLooper.callAiWithLooping(
- prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts
+ prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId
)
async def _defineKpisFromPrompt(
@@ -341,49 +338,21 @@ Respond with ONLY a JSON object in this exact format:
prompt: str,
options: AiCallOptions,
title: Optional[str],
- aiOperationId: str
+ parentOperationId: Optional[str]
) -> AiResponse:
- """Handle IMAGE_GENERATE operation type."""
- self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
+ """Handle IMAGE_GENERATE operation type using image generation path."""
+ from modules.services.serviceGeneration.paths.imagePath import ImageGenerationPath
- request = AiCallRequest(
- prompt=prompt,
- context="",
- options=options
- )
+ imagePath = ImageGenerationPath(self.services)
- response = await self.callAi(request)
+ # Extract format from options
+ format = options.resultFormat or "png"
- if not response.content:
- errorMsg = f"No image data returned: {response.content}"
- logger.error(f"Error in AI image generation: {errorMsg}")
- self.services.chat.progressLogFinish(aiOperationId, False)
- raise ValueError(errorMsg)
-
- imageDoc = DocumentData(
- documentName="generated_image.png",
- documentData=response.content,
- mimeType="image/png"
- )
-
- metadata = AiResponseMetadata(
- title=title or "Generated Image",
- operationType=options.operationType.value
- )
-
- self.services.chat.storeWorkflowStat(
- self.services.workflow,
- response,
- "ai.generate.image"
- )
-
- self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated")
- self.services.chat.progressLogFinish(aiOperationId, True)
-
- return AiResponse(
- content=response.content,
- metadata=metadata,
- documents=[imageDoc]
+ return await imagePath.generateImages(
+ userPrompt=prompt,
+ format=format,
+ title=title,
+ parentOperationId=parentOperationId
)
async def _handleWebOperation(
@@ -441,54 +410,54 @@ Respond with ONLY a JSON object in this exact format:
return intent
return None
- async def _clarifyDocumentIntents(
+ async def clarifyDocumentIntents(
self,
documents: List[ChatDocument],
userPrompt: str,
actionParameters: Dict[str, Any],
parentOperationId: str
) -> List[DocumentIntent]:
- """Delegate to DocumentIntentAnalyzer."""
+ """Public method: Delegate to DocumentIntentAnalyzer."""
return await self.intentAnalyzer.clarifyDocumentIntents(
documents, userPrompt, actionParameters, parentOperationId
)
- async def _extractAndPrepareContent(
+ async def extractAndPrepareContent(
self,
documents: List[ChatDocument],
documentIntents: List[DocumentIntent],
parentOperationId: str
) -> List[ContentPart]:
- """Delegate to ContentExtractor."""
+ """Public method: Delegate to ContentExtractor."""
return await self.contentExtractor.extractAndPrepareContent(
documents, documentIntents, parentOperationId, self._getIntentForDocument
)
- async def _generateStructure(
+ async def generateStructure(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
) -> Dict[str, Any]:
- """Delegate to StructureGenerator."""
+ """Public method: Delegate to StructureGenerator."""
return await self.structureGenerator.generateStructure(
userPrompt, contentParts, outputFormat, parentOperationId
)
- async def _fillStructure(
+ async def fillStructure(
self,
structure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str
) -> Dict[str, Any]:
- """Delegate to StructureFiller."""
+ """Public method: Delegate to StructureFiller."""
return await self.structureFiller.fillStructure(
structure, contentParts, userPrompt, parentOperationId
)
- async def _renderResult(
+ async def renderResult(
self,
filledStructure: Dict[str, Any],
outputFormat: str,
@@ -577,13 +546,14 @@ Respond with ONLY a JSON object in this exact format:
documentIntents: Optional[List[DocumentIntent]] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None,
- parentOperationId: Optional[str] = None
+ parentOperationId: Optional[str] = None,
+ generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection)
) -> AiResponse:
"""
- Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions.
+ Unified AI content generation with explicit intent requirement.
- Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch.
- Sie unterscheiden sich nur in Parametern, nicht in Logik.
+ All AI-Actions (ai.process, ai.generateDocument, etc.) route through here.
+ They differ only in parameters, not in logic.
Args:
prompt: The main prompt for the AI call
@@ -594,6 +564,8 @@ Respond with ONLY a JSON object in this exact format:
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
title: Optional title for generated documents
parentOperationId: Optional parent operation ID for hierarchical logging
+ generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
+ NO auto-detection - actions must explicitly specify intent.
Returns:
AiResponse with content, metadata, and optional documents
@@ -625,111 +597,73 @@ Respond with ONLY a JSON object in this exact format:
# Route zu Operation-spezifischen Handlern
if opType == OperationTypeEnum.IMAGE_GENERATE:
- return await self._handleImageGeneration(prompt, options, title, aiOperationId)
+ # Image generation - route to image path
+ return await self._handleImageGeneration(prompt, options, title, parentOperationId)
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
return await self._handleWebOperation(prompt, options, opType, aiOperationId)
- # Dokument-Generierungs-Pfad
- options.compressPrompt = False
- options.compressContext = False
-
- # Schritt 5A: Kläre Dokument-Intents
- documents = []
- if documentList:
- documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
-
- if not documentIntents and documents:
- documentIntents = await self._clarifyDocumentIntents(
- documents,
- prompt,
- {"outputFormat": outputFormat},
- aiOperationId
- )
-
- # Schritt 5B: Extrahiere und bereite Content vor
- if documents:
- preparedContentParts = await self._extractAndPrepareContent(
- documents,
- documentIntents or [],
- aiOperationId
- )
-
- # Merge mit bereitgestellten contentParts (falls vorhanden)
- if contentParts:
- # Prüfe auf pre-extracted Content
- for part in contentParts:
- if part.metadata.get("skipExtraction", False):
- # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
- part.metadata.setdefault("contentFormat", "extracted")
- part.metadata.setdefault("isPreExtracted", True)
- preparedContentParts.extend(contentParts)
-
- contentParts = preparedContentParts
-
- # Schritt 5C: Generiere Struktur
- structure = await self._generateStructure(
- prompt,
- contentParts or [],
- outputFormat,
- aiOperationId
- )
-
- # Schritt 5D: Fülle Struktur
- # Language will be extracted from services (user intention analysis) in fillStructure
- filledStructure = await self._fillStructure(
- structure,
- contentParts or [],
- prompt,
- aiOperationId
- )
-
- # Schritt 5E: Rendere Resultat
- # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
- renderedDocuments = await self._renderResult(
- filledStructure,
- outputFormat,
- title or "Generated Document",
- prompt,
- aiOperationId
- )
-
- # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
- documentDataList = []
- for renderedDoc in renderedDocuments:
- try:
- # Erstelle DocumentData für jedes gerenderte Dokument
- docDataObj = DocumentData(
- documentName=renderedDoc.filename,
- documentData=renderedDoc.documentData,
- mimeType=renderedDoc.mimeType,
- sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
+ # Data generation - REQUIRES explicit generationIntent
+ if opType == OperationTypeEnum.DATA_GENERATE:
+ if not generationIntent:
+ errorMsg = (
+ "generationIntent is required for DATA_GENERATE operation. "
+ "Actions must explicitly specify 'document' or 'code' intent. "
+ "No auto-detection - use qualified actions (ai.generateDocument, ai.generateCode)."
+ )
+ logger.error(errorMsg)
+ self.services.chat.progressLogFinish(aiOperationId, False)
+ raise ValueError(errorMsg)
+
+ # Route based on explicit intent (no auto-detection, no fallback)
+ if generationIntent == "code":
+ # Route to code generation path
+ return await self._handleCodeGeneration(
+ prompt=prompt,
+ options=options,
+ contentParts=contentParts,
+ outputFormat=outputFormat,
+ title=title,
+ parentOperationId=parentOperationId
+ )
+ else:
+ # Route to document generation path (existing behavior)
+ return await self._handleDocumentGeneration(
+ prompt=prompt,
+ options=options,
+ documentList=documentList,
+ documentIntents=documentIntents,
+ contentParts=contentParts,
+ outputFormat=outputFormat,
+ title=title,
+ parentOperationId=parentOperationId
)
- documentDataList.append(docDataObj)
- logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
- except Exception as e:
- logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
- if not documentDataList:
- raise ValueError("No documents were rendered")
+ # DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
+ if opType == OperationTypeEnum.DATA_EXTRACT:
+ return await self._handleDataExtraction(
+ prompt=prompt,
+ options=options,
+ documentList=documentList,
+ documentIntents=documentIntents,
+ contentParts=contentParts,
+ outputFormat=outputFormat,
+ title=title,
+ parentOperationId=parentOperationId
+ )
- metadata = AiResponseMetadata(
- title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
- operationType=opType.value
- )
-
- # Debug-Log (harmonisiert)
- self.services.utils.writeDebugFile(
- json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
- "document_generation_response"
- )
-
- self.services.chat.progressLogFinish(aiOperationId, True)
-
- return AiResponse(
- content=json.dumps(filledStructure),
- metadata=metadata,
- documents=documentDataList
+ # Other operation types (DATA_ANALYSE, etc.) - existing logic
+ # Fallback to document generation for backward compatibility (should not happen)
+ logger.warning(f"Unhandled operation type: {opType}, falling back to document generation")
+ return await self._handleDocumentGeneration(
+ prompt=prompt,
+ options=options,
+ documentList=documentList,
+ documentIntents=documentIntents,
+ contentParts=contentParts,
+ outputFormat=outputFormat,
+ title=title,
+ parentOperationId=parentOperationId
)
except Exception as e:
@@ -737,6 +671,166 @@ Respond with ONLY a JSON object in this exact format:
self.services.chat.progressLogFinish(aiOperationId, False)
raise
+ async def _handleDataExtraction(
+ self,
+ prompt: str,
+ options: AiCallOptions,
+ documentList: Optional[Any],
+ documentIntents: Optional[List[DocumentIntent]],
+ contentParts: Optional[List[ContentPart]],
+ outputFormat: str,
+ title: str,
+ parentOperationId: Optional[str]
+ ) -> AiResponse:
+ """
+ Handle DATA_EXTRACT: Extract content from documents (no AI), then process with AI.
+ This is the original flow: extract all documents first, then process contentParts with AI.
+ """
+ import time
+
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ extractOperationId = f"data_extract_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ extractOperationId,
+ "Data Extraction",
+ "Extraction",
+ f"Format: {outputFormat}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Step 1: Get documents from documentList
+ documents = []
+ if documentList:
+ documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+
+ # Step 2: Clarify document intents (if not provided) - REQUIRED for all documents
+ if not documentIntents and documents:
+ documentIntents = await self.clarifyDocumentIntents(
+ documents,
+ prompt,
+ {"outputFormat": outputFormat},
+ extractOperationId
+ )
+
+ # Step 3: Extract and prepare content (NO AI - pure extraction) - REQUIRED for all documents
+ if documents:
+ preparedContentParts = await self.extractAndPrepareContent(
+ documents,
+ documentIntents or [],
+ extractOperationId
+ )
+
+ # Merge with provided contentParts (if any)
+ if contentParts:
+ for part in contentParts:
+ if part.metadata.get("skipExtraction", False):
+ part.metadata.setdefault("contentFormat", "extracted")
+ part.metadata.setdefault("isPreExtracted", True)
+ preparedContentParts.extend(contentParts)
+
+ contentParts = preparedContentParts
+
+ # Step 4: Process extracted contentParts with AI (simple text processing, no structure generation)
+ if not contentParts:
+ raise ValueError("No content extracted from documents")
+
+ # Use simple AI call to process extracted content
+ # Prepare content for AI processing
+ contentText = "\n\n".join([
+ f"[Document: {part.metadata.get('documentName', 'Unknown')}]\n{part.data}"
+ for part in contentParts
+ if part.data
+ ])
+
+ # Call AI with extracted content
+ aiRequest = AiCallRequest(
+ prompt=f"{prompt}\n\nExtracted Content:\n{contentText}",
+ context="",
+ options=options
+ )
+
+ aiResponse = await self.callAi(aiRequest)
+
+ # Create response document
+ resultDocument = DocumentData(
+ documentName=f"{title or 'extracted_data'}.{outputFormat}",
+ documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
+ mimeType=f"text/{outputFormat}" if outputFormat in ["txt", "json", "csv"] else "application/octet-stream"
+ )
+
+ metadata = AiResponseMetadata(
+ title=title or "Extracted Data",
+ operationType=OperationTypeEnum.DATA_EXTRACT.value
+ )
+
+ self.services.chat.progressLogFinish(extractOperationId, True)
+
+ return AiResponse(
+ content=aiResponse.content if isinstance(aiResponse.content, str) else aiResponse.content.decode('utf-8', errors='replace'),
+ metadata=metadata,
+ documents=[resultDocument]
+ )
+
+ except Exception as e:
+ logger.error(f"Error in data extraction: {str(e)}")
+ self.services.chat.progressLogFinish(extractOperationId, False)
+ raise
+
+ async def _handleCodeGeneration(
+ self,
+ prompt: str,
+ options: AiCallOptions,
+ contentParts: Optional[List[ContentPart]],
+ outputFormat: str,
+ title: str,
+ parentOperationId: Optional[str]
+ ) -> AiResponse:
+ """Handle code generation using code generation path."""
+ from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
+
+ codePath = CodeGenerationPath(self.services)
+ return await codePath.generateCode(
+ userPrompt=prompt,
+ outputFormat=outputFormat,
+ contentParts=contentParts,
+ title=title or "Generated Code",
+ parentOperationId=parentOperationId
+ )
+
+ async def _handleDocumentGeneration(
+ self,
+ prompt: str,
+ options: AiCallOptions,
+ documentList: Optional[Any],
+ documentIntents: Optional[List[DocumentIntent]],
+ contentParts: Optional[List[ContentPart]],
+ outputFormat: str,
+ title: str,
+ parentOperationId: Optional[str]
+ ) -> AiResponse:
+ """Handle document generation using document generation path."""
+ from modules.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
+
+ # Set compression options for document generation
+ options.compressPrompt = False
+ options.compressContext = False
+
+ documentPath = DocumentGenerationPath(self.services)
+ return await documentPath.generateDocument(
+ userPrompt=prompt,
+ documentList=documentList,
+ documentIntents=documentIntents,
+ contentParts=contentParts,
+ outputFormat=outputFormat,
+ title=title or "Generated Document",
+ parentOperationId=parentOperationId
+ )
+
+
def _determineDocumentName(
self,
filledStructure: Dict[str, Any],
diff --git a/modules/services/serviceAi/subAiCallLooping.py b/modules/services/serviceAi/subAiCallLooping.py
index 6e2c90b5..63b0c806 100644
--- a/modules/services/serviceAi/subAiCallLooping.py
+++ b/modules/services/serviceAi/subAiCallLooping.py
@@ -16,6 +16,7 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati
from modules.datamodels.datamodelExtraction import ContentPart
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
+from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
logger = logging.getLogger(__name__)
@@ -28,6 +29,7 @@ class AiCallLooper:
self.services = services
self.aiService = aiService
self.responseParser = responseParser
+ self.useCaseRegistry = LoopingUseCaseRegistry() # Initialize use case registry
async def callAiWithLooping(
self,
@@ -38,7 +40,8 @@ class AiCallLooper:
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None,
userPrompt: Optional[str] = None,
- contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
+ contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
+ useCaseId: str = None # REQUIRED: Explicit use case ID - no auto-detection, no fallback
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
@@ -53,10 +56,31 @@ class AiCallLooper:
operationId: Optional operation ID for progress tracking
userPrompt: Optional user prompt for KPI definition
contentParts: Optional content parts for first iteration
+ useCaseId: REQUIRED: Explicit use case ID - no auto-detection, no fallback
Returns:
Complete AI response after all iterations
"""
+ # REQUIRED: useCaseId must be provided - no auto-detection, no fallback
+ if not useCaseId:
+ errorMsg = (
+ "useCaseId is REQUIRED for callAiWithLooping. "
+ "No auto-detection - must explicitly specify use case ID. "
+ f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
+ )
+ logger.error(errorMsg)
+ raise ValueError(errorMsg)
+
+ # Validate use case exists
+ useCase = self.useCaseRegistry.get(useCaseId)
+ if not useCase:
+ errorMsg = (
+ f"Use case '{useCaseId}' not found in registry. "
+ f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
+ )
+ logger.error(errorMsg)
+ raise ValueError(errorMsg)
+
maxIterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
@@ -199,36 +223,31 @@ class AiCallLooper:
# Store raw response for continuation (even if broken)
lastRawResponse = result
- # Check if this is section content generation (has "elements" not "sections")
- # Section content generation returns JSON with "elements" array, not document structure with "sections"
- isSectionContentGeneration = False
- parsedJsonForSection = None
- extractedJsonForSection = None
+ # Parse JSON for use case handling
+ parsedJsonForUseCase = None
+ extractedJsonForUseCase = None
+
try:
- extractedJsonForSection = extractJsonString(result)
- parsedJson, parseError, _ = tryParseJson(extractedJsonForSection)
+ extractedJsonForUseCase = extractJsonString(result)
+ parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase)
if parseError is None and parsedJson:
- parsedJsonForSection = parsedJson
- # Check if JSON has "elements" (section content) or "sections" (document structure)
- if isinstance(parsedJson, dict):
- if "elements" in parsedJson:
- isSectionContentGeneration = True
- elif isinstance(parsedJson, list) and len(parsedJson) > 0:
- # Check if it's a list of elements (section content format)
- if isinstance(parsedJson[0], dict) and "type" in parsedJson[0]:
- isSectionContentGeneration = True
+ parsedJsonForUseCase = parsedJson
except Exception:
pass
- if isSectionContentGeneration:
- # This is section content generation - return the JSON directly
- # No need to extract sections, just return the complete JSON string
- logger.info(f"Iteration {iteration}: Section content generation detected (elements found), returning JSON directly")
+ # Handle use cases that return JSON directly (no section extraction needed)
+ directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"]
+ if useCaseId in directReturnUseCases:
+ logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly")
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
- # Note: Debug files (_prompt and _response) are already written above for iteration 1
- # No need to write _final_result as it's redundant with _response
- final_json = json.dumps(parsedJsonForSection, indent=2, ensure_ascii=False) if parsedJsonForSection else (extractedJsonForSection or result)
+
+ final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
+
+ # Write final result for chapter structure and code structure (section_content skips it)
+ if useCaseId in ["chapter_structure", "code_structure"]:
+ self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
+
return final_json
# Extract sections from response (handles both valid and broken JSON)
diff --git a/modules/services/serviceAi/subLoopingUseCases.py b/modules/services/serviceAi/subLoopingUseCases.py
new file mode 100644
index 00000000..c52ed1bc
--- /dev/null
+++ b/modules/services/serviceAi/subLoopingUseCases.py
@@ -0,0 +1,231 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Generic Looping Use Case System
+
+Provides parametrized looping infrastructure supporting different JSON formats and use cases.
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, Any, List, Optional, Callable
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class LoopingUseCase:
+ """Configuration for a specific looping use case."""
+
+ # Identification
+ useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch"
+
+ # JSON Format Detection
+ jsonTemplate: Dict[str, Any] # Expected JSON structure template
+ detectionKeys: List[str] # Keys to check for format detection (e.g., ["elements"], ["chapters"], ["files"])
+ detectionPath: str # JSONPath to check (e.g., "documents[0].chapters", "files[0].content")
+
+ # Prompt Building
+ initialPromptBuilder: Optional[Callable] = None # Function to build initial prompt
+ continuationPromptBuilder: Optional[Callable] = None # Function to build continuation prompt
+
+ # Accumulation & Merging
+ accumulator: Optional[Callable] = None # Function to accumulate fragments
+ merger: Optional[Callable] = None # Function to merge accumulated data
+
+ # Continuation Context
+ continuationContextBuilder: Optional[Callable] = None # Build continuation context for this format
+
+ # Result Building
+ resultBuilder: Optional[Callable] = None # Build final result from accumulated data
+
+ # Metadata
+ supportsAccumulation: bool = True # Whether this use case supports accumulation
+ requiresExtraction: bool = False # Whether this requires extraction (like sections)
+
+
+class LoopingUseCaseRegistry:
+ """Registry of all looping use cases."""
+
+ def __init__(self):
+ self.useCases: Dict[str, LoopingUseCase] = {}
+ self._registerDefaultUseCases()
+
+ def register(self, useCase: LoopingUseCase):
+ """Register a new use case."""
+ self.useCases[useCase.useCaseId] = useCase
+ logger.debug(f"Registered looping use case: {useCase.useCaseId}")
+
+ def get(self, useCaseId: str) -> Optional[LoopingUseCase]:
+ """Get use case by ID."""
+ return self.useCases.get(useCaseId)
+
+ def detectUseCase(self, parsedJson: Dict[str, Any]) -> Optional[str]:
+ """Detect which use case matches the JSON structure."""
+ for useCaseId, useCase in self.useCases.items():
+ if self._matchesFormat(parsedJson, useCase):
+ return useCaseId
+ return None
+
+ def _matchesFormat(self, json: Dict[str, Any], useCase: LoopingUseCase) -> bool:
+ """Check if JSON matches use case format."""
+ # Check top-level keys
+ for key in useCase.detectionKeys:
+ if key in json:
+ return True
+
+ # Check nested path using simple dictionary traversal (no jsonpath_ng needed)
+ if useCase.detectionPath:
+ try:
+ # Simple path matching without jsonpath_ng
+ # Format: "documents[0].chapters" or "files[0].content"
+ pathParts = useCase.detectionPath.split(".")
+ current = json
+
+ for part in pathParts:
+ # Handle array indices like "documents[0]"
+ if "[" in part and "]" in part:
+ key = part.split("[")[0]
+ index = int(part.split("[")[1].split("]")[0])
+ if isinstance(current, dict) and key in current:
+ if isinstance(current[key], list) and 0 <= index < len(current[key]):
+ current = current[key][index]
+ else:
+ return False
+ else:
+ return False
+ else:
+ # Regular key access
+ if isinstance(current, dict) and part in current:
+ current = current[part]
+ else:
+ return False
+
+ # If we successfully traversed the path, it matches
+ return True
+ except Exception as e:
+ logger.debug(f"Path matching failed for {useCase.useCaseId}: {e}")
+
+ return False
+
+ def _registerDefaultUseCases(self):
+ """Register default use cases."""
+
+ # Use Case 1: Section Content Generation
+ # Returns JSON with "elements" array directly
+ self.register(LoopingUseCase(
+ useCaseId="section_content",
+ jsonTemplate={"elements": []},
+ detectionKeys=["elements"],
+ detectionPath="",
+ initialPromptBuilder=None, # Will use default prompt builder
+ continuationPromptBuilder=None, # Will use default continuation builder
+ accumulator=None, # Direct return, no accumulation
+ merger=None,
+ continuationContextBuilder=None, # Will use default continuation context
+ resultBuilder=None, # Return JSON directly
+ supportsAccumulation=False,
+ requiresExtraction=False
+ ))
+
+ # Use Case 2: Chapter Structure Generation
+ # Returns JSON with "documents[0].chapters" structure
+ self.register(LoopingUseCase(
+ useCaseId="chapter_structure",
+ jsonTemplate={"documents": [{"chapters": []}]},
+ detectionKeys=["chapters"],
+ detectionPath="documents[0].chapters",
+ initialPromptBuilder=None,
+ continuationPromptBuilder=None,
+ accumulator=None, # Direct return, no accumulation
+ merger=None,
+ continuationContextBuilder=None,
+ resultBuilder=None, # Return JSON directly
+ supportsAccumulation=False,
+ requiresExtraction=False
+ ))
+
+ # Use Case 3: Document Structure Generation
+ # Returns JSON with "documents[0].sections" structure, requires extraction and accumulation
+ self.register(LoopingUseCase(
+ useCaseId="document_structure",
+ jsonTemplate={"documents": [{"sections": []}]},
+ detectionKeys=["sections"],
+ detectionPath="documents[0].sections",
+ initialPromptBuilder=None,
+ continuationPromptBuilder=None,
+ accumulator=None, # Will use default accumulator
+ merger=None, # Will use default merger
+ continuationContextBuilder=None,
+ resultBuilder=None, # Will use default result builder
+ supportsAccumulation=True,
+ requiresExtraction=True
+ ))
+
+ # Use Case 4: Code Structure Generation (NEW)
+ self.register(LoopingUseCase(
+ useCaseId="code_structure",
+ jsonTemplate={
+ "metadata": {
+ "language": "",
+ "projectType": "single_file|multi_file",
+ "projectName": ""
+ },
+ "files": [
+ {
+ "id": "",
+ "filename": "",
+ "fileType": "",
+ "dependencies": [],
+ "imports": [],
+ "functions": [],
+ "classes": []
+ }
+ ]
+ },
+ detectionKeys=["files"],
+ detectionPath="files",
+ initialPromptBuilder=None,
+ continuationPromptBuilder=None,
+ accumulator=None, # Direct return
+ merger=None,
+ continuationContextBuilder=None,
+ resultBuilder=None,
+ supportsAccumulation=False,
+ requiresExtraction=False
+ ))
+
+ # Use Case 5: Code Content Generation (NEW)
+ self.register(LoopingUseCase(
+ useCaseId="code_content",
+ jsonTemplate={"files": [{"content": "", "functions": []}]},
+ detectionKeys=["content", "functions"],
+ detectionPath="files[0].content",
+ initialPromptBuilder=None,
+ continuationPromptBuilder=None,
+ accumulator=None, # Will use default accumulator
+ merger=None, # Will use default merger
+ continuationContextBuilder=None,
+ resultBuilder=None, # Will use default result builder
+ supportsAccumulation=True,
+ requiresExtraction=False
+ ))
+
+ # Use Case 6: Image Batch Generation (NEW)
+ self.register(LoopingUseCase(
+ useCaseId="image_batch",
+ jsonTemplate={"images": []},
+ detectionKeys=["images"],
+ detectionPath="images",
+ initialPromptBuilder=None,
+ continuationPromptBuilder=None,
+ accumulator=None, # Direct return
+ merger=None,
+ continuationContextBuilder=None,
+ resultBuilder=None,
+ supportsAccumulation=False,
+ requiresExtraction=False
+ ))
+
+ logger.info(f"Registered {len(self.useCases)} default looping use cases")
+
diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py
index 138f6572..5a917279 100644
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
@@ -23,11 +23,20 @@ logger = logging.getLogger(__name__)
class StructureFiller:
"""Handles filling document structure with content."""
+ # Default concurrency limit for parallel generation (chapters/sections)
+ DEFAULT_MAX_CONCURRENT_GENERATION = 16
+
def __init__(self, services, aiService):
"""Initialize StructureFiller with service center and AI service access."""
self.services = services
self.aiService = aiService
+ def _getMaxConcurrentGeneration(self, options: Optional[AiCallOptions] = None) -> int:
+ """Get max concurrent generation limit, configurable via options."""
+ if options and hasattr(options, 'maxConcurrentGeneration'):
+ return options.maxConcurrentGeneration
+ return self.DEFAULT_MAX_CONCURRENT_GENERATION
+
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
@@ -101,14 +110,19 @@ class StructureFiller:
try:
filledStructure = copy.deepcopy(structure)
+ # Get options from AI service if available (for concurrency control)
+ # Default concurrency limit (16) will be used if options is None
+ options = None
+ # Note: Options can be passed via fillStructure if needed in the future
+
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
filledStructure = await self._generateChapterSectionsStructure(
- filledStructure, contentParts, userPrompt, fillOperationId, language
+ filledStructure, contentParts, userPrompt, fillOperationId, language, options
)
# Phase 5D.2: Sections mit ContentParts füllen
filledStructure = await self._fillChapterSections(
- filledStructure, contentParts, userPrompt, fillOperationId, language
+ filledStructure, contentParts, userPrompt, fillOperationId, language, options
)
# Flattening: Chapters zu Sections konvertieren
@@ -243,7 +257,8 @@ class StructureFiller:
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str,
- language: str
+ language: str,
+ options: Optional[AiCallOptions] = None
) -> Dict[str, Any]:
"""
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content) in parallel.
@@ -252,6 +267,10 @@ class StructureFiller:
# Count total chapters for progress tracking
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
+ # Get concurrency limit
+ maxConcurrent = self._getMaxConcurrentGeneration(options)
+ semaphore = asyncio.Semaphore(maxConcurrent)
+
# Collect all chapters with their indices for parallel processing
chapterTasks = []
chapterIndex = 0
@@ -266,25 +285,31 @@ class StructureFiller:
contentPartIds = chapter.get("contentPartIds", [])
contentPartInstructions = chapter.get("contentPartInstructions", {})
- # Create task for parallel processing
- task = self._generateSingleChapterSectionsStructure(
- chapter=chapter,
- chapterIndex=chapterIndex,
- chapterId=chapterId,
- chapterLevel=chapterLevel,
- chapterTitle=chapterTitle,
- generationHint=generationHint,
- contentPartIds=contentPartIds,
- contentPartInstructions=contentPartInstructions,
- contentParts=contentParts,
- userPrompt=userPrompt,
- language=language,
- parentOperationId=parentOperationId,
- totalChapters=totalChapters
+ # Create task for parallel processing with semaphore
+ async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions):
+ async with semaphore:
+ return await self._generateSingleChapterSectionsStructure(
+ chapter=chapter,
+ chapterIndex=chapterIndex,
+ chapterId=chapterId,
+ chapterLevel=chapterLevel,
+ chapterTitle=chapterTitle,
+ generationHint=generationHint,
+ contentPartIds=contentPartIds,
+ contentPartInstructions=contentPartInstructions,
+ contentParts=contentParts,
+ userPrompt=userPrompt,
+ language=language,
+ parentOperationId=parentOperationId,
+ totalChapters=totalChapters
+ )
+
+ task = processChapterWithSemaphore(
+ chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions
)
chapterTasks.append((chapterIndex, chapter, task))
- # Execute all chapter tasks in parallel
+ # Execute all chapter tasks in parallel with concurrency control
if chapterTasks:
# Create list of tasks (without indices for gather)
tasks = [task for _, _, task in chapterTasks]
@@ -386,11 +411,25 @@ class StructureFiller:
if generatedElements:
elements.extend(generatedElements)
else:
- # Fallback: Try to parse JSON response directly
+ # Fallback: Try to parse JSON response directly with repair logic
try:
- fallbackElements = json.loads(
- self.services.utils.jsonExtractString(aiResponse.content)
- )
+ from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
+
+ # Use tryParseJson which handles extraction and basic parsing
+ fallbackElements, parseError, cleanedStr = tryParseJson(aiResponse.content)
+
+ # If parsing failed, try repair
+ if parseError and isinstance(aiResponse.content, str):
+ logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}")
+ repairedJson = repairBrokenJson(aiResponse.content)
+ if repairedJson:
+ fallbackElements = repairedJson
+ parseError = None
+ logger.info(f"Successfully repaired JSON for section {sectionId}")
+
+ if parseError:
+ raise parseError
+
if isinstance(fallbackElements, list):
elements.extend(fallbackElements)
elif isinstance(fallbackElements, dict) and "elements" in fallbackElements:
@@ -621,7 +660,7 @@ The JSON should be a fragment that can be merged with the previous response."""
processingMode=ProcessingModeEnum.DETAILED
)
- aiResponseJson = await self.aiService._callAiWithLooping(
+ aiResponseJson = await self.aiService.callAiWithLooping(
prompt=generationPrompt,
options=options,
debugPrefix=f"{chapterId}_section_{sectionId}",
@@ -638,11 +677,28 @@ The JSON should be a fragment that can be merged with the previous response."""
},
operationId=sectionOperationId,
userPrompt=userPrompt,
- contentParts=extractedParts
+ contentParts=extractedParts,
+ useCaseId="section_content" # REQUIRED: Explicit use case ID
)
try:
- parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson))
+ # Use tryParseJson which handles extraction and basic parsing
+ from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
+
+ parsedResponse, parseError, cleanedStr = tryParseJson(aiResponseJson)
+
+ # If parsing failed, try repair
+ if parseError and isinstance(aiResponseJson, str):
+ logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}")
+ repairedJson = repairBrokenJson(aiResponseJson)
+ if repairedJson:
+ parsedResponse = repairedJson
+ parseError = None
+ logger.info(f"Successfully repaired JSON for section {sectionId}")
+
+ if parseError:
+ raise parseError
+
if isinstance(parsedResponse, list):
generatedElements = parsedResponse
elif isinstance(parsedResponse, dict):
@@ -824,7 +880,7 @@ The JSON should be a fragment that can be merged with the previous response."""
processingMode=ProcessingModeEnum.DETAILED
)
- aiResponseJson = await self.aiService._callAiWithLooping(
+ aiResponseJson = await self.aiService.callAiWithLooping(
prompt=generationPrompt,
options=options,
debugPrefix=f"{chapterId}_section_{sectionId}",
@@ -841,7 +897,8 @@ The JSON should be a fragment that can be merged with the previous response."""
},
operationId=sectionOperationId,
userPrompt=userPrompt,
- contentParts=[]
+ contentParts=[],
+ useCaseId="section_content" # REQUIRED: Explicit use case ID
)
try:
@@ -1060,7 +1117,7 @@ The JSON should be a fragment that can be merged with the previous response."""
processingMode=ProcessingModeEnum.DETAILED
)
- aiResponseJson = await self.aiService._callAiWithLooping(
+ aiResponseJson = await self.aiService.callAiWithLooping(
prompt=generationPrompt,
options=options,
debugPrefix=f"{chapterId}_section_{sectionId}",
@@ -1077,7 +1134,8 @@ The JSON should be a fragment that can be merged with the previous response."""
},
operationId=sectionOperationId,
userPrompt=userPrompt,
- contentParts=[part]
+ contentParts=[part],
+ useCaseId="section_content" # REQUIRED: Explicit use case ID
)
try:
@@ -1200,7 +1258,8 @@ The JSON should be a fragment that can be merged with the previous response."""
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str,
- language: str
+ language: str,
+ options: Optional[AiCallOptions] = None
) -> Dict[str, Any]:
"""
Phase 5D.2: Füllt Sections mit ContentParts.
@@ -1217,6 +1276,10 @@ The JSON should be a fragment that can be merged with the previous response."""
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
fillOperationId = parentOperationId
+ # Get concurrency limit for sections
+ maxConcurrent = self._getMaxConcurrentGeneration(options)
+ sectionSemaphore = asyncio.Semaphore(maxConcurrent)
+
# Helper function to calculate overall progress
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
"""Calculate overall progress: 0.0 to 1.0"""
@@ -1251,28 +1314,34 @@ The JSON should be a fragment that can be merged with the previous response."""
parentOperationId=fillOperationId
)
- # Process sections within chapter in parallel
+ # Process sections within chapter in parallel with concurrency control
sectionTasks = []
for sectionIndex, section in enumerate(sections):
- # Create task for parallel processing
- task = self._processSingleSection(
- section=section,
- sectionIndex=sectionIndex,
- totalSections=totalSections,
- chapterIndex=chapterIndex,
- totalChapters=totalChapters,
- chapterId=chapterId,
- chapterOperationId=chapterOperationId,
- fillOperationId=fillOperationId,
- contentParts=contentParts,
- userPrompt=userPrompt,
- all_sections_list=all_sections_list,
- language=language,
- calculateOverallProgress=calculateOverallProgress
+ # Create task wrapper with semaphore for parallel processing
+ async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress):
+ async with sectionSemaphore:
+ return await self._processSingleSection(
+ section=section,
+ sectionIndex=sectionIndex,
+ totalSections=totalSections,
+ chapterIndex=chapterIndex,
+ totalChapters=totalChapters,
+ chapterId=chapterId,
+ chapterOperationId=chapterOperationId,
+ fillOperationId=fillOperationId,
+ contentParts=contentParts,
+ userPrompt=userPrompt,
+ all_sections_list=all_sections_list,
+ language=language,
+ calculateOverallProgress=calculateOverallProgress
+ )
+
+ task = processSectionWithSemaphore(
+ section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress
)
sectionTasks.append((sectionIndex, section, task))
- # Execute all section tasks in parallel
+ # Execute all section tasks in parallel with concurrency control
if sectionTasks:
# Create list of tasks (without indices for gather)
tasks = [task for _, _, task in sectionTasks]
diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py
index bee83706..cbabd2fc 100644
--- a/modules/services/serviceAi/subStructureGeneration.py
+++ b/modules/services/serviceAi/subStructureGeneration.py
@@ -9,9 +9,10 @@ Handles document structure generation, including:
"""
import json
import logging
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
logger = logging.getLogger(__name__)
@@ -82,28 +83,89 @@ class StructureGenerator:
outputFormat=outputFormat
)
- # AI-Call für Chapter-Struktur-Generierung
- # Note: Debug logging is handled by callAiPlanning
- aiResponse = await self.aiService.callAiPlanning(
- prompt=structurePrompt,
- debugType="chapter_structure_generation"
+ # AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
+ # Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ priority=PriorityEnum.QUALITY,
+ processingMode=ProcessingModeEnum.DETAILED,
+ compressPrompt=False,
+ compressContext=False,
+ resultFormat="json"
)
- # Parse Struktur
- # Use tryParseJson which handles malformed JSON and unterminated strings
- extractedJson = self.services.utils.jsonExtractString(aiResponse)
+ # Create prompt builder for continuation support
+ async def buildChapterStructurePromptWithContinuation(
+ continuationContext: Optional[Dict[str, Any]] = None,
+ **kwargs
+ ) -> str:
+ """Build chapter structure prompt with optional continuation context."""
+ basePrompt = self._buildChapterStructurePrompt(
+ userPrompt=userPrompt,
+ contentParts=contentParts,
+ outputFormat=outputFormat
+ )
+
+ if continuationContext:
+ # Add continuation instructions
+ deliveredSummary = continuationContext.get("delivered_summary", "")
+ elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
+ cutOffElement = continuationContext.get("cut_off_element", "")
+
+ continuationText = f"{deliveredSummary}\n\n"
+ continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
+
+ if elementBeforeCutoff:
+ continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
+ continuationText += f"{elementBeforeCutoff}\n\n"
+
+ if cutOffElement:
+ continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
+ continuationText += f"{cutOffElement}\n\n"
+
+ continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
+ continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
+ continuationText += "Start directly with the next chapter that should follow.\n\n"
+
+ return f"""{basePrompt}
+
+{continuationText}
+
+Continue generating the remaining chapters now.
+"""
+ else:
+ return basePrompt
+
+ # Call AI with looping support
+ aiResponseJson = await self.aiService.callAiWithLooping(
+ prompt=structurePrompt,
+ options=options,
+ debugPrefix="chapter_structure_generation",
+ promptBuilder=buildChapterStructurePromptWithContinuation,
+ promptArgs={
+ "userPrompt": userPrompt,
+ "outputFormat": outputFormat,
+ "services": self.services
+ },
+ useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
+ operationId=structureOperationId,
+ userPrompt=userPrompt,
+ contentParts=contentParts
+ )
+
+ # Parse the complete JSON response (looping system already handles completion)
+ extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
if parseError is not None:
- # Try to repair broken JSON (handles unterminated strings, incomplete structures, etc.)
- logger.warning(f"Initial JSON parsing failed: {str(parseError)}. Attempting repair...")
+ # Even with looping, try repair as fallback
+ logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
from modules.shared import jsonUtils
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
if repairedJson:
- # Try parsing repaired JSON
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
if parseError is None:
- logger.info("Successfully repaired and parsed JSON structure")
+ logger.info("Successfully repaired and parsed JSON structure after looping")
structure = parsedJson
else:
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py
index 06877968..618a86e8 100644
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@@ -1254,9 +1254,10 @@ class ExtractionService:
aiObjects, # Pass interface for AI calls
progressCallback=None
) -> AiCallResponse:
- """Process content parts with model-aware chunking and AI calls.
+ """Process content parts with model-aware chunking and AI calls in parallel.
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
+ Uses parallel processing similar to section generation for better performance.
"""
prompt = request.prompt
options = request.options
@@ -1269,13 +1270,65 @@ class ExtractionService:
if not failoverModelList:
return self._createErrorResponse("No suitable models found", 0, 0)
- # Process each content part
+ totalParts = len(contentParts)
+ if totalParts == 0:
+ return self._createErrorResponse("No content parts to process", 0, 0)
+
+ # Thread-safe counter for progress tracking
+ completedCount = [0] # Use list to allow modification in nested function
+
+ # Process parts in parallel with concurrency control
+ maxConcurrent = 5
+ if options and hasattr(options, 'maxConcurrentParts'):
+ maxConcurrent = options.maxConcurrentParts
+
+ semaphore = asyncio.Semaphore(maxConcurrent)
+
+ async def processSinglePart(contentPart, partIndex: int) -> AiCallResponse:
+ """Process a single content part with progress logging."""
+ async with semaphore:
+ partLabel = contentPart.label or f"Part {partIndex+1}"
+ partType = contentPart.typeGroup or "unknown"
+
+ # Log start of processing
+ if progressCallback:
+ progressCallback(0.1 + (partIndex / totalParts) * 0.8, f"Processing {partLabel} ({partType}) - {partIndex+1}/{totalParts}")
+
+ try:
+ # Process the part
+ partResult = await self.processContentPartWithFallback(
+ contentPart, prompt, options, failoverModelList, aiObjects, None # Don't pass progressCallback to avoid double logging
+ )
+
+ # Update completed count and log progress
+ completedCount[0] += 1
+ if progressCallback:
+ progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Completed {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
+
+ return partResult
+ except Exception as e:
+ # Update completed count even on error
+ completedCount[0] += 1
+ logger.error(f"Error processing part {partIndex+1} ({partLabel}): {str(e)}")
+ if progressCallback:
+ progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Error processing {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
+ # Return error response
+ return self._createErrorResponse(f"Error processing part: {str(e)}", 0, 0)
+
+ # Create tasks for all parts
+ tasks = [processSinglePart(contentPart, i) for i, contentPart in enumerate(contentParts)]
+
+ # Execute all tasks in parallel with error handling
+ results = await asyncio.gather(*tasks, return_exceptions=True)
+
+ # Process results and handle exceptions
allResults = []
- for contentPart in contentParts:
- partResult = await self.processContentPartWithFallback(
- contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
- )
- allResults.append(partResult)
+ for i, result in enumerate(results):
+ if isinstance(result, Exception):
+ logger.error(f"Exception processing part {i+1}: {str(result)}")
+ allResults.append(self._createErrorResponse(f"Exception: {str(result)}", 0, 0))
+ elif result is not None:
+ allResults.append(result)
# Merge all results using unified mergePartResults
mergedContent = self.mergePartResults(allResults)
diff --git a/modules/services/serviceGeneration/paths/codePath.py b/modules/services/serviceGeneration/paths/codePath.py
new file mode 100644
index 00000000..5beb1867
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/codePath.py
@@ -0,0 +1,584 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Code Generation Path
+
+Handles code generation with multi-file project support, dependency handling,
+and proper cross-file references.
+"""
+
+import json
+import logging
+import time
+import re
+from typing import Dict, Any, List, Optional
+from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+
+logger = logging.getLogger(__name__)
+
+
+class CodeGenerationPath:
+ """Code generation path."""
+
+ def __init__(self, services):
+ self.services = services
+
+ async def generateCode(
+ self,
+ userPrompt: str,
+ outputFormat: str = None,
+ contentParts: Optional[List[ContentPart]] = None,
+ title: str = "Generated Code",
+ parentOperationId: Optional[str] = None
+ ) -> AiResponse:
+ """
+ Generate code files with multi-file project support.
+
+ Returns: AiResponse with code files as documents
+ """
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ codeOperationId = f"code_gen_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ codeOperationId,
+ "Code Generation",
+ "Code Generation",
+ f"Format: {outputFormat or 'txt'}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Detect language and project type from prompt or outputFormat
+ language, projectType = self._detectLanguageAndProjectType(userPrompt, outputFormat)
+
+ # Phase 1: Code structure generation (with looping)
+ self.services.chat.progressLogUpdate(codeOperationId, 0.2, "Generating code structure")
+ codeStructure = await self._generateCodeStructure(
+ userPrompt=userPrompt,
+ language=language,
+ outputFormat=outputFormat,
+ contentParts=contentParts
+ )
+
+ # Phase 2: Code content generation (with dependency handling)
+ self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
+ codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
+
+ # Phase 3: Code formatting & validation
+ self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
+ formattedFiles = await self._formatAndValidateCode(codeFiles)
+
+ # Convert to unified document format
+ documents = []
+ for file in formattedFiles:
+ mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
+ content = file.get("content", "")
+ if isinstance(content, str):
+ contentBytes = content.encode('utf-8')
+ else:
+ contentBytes = content
+
+ documents.append(DocumentData(
+ documentName=file.get("filename", "generated.txt"),
+ documentData=contentBytes,
+ mimeType=mimeType,
+ sourceJson=file
+ ))
+
+ metadata = AiResponseMetadata(
+ title=title,
+ operationType=OperationTypeEnum.DATA_GENERATE.value
+ )
+
+ self.services.chat.progressLogFinish(codeOperationId, True)
+
+ return AiResponse(
+ documents=documents,
+ content=None,
+ metadata=metadata
+ )
+
+ except Exception as e:
+ logger.error(f"Error in code generation: {str(e)}")
+ self.services.chat.progressLogFinish(codeOperationId, False)
+ raise
+
+ def _detectLanguageAndProjectType(self, userPrompt: str, outputFormat: Optional[str]) -> tuple:
+ """Detect programming language and project type from prompt or format."""
+ promptLower = userPrompt.lower()
+
+ # Detect language
+ language = None
+ if outputFormat:
+ if outputFormat == "py":
+ language = "python"
+ elif outputFormat in ["js", "ts"]:
+ language = outputFormat
+ elif outputFormat == "html":
+ language = "html"
+
+ if not language:
+ if "python" in promptLower or ".py" in promptLower:
+ language = "python"
+ elif "javascript" in promptLower or ".js" in promptLower:
+ language = "javascript"
+ elif "typescript" in promptLower or ".ts" in promptLower:
+ language = "typescript"
+ elif "html" in promptLower:
+ language = "html"
+ else:
+ language = "python" # Default
+
+ # Detect project type
+ projectType = "single_file"
+ if "multi" in promptLower or "multiple files" in promptLower or "project" in promptLower:
+ projectType = "multi_file"
+
+ return language, projectType
+
+ async def _generateCodeStructure(
+ self,
+ userPrompt: str,
+ language: str,
+ outputFormat: Optional[str],
+ contentParts: Optional[List[ContentPart]]
+ ) -> Dict[str, Any]:
+ """Generate code structure using looping system."""
+
+ # Build structure generation prompt
+ structurePrompt = f"""Analyze the following code generation request and create a project structure.
+
+Request: {userPrompt}
+
+Language: {language}
+
+Create a JSON structure with:
+1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
+2. files: Array of file structures, each with:
+ - id: Unique identifier
+ - filename: File name (e.g., "main.py", "utils.py")
+ - fileType: File extension (e.g., "py", "js")
+ - dependencies: List of file IDs this file depends on (for multi-file projects)
+ - imports: List of import statements (for dependency extraction)
+ - functions: Array of function signatures {{"name": "...", "signature": "..."}}
+ - classes: Array of class definitions {{"name": "...", "signature": "..."}}
+
+For single-file projects, return one file. For multi-file projects, break down into logical modules.
+
+Return ONLY valid JSON in this format:
+{{
+ "metadata": {{
+ "language": "{language}",
+ "projectType": "single_file",
+ "projectName": "generated-project"
+ }},
+ "files": [
+ {{
+ "id": "file_1",
+ "filename": "main.py",
+ "fileType": "py",
+ "dependencies": [],
+ "imports": [],
+ "functions": [],
+ "classes": []
+ }}
+ ]
+}}
+"""
+
+ # Use generic looping system with code_structure use case
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ resultFormat="json"
+ )
+
+ structureJson = await self.services.ai.callAiWithLooping(
+ prompt=structurePrompt,
+ options=options,
+ useCaseId="code_structure",
+ debugPrefix="code_structure_generation",
+ contentParts=contentParts
+ )
+
+ parsed = json.loads(structureJson)
+ return parsed
+
+ async def _generateCodeContent(
+ self,
+ codeStructure: Dict[str, Any],
+ parentOperationId: str
+ ) -> List[Dict[str, Any]]:
+ """Generate code content for each file with dependency handling."""
+ files = codeStructure.get("files", [])
+ metadata = codeStructure.get("metadata", {})
+
+ if not files:
+ raise ValueError("No files found in code structure")
+
+ # Step 1: Resolve dependency order
+ orderedFiles = self._resolveDependencyOrder(files)
+
+ # Step 2: Generate dependency files first (requirements.txt, package.json, etc.)
+ dependencyFiles = await self._generateDependencyFiles(metadata, orderedFiles)
+
+ # Step 3: Generate code files in dependency order (not fully parallel)
+ codeFiles = []
+ generatedFileContext = {} # Track what's been generated for cross-file references
+
+ for idx, fileStructure in enumerate(orderedFiles):
+ # Update progress
+ progress = 0.5 + (0.4 * (idx / len(orderedFiles)))
+ self.services.chat.progressLogUpdate(
+ parentOperationId,
+ progress,
+ f"Generating {fileStructure.get('filename', 'file')}"
+ )
+
+ # Provide context about already-generated files for proper imports
+ fileContext = self._buildFileContext(generatedFileContext, fileStructure)
+
+ # Generate this file with context
+ fileContent = await self._generateSingleFileContent(
+ fileStructure,
+ fileContext=fileContext,
+ allFilesStructure=orderedFiles,
+ metadata=metadata
+ )
+
+ codeFiles.append(fileContent)
+
+ # Update context with generated file info (for next files)
+ generatedFileContext[fileStructure["id"]] = {
+ "filename": fileContent.get("filename", fileStructure.get("filename")),
+ "functions": fileContent.get("functions", []),
+ "classes": fileContent.get("classes", []),
+ "exports": fileContent.get("exports", [])
+ }
+
+ # Combine dependency files and code files
+ return dependencyFiles + codeFiles
+
+ def _resolveDependencyOrder(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """Resolve file generation order based on dependencies using topological sort."""
+ # Build dependency graph
+ fileMap = {f["id"]: f for f in files}
+ dependencies = {}
+
+ for file in files:
+ fileId = file["id"]
+ deps = file.get("dependencies", []) # List of file IDs this file depends on
+ dependencies[fileId] = deps
+
+ # Topological sort
+ ordered = []
+ visited = set()
+ tempMark = set()
+
+ def visit(fileId: str):
+ if fileId in tempMark:
+ # Circular dependency detected - break it
+ logger.warning(f"Circular dependency detected involving {fileId}")
+ return
+ if fileId in visited:
+ return
+
+ tempMark.add(fileId)
+ for depId in dependencies.get(fileId, []):
+ if depId in fileMap:
+ visit(depId)
+ tempMark.remove(fileId)
+ visited.add(fileId)
+ ordered.append(fileMap[fileId])
+
+ for file in files:
+ if file["id"] not in visited:
+ visit(file["id"])
+
+ return ordered
+
+ async def _generateDependencyFiles(
+ self,
+ metadata: Dict[str, Any],
+ files: List[Dict[str, Any]]
+ ) -> List[Dict[str, Any]]:
+ """Generate dependency files (requirements.txt, package.json, etc.)."""
+ language = metadata.get("language", "").lower()
+ dependencyFiles = []
+
+ # Generate requirements.txt for Python
+ if language in ["python", "py"]:
+ requirementsContent = await self._generateRequirementsTxt(files)
+ if requirementsContent:
+ dependencyFiles.append({
+ "filename": "requirements.txt",
+ "content": requirementsContent,
+ "fileType": "txt",
+ "id": "requirements_txt"
+ })
+
+ # Generate package.json for JavaScript/TypeScript
+ elif language in ["javascript", "typescript", "js", "ts"]:
+ packageJson = await self._generatePackageJson(files, metadata)
+ if packageJson:
+ dependencyFiles.append({
+ "filename": "package.json",
+ "content": json.dumps(packageJson, indent=2),
+ "fileType": "json",
+ "id": "package_json"
+ })
+
+ return dependencyFiles
+
+ async def _generateRequirementsTxt(
+ self,
+ files: List[Dict[str, Any]]
+ ) -> Optional[str]:
+ """Generate requirements.txt content from Python imports."""
+ pythonPackages = set()
+
+ for file in files:
+ imports = file.get("imports", [])
+ if isinstance(imports, list):
+ for imp in imports:
+ if isinstance(imp, str):
+ # Extract package name from import
+ # Handle: "from flask import", "import flask", "from flask import Flask"
+ imp = imp.strip()
+ if "import" in imp:
+ if "from" in imp:
+ # "from package import ..."
+ parts = imp.split("from")
+ if len(parts) > 1:
+ package = parts[1].split("import")[0].strip()
+ if package and not package.startswith("."):
+ pythonPackages.add(package.split(".")[0]) # Get root package
+ else:
+ # "import package" or "import package.module"
+ parts = imp.split("import")
+ if len(parts) > 1:
+ package = parts[1].strip().split(".")[0].strip()
+ if package and not package.startswith("."):
+ pythonPackages.add(package)
+
+ if pythonPackages:
+ return "\n".join(sorted(pythonPackages))
+ return None
+
+ async def _generatePackageJson(
+ self,
+ files: List[Dict[str, Any]],
+ metadata: Dict[str, Any]
+ ) -> Optional[Dict[str, Any]]:
+ """Generate package.json content from JavaScript/TypeScript imports."""
+ npmPackages = {}
+
+ for file in files:
+ imports = file.get("imports", [])
+ if isinstance(imports, list):
+ for imp in imports:
+ if isinstance(imp, str):
+ # Extract npm package from import
+ # Handle: "import express from 'express'", "const express = require('express')"
+ imp = imp.strip()
+ if "from" in imp:
+ # ES6 import: "import ... from 'package'"
+ parts = imp.split("from")
+ if len(parts) > 1:
+ package = parts[1].strip().strip("'\"")
+ if package and not package.startswith(".") and not package.startswith("/"):
+ npmPackages[package] = "*"
+ elif "require" in imp:
+ # CommonJS: "require('package')"
+ match = re.search(r"require\(['\"]([^'\"]+)['\"]\)", imp)
+ if match:
+ package = match.group(1)
+ if not package.startswith(".") and not package.startswith("/"):
+ npmPackages[package] = "*"
+
+ if npmPackages:
+ return {
+ "name": metadata.get("projectName", "generated-project"),
+ "version": "1.0.0",
+ "dependencies": npmPackages
+ }
+ return None
+
+ def _buildFileContext(
+ self,
+ generatedFileContext: Dict[str, Dict[str, Any]],
+ currentFile: Dict[str, Any]
+ ) -> Dict[str, Any]:
+ """Build context about other files for proper imports/references."""
+ context = {
+ "availableFiles": [],
+ "availableFunctions": {},
+ "availableClasses": {}
+ }
+
+ # Add info about already-generated files
+ for fileId, fileInfo in generatedFileContext.items():
+ context["availableFiles"].append({
+ "id": fileId,
+ "filename": fileInfo["filename"],
+ "functions": fileInfo.get("functions", []),
+ "classes": fileInfo.get("classes", []),
+ "exports": fileInfo.get("exports", [])
+ })
+
+ # Build function/class maps for easy lookup
+ for func in fileInfo.get("functions", []):
+ funcName = func.get("name", "")
+ if funcName:
+ context["availableFunctions"][funcName] = {
+ "file": fileInfo["filename"],
+ "signature": func.get("signature", "")
+ }
+
+ for cls in fileInfo.get("classes", []):
+ className = cls.get("name", "")
+ if className:
+ context["availableClasses"][className] = {
+ "file": fileInfo["filename"]
+ }
+
+ return context
+
+ async def _generateSingleFileContent(
+ self,
+ fileStructure: Dict[str, Any],
+ fileContext: Dict[str, Any] = None,
+ allFilesStructure: List[Dict[str, Any]] = None,
+ metadata: Dict[str, Any] = None
+ ) -> Dict[str, Any]:
+ """Generate code content for a single file with context about other files."""
+
+ # Build prompt with context about other files for proper imports
+ filename = fileStructure.get("filename", "generated.py")
+ fileType = fileStructure.get("fileType", "py")
+ dependencies = fileStructure.get("dependencies", [])
+ functions = fileStructure.get("functions", [])
+ classes = fileStructure.get("classes", [])
+
+ contextInfo = ""
+ if fileContext and fileContext.get("availableFiles"):
+ contextInfo = "\n\nAvailable files and their exports:\n"
+ for fileInfo in fileContext["availableFiles"]:
+ contextInfo += f"- {fileInfo['filename']}: "
+ funcs = [f.get("name", "") for f in fileInfo.get("functions", [])]
+ cls = [c.get("name", "") for c in fileInfo.get("classes", [])]
+ exports = []
+ if funcs:
+ exports.extend(funcs)
+ if cls:
+ exports.extend(cls)
+ if exports:
+ contextInfo += ", ".join(exports)
+ contextInfo += "\n"
+
+ contentPrompt = f"""Generate complete, executable code for the file: {filename}
+
+File Type: {fileType}
+Language: {metadata.get('language', 'python') if metadata else 'python'}
+
+Required functions:
+{json.dumps(functions, indent=2) if functions else 'None specified'}
+
+Required classes:
+{json.dumps(classes, indent=2) if classes else 'None specified'}
+
+Dependencies on other files: {', '.join(dependencies) if dependencies else 'None'}
+{contextInfo}
+
+Generate complete, production-ready code with:
+1. Proper imports (including imports from other files in the project if dependencies exist)
+2. All required functions and classes
+3. Error handling
+4. Documentation/docstrings
+5. Type hints where appropriate
+
+Return ONLY valid JSON in this format:
+{{
+ "files": [
+ {{
+ "filename": "{filename}",
+ "content": "// Complete code here",
+ "functions": {json.dumps(functions, indent=2) if functions else '[]'},
+ "classes": {json.dumps(classes, indent=2) if classes else '[]'}
+ }}
+ ]
+}}
+"""
+
+ # Use generic looping system with code_content use case
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ resultFormat="json"
+ )
+
+ contentJson = await self.services.ai.callAiWithLooping(
+ prompt=contentPrompt,
+ options=options,
+ useCaseId="code_content",
+ debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
+ )
+
+ parsed = json.loads(contentJson)
+
+ # Extract file content and metadata
+ files = parsed.get("files", [])
+ if files and len(files) > 0:
+ fileData = files[0]
+ return {
+ "filename": fileData.get("filename", filename),
+ "content": fileData.get("content", ""),
+ "fileType": fileType,
+ "functions": fileData.get("functions", functions),
+ "classes": fileData.get("classes", classes),
+ "id": fileStructure.get("id")
+ }
+
+ # Fallback if structure is different
+ return {
+ "filename": filename,
+ "content": parsed.get("content", ""),
+ "fileType": fileType,
+ "functions": functions,
+ "classes": classes,
+ "id": fileStructure.get("id")
+ }
+
+ async def _formatAndValidateCode(self, codeFiles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """Format and validate generated code files."""
+ # For now, just return files as-is
+ # TODO: Add code formatting (black, prettier, etc.) and validation
+ formatted = []
+ for file in codeFiles:
+ content = file.get("content", "")
+ # Basic cleanup: remove markdown code fences if present
+ if isinstance(content, str):
+ content = re.sub(r'^```[\w]*\n', '', content, flags=re.MULTILINE)
+ content = re.sub(r'\n```$', '', content, flags=re.MULTILINE)
+ file["content"] = content.strip()
+ formatted.append(file)
+ return formatted
+
+ def _getMimeType(self, fileType: str) -> str:
+ """Get MIME type for file type."""
+ mimeTypes = {
+ "py": "text/x-python",
+ "js": "application/javascript",
+ "ts": "application/typescript",
+ "html": "text/html",
+ "css": "text/css",
+ "json": "application/json",
+ "txt": "text/plain",
+ "md": "text/markdown",
+ "java": "text/x-java-source",
+ "cpp": "text/x-c++src",
+ "c": "text/x-csrc"
+ }
+ return mimeTypes.get(fileType.lower(), "text/plain")
diff --git a/modules/services/serviceGeneration/paths/documentPath.py b/modules/services/serviceGeneration/paths/documentPath.py
new file mode 100644
index 00000000..d03c82a0
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/documentPath.py
@@ -0,0 +1,258 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Document Generation Path
+
+Handles document generation using existing chapter/section model.
+"""
+
+import json
+import logging
+import time
+from typing import Dict, Any, List, Optional
+from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+from modules.datamodels.datamodelDocument import RenderedDocument
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentGenerationPath:
+ """Document generation path (existing functionality, refactored)."""
+
+ def __init__(self, services):
+ self.services = services
+
+ async def generateDocument(
+ self,
+ userPrompt: str,
+ documentList: Optional[Any] = None, # DocumentReferenceList
+ documentIntents: Optional[List[DocumentIntent]] = None,
+ contentParts: Optional[List[ContentPart]] = None,
+ outputFormat: str = "txt",
+ title: Optional[str] = None,
+ parentOperationId: Optional[str] = None
+ ) -> AiResponse:
+ """
+ Generate document using existing chapter/section model.
+
+ Returns: AiResponse with documents list
+ """
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ docOperationId = f"doc_gen_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ docOperationId,
+ "Document Generation",
+ "Document Generation",
+ f"Format: {outputFormat}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Schritt 5A: Kläre Dokument-Intents
+ documents = []
+ if documentList:
+ documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+
+ if not documentIntents and documents:
+ documentIntents = await self.services.ai.clarifyDocumentIntents(
+ documents,
+ userPrompt,
+ {"outputFormat": outputFormat},
+ docOperationId
+ )
+
+ # Schritt 5B: Extrahiere und bereite Content vor
+ if documents:
+ preparedContentParts = await self.services.ai.extractAndPrepareContent(
+ documents,
+ documentIntents or [],
+ docOperationId
+ )
+
+ # Merge mit bereitgestellten contentParts (falls vorhanden)
+ if contentParts:
+ # Prüfe auf pre-extracted Content
+ for part in contentParts:
+ if part.metadata.get("skipExtraction", False):
+ # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
+ part.metadata.setdefault("contentFormat", "extracted")
+ part.metadata.setdefault("isPreExtracted", True)
+ preparedContentParts.extend(contentParts)
+
+ contentParts = preparedContentParts
+
+ # Schritt 5B.5: Process contentParts with AI extraction (if provided)
+ # This extracts text from images, processes content, and updates contentParts with extracted data
+ # This matches the original flow: extract content first (no AI), then process with AI
+ if contentParts:
+ # Filter out binary/other parts that shouldn't be processed
+ processableParts = []
+ skippedParts = []
+ for p in contentParts:
+ if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
+ processableParts.append(p)
+ else:
+ skippedParts.append(p)
+
+ if skippedParts:
+ logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
+
+ if processableParts:
+ # Count images for progress update
+ imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
+ if imageCount > 0:
+ self.services.chat.progressLogUpdate(docOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
+
+ # Build proper extraction prompt using buildExtractionPrompt
+ # This creates a focused extraction prompt, not the user's generation prompt
+ from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+
+ # Determine renderer for format-specific guidelines
+ renderer = None
+ if outputFormat:
+ try:
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generationService = GenerationService(self.services)
+ renderer = generationService.getRendererForFormat(outputFormat)
+ except Exception as e:
+ logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
+
+ extractionPrompt = await buildExtractionPrompt(
+ outputFormat=outputFormat or "txt",
+ userPrompt=userPrompt, # User's prompt as context for what to extract
+ title=title or "Document",
+ aiService=self.services.ai if hasattr(self.services.ai, 'aiObjects') and self.services.ai.aiObjects else None,
+ services=self.services,
+ renderer=renderer
+ )
+
+ logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
+
+ # Update progress - starting extraction
+ self.services.chat.progressLogUpdate(docOperationId, 0.26, f"Starting AI extraction from {len(processableParts)} content parts")
+
+ # Use DATA_EXTRACT operation type for extraction
+ extractionOptions = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
+ compressPrompt=False,
+ compressContext=False
+ )
+
+ # Create progress callback for per-part progress updates
+ def extractionProgressCallback(progress: float, message: str):
+ """Progress callback for extraction - updates parent operation."""
+ # Map progress from 0.0-1.0 to 0.26-0.35 range (extraction phase)
+ mappedProgress = 0.26 + (progress * 0.09) # 0.26 to 0.35
+ self.services.chat.progressLogUpdate(docOperationId, mappedProgress, message)
+
+ extractionRequest = AiCallRequest(
+ prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
+ context="",
+ options=extractionOptions,
+ contentParts=processableParts
+ )
+
+ # Write debug file for extraction prompt (all parts)
+ self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
+
+ # Call AI to extract content from contentParts (with progress callback)
+ extractionResponse = await self.services.ai.callAi(extractionRequest, progressCallback=extractionProgressCallback)
+
+ # Update progress - extraction completed
+ self.services.chat.progressLogUpdate(docOperationId, 0.35, f"Completed AI extraction from {len(processableParts)} content parts")
+
+ # Write debug file for extraction response
+ if extractionResponse.content:
+ self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
+ else:
+ self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
+ logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
+
+ # Update contentParts with extracted content (matching original flow)
+ if extractionResponse.errorCount == 0 and extractionResponse.content:
+ # The extracted content is already merged - update the first processable part with it
+ # This matches the original behavior where extracted text was used for generation
+ if processableParts:
+ # Store extracted content in metadata for use in structure generation
+ processableParts[0].metadata["extractedContent"] = extractionResponse.content
+ logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars)")
+ else:
+ # Extraction failed - log warning but continue
+ logger.warning(f"Content extraction failed, continuing with original contentParts")
+
+ # Schritt 5C: Generiere Struktur
+ structure = await self.services.ai.generateStructure(
+ userPrompt,
+ contentParts or [],
+ outputFormat,
+ docOperationId
+ )
+
+ # Schritt 5D: Fülle Struktur
+ # Language will be extracted from services (user intention analysis) in fillStructure
+ filledStructure = await self.services.ai.fillStructure(
+ structure,
+ contentParts or [],
+ userPrompt,
+ docOperationId
+ )
+
+ # Schritt 5E: Rendere Resultat
+ # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
+ renderedDocuments = await self.services.ai.renderResult(
+ filledStructure,
+ outputFormat,
+ title or "Generated Document",
+ userPrompt,
+ docOperationId
+ )
+
+ # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
+ documentDataList = []
+ for renderedDoc in renderedDocuments:
+ try:
+ # Erstelle DocumentData für jedes gerenderte Dokument
+ docDataObj = DocumentData(
+ documentName=renderedDoc.filename,
+ documentData=renderedDoc.documentData,
+ mimeType=renderedDoc.mimeType,
+ sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
+ )
+ documentDataList.append(docDataObj)
+ logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
+ except Exception as e:
+ logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
+
+ if not documentDataList:
+ raise ValueError("No documents were rendered")
+
+ metadata = AiResponseMetadata(
+ title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
+ operationType=OperationTypeEnum.DATA_GENERATE.value
+ )
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
+ "document_generation_response"
+ )
+
+ self.services.chat.progressLogFinish(docOperationId, True)
+
+ return AiResponse(
+ content=json.dumps(filledStructure),
+ metadata=metadata,
+ documents=documentDataList
+ )
+
+ except Exception as e:
+ logger.error(f"Error in document generation: {str(e)}")
+ self.services.chat.progressLogFinish(docOperationId, False)
+ raise
+
diff --git a/modules/services/serviceGeneration/paths/imagePath.py b/modules/services/serviceGeneration/paths/imagePath.py
new file mode 100644
index 00000000..1247494f
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/imagePath.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Image Generation Path
+
+Handles image generation with support for single and batch generation.
+"""
+
+import logging
+import time
+from typing import List, Optional
+from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallRequest
+
+logger = logging.getLogger(__name__)
+
+
+class ImageGenerationPath:
+ """Image generation path."""
+
+ def __init__(self, services):
+ self.services = services
+
+ async def generateImages(
+ self,
+ userPrompt: str,
+ count: int = 1,
+ style: Optional[str] = None,
+ format: str = "png",
+ title: Optional[str] = None,
+ parentOperationId: Optional[str] = None
+ ) -> AiResponse:
+ """
+ Generate image files.
+
+ Returns: AiResponse with image files as documents
+ """
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ imageOperationId = f"image_gen_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ imageOperationId,
+ "Image Generation",
+ "Image Generation",
+ f"Format: {format}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ self.services.chat.progressLogUpdate(imageOperationId, 0.4, "Calling AI for image generation")
+
+ # Build prompt with style if provided
+ imagePrompt = userPrompt
+ if style:
+ imagePrompt = f"{userPrompt}\n\nStyle: {style}"
+
+ # Use IMAGE_GENERATE operation
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.IMAGE_GENERATE,
+ resultFormat=format
+ )
+
+ request = AiCallRequest(
+ prompt=imagePrompt,
+ context="",
+ options=options
+ )
+
+ response = await self.services.ai.callAi(request)
+
+ if not response.content:
+ errorMsg = f"No image data returned: {response.content}"
+ logger.error(f"Error in AI image generation: {errorMsg}")
+ self.services.chat.progressLogFinish(imageOperationId, False)
+ raise ValueError(errorMsg)
+
+ # Handle response content (could be base64 string or bytes)
+ imageData = response.content
+ if isinstance(imageData, str):
+ # Assume base64 encoded string
+ import base64
+ try:
+ imageData = base64.b64decode(imageData)
+ except Exception:
+ # If not base64, try encoding as bytes
+ imageData = imageData.encode('utf-8')
+ elif not isinstance(imageData, bytes):
+ imageData = bytes(imageData)
+
+ # Create document
+ imageDoc = DocumentData(
+ documentName=f"generated_image.{format}",
+ documentData=imageData,
+ mimeType=f"image/{format}"
+ )
+
+ metadata = AiResponseMetadata(
+ title=title or "Generated Image",
+ operationType=OperationTypeEnum.IMAGE_GENERATE.value
+ )
+
+ self.services.chat.storeWorkflowStat(
+ self.services.workflow,
+ response,
+ "ai.generate.image"
+ )
+
+ self.services.chat.progressLogUpdate(imageOperationId, 0.9, "Image generated")
+ self.services.chat.progressLogFinish(imageOperationId, True)
+
+ # Create content string describing the image generation
+ import json
+ contentJson = json.dumps({
+ "type": "image",
+ "format": format,
+ "prompt": userPrompt,
+ "filename": imageDoc.documentName
+ }, ensure_ascii=False)
+
+ return AiResponse(
+ content=contentJson, # JSON string describing the image generation
+ metadata=metadata,
+ documents=[imageDoc]
+ )
+
+ except Exception as e:
+ logger.error(f"Error in image generation: {str(e)}")
+ self.services.chat.progressLogFinish(imageOperationId, False)
+ raise
+
diff --git a/modules/workflows/methods/methodAi.py.old b/modules/workflows/methods/methodAi.py.old
deleted file mode 100644
index fedaa0ef..00000000
--- a/modules/workflows/methods/methodAi.py.old
+++ /dev/null
@@ -1,742 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-AI processing method module.
-Handles direct AI calls for any type of task.
-"""
-
-import time
-import logging
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
-from modules.datamodels.datamodelWorkflow import ExtractContentParameters
-from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
-
-logger = logging.getLogger(__name__)
-
-class MethodAi(MethodBase):
- """AI processing methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "ai"
- self.description = "AI processing methods"
-
- def _format_timestamp_for_filename(self) -> str:
- """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
- return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-
-
- @action
- async def process(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
-
- Parameters:
- - aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- - documentList (list, optional): Document reference(s) in any format to use as input/context.
- - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
- """
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"ai_process_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Generate",
- "AI Processing",
- f"Format: {parameters.get('resultType', 'txt')}",
- parentOperationId=parentOperationId
- )
-
- aiPrompt = parameters.get("aiPrompt")
- logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
-
- # Update progress - preparing parameters
- self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
-
- from modules.datamodels.datamodelDocref import DocumentReferenceList
-
- documentListParam = parameters.get("documentList")
- # Convert to DocumentReferenceList if needed
- if documentListParam is None:
- documentList = DocumentReferenceList(references=[])
- elif isinstance(documentListParam, DocumentReferenceList):
- documentList = documentListParam
- elif isinstance(documentListParam, str):
- documentList = DocumentReferenceList.from_string_list([documentListParam])
- elif isinstance(documentListParam, list):
- documentList = DocumentReferenceList.from_string_list(documentListParam)
- else:
- logger.error(f"Invalid documentList type: {type(documentListParam)}")
- documentList = DocumentReferenceList(references=[])
-
- resultType = parameters.get("resultType", "txt")
-
-
- if not aiPrompt:
- logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
- return ActionResult.isFailure(
- error="AI prompt is required"
- )
-
- # Determine output extension and default MIME type without duplicating service logic
- normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
- output_extension = f".{normalized_result_type}"
- output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
- logger.info(f"Using result type: {resultType} -> {output_extension}")
-
- # Phase 7.3: Extract content first if documents provided, then use contentParts
- # Check if contentParts are already provided (preferred path)
- contentParts: Optional[List[ContentPart]] = None
- if "contentParts" in parameters:
- contentParts = parameters.get("contentParts")
- if contentParts and not isinstance(contentParts, list):
- # Try to extract from ContentExtracted if it's an ActionDocument
- if hasattr(contentParts, 'parts'):
- contentParts = contentParts.parts
- else:
- logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
- contentParts = None
-
- # If contentParts not provided but documentList is, extract content first
- if not contentParts and documentList.references:
- self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
-
- # Get ChatDocuments
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
- if not chatDocuments:
- logger.warning("No documents found in documentList")
- else:
- logger.info(f"Extracting content from {len(chatDocuments)} documents")
-
- # Prepare extraction options (use defaults if not provided)
- extractionOptions = parameters.get("extractionOptions")
- if not extractionOptions:
- extractionOptions = ExtractionOptions(
- prompt="Extract all content from the document",
- mergeStrategy=MergeStrategy(
- mergeType="concatenate",
- groupBy="typeGroup",
- orderBy="id"
- ),
- processDocumentsIndividually=True
- )
-
- # Extract content using extraction service with hierarchical progress logging
- # Pass operationId for per-document progress tracking
- extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
-
- # Combine all ContentParts from all extracted results
- contentParts = []
- for extracted in extractedResults:
- if extracted.parts:
- contentParts.extend(extracted.parts)
-
- logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
-
- # Update progress - preparing AI call
- self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
-
- # Build options with only resultFormat - let service layer handle all other parameters
- output_format = output_extension.replace('.', '') or 'txt'
- options = AiCallOptions(
- resultFormat=output_format
- # Removed all model parameters - service layer will analyze prompt and determine optimal parameters
- )
-
- # Update progress - calling AI
- self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
-
- # Use unified callAiContent method with contentParts (extraction is now separate)
- aiResponse = await self.services.ai.callAiContent(
- prompt=aiPrompt,
- options=options,
- contentParts=contentParts, # Already extracted (or None if no documents)
- outputFormat=output_format,
- parentOperationId=operationId
- )
-
- # Update progress - processing result
- self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
-
- from modules.datamodels.datamodelChat import ActionDocument
-
- # Extract documents from AiResponse
- if aiResponse.documents and len(aiResponse.documents) > 0:
- action_documents = []
- for doc in aiResponse.documents:
- validationMetadata = {
- "actionType": "ai.process",
- "resultType": normalized_result_type,
- "outputFormat": output_format,
- "hasDocuments": True,
- "documentCount": len(aiResponse.documents)
- }
- action_documents.append(ActionDocument(
- documentName=doc.documentName,
- documentData=doc.documentData,
- mimeType=doc.mimeType or output_mime_type,
- sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
- validationMetadata=validationMetadata
- ))
-
- final_documents = action_documents
- else:
- # Text response - create document from content
- extension = output_extension.lstrip('.')
- meaningful_name = self._generateMeaningfulFileName(
- base_name="ai",
- extension=extension,
- action_name="result"
- )
- validationMetadata = {
- "actionType": "ai.process",
- "resultType": normalized_result_type,
- "outputFormat": output_format,
- "hasDocuments": False,
- "contentType": "text"
- }
- action_document = ActionDocument(
- documentName=meaningful_name,
- documentData=aiResponse.content,
- mimeType=output_mime_type,
- validationMetadata=validationMetadata
- )
- final_documents = [action_document]
-
- # Complete progress tracking
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(documents=final_documents)
-
- except Exception as e:
- logger.error(f"Error in AI processing: {str(e)}")
-
- # Complete progress tracking with failure
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
-
- return ActionResult.isFailure(
- error=str(e)
- )
-
-
- @action
- async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Web research with two-step process: search for URLs, then crawl content.
- - Input requirements: prompt (required); optional list(url), country, language, researchDepth.
- - Output format: JSON with research results including URLs and content.
-
- Parameters:
- - prompt (str, required): Natural language research instruction.
- - urlList (list, optional): Specific URLs to crawl, if needed.
- - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
- - language (str, optional): Language code (lowercase, e.g., de, en, fr).
- - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
- """
- try:
- prompt = parameters.get("prompt")
- if not prompt:
- return ActionResult.isFailure(error="Research prompt is required")
-
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"web_research_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Web Research",
- "Searching and Crawling",
- "Extracting URLs and Content",
- parentOperationId=parentOperationId
- )
-
- # Call webcrawl service - service handles all AI intention analysis and processing
- result = await self.services.web.performWebResearch(
- prompt=prompt,
- urls=parameters.get("urlList", []),
- country=parameters.get("country"),
- language=parameters.get("language"),
- researchDepth=parameters.get("researchDepth", "general"),
- operationId=operationId
- )
-
- # Complete progress tracking
- self.services.chat.progressLogFinish(operationId, True)
-
- # Get meaningful filename from research result (generated by intent analyzer)
- suggestedFilename = result.get("suggested_filename")
- if suggestedFilename:
- # Clean and validate filename
- import re
- cleaned = suggestedFilename.strip().strip('"\'')
- cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
- # Ensure it doesn't already have extension
- if cleaned.lower().endswith('.json'):
- cleaned = cleaned[:-5]
- # Validate: should be reasonable length and contain only safe characters
- if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
- meaningfulName = f"{cleaned}.json"
- else:
- # Fallback to generic meaningful filename
- meaningfulName = self._generateMeaningfulFileName(
- base_name="web_research",
- extension="json",
- action_name="research"
- )
- else:
- # Fallback to generic meaningful filename
- meaningfulName = self._generateMeaningfulFileName(
- base_name="web_research",
- extension="json",
- action_name="research"
- )
-
- from modules.datamodels.datamodelChat import ActionDocument
- validationMetadata = {
- "actionType": "ai.webResearch",
- "prompt": prompt,
- "urlList": parameters.get("urlList", []),
- "country": parameters.get("country"),
- "language": parameters.get("language"),
- "researchDepth": parameters.get("researchDepth", "general"),
- "resultFormat": "json"
- }
- actionDocument = ActionDocument(
- documentName=meaningfulName,
- documentData=result,
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[actionDocument])
-
- except Exception as e:
- logger.error(f"Error in web research: {str(e)}")
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult.isFailure(error=str(e))
-
-
- # ============================================================================
- # Document Transformation Wrappers
- # ============================================================================
-
- @action
- async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Summarize one or more documents, extracting key points and main ideas.
- - Input requirements: documentList (required); optional summaryLength, focus.
- - Output format: Text document with summary (default: txt, can be overridden with resultType).
-
- Parameters:
- - documentList (list, required): Document reference(s) to summarize.
- - summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- - focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- - resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- summaryLength = parameters.get("summaryLength", "medium")
- focus = parameters.get("focus")
- resultType = parameters.get("resultType", "txt")
-
- lengthInstructions = {
- "brief": "Create a brief summary (2-3 paragraphs)",
- "medium": "Create a medium-length summary (comprehensive but concise)",
- "detailed": "Create a detailed summary covering all major points"
- }
- lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
-
- aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
- if focus:
- aiPrompt += f" Focus specifically on: {focus}."
- aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
- @action
- async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Translate documents to a target language while preserving formatting and structure.
- - Input requirements: documentList (required); targetLanguage (required).
- - Output format: Translated document in same format as input (default) or specified resultType.
-
- Parameters:
- - documentList (list, required): Document reference(s) to translate.
- - targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- - sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- - preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- - resultType (str, optional): Output file extension. If not specified, uses same format as input.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- targetLanguage = parameters.get("targetLanguage")
- if not targetLanguage:
- return ActionResult.isFailure(error="targetLanguage is required")
-
- sourceLanguage = parameters.get("sourceLanguage")
- preserveFormatting = parameters.get("preserveFormatting", True)
- resultType = parameters.get("resultType")
-
- aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
- if sourceLanguage:
- aiPrompt += f" The source language is {sourceLanguage}."
- if preserveFormatting:
- aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
- else:
- aiPrompt += " Focus on accurate translation of content."
- aiPrompt += " Maintain the same document structure, headings, and organization."
-
- processParams = {
- "aiPrompt": aiPrompt,
- "documentList": documentList
- }
- if resultType:
- processParams["resultType"] = resultType
-
- return await self.process(processParams)
-
-
- @action
- async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
- - Input requirements: documentList (required); inputFormat and outputFormat (required).
- - Output format: Document in target format with specified formatting options.
- - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
-
- Parameters:
- - documentList (list, required): Document reference(s) to convert.
- - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
- - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
- - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
- - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
- - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
- - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- inputFormat = parameters.get("inputFormat")
- outputFormat = parameters.get("outputFormat")
- if not inputFormat or not outputFormat:
- return ActionResult.isFailure(error="inputFormat and outputFormat are required")
-
- # Normalize formats (remove leading dot if present)
- normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
- normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
-
- # Get documents
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- else:
- docRefList = DocumentReferenceList.from_string_list([documentList])
-
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
- if not chatDocuments:
- return ActionResult.isFailure(error="No documents found in documentList")
-
- # Check if input is standardized JSON format - if so, use direct rendering
- if normalizedInputFormat == "json" and len(chatDocuments) == 1:
- try:
- import json
- doc = chatDocuments[0]
- # ChatDocument doesn't have documentData - need to load file content using fileId
- docBytes = self.services.chat.getFileData(doc.fileId)
- if not docBytes:
- raise ValueError(f"No file data found for fileId={doc.fileId}")
-
- # Decode bytes to string
- docData = docBytes.decode('utf-8')
-
- # Try to parse as JSON
- if isinstance(docData, str):
- jsonData = json.loads(docData)
- elif isinstance(docData, dict):
- jsonData = docData
- else:
- jsonData = None
-
- # Check if it's standardized JSON format (has "documents" or "sections")
- if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
- # Use direct rendering - no AI call needed!
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
-
- # Ensure format is "documents" array
- if "documents" not in jsonData:
- jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
-
- # Get title
- title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
-
- # Render with options
- renderOptions = {}
- if normalizedOutputFormat == "csv":
- renderOptions["delimiter"] = parameters.get("delimiter", ",")
- renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
- renderOptions["includeHeader"] = parameters.get("includeHeader", True)
-
- rendered_content, mime_type = await generationService.renderReport(
- jsonData, normalizedOutputFormat, title, None, None
- )
-
- # Apply CSV options if needed (renderer will handle them)
- if normalizedOutputFormat == "csv" and renderOptions:
- rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
-
- from modules.datamodels.datamodelChat import ActionDocument
- validationMetadata = {
- "actionType": "ai.convert",
- "inputFormat": normalizedInputFormat,
- "outputFormat": normalizedOutputFormat,
- "hasSourceJson": True,
- "conversionType": "direct_rendering"
- }
- actionDoc = ActionDocument(
- documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
- documentData=rendered_content,
- mimeType=mime_type,
- sourceJson=jsonData, # Preserve source JSON for structure validation
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[actionDoc])
-
- except Exception as e:
- logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
- # Fall through to AI-based conversion
-
- # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
- columnsPerRow = parameters.get("columnsPerRow")
- delimiter = parameters.get("delimiter", ",")
- includeHeader = parameters.get("includeHeader", True)
- language = parameters.get("language", "en")
-
- aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
-
- if normalizedOutputFormat == "csv":
- aiPrompt += f" Use '{delimiter}' as the delimiter character."
- if columnsPerRow:
- aiPrompt += f" Format the output with {columnsPerRow} columns per row."
- if not includeHeader:
- aiPrompt += " Do not include a header row."
- else:
- aiPrompt += " Include a header row with column names."
-
- if language and language != "en":
- aiPrompt += f" Use language: {language}."
-
- aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": normalizedOutputFormat
- })
-
- def _applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
- """Apply CSV formatting options to rendered CSV content."""
- delimiter = options.get("delimiter", ",")
- columnsPerRow = options.get("columnsPerRow")
- includeHeader = options.get("includeHeader", True)
-
- # Check if any options need to be applied
- needsProcessing = (delimiter != ",") or (columnsPerRow is not None) or (not includeHeader)
-
- if not needsProcessing:
- return csvContent
-
- import csv
- import io
- # Re-read CSV with comma, write with new delimiter
- reader = csv.reader(io.StringIO(csvContent))
- output = io.StringIO()
- writer = csv.writer(output, delimiter=delimiter)
-
- rows = list(reader)
-
- # Handle header
- if not includeHeader and rows:
- rows = rows[1:] # Skip header
-
- # Handle columnsPerRow
- if columnsPerRow:
- newRows = []
- for row in rows:
- # Split row into chunks of columnsPerRow
- for i in range(0, len(row), columnsPerRow):
- chunk = row[i:i+columnsPerRow]
- # Pad to columnsPerRow if needed
- while len(chunk) < columnsPerRow:
- chunk.append("")
- newRows.append(chunk)
- rows = newRows
-
- for row in rows:
- writer.writerow(row)
-
- return output.getvalue()
-
-
- @action
- async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
- - Input requirements: documentList (required); targetFormat (required).
- - Output format: Document in target format.
-
- Parameters:
- - documentList (list, required): Document reference(s) to convert.
- - targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- - preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- targetFormat = parameters.get("targetFormat")
- if not targetFormat:
- return ActionResult.isFailure(error="targetFormat is required")
-
- preserveStructure = parameters.get("preserveStructure", True)
-
- # Normalize format (remove leading dot if present)
- normalizedFormat = targetFormat.strip().lstrip('.').lower()
-
- aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
- if preserveStructure:
- aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
- aiPrompt += " Ensure the converted document maintains the same content and information as the original."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": normalizedFormat
- })
-
-
- @action
- async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
- - Input requirements: documentList (required); optional dataStructure, fields.
- - Output format: JSON by default, or specified resultType.
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract data from.
- - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
- - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
- - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- dataStructure = parameters.get("dataStructure", "nested")
- fields = parameters.get("fields", [])
- resultType = parameters.get("resultType", "json")
-
- aiPrompt = "Extract structured data from the provided document(s)."
- if fields:
- fieldsStr = ", ".join(fields)
- aiPrompt += f" Extract the following specific fields: {fieldsStr}."
- else:
- aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
-
- structureInstructions = {
- "flat": "Use a flat key-value structure with simple properties.",
- "nested": "Use a nested JSON structure with logical grouping of related data.",
- "list": "Structure the data as a list/array of objects, one per entity or record."
- }
- aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
-
- aiPrompt += " Ensure all extracted data is accurate and complete."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
- # ============================================================================
- # Content Generation Wrapper
- # ============================================================================
-
- @action
- async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate documents from scratch or based on templates/inputs.
- - Input requirements: prompt or description (required); optional documentList (for templates/references).
- - Output format: Document in specified format (default: docx).
-
- Parameters:
- - prompt (str, required): Description of the document to generate.
- - documentList (list, optional): Template documents or reference documents to use as a guide.
- - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
- """
- prompt = parameters.get("prompt")
- if not prompt:
- return ActionResult.isFailure(error="prompt is required")
-
- documentList = parameters.get("documentList", [])
- documentType = parameters.get("documentType")
- resultType = parameters.get("resultType", "docx")
-
- aiPrompt = f"Generate a document based on the following requirements: {prompt}"
- if documentType:
- aiPrompt += f" Document type: {documentType}."
- if documentList:
- aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
- aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
-
- processParams = {
- "aiPrompt": aiPrompt,
- "resultType": resultType
- }
- if documentList:
- processParams["documentList"] = documentList
-
- return await self.process(processParams)
diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py
index 8ebe6679..f0f18286 100644
--- a/modules/workflows/methods/methodAi/actions/__init__.py
+++ b/modules/workflows/methods/methodAi/actions/__init__.py
@@ -10,6 +10,7 @@ from .summarizeDocument import summarizeDocument
from .translateDocument import translateDocument
from .convertDocument import convertDocument
from .generateDocument import generateDocument
+from .generateCode import generateCode
__all__ = [
'process',
@@ -18,5 +19,6 @@ __all__ = [
'translateDocument',
'convertDocument',
'generateDocument',
+ 'generateCode',
]
diff --git a/modules/workflows/methods/methodAi/actions/convertDocument.py b/modules/workflows/methods/methodAi/actions/convertDocument.py
index e86b1d5a..9a7522ba 100644
--- a/modules/workflows/methods/methodAi/actions/convertDocument.py
+++ b/modules/workflows/methods/methodAi/actions/convertDocument.py
@@ -1,31 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Convert Document action for AI operations.
-Converts documents between different formats (PDF→Word, Excel→CSV, etc.).
-"""
-
import logging
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
-@action
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
- - Input requirements: documentList (required); targetFormat (required).
- - Output format: Document in target format.
-
- Parameters:
- - documentList (list, required): Document reference(s) to convert.
- - targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- - preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
- """
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
diff --git a/modules/workflows/methods/methodAi/actions/generateCode.py b/modules/workflows/methods/methodAi/actions/generateCode.py
new file mode 100644
index 00000000..52e36316
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/generateCode.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import logging
+import time
+from typing import Dict, Any, Optional, List
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
+from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
+
+logger = logging.getLogger(__name__)
+
+async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
+ prompt = parameters.get("prompt")
+ if not prompt:
+ return ActionResult.isFailure(error="prompt is required")
+
+ documentList = parameters.get("documentList", [])
+ resultType = parameters.get("resultType")
+
+ # Auto-detect format from prompt if not provided
+ if not resultType:
+ promptLower = prompt.lower()
+ if ".html" in promptLower or "html file" in promptLower:
+ resultType = "html"
+ elif ".js" in promptLower or "javascript" in promptLower:
+ resultType = "js"
+ elif ".py" in promptLower or "python" in promptLower:
+ resultType = "py"
+ elif ".ts" in promptLower or "typescript" in promptLower:
+ resultType = "ts"
+ elif ".java" in promptLower:
+ resultType = "java"
+ elif ".cpp" in promptLower or ".c++" in promptLower:
+ resultType = "cpp"
+ else:
+ resultType = "txt" # Default
+
+ # Create operation ID for progress tracking
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"code_gen_{workflowId}_{int(time.time())}"
+ parentOperationId = parameters.get('parentOperationId')
+
+ try:
+ # Convert documentList to DocumentReferenceList if needed
+ docRefList = None
+ if documentList:
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, str):
+ docRefList = DocumentReferenceList.from_string_list([documentList])
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ else:
+ docRefList = DocumentReferenceList(references=[])
+
+ # Prepare title
+ title = "Generated Code"
+
+ # Call AI service with explicit code intent
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+
+ aiResponse: AiResponse = await self.services.ai.callAiContent(
+ prompt=prompt,
+ options=options,
+ documentList=docRefList,
+ outputFormat=resultType,
+ title=title,
+ parentOperationId=parentOperationId,
+ generationIntent="code" # Explicit intent, skips detection
+ )
+
+ # Convert AiResponse to ActionResult
+ documents = []
+
+ # Convert DocumentData to ActionDocument
+ if aiResponse.documents:
+ for docData in aiResponse.documents:
+ documents.append(ActionDocument(
+ documentName=docData.documentName,
+ documentData=docData.documentData,
+ mimeType=docData.mimeType,
+ sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
+ ))
+
+ # If no documents but content exists, create a document from content
+ if not documents and aiResponse.content:
+ # Determine document name from metadata
+ docName = f"code.{resultType}"
+ if aiResponse.metadata and aiResponse.metadata.filename:
+ docName = aiResponse.metadata.filename
+ elif aiResponse.metadata and aiResponse.metadata.title:
+ import re
+ sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
+ sanitized = re.sub(r"_+", "_", sanitized).strip("_")
+ if sanitized:
+ if not sanitized.lower().endswith(f".{resultType}"):
+ docName = f"{sanitized}.{resultType}"
+ else:
+ docName = sanitized
+
+ # Determine mime type
+ mimeType = "text/plain"
+ if resultType == "html":
+ mimeType = "text/html"
+ elif resultType == "js":
+ mimeType = "application/javascript"
+ elif resultType == "py":
+ mimeType = "text/x-python"
+ elif resultType == "ts":
+ mimeType = "application/typescript"
+ elif resultType == "java":
+ mimeType = "text/x-java-source"
+ elif resultType == "cpp":
+ mimeType = "text/x-c++src"
+
+ documents.append(ActionDocument(
+ documentName=docName,
+ documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
+ mimeType=mimeType
+ ))
+
+ return ActionResult.isSuccess(documents=documents)
+
+ except Exception as e:
+ logger.error(f"Error in code generation: {str(e)}")
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py
index 6569ddab..4e67251b 100644
--- a/modules/workflows/methods/methodAi/actions/generateDocument.py
+++ b/modules/workflows/methods/methodAi/actions/generateDocument.py
@@ -1,15 +1,9 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Generate Document action for AI operations.
-Wrapper around AI service callAiContent method.
-"""
-
import logging
import time
from typing import Dict, Any, Optional, List
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
@@ -17,23 +11,7 @@ from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
logger = logging.getLogger(__name__)
-@action
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
- - Input requirements: prompt or description (required); optional documentList (for templates/references).
- - Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
-
- Parameters:
- - prompt (str, required): Description of the document to generate.
- - documentList (list, optional): Template documents or reference documents to use as a guide.
- - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- - resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
- - maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
- - parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
- - progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
- """
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="prompt is required")
@@ -97,7 +75,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
outputFormat=resultType,
title=title,
- parentOperationId=parentOperationId
+ parentOperationId=parentOperationId,
+ generationIntent="document" # NEW: Explicit intent, skips detection
)
# Convert AiResponse to ActionResult
diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py
index 807c1a64..5f05afed 100644
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@@ -1,36 +1,17 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Process action for AI operations.
-Universal AI document processing action.
-"""
-
import logging
import time
import json
from typing import Dict, Any, List, Optional
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelExtraction import ContentPart
logger = logging.getLogger(__name__)
-@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
-
- Parameters:
- - aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- - documentList (list, optional): Document reference(s) in any format to use as input/context.
- - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
- """
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@@ -88,7 +69,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}")
- # Check if contentParts are already provided (from context.extractContent or other sources)
+ # Phase 7.3: Extract content first if documents provided, then use contentParts
+ # Check if contentParts are already provided (preferred path)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
contentParts = parameters.get("contentParts")
@@ -100,42 +82,96 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
contentParts = None
+ # If contentParts not provided but documentList is, extract content first
+ if not contentParts and documentList.references:
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
+
+ # Get ChatDocuments
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+ if not chatDocuments:
+ logger.warning("No documents found in documentList")
+ else:
+ logger.info(f"Extracting content from {len(chatDocuments)} documents")
+
+ # Prepare extraction options (use defaults if not provided)
+ from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
+ extractionOptions = parameters.get("extractionOptions")
+ if not extractionOptions:
+ extractionOptions = ExtractionOptions(
+ prompt="Extract all content from the document",
+ mergeStrategy=MergeStrategy(
+ mergeType="concatenate",
+ groupBy="typeGroup",
+ orderBy="id"
+ ),
+ processDocumentsIndividually=True
+ )
+
+ # Extract content using extraction service
+ extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
+
+ # Combine all ContentParts from all extracted results
+ contentParts = []
+ for extracted in extractedResults:
+ if extracted.parts:
+ contentParts.extend(extracted.parts)
+
+ logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
+
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
- # Build options
+ # Detect image generation from resultType
+ imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
+ isImageGeneration = normalized_result_type in imageFormats
+
+ # Build options with correct operationType
output_format = output_extension.replace('.', '') or 'txt'
+ from modules.datamodels.datamodelAi import OperationTypeEnum
options = AiCallOptions(
- resultFormat=output_format
+ resultFormat=output_format,
+ operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
)
+
+ # Get generationIntent from parameters
+ generationIntent = parameters.get("generationIntent")
+
+ # For DATA_GENERATE, generationIntent is REQUIRED
+ # If not provided, default to "document" for document formats (xlsx, docx, pdf, txt, html, etc.)
+ # This is format-based defaulting, not prompt-based auto-detection
+ if options.operationType == OperationTypeEnum.DATA_GENERATE and not generationIntent:
+ # Document formats (default to document generation)
+ documentFormats = ["xlsx", "docx", "pdf", "txt", "md", "html", "csv", "xml", "json", "pptx"]
+ # Code formats (should use ai.generateCode instead, but default to code if ai.process is used)
+ codeFormats = ["py", "js", "ts", "java", "cpp", "c", "go", "rs", "rb", "php", "swift", "kt"]
+
+ if normalized_result_type in documentFormats:
+ generationIntent = "document"
+ logger.info(f"Defaulting generationIntent to 'document' for resultType '{normalized_result_type}'")
+ elif normalized_result_type in codeFormats:
+ generationIntent = "code"
+ logger.info(f"Defaulting generationIntent to 'code' for resultType '{normalized_result_type}'")
+ else:
+ # Unknown format - default to document (most common use case)
+ generationIntent = "document"
+ logger.warning(
+ f"Unknown resultType '{normalized_result_type}', defaulting generationIntent to 'document'. "
+ f"For code generation, use ai.generateCode action or explicitly pass generationIntent='code'."
+ )
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
- # Use unified callAiContent method
- # If contentParts provided (pre-extracted), use them directly
- # Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
- # Note: ContentExtracted documents (from context.extractContent) are now handled
- # automatically in _extractAndPrepareContent() (Phase 5B)
- if contentParts:
- # Pre-extracted ContentParts - use them directly
- aiResponse = await self.services.ai.callAiContent(
- prompt=aiPrompt,
- options=options,
- contentParts=contentParts, # Pre-extracted ContentParts
- outputFormat=output_format,
- parentOperationId=operationId
- )
- else:
- # Pass documentList - callAiContent handles Phases 5A-5E internally
- # This includes automatic detection of ContentExtracted documents
- aiResponse = await self.services.ai.callAiContent(
- prompt=aiPrompt,
- options=options,
- documentList=documentList, # callAiContent macht Phasen 5A-5E
- outputFormat=output_format,
- parentOperationId=operationId
- )
+ # Use unified callAiContent method with contentParts (extraction is now separate)
+ # ContentParts are already extracted above (or None if no documents)
+ aiResponse = await self.services.ai.callAiContent(
+ prompt=aiPrompt,
+ options=options,
+ contentParts=contentParts, # Already extracted (or None if no documents)
+ outputFormat=output_format,
+ parentOperationId=operationId,
+ generationIntent=generationIntent # REQUIRED for DATA_GENERATE
+ )
# Update progress - processing result
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
diff --git a/modules/workflows/methods/methodAi/actions/summarizeDocument.py b/modules/workflows/methods/methodAi/actions/summarizeDocument.py
index 80588712..619e80c2 100644
--- a/modules/workflows/methods/methodAi/actions/summarizeDocument.py
+++ b/modules/workflows/methods/methodAi/actions/summarizeDocument.py
@@ -1,32 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Summarize Document action for AI operations.
-Summarizes one or more documents, extracting key points and main ideas.
-"""
-
import logging
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
-@action
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Summarize one or more documents, extracting key points and main ideas.
- - Input requirements: documentList (required); optional summaryLength, focus.
- - Output format: Text document with summary (default: txt, can be overridden with resultType).
-
- Parameters:
- - documentList (list, required): Document reference(s) to summarize.
- - summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- - focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- - resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
- """
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
@@ -50,6 +31,7 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
- "resultType": resultType
+ "resultType": resultType,
+ "generationIntent": "document" # NEW: Explicit intent
})
diff --git a/modules/workflows/methods/methodAi/actions/translateDocument.py b/modules/workflows/methods/methodAi/actions/translateDocument.py
index 12264e39..7388dcc5 100644
--- a/modules/workflows/methods/methodAi/actions/translateDocument.py
+++ b/modules/workflows/methods/methodAi/actions/translateDocument.py
@@ -1,33 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Translate Document action for AI operations.
-Translates documents to a target language while preserving formatting and structure.
-"""
-
import logging
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
-@action
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Translate documents to a target language while preserving formatting and structure.
- - Input requirements: documentList (required); targetLanguage (required).
- - Output format: Translated document in same format as input (default) or specified resultType.
-
- Parameters:
- - documentList (list, required): Document reference(s) to translate.
- - targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- - sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- - preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- - resultType (str, optional): Output file extension. If not specified, uses same format as input.
- """
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
@@ -51,7 +31,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
processParams = {
"aiPrompt": aiPrompt,
- "documentList": documentList
+ "documentList": documentList,
+ "generationIntent": "document" # NEW: Explicit intent
}
if resultType:
processParams["resultType"] = resultType
diff --git a/modules/workflows/methods/methodAi/actions/webResearch.py b/modules/workflows/methods/methodAi/actions/webResearch.py
index 2bd5c3dd..62b43bce 100644
--- a/modules/workflows/methods/methodAi/actions/webResearch.py
+++ b/modules/workflows/methods/methodAi/actions/webResearch.py
@@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Web Research action for AI operations.
-Web research with two-step process: search for URLs, then crawl content.
-"""
-
import logging
import time
import re
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Web research with two-step process: search for URLs, then crawl content.
- - Input requirements: prompt (required); optional list(url), country, language, researchDepth.
- - Output format: JSON with research results including URLs and content.
-
- Parameters:
- - prompt (str, required): Natural language research instruction.
- - urlList (list, optional): Specific URLs to crawl, if needed.
- - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
- - language (str, optional): Language code (lowercase, e.g., de, en, fr).
- - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
- """
try:
prompt = parameters.get("prompt")
if not prompt:
diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py
index 881b007d..86efe406 100644
--- a/modules/workflows/methods/methodAi/methodAi.py
+++ b/modules/workflows/methods/methodAi/methodAi.py
@@ -17,6 +17,7 @@ from .actions.summarizeDocument import summarizeDocument
from .actions.translateDocument import translateDocument
from .actions.convertDocument import convertDocument
from .actions.generateDocument import generateDocument
+from .actions.generateCode import generateCode
logger = logging.getLogger(__name__)
@@ -59,6 +60,14 @@ class MethodAi(MethodBase):
required=False,
default="txt",
description="Output file extension. All output documents will use this format"
+ ),
+ "generationIntent": WorkflowActionParameter(
+ name="generationIntent",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["document", "code", "image"],
+ required=False,
+ description="Explicit generation intent (\"document\" | \"code\" | \"image\"). For DATA_GENERATE operations, if not provided, defaults based on resultType: document formats (xlsx, docx, pdf, etc.) → \"document\", code formats (py, js, ts, etc.) → \"code\". For IMAGE_GENERATE operations, this parameter is ignored. Best practice: Use qualified actions (ai.generateDocument, ai.generateCode) instead of ai.process."
)
},
execute=process.__get__(self, self.__class__)
@@ -256,6 +265,35 @@ class MethodAi(MethodBase):
)
},
execute=generateDocument.__get__(self, self.__class__)
+ ),
+ "generateCode": WorkflowActionDefinition(
+ actionId="ai.generateCode",
+ description="Generate code files - explicitly sets intent to 'code'",
+ parameters={
+ "prompt": WorkflowActionParameter(
+ name="prompt",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Description of code to generate"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Reference documents"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
+ required=False,
+ description="Output format (html, js, py, etc.). Default: based on prompt"
+ )
+ },
+ execute=generateCode.__get__(self, self.__class__)
)
}
@@ -269,6 +307,7 @@ class MethodAi(MethodBase):
self.translateDocument = translateDocument.__get__(self, self.__class__)
self.convertDocument = convertDocument.__get__(self, self.__class__)
self.generateDocument = generateDocument.__get__(self, self.__class__)
+ self.generateCode = generateCode.__get__(self, self.__class__)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
diff --git a/modules/workflows/methods/methodContext.py.old b/modules/workflows/methods/methodContext.py.old
deleted file mode 100644
index 0c7e1cae..00000000
--- a/modules/workflows/methods/methodContext.py.old
+++ /dev/null
@@ -1,460 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-Context and workflow information method module.
-Handles workflow context queries and document indexing.
-"""
-
-import time
-import json
-import logging
-import aiohttp
-from typing import Dict, Any, List
-from datetime import datetime, UTC
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
-from modules.shared.configuration import APP_CONFIG
-
-logger = logging.getLogger(__name__)
-
-class MethodContext(MethodBase):
- """Context and workflow information methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "context"
- self.description = "Context and workflow information methods"
-
- @action
- async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
- - Input requirements: No input documents required. Optional resultType parameter.
- - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
-
- Parameters:
- - resultType (str, optional): Output format (json, txt, md). Default: json.
- """
- try:
- workflow = self.services.workflow
- if not workflow:
- return ActionResult.isFailure(
- error="No workflow available"
- )
-
- resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
-
- # Get available documents index from chat service
- documentsIndex = self.services.chat.getAvailableDocuments(workflow)
-
- if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
- # Return empty index structure
- if resultType == "json":
- indexData = {
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "totalDocuments": 0,
- "rounds": [],
- "documentReferences": []
- }
- indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
- else:
- indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
- else:
- # Parse the document index string to extract structured information
- indexData = self._parseDocumentIndex(documentsIndex, workflow)
-
- if resultType == "json":
- indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
- elif resultType == "md":
- indexContent = self._formatAsMarkdown(indexData)
- else: # txt
- indexContent = self._formatAsText(indexData, documentsIndex)
-
- # Generate meaningful filename
- workflowContext = self.services.chat.getWorkflowContext()
- filename = self._generateMeaningfulFileName(
- "document_index",
- resultType if resultType in ["json", "txt", "md"] else "json",
- workflowContext,
- "getDocumentIndex"
- )
-
- validationMetadata = {
- "actionType": "context.getDocumentIndex",
- "resultType": resultType,
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
- }
-
- # Create ActionDocument
- document = ActionDocument(
- documentName=filename,
- documentData=indexContent,
- mimeType="application/json" if resultType == "json" else "text/plain",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- logger.error(f"Error generating document index: {str(e)}")
- return ActionResult.isFailure(
- error=f"Failed to generate document index: {str(e)}"
- )
-
- def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
- """Parse the document index string into structured data."""
- try:
- indexData = {
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "generatedAt": datetime.now(UTC).isoformat(),
- "totalDocuments": 0,
- "rounds": [],
- "documentReferences": []
- }
-
- # Extract document references from the index string
- lines = documentsIndex.split('\n')
- currentRound = None
- currentDocList = None
-
- for line in lines:
- line = line.strip()
- if not line:
- continue
-
- # Check for round headers
- if "Current round documents:" in line:
- currentRound = "current"
- continue
- elif "Past rounds documents:" in line:
- currentRound = "past"
- continue
-
- # Check for document list references (docList:...)
- if line.startswith("- docList:"):
- docListRef = line.replace("- docList:", "").strip()
- currentDocList = {
- "reference": docListRef,
- "round": currentRound,
- "documents": []
- }
- indexData["rounds"].append(currentDocList)
- continue
-
- # Check for individual document references (docItem:...)
- if line.startswith(" - docItem:") or line.startswith("- docItem:"):
- docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
- indexData["documentReferences"].append({
- "reference": docItemRef,
- "round": currentRound,
- "docList": currentDocList["reference"] if currentDocList else None
- })
- indexData["totalDocuments"] += 1
- if currentDocList:
- currentDocList["documents"].append(docItemRef)
-
- return indexData
-
- except Exception as e:
- logger.error(f"Error parsing document index: {str(e)}")
- return {
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "error": f"Failed to parse document index: {str(e)}",
- "rawIndex": documentsIndex
- }
-
- def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
- """Format document index as Markdown."""
- try:
- md = f"# Document Index\n\n"
- md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
- md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
- md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
-
- if indexData.get('rounds'):
- md += "## Documents by Round\n\n"
- for roundInfo in indexData['rounds']:
- roundLabel = roundInfo.get('round', 'unknown').title()
- md += f"### {roundLabel} Round\n\n"
- md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
- if roundInfo.get('documents'):
- md += "**Documents:**\n\n"
- for docRef in roundInfo['documents']:
- md += f"- `{docRef}`\n"
- md += "\n"
-
- if indexData.get('documentReferences'):
- md += "## All Document References\n\n"
- for docRef in indexData['documentReferences']:
- md += f"- `{docRef.get('reference', 'unknown')}`\n"
-
- return md
-
- except Exception as e:
- logger.error(f"Error formatting as Markdown: {str(e)}")
- return f"# Document Index\n\nError formatting index: {str(e)}\n"
-
- def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
- """Format document index as plain text."""
- try:
- text = "Document Index\n"
- text += "=" * 50 + "\n\n"
- text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
- text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
- text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
-
- # Include the raw formatted index for readability
- text += rawIndex
-
- return text
-
- except Exception as e:
- logger.error(f"Error formatting as text: {str(e)}")
- return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
-
- @action
- async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Extract content from documents (separate from AI calls).
-
- This action performs pure content extraction without AI processing.
- The extracted ContentParts can then be used by subsequent AI processing actions.
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract content from.
- - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
-
- Returns:
- - ActionResult with ActionDocument containing ContentExtracted objects
- - ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
- """
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"context_extract_{workflowId}_{int(time.time())}"
-
- # Extract documentList from parameters dict
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- documentListParam = parameters.get("documentList")
- if not documentListParam:
- return ActionResult.isFailure(error="documentList is required")
-
- # Convert to DocumentReferenceList if needed
- if isinstance(documentListParam, DocumentReferenceList):
- documentList = documentListParam
- elif isinstance(documentListParam, str):
- documentList = DocumentReferenceList.from_string_list([documentListParam])
- elif isinstance(documentListParam, list):
- documentList = DocumentReferenceList.from_string_list(documentListParam)
- else:
- return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Extracting content from documents",
- "Content Extraction",
- f"Documents: {len(documentList.references)}",
- parentOperationId=parentOperationId
- )
-
- # Get ChatDocuments from documentList
- self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
-
- if not chatDocuments:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No documents found in documentList")
-
- logger.info(f"Extracting content from {len(chatDocuments)} documents")
-
- # Prepare extraction options
- self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
- extractionOptionsParam = parameters.get("extractionOptions")
-
- # Convert dict to ExtractionOptions object if needed, or create defaults
- if extractionOptionsParam:
- if isinstance(extractionOptionsParam, dict):
- # Convert dict to ExtractionOptions object
- extractionOptions = ExtractionOptions(**extractionOptionsParam)
- elif isinstance(extractionOptionsParam, ExtractionOptions):
- extractionOptions = extractionOptionsParam
- else:
- # Invalid type, use defaults
- extractionOptions = None
- else:
- extractionOptions = None
-
- # If extractionOptions not provided, create defaults
- if not extractionOptions:
- # Default extraction options for pure content extraction (no AI processing)
- extractionOptions = ExtractionOptions(
- prompt="Extract all content from the document",
- mergeStrategy=MergeStrategy(
- mergeType="concatenate",
- groupBy="typeGroup",
- orderBy="id"
- ),
- processDocumentsIndividually=True
- )
-
- # Call extraction service with hierarchical progress logging
- self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
- # Pass operationId for hierarchical per-document progress logging
- extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
-
- # Build ActionDocuments from ContentExtracted results
- self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
- actionDocuments = []
- # Map extracted results back to original documents by index (results are in same order)
- for i, extracted in enumerate(extractedResults):
- # Get original document name if available
- originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
- if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
- # Use original filename with "extracted_" prefix
- baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
- documentName = f"{baseName}_extracted_{extracted.id}.json"
- else:
- # Fallback to generic name with index
- documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
-
- # Store ContentExtracted object in ActionDocument.documentData
- validationMetadata = {
- "actionType": "context.extractContent",
- "documentIndex": i,
- "extractedId": extracted.id,
- "partCount": len(extracted.parts) if extracted.parts else 0,
- "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
- }
- actionDoc = ActionDocument(
- documentName=documentName,
- documentData=extracted, # ContentExtracted object
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
-
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(documents=actionDocuments)
-
- except Exception as e:
- logger.error(f"Error in content extraction: {str(e)}")
-
- # Complete progress tracking with failure
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
-
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Trigger preprocessing server at customer tenant to update database with configuration.
-
- This action makes a POST request to the preprocessing server endpoint with the provided
- configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
-
- Parameters:
- - endpoint (str, required): The full URL endpoint for the preprocessing server API.
- - configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
- - authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
-
- Returns:
- - ActionResult with ActionDocument containing "ok" on success, or error message on failure.
- """
- try:
- endpoint = parameters.get("endpoint")
- if not endpoint:
- return ActionResult.isFailure(error="endpoint parameter is required")
-
- configJsonParam = parameters.get("configJson")
- if not configJsonParam:
- return ActionResult.isFailure(error="configJson parameter is required")
-
- authSecretConfigKey = parameters.get("authSecretConfigKey")
- if not authSecretConfigKey:
- return ActionResult.isFailure(error="authSecretConfigKey parameter is required")
-
- # Handle configJson as either dict or JSON string
- if isinstance(configJsonParam, str):
- try:
- configJson = json.loads(configJsonParam)
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}")
- elif isinstance(configJsonParam, dict):
- configJson = configJsonParam
- else:
- return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}")
-
- # Get authorization secret from APP_CONFIG using the provided config key
- authSecret = APP_CONFIG.get(authSecretConfigKey)
- if not authSecret:
- errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- # Prepare headers with authorization (default headers as in original function)
- headers = {
- "X-PP-API-Key": authSecret,
- "Content-Type": "application/json"
- }
-
- # Make POST request
- timeout = aiohttp.ClientTimeout(total=60)
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.post(
- endpoint,
- headers=headers,
- json=configJson
- ) as response:
- if response.status in [200, 201]:
- responseText = await response.text()
- logger.info(f"Preprocessing server trigger successful: {response.status}")
- logger.debug(f"Response: {responseText}")
-
- # Generate meaningful filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "preprocessing_result",
- "txt",
- workflowContext,
- "triggerPreprocessingServer"
- )
-
- # Create validation metadata
- validationMetadata = self._createValidationMetadata(
- "triggerPreprocessingServer",
- endpoint=endpoint,
- statusCode=response.status,
- responseText=responseText
- )
-
- # Return success with "ok" document
- document = ActionDocument(
- documentName=filename,
- documentData="ok",
- mimeType="text/plain",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
- else:
- errorText = await response.text()
- errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- except Exception as e:
- errorMsg = f"Error triggering preprocessing server: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py
index 949ac63d..5b90ce13 100644
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@@ -1,49 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Extract Content action for Context operations.
-Extracts content from documents (separate from AI calls).
-"""
-
import logging
import time
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
-@action
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Extract raw content parts from documents without AI processing.
-
- This action performs pure content extraction WITHOUT AI/OCR processing.
- It returns ContentParts with different typeGroups:
- - "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
- - "image": Images as base64-encoded data (NOT converted to text, no OCR)
- - "table": Tables as structured data
- - "structure": Structured content (JSON, etc.)
- - "container": Container elements (PDF pages, etc.)
-
- IMPORTANT:
- - Images are returned as base64 data, NOT as extracted text
- - No OCR is performed - images are preserved as visual elements
- - Text extraction only works for text-based formats (not images)
- - The extracted ContentParts can then be used by subsequent AI processing actions
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract content from.
- - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
-
- Returns:
- - ActionResult with ActionDocument containing ContentExtracted objects
- - ContentExtracted.parts contains List[ContentPart] with various typeGroups
- - Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
- """
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
diff --git a/modules/workflows/methods/methodContext/actions/getDocumentIndex.py b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
index 6c9a6700..9991285b 100644
--- a/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
+++ b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
@@ -1,30 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Get Document Index action for Context operations.
-Generates a comprehensive index of all documents available in the current workflow.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
- - Input requirements: No input documents required. Optional resultType parameter.
- - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
-
- Parameters:
- - resultType (str, optional): Output format (json, txt, md). Default: json.
- """
try:
workflow = self.services.workflow
if not workflow:
diff --git a/modules/workflows/methods/methodContext/actions/neutralizeData.py b/modules/workflows/methods/methodContext/actions/neutralizeData.py
index 240fe6b1..8e3b7185 100644
--- a/modules/workflows/methods/methodContext/actions/neutralizeData.py
+++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py
@@ -1,35 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Neutralize Data action for Context operations.
-Neutralizes extracted content data from ContentExtracted documents.
-"""
-
import logging
import time
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
-@action
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Neutralize data from ContentExtracted documents.
-
- This action takes documents containing ContentExtracted objects (from extractContent)
- and neutralizes the text data in ContentPart.data fields.
-
- Parameters:
- - documentList (list, required): Document reference(s) containing ContentExtracted objects.
-
- Returns:
- - ActionResult with ActionDocument containing neutralized ContentExtracted objects
- """
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
diff --git a/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
index 7ef16d5f..2f011a25 100644
--- a/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
+++ b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
@@ -1,37 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Trigger Preprocessing Server action for Context operations.
-Triggers preprocessing server at customer tenant to update database with configuration.
-"""
-
import logging
import json
import aiohttp
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
-@action
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Trigger preprocessing server at customer tenant to update database with configuration.
-
- This action makes a POST request to the preprocessing server endpoint with the provided
- configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
-
- Parameters:
- - endpoint (str, required): The full URL endpoint for the preprocessing server API.
- - configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
- - authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
-
- Returns:
- - ActionResult with ActionDocument containing "ok" on success, or error message on failure.
- """
try:
endpoint = parameters.get("endpoint")
if not endpoint:
diff --git a/modules/workflows/methods/methodJira.py.old b/modules/workflows/methods/methodJira.py.old
deleted file mode 100644
index 2be46c1f..00000000
--- a/modules/workflows/methods/methodJira.py.old
+++ /dev/null
@@ -1,1101 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-JIRA operations method module.
-Handles JIRA ticket operations including connection, export, import, and data processing.
-"""
-
-import logging
-import json
-import io
-import pandas as pd
-import csv as csv_module
-from io import StringIO, BytesIO
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelDocref import DocumentReferenceList
-from modules.shared.configuration import APP_CONFIG
-
-logger = logging.getLogger(__name__)
-
-class MethodJira(MethodBase):
- """JIRA operations methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "jira"
- self.description = "JIRA operations methods"
- # Store connections in memory (keyed by connectionId)
- self._connections: Dict[str, Any] = {}
-
- def _convertAdfToText(self, adfData):
- """Convert Atlassian Document Format (ADF) to plain text.
-
- Based on Atlassian Document Format specification for JIRA fields.
- Handles paragraphs, lists, text formatting, and other ADF node types.
-
- Args:
- adfData: ADF object or None
-
- Returns:
- str: Plain text content, or empty string if None/invalid
- """
- if not adfData or not isinstance(adfData, dict):
- return ""
-
- if adfData.get("type") != "doc":
- return str(adfData) if adfData else ""
-
- content = adfData.get("content", [])
- if not isinstance(content, list):
- return ""
-
- def extractTextFromContent(contentList, listLevel=0):
- """Recursively extract text from ADF content with proper formatting."""
- textParts = []
- listCounter = 1
-
- for item in contentList:
- if not isinstance(item, dict):
- continue
-
- itemType = item.get("type", "")
-
- if itemType == "text":
- # Extract text content, preserving formatting
- text = item.get("text", "")
- marks = item.get("marks", [])
-
- # Handle text formatting (bold, italic, etc.)
- if marks:
- for mark in marks:
- if mark.get("type") == "strong":
- text = f"**{text}**"
- elif mark.get("type") == "em":
- text = f"*{text}*"
- elif mark.get("type") == "code":
- text = f"`{text}`"
- elif mark.get("type") == "link":
- attrs = mark.get("attrs", {})
- href = attrs.get("href", "")
- if href:
- text = f"[{text}]({href})"
-
- textParts.append(text)
-
- elif itemType == "hardBreak":
- textParts.append("\n")
-
- elif itemType == "paragraph":
- paragraphContent = item.get("content", [])
- if paragraphContent:
- paragraphText = extractTextFromContent(paragraphContent, listLevel)
- if paragraphText.strip():
- textParts.append(paragraphText)
-
- elif itemType == "bulletList":
- listContent = item.get("content", [])
- for listItem in listContent:
- if listItem.get("type") == "listItem":
- listItemContent = listItem.get("content", [])
- for listParagraph in listItemContent:
- if listParagraph.get("type") == "paragraph":
- listParagraphContent = listParagraph.get("content", [])
- if listParagraphContent:
- indent = " " * listLevel
- bulletText = extractTextFromContent(listParagraphContent, listLevel + 1)
- if bulletText.strip():
- textParts.append(f"{indent}• {bulletText}")
-
- elif itemType == "orderedList":
- listContent = item.get("content", [])
- for listItem in listContent:
- if listItem.get("type") == "listItem":
- listItemContent = listItem.get("content", [])
- for listParagraph in listItemContent:
- if listParagraph.get("type") == "paragraph":
- listParagraphContent = listParagraph.get("content", [])
- if listParagraphContent:
- indent = " " * listLevel
- orderedText = extractTextFromContent(listParagraphContent, listLevel + 1)
- if orderedText.strip():
- textParts.append(f"{indent}{listCounter}. {orderedText}")
- listCounter += 1
-
- elif itemType == "listItem":
- # Handle nested list items
- listItemContent = item.get("content", [])
- if listItemContent:
- textParts.append(extractTextFromContent(listItemContent, listLevel))
-
- elif itemType == "embedCard":
- # Handle embedded content (videos, etc.)
- attrs = item.get("attrs", {})
- url = attrs.get("url", "")
- if url:
- textParts.append(f"[Embedded Content: {url}]")
-
- elif itemType == "codeBlock":
- # Handle code blocks
- codeContent = item.get("content", [])
- if codeContent:
- codeText = extractTextFromContent(codeContent, listLevel)
- if codeText.strip():
- textParts.append(f"```\n{codeText}\n```")
-
- elif itemType == "blockquote":
- # Handle blockquotes
- quoteContent = item.get("content", [])
- if quoteContent:
- quoteText = extractTextFromContent(quoteContent, listLevel)
- if quoteText.strip():
- textParts.append(f"> {quoteText}")
-
- elif itemType == "heading":
- # Handle headings
- headingContent = item.get("content", [])
- if headingContent:
- headingText = extractTextFromContent(headingContent, listLevel)
- if headingText.strip():
- level = item.get("attrs", {}).get("level", 1)
- textParts.append(f"{'#' * level} {headingText}")
-
- elif itemType == "rule":
- # Handle horizontal rules
- textParts.append("---")
-
- else:
- # Handle unknown types by trying to extract content
- if "content" in item:
- contentText = extractTextFromContent(item.get("content", []), listLevel)
- if contentText.strip():
- textParts.append(contentText)
-
- return "\n".join(textParts)
-
- result = extractTextFromContent(content)
- return result.strip()
-
- def _getDocumentData(self, documentReference: str) -> Any:
- """Get document data from a document reference (string or document object)."""
- try:
- if isinstance(documentReference, str):
- # Get document from workflow
- documentList = DocumentReferenceList.from_string_list([documentReference])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
- if not chatDocuments or len(chatDocuments) == 0:
- return None
- document = chatDocuments[0]
- return document.documentData
- else:
- # Assume it's already a document object
- return documentReference.documentData if hasattr(documentReference, 'documentData') else documentReference
- except Exception as e:
- logger.error(f"Error getting document data: {str(e)}")
- return None
-
- def _parseJsonFromDocument(self, documentReference: str) -> Optional[Dict[str, Any]]:
- """Parse JSON from a document reference."""
- data = self._getDocumentData(documentReference)
- if data is None:
- return None
-
- if isinstance(data, str):
- try:
- return json.loads(data)
- except json.JSONDecodeError:
- return None
- elif isinstance(data, dict):
- return data
- else:
- return None
-
- @action
- async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Connect to JIRA instance and create ticket interface.
-
- Parameters:
- - apiUsername (str, required): JIRA API username/email
- - apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
- - apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
- - projectCode (str, required): JIRA project code (e.g., "DCS")
- - issueType (str, required): JIRA issue type (e.g., "Task")
- - taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
-
- Returns:
- - ActionResult with ActionDocument containing connection ID
- """
- try:
- apiUsername = parameters.get("apiUsername")
- if not apiUsername:
- return ActionResult.isFailure(error="apiUsername parameter is required")
-
- apiTokenConfigKey = parameters.get("apiTokenConfigKey")
- if not apiTokenConfigKey:
- return ActionResult.isFailure(error="apiTokenConfigKey parameter is required")
-
- apiUrl = parameters.get("apiUrl")
- if not apiUrl:
- return ActionResult.isFailure(error="apiUrl parameter is required")
-
- projectCode = parameters.get("projectCode")
- if not projectCode:
- return ActionResult.isFailure(error="projectCode parameter is required")
-
- issueType = parameters.get("issueType")
- if not issueType:
- return ActionResult.isFailure(error="issueType parameter is required")
-
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
- if not taskSyncDefinitionParam:
- return ActionResult.isFailure(error="taskSyncDefinition parameter is required")
-
- # Parse taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- try:
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}")
- elif isinstance(taskSyncDefinitionParam, dict):
- taskSyncDefinition = taskSyncDefinitionParam
- else:
- return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}")
-
- # Get API token from APP_CONFIG
- apiToken = APP_CONFIG.get(apiTokenConfigKey)
- if not apiToken:
- errorMsg = f"{apiTokenConfigKey} not found in APP_CONFIG"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- # Create ticket interface
- syncInterface = await self.services.ticket.connectTicket(
- taskSyncDefinition=taskSyncDefinition,
- connectorType="Jira",
- connectorParams={
- "apiUsername": apiUsername,
- "apiToken": apiToken,
- "apiUrl": apiUrl,
- "projectCode": projectCode,
- "ticketType": issueType,
- },
- )
-
- # Store connection with unique ID
- import uuid
- connectionId = str(uuid.uuid4())
- self._connections[connectionId] = {
- "interface": syncInterface,
- "taskSyncDefinition": taskSyncDefinition,
- "apiUrl": apiUrl,
- "projectCode": projectCode,
- }
-
- logger.info(f"JIRA connection established: {connectionId} (Project: {projectCode})")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "jira_connection",
- "json",
- workflowContext,
- "connectJira"
- )
-
- # Create connection info document
- connectionInfo = {
- "connectionId": connectionId,
- "apiUrl": apiUrl,
- "projectCode": projectCode,
- "issueType": issueType,
- }
-
- validationMetadata = self._createValidationMetadata(
- "connectJira",
- connectionId=connectionId,
- apiUrl=apiUrl,
- projectCode=projectCode
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(connectionInfo, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error connecting to JIRA: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Export tickets from JIRA as JSON list.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing list of tickets as JSON
- """
- try:
- connectionIdParam = parameters.get("connectionId")
- if not connectionIdParam:
- return ActionResult.isFailure(error="connectionId parameter is required")
-
- # Get connection ID from document if it's a reference
- connectionId = None
- if isinstance(connectionIdParam, str):
- # Try to parse from document reference
- connectionInfo = self._parseJsonFromDocument(connectionIdParam)
- if connectionInfo and "connectionId" in connectionInfo:
- connectionId = connectionInfo["connectionId"]
- else:
- # Assume it's the connection ID directly
- connectionId = connectionIdParam
-
- if not connectionId or connectionId not in self._connections:
- return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.")
-
- connection = self._connections[connectionId]
- syncInterface = connection["interface"]
-
- # Export tickets
- dataList = await syncInterface.exportTicketsAsList()
-
- logger.info(f"Exported {len(dataList)} tickets from JIRA")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "jira_tickets_export",
- "json",
- workflowContext,
- "exportTicketsAsJson"
- )
-
- validationMetadata = self._createValidationMetadata(
- "exportTicketsAsJson",
- connectionId=connectionId,
- ticketCount=len(dataList)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(dataList, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error exporting tickets from JIRA: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Import ticket data from JSON back to JIRA.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - ticketData (str, required): Document reference containing ticket data as JSON
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing import result with counts
- """
- try:
- connectionIdParam = parameters.get("connectionId")
- if not connectionIdParam:
- return ActionResult.isFailure(error="connectionId parameter is required")
-
- ticketDataParam = parameters.get("ticketData")
- if not ticketDataParam:
- return ActionResult.isFailure(error="ticketData parameter is required")
-
- # Get connection ID from document if it's a reference
- connectionId = None
- if isinstance(connectionIdParam, str):
- connectionInfo = self._parseJsonFromDocument(connectionIdParam)
- if connectionInfo and "connectionId" in connectionInfo:
- connectionId = connectionInfo["connectionId"]
- else:
- connectionId = connectionIdParam
-
- if not connectionId or connectionId not in self._connections:
- return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.")
-
- connection = self._connections[connectionId]
- syncInterface = connection["interface"]
-
- # Get ticket data from document
- ticketDataJson = self._parseJsonFromDocument(ticketDataParam)
- if ticketDataJson is None:
- return ActionResult.isFailure(error="Could not parse ticket data from document reference")
-
- # Ensure it's a list
- if not isinstance(ticketDataJson, list):
- return ActionResult.isFailure(error="ticketData must be a JSON array")
-
- # Import tickets
- await syncInterface.importListToTickets(ticketDataJson)
-
- logger.info(f"Imported {len(ticketDataJson)} tickets to JIRA")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "jira_import_result",
- "json",
- workflowContext,
- "importTicketsFromJson"
- )
-
- importResult = {
- "imported": len(ticketDataJson),
- "connectionId": connectionId,
- }
-
- validationMetadata = self._createValidationMetadata(
- "importTicketsFromJson",
- connectionId=connectionId,
- importedCount=len(ticketDataJson)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(importResult, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error importing tickets to JIRA: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Merge JIRA export data with existing SharePoint data.
-
- Parameters:
- - jiraData (str, required): Document reference containing JIRA ticket data as JSON array
- - existingData (str, required): Document reference containing existing SharePoint data as JSON array
- - taskSyncDefinition (str or dict, required): Field mapping definition
- - idField (str, optional): Field name to use as ID for merging (default: "ID")
-
- Returns:
- - ActionResult with ActionDocument containing merged data and merge details
- """
- try:
- jiraDataParam = parameters.get("jiraData")
- if not jiraDataParam:
- return ActionResult.isFailure(error="jiraData parameter is required")
-
- existingDataParam = parameters.get("existingData")
- if not existingDataParam:
- return ActionResult.isFailure(error="existingData parameter is required")
-
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
- if not taskSyncDefinitionParam:
- return ActionResult.isFailure(error="taskSyncDefinition parameter is required")
-
- idField = parameters.get("idField", "ID")
-
- # Parse taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- try:
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}")
- elif isinstance(taskSyncDefinitionParam, dict):
- taskSyncDefinition = taskSyncDefinitionParam
- else:
- return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}")
-
- # Get data from documents
- jiraDataJson = self._parseJsonFromDocument(jiraDataParam)
- if jiraDataJson is None or not isinstance(jiraDataJson, list):
- return ActionResult.isFailure(error="Could not parse jiraData as JSON array")
-
- existingDataJson = self._parseJsonFromDocument(existingDataParam)
- if existingDataJson is None or not isinstance(existingDataJson, list):
- # Empty existing data is OK
- existingDataJson = []
-
- # Perform merge
- existingLookup = {row.get(idField): row for row in existingDataJson if row.get(idField)}
- mergedData: List[dict] = []
- changes: List[str] = []
- updatedCount = addedCount = unchangedCount = 0
-
- for jiraRow in jiraDataJson:
- jiraId = jiraRow.get(idField)
- if jiraId and jiraId in existingLookup:
- existingRow = existingLookup[jiraId].copy()
- rowChanges: List[str] = []
-
- for fieldName, fieldConfig in taskSyncDefinition.items():
- if fieldConfig[0] == 'get':
- oldValue = "" if existingRow.get(fieldName) is None else str(existingRow.get(fieldName))
- newValue = "" if jiraRow.get(fieldName) is None else str(jiraRow.get(fieldName))
-
- # Convert ADF data to readable text for logging
- if isinstance(newValue, dict) and newValue.get("type") == "doc":
- newValueReadable = self._convertAdfToText(newValue)
- if oldValue != newValueReadable:
- rowChanges.append(f"{fieldName}: '{oldValue[:100]}...' -> '{newValueReadable[:100]}...'")
- elif oldValue != newValue:
- # Truncate long values for logging
- oldTruncated = oldValue[:100] + "..." if len(oldValue) > 100 else oldValue
- newTruncated = newValue[:100] + "..." if len(newValue) > 100 else newValue
- rowChanges.append(f"{fieldName}: '{oldTruncated}' -> '{newTruncated}'")
-
- existingRow[fieldName] = jiraRow.get(fieldName)
-
- mergedData.append(existingRow)
- if rowChanges:
- updatedCount += 1
- changes.append(f"Row ID {jiraId} updated: {', '.join(rowChanges)}")
- else:
- unchangedCount += 1
- del existingLookup[jiraId]
- else:
- mergedData.append(jiraRow)
- addedCount += 1
- changes.append(f"Row ID {jiraId} added as new record")
-
- # Add remaining existing rows
- for remaining in existingLookup.values():
- mergedData.append(remaining)
- unchangedCount += 1
-
- mergeDetails = {
- "updated": updatedCount,
- "added": addedCount,
- "unchanged": unchangedCount,
- "changes": changes
- }
-
- logger.info(f"Merged ticket data: {updatedCount} updated, {addedCount} added, {unchangedCount} unchanged")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "merged_ticket_data",
- "json",
- workflowContext,
- "mergeTicketData"
- )
-
- result = {
- "data": mergedData,
- "mergeDetails": mergeDetails
- }
-
- validationMetadata = self._createValidationMetadata(
- "mergeTicketData",
- updated=updatedCount,
- added=addedCount,
- unchanged=unchangedCount
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error merging ticket data: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse CSV content with custom headers.
-
- Parameters:
- - csvContent (str, required): Document reference containing CSV file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 2)
- - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
- try:
- csvContentParam = parameters.get("csvContent")
- if not csvContentParam:
- return ActionResult.isFailure(error="csvContent parameter is required")
-
- skipRows = parameters.get("skipRows", 2)
- hasCustomHeaders = parameters.get("hasCustomHeaders", True)
-
- # Get CSV content from document
- csvBytes = self._getDocumentData(csvContentParam)
- if csvBytes is None:
- return ActionResult.isFailure(error="Could not get CSV content from document reference")
-
- # Convert to bytes if needed
- if isinstance(csvBytes, str):
- csvBytes = csvBytes.encode('utf-8')
- elif not isinstance(csvBytes, bytes):
- return ActionResult.isFailure(error="CSV content must be bytes or string")
-
- # Parse headers if hasCustomHeaders
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if hasCustomHeaders:
- csvLines = csvBytes.decode('utf-8').split('\n')
- if len(csvLines) >= 2:
- headers["header1"] = csvLines[0].rstrip('\r\n')
- headers["header2"] = csvLines[1].rstrip('\r\n')
-
- # Parse CSV data
- df = pd.read_csv(
- io.BytesIO(csvBytes),
- skiprows=skipRows,
- quoting=1,
- escapechar='\\',
- on_bad_lines='skip',
- engine='python'
- )
-
- # Convert to dict records
- for column in df.columns:
- df[column] = df[column].astype('object').fillna('')
- data = df.to_dict(orient='records')
-
- logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "parsed_csv_data",
- "json",
- workflowContext,
- "parseCsvContent"
- )
-
- result = {
- "data": data,
- "headers": headers,
- "rowCount": len(data),
- "columnCount": len(df.columns)
- }
-
- validationMetadata = self._createValidationMetadata(
- "parseCsvContent",
- rowCount=len(data),
- columnCount=len(df.columns),
- skipRows=skipRows
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error parsing CSV content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse Excel content with custom headers.
-
- Parameters:
- - excelContent (str, required): Document reference containing Excel file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 3)
- - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
- try:
- excelContentParam = parameters.get("excelContent")
- if not excelContentParam:
- return ActionResult.isFailure(error="excelContent parameter is required")
-
- skipRows = parameters.get("skipRows", 3)
- hasCustomHeaders = parameters.get("hasCustomHeaders", True)
-
- # Get Excel content from document
- excelBytes = self._getDocumentData(excelContentParam)
- if excelBytes is None:
- return ActionResult.isFailure(error="Could not get Excel content from document reference")
-
- # Convert to bytes if needed
- if isinstance(excelBytes, str):
- excelBytes = excelBytes.encode('latin-1') # Excel might have binary data
- elif not isinstance(excelBytes, bytes):
- return ActionResult.isFailure(error="Excel content must be bytes or string")
-
- # Parse Excel
- df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None)
-
- # Extract headers if hasCustomHeaders
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if hasCustomHeaders and len(df) >= 3:
- headerRow1 = df.iloc[0:1].copy()
- headerRow2 = df.iloc[1:2].copy()
- tableHeaders = df.iloc[2:3].copy()
- dfData = df.iloc[skipRows:].copy()
- dfData.columns = tableHeaders.iloc[0]
-
- headers = {
- "header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]),
- "header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]),
- }
- else:
- # No custom headers, use standard parsing
- if skipRows > 0:
- dfData = df.iloc[skipRows:].copy()
- if len(df) > skipRows:
- dfData.columns = df.iloc[skipRows-1]
- else:
- dfData = df.copy()
-
- # Reset index and clean data
- dfData = dfData.reset_index(drop=True)
- for column in dfData.columns:
- dfData[column] = dfData[column].astype('object').fillna('')
-
- data = dfData.to_dict(orient='records')
-
- logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "parsed_excel_data",
- "json",
- workflowContext,
- "parseExcelContent"
- )
-
- result = {
- "data": data,
- "headers": headers,
- "rowCount": len(data),
- "columnCount": len(dfData.columns)
- }
-
- validationMetadata = self._createValidationMetadata(
- "parseExcelContent",
- rowCount=len(data),
- columnCount=len(dfData.columns),
- skipRows=skipRows
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error parsing Excel content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create CSV content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing CSV content as bytes
- """
- try:
- dataParam = parameters.get("data")
- if not dataParam:
- return ActionResult.isFailure(error="data parameter is required")
-
- headersParam = parameters.get("headers")
- columnsParam = parameters.get("columns")
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
-
- # Get data from document
- dataJson = self._parseJsonFromDocument(dataParam)
- if dataJson is None:
- return ActionResult.isFailure(error="Could not parse data from document reference")
-
- # Extract data array if wrapped in object
- if isinstance(dataJson, dict) and "data" in dataJson:
- dataList = dataJson["data"]
- elif isinstance(dataJson, list):
- dataList = dataJson
- else:
- return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
-
- # Get headers
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if headersParam:
- headersJson = self._parseJsonFromDocument(headersParam)
- if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
- headers = headersJson["headers"]
- elif headersJson and isinstance(headersJson, dict):
- headers = headersJson
-
- # Get columns
- if columnsParam:
- if isinstance(columnsParam, str):
- try:
- columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
- except:
- columns = columnsParam.split(',')
- elif isinstance(columnsParam, list):
- columns = columnsParam
- else:
- columns = None
- elif taskSyncDefinitionParam:
- # Extract columns from taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- else:
- taskSyncDefinition = taskSyncDefinitionParam
- columns = list(taskSyncDefinition.keys())
- elif dataList and len(dataList) > 0:
- columns = list(dataList[0].keys())
- else:
- columns = []
-
- # Create DataFrame
- if not dataList:
- df = pd.DataFrame(columns=columns)
- else:
- df = pd.DataFrame(dataList)
- # Ensure all columns exist
- for col in columns:
- if col not in df.columns:
- df[col] = ""
- # Reorder columns
- df = df[columns]
-
- # Clean data
- for column in df.columns:
- df[column] = df[column].astype("object").fillna("")
- df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
-
- # Create headers with timestamp
- timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
- header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
- header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
- if len(header2Row) > 1:
- header2Row[1] = timestamp
-
- headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
- headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
- tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
- finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
-
- # Convert to CSV bytes
- out = StringIO()
- finalDf.to_csv(out, index=False, header=False, quoting=1, escapechar='\\')
- csvBytes = out.getvalue().encode('utf-8')
-
- logger.info(f"Created CSV content: {len(dataList)} rows, {len(columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "ticket_sync",
- "csv",
- workflowContext,
- "createCsvContent"
- )
-
- validationMetadata = self._createValidationMetadata(
- "createCsvContent",
- rowCount=len(dataList),
- columnCount=len(columns)
- )
-
- # Store as base64 for document
- import base64
- csvBase64 = base64.b64encode(csvBytes).decode('utf-8')
-
- document = ActionDocument(
- documentName=filename,
- documentData=csvBase64,
- mimeType="application/octet-stream",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error creating CSV content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create Excel content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing Excel content as bytes
- """
- try:
- dataParam = parameters.get("data")
- if not dataParam:
- return ActionResult.isFailure(error="data parameter is required")
-
- headersParam = parameters.get("headers")
- columnsParam = parameters.get("columns")
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
-
- # Get data from document
- dataJson = self._parseJsonFromDocument(dataParam)
- if dataJson is None:
- return ActionResult.isFailure(error="Could not parse data from document reference")
-
- # Extract data array if wrapped in object
- if isinstance(dataJson, dict) and "data" in dataJson:
- dataList = dataJson["data"]
- elif isinstance(dataJson, list):
- dataList = dataJson
- else:
- return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
-
- # Get headers
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if headersParam:
- headersJson = self._parseJsonFromDocument(headersParam)
- if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
- headers = headersJson["headers"]
- elif headersJson and isinstance(headersJson, dict):
- headers = headersJson
-
- # Get columns
- if columnsParam:
- if isinstance(columnsParam, str):
- try:
- columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
- except:
- columns = columnsParam.split(',')
- elif isinstance(columnsParam, list):
- columns = columnsParam
- else:
- columns = None
- elif taskSyncDefinitionParam:
- # Extract columns from taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- else:
- taskSyncDefinition = taskSyncDefinitionParam
- columns = list(taskSyncDefinition.keys())
- elif dataList and len(dataList) > 0:
- columns = list(dataList[0].keys())
- else:
- columns = []
-
- # Create DataFrame
- if not dataList:
- df = pd.DataFrame(columns=columns)
- else:
- df = pd.DataFrame(dataList)
- # Ensure all columns exist
- for col in columns:
- if col not in df.columns:
- df[col] = ""
- # Reorder columns
- df = df[columns]
-
- # Clean data
- for column in df.columns:
- df[column] = df[column].astype("object").fillna("")
- df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
-
- # Create headers with timestamp
- timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
- header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
- header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
- if len(header2Row) > 1:
- header2Row[1] = timestamp
-
- headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
- headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
- tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
- finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
-
- # Convert to Excel bytes
- buf = BytesIO()
- finalDf.to_excel(buf, index=False, header=False, engine='openpyxl')
- excelBytes = buf.getvalue()
-
- logger.info(f"Created Excel content: {len(dataList)} rows, {len(columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "ticket_sync",
- "xlsx",
- workflowContext,
- "createExcelContent"
- )
-
- validationMetadata = self._createValidationMetadata(
- "createExcelContent",
- rowCount=len(dataList),
- columnCount=len(columns)
- )
-
- # Store as base64 for document
- import base64
- excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
-
- document = ActionDocument(
- documentName=filename,
- documentData=excelBase64,
- mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error creating Excel content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
diff --git a/modules/workflows/methods/methodJira/actions/connectJira.py b/modules/workflows/methods/methodJira/actions/connectJira.py
index 8200514a..45b60cad 100644
--- a/modules/workflows/methods/methodJira/actions/connectJira.py
+++ b/modules/workflows/methods/methodJira/actions/connectJira.py
@@ -1,37 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Connect JIRA action for JIRA operations.
-Connects to JIRA instance and creates ticket interface.
-"""
-
import logging
import json
import uuid
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
-@action
async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Connect to JIRA instance and create ticket interface.
-
- Parameters:
- - apiUsername (str, required): JIRA API username/email
- - apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
- - apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
- - projectCode (str, required): JIRA project code (e.g., "DCS")
- - issueType (str, required): JIRA issue type (e.g., "Task")
- - taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
-
- Returns:
- - ActionResult with ActionDocument containing connection ID
- """
try:
apiUsername = parameters.get("apiUsername")
if not apiUsername:
diff --git a/modules/workflows/methods/methodJira/actions/createCsvContent.py b/modules/workflows/methods/methodJira/actions/createCsvContent.py
index c856760e..cbec7960 100644
--- a/modules/workflows/methods/methodJira/actions/createCsvContent.py
+++ b/modules/workflows/methods/methodJira/actions/createCsvContent.py
@@ -1,11 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Create CSV Content action for JIRA operations.
-Creates CSV content with custom headers.
-"""
-
import logging
import json
import base64
@@ -14,25 +9,11 @@ import csv as csv_module
from io import StringIO
from datetime import datetime, UTC
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create CSV content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing CSV content as bytes
- """
try:
dataParam = parameters.get("data")
if not dataParam:
diff --git a/modules/workflows/methods/methodJira/actions/createExcelContent.py b/modules/workflows/methods/methodJira/actions/createExcelContent.py
index fbf54299..631795b3 100644
--- a/modules/workflows/methods/methodJira/actions/createExcelContent.py
+++ b/modules/workflows/methods/methodJira/actions/createExcelContent.py
@@ -1,11 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Create Excel Content action for JIRA operations.
-Creates Excel content with custom headers.
-"""
-
import logging
import json
import base64
@@ -14,25 +9,11 @@ import csv as csv_module
from io import BytesIO
from datetime import datetime, UTC
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create Excel content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing Excel content as bytes
- """
try:
dataParam = parameters.get("data")
if not dataParam:
diff --git a/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
index 85926851..55d99654 100644
--- a/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
+++ b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
@@ -1,31 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Export Tickets As JSON action for JIRA operations.
-Exports tickets from JIRA as JSON list.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Export tickets from JIRA as JSON list.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing list of tickets as JSON
- """
try:
connectionIdParam = parameters.get("connectionId")
if not connectionIdParam:
diff --git a/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
index b17519ea..b997889e 100644
--- a/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
+++ b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
@@ -1,32 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Import Tickets From JSON action for JIRA operations.
-Imports ticket data from JSON back to JIRA.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Import ticket data from JSON back to JIRA.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - ticketData (str, required): Document reference containing ticket data as JSON
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing import result with counts
- """
try:
connectionIdParam = parameters.get("connectionId")
if not connectionIdParam:
diff --git a/modules/workflows/methods/methodJira/actions/mergeTicketData.py b/modules/workflows/methods/methodJira/actions/mergeTicketData.py
index a8f8b486..2bd7ab74 100644
--- a/modules/workflows/methods/methodJira/actions/mergeTicketData.py
+++ b/modules/workflows/methods/methodJira/actions/mergeTicketData.py
@@ -1,33 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Merge Ticket Data action for JIRA operations.
-Merges JIRA export data with existing SharePoint data.
-"""
-
import logging
import json
from typing import Dict, Any, List
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Merge JIRA export data with existing SharePoint data.
-
- Parameters:
- - jiraData (str, required): Document reference containing JIRA ticket data as JSON array
- - existingData (str, required): Document reference containing existing SharePoint data as JSON array
- - taskSyncDefinition (str or dict, required): Field mapping definition
- - idField (str, optional): Field name to use as ID for merging (default: "ID")
-
- Returns:
- - ActionResult with ActionDocument containing merged data and merge details
- """
try:
jiraDataParam = parameters.get("jiraData")
if not jiraDataParam:
diff --git a/modules/workflows/methods/methodJira/actions/parseCsvContent.py b/modules/workflows/methods/methodJira/actions/parseCsvContent.py
index 3038e566..bbdc2cc7 100644
--- a/modules/workflows/methods/methodJira/actions/parseCsvContent.py
+++ b/modules/workflows/methods/methodJira/actions/parseCsvContent.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Parse CSV Content action for JIRA operations.
-Parses CSV content with custom headers.
-"""
-
import logging
import json
import io
import pandas as pd
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse CSV content with custom headers.
-
- Parameters:
- - csvContent (str, required): Document reference containing CSV file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 2)
- - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
try:
csvContentParam = parameters.get("csvContent")
if not csvContentParam:
diff --git a/modules/workflows/methods/methodJira/actions/parseExcelContent.py b/modules/workflows/methods/methodJira/actions/parseExcelContent.py
index c0d64325..5ac4e548 100644
--- a/modules/workflows/methods/methodJira/actions/parseExcelContent.py
+++ b/modules/workflows/methods/methodJira/actions/parseExcelContent.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Parse Excel Content action for JIRA operations.
-Parses Excel content with custom headers.
-"""
-
import logging
import json
import pandas as pd
from io import BytesIO
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse Excel content with custom headers.
-
- Parameters:
- - excelContent (str, required): Document reference containing Excel file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 3)
- - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
try:
excelContentParam = parameters.get("excelContent")
if not excelContentParam:
diff --git a/modules/workflows/methods/methodOutlook.py.old b/modules/workflows/methods/methodOutlook.py.old
deleted file mode 100644
index 98dfbc41..00000000
--- a/modules/workflows/methods/methodOutlook.py.old
+++ /dev/null
@@ -1,1904 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-Microsoft Outlook Email Operations Module
-"""
-
-import base64
-import re
-import logging
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
-import json
-import requests
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-
-logger = logging.getLogger(__name__)
-
-class MethodOutlook(MethodBase):
- """Outlook method implementation for email operations"""
-
- def __init__(self, services):
- """Initialize the Outlook method"""
- super().__init__(services)
- self.name = "outlook"
- self.description = "Handle Microsoft Outlook email operations"
-
- def _format_timestamp_for_filename(self) -> str:
- """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
- return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-
- def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
- """
- Helper function to get Microsoft connection details.
- """
- try:
- logger.debug(f"Getting Microsoft connection for reference: {connectionReference}")
-
- # Get the connection from the service
- userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
- if not userConnection:
- logger.error(f"Connection not found: {connectionReference}")
- return None
-
- logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
-
- # Get a fresh token for this connection
- token = self.services.chat.getFreshConnectionToken(userConnection.id)
- if not token:
- logger.error(f"Fresh token not found for connection: {userConnection.id}")
- logger.debug(f"Connection details: {userConnection}")
- return None
-
- logger.debug(f"Fresh token retrieved for connection {userConnection.id}")
-
- # Check if connection is active
- if userConnection.status.value != "active":
- logger.error(f"Connection is not active: {userConnection.id}, status: {userConnection.status.value}")
- return None
-
- return {
- "id": userConnection.id,
- "accessToken": token.tokenAccess,
- "refreshToken": token.tokenRefresh,
- "scopes": ["Mail.ReadWrite", "Mail.Send", "Mail.ReadWrite.Shared", "User.Read"] # Valid Microsoft Graph API scopes
- }
- except Exception as e:
- logger.error(f"Error getting Microsoft connection: {str(e)}")
- return None
-
- async def _checkPermissions(self, connection: Dict[str, Any]) -> bool:
- """
- Check if the current connection has the necessary permissions for Outlook operations.
- """
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Test permissions by trying to access the user's mail folder
- test_url = f"{graph_url}/me/mailFolders"
- response = requests.get(test_url, headers=headers)
-
- if response.status_code == 200:
-
- return True
- elif response.status_code == 403:
- logger.error("Permission denied - connection lacks necessary mail permissions")
- logger.error("Required scopes: Mail.ReadWrite, Mail.Send, Mail.ReadWrite.Shared")
- return False
- else:
- logger.warning(f"Permission check returned status {response.status_code}")
- return False
-
- except Exception as e:
- logger.error(f"Error checking permissions: {str(e)}")
- return False
-
- def _sanitizeSearchQuery(self, query: str) -> str:
- """
- Sanitize and validate search query for Microsoft Graph API
-
- Microsoft Graph API has specific requirements for search queries:
- - Escape special characters properly
- - Handle search operators correctly
- - Ensure query format is valid
- """
- if not query:
- return ""
-
- # Clean the query
- clean_query = query.strip()
-
- # Handle folder specifications first
- if clean_query.lower().startswith('folder:'):
- folder_name = clean_query[7:].strip()
- if folder_name:
- # Return the folder specification as-is
- return clean_query
-
- # Remove any double quotes that might cause issues
- clean_query = clean_query.replace('"', '')
-
- # Handle common search operators
- # Recognize Graph operators including both singular and plural forms for hasAttachments
- lowered = clean_query.lower()
- if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
- # This is an advanced search query, return as-is
- return clean_query
-
- # For basic text search, ensure it's safe for contains() filter
- # Remove any characters that might break the OData filter syntax
- # Remove or escape characters that could break OData filter syntax
- safe_query = re.sub(r'[\\\'"]', '', clean_query)
-
- return safe_query
-
- def _buildSearchParameters(self, query: str, folder: str, limit: int) -> Dict[str, Any]:
- """
- Build search parameters for Microsoft Graph API
-
- This method handles the complexity of building search parameters
- while avoiding conflicts between $search and $filter parameters.
- """
- params = {
- "$top": limit
- }
-
- if not query or not query.strip():
- # No query specified, just get emails from folder
- if folder and folder.lower() != "all":
- # Use folder name directly for well-known folders, or get folder ID
- if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
- params["$filter"] = f"parentFolderId eq '{folder}'"
- else:
- # For custom folders, we need to get the folder ID first
- # This will be handled by the calling method
- params["$filter"] = f"parentFolderId eq '{folder}'"
- # Add orderby for basic queries
- params["$orderby"] = "receivedDateTime desc"
- return params
-
- clean_query = self._sanitizeSearchQuery(query)
-
- # Check if this is a folder specification (e.g., "folder:Drafts", "folder:Inbox")
- if clean_query.lower().startswith('folder:'):
- folder_name = clean_query[7:].strip() # Remove "folder:" prefix
- if folder_name:
- # This is a folder specification, not a text search
- # Just filter by folder and return
- params["$filter"] = f"parentFolderId eq '{folder_name}'"
- params["$orderby"] = "receivedDateTime desc"
- return params
-
- # Check if this is a complex search query with multiple operators
- # Recognize Graph operators including both singular and plural forms for hasAttachments
- lowered = clean_query.lower()
- if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
- # This is an advanced search query, use $search
- # Microsoft Graph API supports complex search syntax
- params["$search"] = f'"{clean_query}"'
-
- # Note: When using $search, we cannot combine it with $orderby or $filter for folder
- # We'll need to filter results after the API call
- # Folder filtering will be done after the API call
- else:
- # Use $filter for basic text search, but keep it simple to avoid "InefficientFilter" error
- # Microsoft Graph API has limitations on complex filters
- if len(clean_query) > 50:
- # If query is too long, truncate it to avoid complex filter issues
- clean_query = clean_query[:50]
-
-
- # Use only subject search to keep filter simple
- # Handle wildcard queries specially
- if clean_query == "*" or clean_query == "":
- # For wildcard or empty query, don't use contains filter
- # Just use folder filter if specified
- if folder and folder.lower() != "all":
- params["$filter"] = f"parentFolderId eq '{folder}'"
- else:
- # No filter needed for wildcard search across all folders
- pass
- else:
- params["$filter"] = f"contains(subject,'{clean_query}')"
-
- # Add folder filter if specified
- if folder and folder.lower() != "all":
- params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
-
- # Add orderby for basic queries
- params["$orderby"] = "receivedDateTime desc"
-
-
- return params
-
- def _buildGraphFilter(self, filter_text: str) -> Dict[str, str]:
- """
- Build proper Microsoft Graph API filter parameters based on filter text
-
- Args:
- filter_text (str): The filter text to process
-
- Returns:
- Dict[str, str]: Dictionary with either $filter or $search parameter
- """
- if not filter_text:
- return {}
-
- filter_text = filter_text.strip()
-
- # Handle folder specifications (e.g., "folder:Drafts", "folder:Inbox")
- if filter_text.lower().startswith('folder:'):
- folder_name = filter_text[7:].strip() # Remove "folder:" prefix
- if folder_name:
- # This is a folder specification, return empty to let the main method handle it
- return {}
-
- # Handle search queries (from:, to:, subject:, etc.) - check this FIRST
- # Support both singular and plural forms for hasAttachments
- lt = filter_text.lower()
- if any(lt.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
- return {"$search": f'"{filter_text}"'}
-
- # Handle email address filters (only if it's NOT a search query)
- if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
- return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
-
- # Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.)
- if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']):
- return {"$filter": filter_text}
-
- # Handle text content - search in subject
- return {"$filter": f"contains(subject,'{filter_text}')"}
-
- def _getFolderId(self, folder_name: str, connection: Dict[str, Any]) -> Optional[str]:
- """
- Get the folder ID for a given folder name
-
- This is needed for proper filtering when using advanced search queries
- """
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get mail folders
- api_url = f"{graph_url}/me/mailFolders"
- response = requests.get(api_url, headers=headers)
-
- if response.status_code == 200:
- folders_data = response.json()
- all_folders = folders_data.get("value", [])
-
-
-
- # Try exact match first
- for folder in all_folders:
- if folder.get("displayName", "").lower() == folder_name.lower():
-
- return folder.get("id")
-
- # Try common variations for Drafts folder
- if folder_name.lower() == "drafts":
- draft_variations = ["drafts", "draft", "entwürfe", "entwurf", "brouillons", "brouillon"]
- for folder in all_folders:
- folder_display_name = folder.get("displayName", "").lower()
- if any(variation in folder_display_name for variation in draft_variations):
-
- return folder.get("id")
-
- # Try common variations for other folders
- if folder_name.lower() == "sent items":
- sent_variations = ["sent items", "sent", "gesendete elemente", "éléments envoyés"]
- for folder in all_folders:
- folder_display_name = folder.get("displayName", "").lower()
- if any(variation in folder_display_name for variation in sent_variations):
-
- return folder.get("id")
-
- logger.warning(f"Folder '{folder_name}' not found. Available folders: {[f.get('displayName', 'Unknown') for f in all_folders]}")
- return None
- else:
- logger.warning(f"Could not retrieve folders: {response.status_code}")
- return None
-
- except Exception as e:
- logger.warning(f"Error getting folder ID for '{folder_name}': {str(e)}")
- return None
-
- @action
- async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read emails and metadata from a mailbox folder.
- - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
- - Output format: JSON with emails and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - folder (str, optional): Folder to read from. Default: Inbox.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - filter (str, optional): Sender, query operators, or subject text.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"outlook_read_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Read Emails",
- "Outlook Email Reading",
- f"Folder: {parameters.get('folder', 'Inbox')}",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- folder = parameters.get("folder", "Inbox")
- limit = parameters.get("limit", 10)
- filter = parameters.get("filter")
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- self.services.chat.progressLogUpdate(operationId, 0.2, "Validating parameters")
-
- # Validate limit parameter
- if limit <= 0:
- limit = 1000
- logger.warning(f"Invalid limit value ({limit}), using default value 1000")
-
- # Validate filter parameter if provided
- if filter:
- # Remove any potentially dangerous characters that could break the filter
- filter = filter.strip()
- if len(filter) > 100:
- logger.warning(f"Filter too long ({len(filter)} chars), truncating to 100 characters")
- filter = filter[:100]
-
-
- # Get Microsoft connection
- self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Read emails using Microsoft Graph API
- self.services.chat.progressLogUpdate(operationId, 0.4, "Reading emails from Microsoft Graph API")
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the folder ID for the specified folder
- folder_id = self._getFolderId(folder, connection)
-
- if folder_id:
- # Build the API request with folder ID
- api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
- else:
- # Fallback: use folder name directly (for well-known folders like "Inbox")
- api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
- logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
- params = {
- "$top": limit,
- "$orderby": "receivedDateTime desc"
- }
-
- if filter:
- # Build proper Graph API filter parameters
- filter_params = self._buildGraphFilter(filter)
- params.update(filter_params)
-
- # If using $search, remove $orderby as they can't be combined
- if "$search" in params:
- params.pop("$orderby", None)
-
- # If using $filter with contains(), remove $orderby as they can't be combined
- # Microsoft Graph API doesn't support contains() with orderby
- if "$filter" in params and "contains(" in params["$filter"].lower():
- params.pop("$orderby", None)
-
- # Filter applied
-
- # Make the API call
-
-
- response = requests.get(api_url, headers=headers, params=params)
-
- if response.status_code != 200:
- logger.error(f"Graph API error: {response.status_code} - {response.text}")
- logger.error(f"Request URL: {response.url}")
- logger.error(f"Request headers: {headers}")
- logger.error(f"Request params: {params}")
-
- response.raise_for_status()
-
- self.services.chat.progressLogUpdate(operationId, 0.7, "Processing email data")
- emails_data = response.json()
- email_data = {
- "emails": emails_data.get("value", []),
- "count": len(emails_data.get("value", [])),
- "folder": folder,
- "filter": filter,
- "apiMetadata": {
- "@odata.context": emails_data.get("@odata.context"),
- "@odata.count": emails_data.get("@odata.count"),
- "@odata.nextLink": emails_data.get("@odata.nextLink")
- }
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="requests module not available")
- except requests.exceptions.HTTPError as e:
- if e.response.status_code == 400:
- logger.error(f"Bad Request (400) - Invalid filter or parameter: {e.response.text}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"Invalid filter syntax. Please check your filter parameter. Error: {e.response.text}")
- elif e.response.status_code == 401:
- logger.error("Unauthorized (401) - Access token may be expired or invalid")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Authentication failed. Please check your connection and try again.")
- elif e.response.status_code == 403:
- logger.error("Forbidden (403) - Insufficient permissions to access emails")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Insufficient permissions to read emails from this folder.")
- else:
- logger.error(f"HTTP Error {e.response.status_code}: {e.response.text}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"HTTP Error {e.response.status_code}: {e.response.text}")
- except Exception as e:
- logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- # Create result data as JSON string
- result_data = {
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "filter": filter,
- "emails": email_data,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.readEmails",
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "filter": filter,
- "emailCount": email_data.get("count", 0),
- "outputMimeType": outputMimeType
- }
-
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {email_data.get('count', 0)} emails")
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(
- documents=[ActionDocument(
- documentName=f"outlook_emails_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error reading emails: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
- return ActionResult.isFailure(
- error=str(e)
- )
-
- @action
- async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Search emails by query and return matching items with metadata.
- - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
- - Output format: JSON with search results and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - query (str, required): Search expression.
- - folder (str, optional): Folder scope or All. Default: All.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- query = parameters.get("query")
- folder = parameters.get("folder", "All")
- limit = parameters.get("limit", 1000)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- # Validate parameters
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Validate limit parameter
- if limit <= 0:
- limit = 1000
- logger.warning(f"Invalid limit value ({limit}), using default value 1000")
-
- if not query or not query.strip():
- return ActionResult.isFailure(error="Search query is required and cannot be empty")
-
- # Check if this is a folder specification query
- if query.strip().lower().startswith('folder:'):
- folder_name = query.strip()[7:].strip() # Remove "folder:" prefix
- if not folder_name:
- return ActionResult.isFailure(error="Invalid folder specification. Use format 'folder:FolderName'")
- logger.info(f"Search query is a folder specification: {folder_name}")
-
- # Validate limit
- try:
- limit = int(limit)
- if limit <= 0:
- limit = 1000
- logger.warning(f"Invalid limit value (<=0), using default value 1000")
- elif limit > 1000: # Microsoft Graph API has limits
- limit = 1000
- logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000")
- except (ValueError, TypeError):
- limit = 1000
- logger.warning(f"Invalid limit value, using default value 1000")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Search emails using Microsoft Graph API
- try:
- # Microsoft Graph API endpoint for searching messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the folder ID for the specified folder if needed
- folder_id = None
- if folder and folder.lower() != "all":
- folder_id = self._getFolderId(folder, connection)
- if folder_id:
- logger.debug(f"Found folder ID for '{folder}': {folder_id}")
- else:
- logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
-
- # Build the search API request
- api_url = f"{graph_url}/me/messages"
- params = self._buildSearchParameters(query, folder_id or folder, limit)
-
- # Log search parameters for debugging
- logger.debug(f"Search query: '{query}'")
- logger.debug(f"Search folder: '{folder}'")
- logger.debug(f"Search parameters: {params}")
- logger.debug(f"API URL: {api_url}")
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
-
- # Log response details for debugging
-
-
- if response.status_code != 200:
- # Log detailed error information
- try:
- error_data = response.json()
- logger.error(f"Microsoft Graph API error: {response.status_code} - {error_data}")
- except:
- logger.error(f"Microsoft Graph API error: {response.status_code} - {response.text}")
-
- # Check for specific error types and provide helpful messages
- if response.status_code == 400:
- logger.error("Bad Request (400) - Check search query format and parameters")
- logger.error(f"Search query: '{query}'")
- logger.error(f"Search parameters: {params}")
- logger.error(f"API URL: {api_url}")
- elif response.status_code == 401:
- logger.error("Unauthorized (401) - Check access token and permissions")
- elif response.status_code == 403:
- logger.error("Forbidden (403) - Check API permissions and scopes")
- elif response.status_code == 429:
- logger.error("Too Many Requests (429) - Rate limit exceeded")
-
- raise Exception(f"Microsoft Graph API returned {response.status_code}: {response.text}")
-
- response.raise_for_status()
-
- search_data = response.json()
- emails = search_data.get("value", [])
-
-
-
- # Apply folder filtering if needed and we used $search
- if folder and folder.lower() != "all" and "$search" in params:
- # Get the actual folder ID for proper filtering
- folder_id = self._getFolderId(folder, connection)
-
- if folder_id:
- # Filter results by folder ID
- filtered_emails = []
- for email in emails:
- if email.get("parentFolderId") == folder_id:
- filtered_emails.append(email)
- emails = filtered_emails
- logger.debug(f"Applied folder filtering: {len(filtered_emails)} emails found in folder {folder}")
- else:
- # Fallback: try to filter by folder name (less reliable)
- filtered_emails = []
- for email in emails:
- # Check if email has folder information
- if hasattr(email, 'parentFolderId') and email.get('parentFolderId'):
- if email.get('parentFolderId') == folder:
- filtered_emails.append(email)
- else:
- # If no folder info, include the email (less strict filtering)
- filtered_emails.append(email)
-
- emails = filtered_emails
- logger.debug(f"Applied fallback folder filtering: {len(filtered_emails)} emails found in folder {folder}")
-
- # Special handling for folder specification queries
- if query.strip().lower().startswith('folder:'):
- folder_name = query.strip()[7:].strip()
- folder_id = self._getFolderId(folder_name, connection)
- if folder_id:
- # Filter results to only include emails from the specified folder
- filtered_emails = []
- for email in emails:
- if email.get("parentFolderId") == folder_id:
- filtered_emails.append(email)
- emails = filtered_emails
- logger.debug(f"Applied folder specification filtering: {len(filtered_emails)} emails found in folder {folder_name}")
- else:
- logger.warning(f"Could not find folder ID for folder specification: {folder_name}")
-
-
- search_result = {
- "query": query,
- "results": emails,
- "count": len(emails),
- "folder": folder,
- "limit": limit,
- "apiMetadata": {
- "@odata.context": search_data.get("@odata.context"),
- "@odata.count": search_data.get("@odata.count"),
- "@odata.nextLink": search_data.get("@odata.nextLink")
- },
- "searchParams": params
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "query": query,
- "folder": folder,
- "limit": limit,
- "searchResults": search_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.searchEmails",
- "connectionReference": connectionReference,
- "query": query,
- "folder": folder,
- "limit": limit,
- "resultCount": search_result.get("count", 0),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_email_search_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error searching emails: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- async def listDrafts(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: List draft emails from a folder.
- - Input requirements: connectionReference (required); optional folder, limit, outputMimeType.
- - Output format: JSON with draft items and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - folder (str, optional): Drafts folder to list. Default: Drafts.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- folder = parameters.get("folder", "Drafts")
- limit = parameters.get("limit", 1000)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # List drafts using Microsoft Graph API
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the folder ID for the specified folder
- folder_id = self._getFolderId(folder, connection)
-
- if folder_id:
- # List messages in the specific folder
- api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
-
- else:
- # Fallback: list all messages (might include drafts)
- api_url = f"{graph_url}/me/messages"
- logger.warning(f"Could not find folder '{folder}', listing all messages")
-
- params = {
- "$top": limit,
- "$orderby": "lastModifiedDateTime desc",
- "$select": "id,subject,from,toRecipients,ccRecipients,bccRecipients,receivedDateTime,lastModifiedDateTime,parentFolderId,isDraft"
- }
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
- response.raise_for_status()
-
- messages_data = response.json()
- messages = messages_data.get("value", [])
-
- # Filter for drafts if we're looking at all messages
- if not folder_id:
- drafts = [msg for msg in messages if msg.get("isDraft", False)]
- messages = drafts
-
-
- drafts_result = {
- "folder": folder,
- "folderId": folder_id,
- "drafts": messages,
- "count": len(messages),
- "limit": limit,
- "apiResponse": messages_data
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error listing drafts via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to list drafts: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "draftsResult": drafts_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.listDrafts",
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "draftCount": drafts_result.get("count", 0),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_drafts_list_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error listing drafts: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- async def findDrafts(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Find draft emails across folders.
- - Input requirements: connectionReference (required); optional limit, outputMimeType.
- - Output format: JSON with drafts and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - limit (int, optional): Maximum items to return. Default: 50.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- limit = parameters.get("limit", 50)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Find drafts using Microsoft Graph API
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get all messages and filter for drafts
- api_url = f"{graph_url}/me/messages"
- params = {
- "$top": limit,
- "$select": "id,subject,from,toRecipients,ccRecipients,bccRecipients,receivedDateTime,lastModifiedDateTime,parentFolderId,isDraft,webLink",
- "$filter": "isDraft eq true"
- }
-
-
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
- response.raise_for_status()
-
- messages_data = response.json()
- drafts = messages_data.get("value", [])
-
- # Get folder information for each draft
- for draft in drafts:
- if "parentFolderId" in draft:
- folder_info = self._getFolderNameById(draft["parentFolderId"], connection)
- draft["folderName"] = folder_info
-
- drafts_result = {
- "totalDrafts": len(drafts),
- "drafts": drafts,
- "limit": limit,
- "apiResponse": messages_data
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error finding drafts via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to find drafts: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "limit": limit,
- "draftsResult": drafts_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.findDrafts",
- "connectionReference": connectionReference,
- "limit": limit,
- "totalDrafts": drafts_result.get("totalDrafts", 0),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_drafts_found_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error finding drafts: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- def _getFolderNameById(self, folder_id: str, connection: Dict[str, Any]) -> str:
- """
- Get folder name by folder ID
-
- This is a helper method to identify which folder a draft is in
- """
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get folder information
- api_url = f"{graph_url}/me/mailFolders/{folder_id}"
- response = requests.get(api_url, headers=headers)
-
- if response.status_code == 200:
- folder_data = response.json()
- return folder_data.get("displayName", f"Unknown Folder ({folder_id})")
- else:
- return f"Unknown Folder ({folder_id})"
-
- except Exception as e:
- logger.warning(f"Error getting folder name for ID '{folder_id}': {str(e)}")
- return f"Unknown Folder ({folder_id})"
-
- async def checkDraftsFolder(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Check contents of the Drafts folder.
- - Input requirements: connectionReference (required); optional limit, outputMimeType.
- - Output format: JSON with drafts and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - limit (int, optional): Maximum items to return. Default: 20.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- limit = parameters.get("limit", 20)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Check Drafts folder directly
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the Drafts folder ID
- drafts_folder_id = self._getFolderId("Drafts", connection)
-
- if not drafts_folder_id:
- return ActionResult.isFailure(error="Could not find Drafts folder")
-
- # Get messages directly from Drafts folder
- api_url = f"{graph_url}/me/mailFolders/{drafts_folder_id}/messages"
- params = {
- "$top": limit,
- "$select": "id,subject,from,toRecipients,ccRecipients,bccRecipients,receivedDateTime,lastModifiedDateTime,isDraft,webLink",
- "$orderby": "lastModifiedDateTime desc"
- }
-
-
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
- response.raise_for_status()
-
- messages_data = response.json()
- drafts = messages_data.get("value", [])
-
-
-
- drafts_result = {
- "draftsFolderId": drafts_folder_id,
- "totalDrafts": len(drafts),
- "drafts": drafts,
- "limit": limit,
- "apiResponse": messages_data,
- "apiUrl": api_url
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error checking Drafts folder via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to check Drafts folder: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "limit": limit,
- "draftsResult": drafts_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.checkDraftsFolder",
- "connectionReference": connectionReference,
- "limit": limit,
- "totalDrafts": drafts_result.get("totalDrafts", 0),
- "draftsFolderId": drafts_result.get("draftsFolderId"),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_drafts_folder_check_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error checking Drafts folder: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Compose email content using AI from context and optional documents, then create a draft.
- - Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
- - Output format: JSON confirmation with AI-generated draft metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - to (list, required): Recipient email addresses.
- - context (str, required): Detailled context for composing the email.
- - documentList (list, optional): Document references for context/attachments.
- - cc (list, optional): CC recipients.
- - bcc (list, optional): BCC recipients.
- - emailStyle (str, optional): formal | casual | business. Default: business.
- - maxLength (int, optional): Maximum length for generated content. Default: 1000.
- """
- try:
- connectionReference = parameters.get("connectionReference")
- to = parameters.get("to")
- context = parameters.get("context")
- documentList = parameters.get("documentList", [])
- cc = parameters.get("cc", [])
- bcc = parameters.get("bcc", [])
- emailStyle = parameters.get("emailStyle", "business")
- maxLength = parameters.get("maxLength", 1000)
-
- if not connectionReference or not to or not context:
- return ActionResult.isFailure(error="connectionReference, to, and context are required")
-
- # Convert single values to lists for all recipient parameters
- if isinstance(to, str):
- to = [to]
- if isinstance(cc, str):
- cc = [cc]
- if isinstance(bcc, str):
- bcc = [bcc]
- if isinstance(documentList, str):
- documentList = [documentList]
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found")
-
- # Check permissions
- permissions_ok = await self._checkPermissions(connection)
- if not permissions_ok:
- return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
-
- # Prepare documents for AI processing
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- chatDocuments = []
- if documentList:
- # Convert to DocumentReferenceList if needed
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- elif isinstance(documentList, str):
- docRefList = DocumentReferenceList.from_string_list([documentList])
- else:
- docRefList = DocumentReferenceList(references=[])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
-
- # Create AI prompt for email composition
- # Build document reference list for AI with expanded list contents when possible
- doc_references = documentList
- doc_list_text = ""
- if doc_references:
- lines = ["Available_Document_References:"]
- for ref in doc_references:
- # Each item is a label: resolve to its document list and render contained items
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- list_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([ref])) or []
- if list_docs:
- for d in list_docs:
- doc_ref_label = self.services.chat.getDocumentReferenceFromChatDocument(d)
- lines.append(f"- {doc_ref_label}")
- else:
- lines.append(" - (no documents)")
- doc_list_text = "\n" + "\n".join(lines)
- else:
- doc_list_text = "Available_Document_References: (No documents available for attachment)"
-
- # Escape only the user-controlled context to prevent prompt injection
- escaped_context = context.replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r')
-
- ai_prompt = f"""Compose an email based on this context:
--------
-{escaped_context}
--------
-
-Recipients: {to}
-Style: {emailStyle}
-Max length: {maxLength} characters
-{doc_list_text}
-
-Based on the context, decide which documents to attach.
-
-CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:: format (include filename)
-
-Return JSON:
-{{
- "subject": "subject line",
- "body": "email body (HTML allowed)",
- "attachments": ["docItem::"]
-}}
-"""
-
- # Call AI service to generate email content
- try:
- ai_response = await self.services.ai.callAiPlanning(
- prompt=ai_prompt,
- placeholders=None,
- debugType="email_composition"
- )
-
- # Parse AI response
- try:
- ai_content = ai_response
- # Extract JSON from AI response
- if "```json" in ai_content:
- json_start = ai_content.find("```json") + 7
- json_end = ai_content.find("```", json_start)
- json_content = ai_content[json_start:json_end].strip()
- elif "{" in ai_content and "}" in ai_content:
- json_start = ai_content.find("{")
- json_end = ai_content.rfind("}") + 1
- json_content = ai_content[json_start:json_end]
- else:
- json_content = ai_content
-
- email_data = json.loads(json_content)
- subject = email_data.get("subject", "")
- body = email_data.get("body", "")
- ai_attachments = email_data.get("attachments", [])
-
- if not subject or not body:
- return ActionResult.isFailure(error="AI did not generate valid subject and body")
-
- # Use AI-selected attachments if provided, otherwise use all documents
- if documentList:
- try:
- available_refs = [documentList] if isinstance(documentList, str) else documentList
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- available_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(available_refs)) or []
- except Exception:
- available_docs = []
-
- # Normalize AI attachments to a list of strings
- if isinstance(ai_attachments, str):
- ai_attachments = [ai_attachments]
- elif isinstance(ai_attachments, list):
- ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
-
- # Initialize normalized_ai_attachments
- normalized_ai_attachments = []
-
- if ai_attachments:
- try:
- ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- ai_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(ai_refs)) or []
- except Exception:
- ai_docs = []
-
- # Intersect by document id
- available_ids = {getattr(d, 'id', None) for d in available_docs}
- selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
-
- if selected_docs:
- # Map selected ChatDocuments back to docItem references (with full filename)
- documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
- # Normalize ai_attachments to full format for storage
- normalized_ai_attachments = documentList.copy()
- logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
- else:
- # No intersection; use all available documents
- documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
- normalized_ai_attachments = documentList.copy()
- logger.warning("AI selected attachments not found in available documents, using all documents")
- else:
- # No AI selection; use all available documents
- documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
- normalized_ai_attachments = documentList.copy()
- logger.warning("AI did not specify attachments, using all available documents")
- else:
- logger.info("No documents provided in documentList; skipping attachment processing")
-
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse AI response as JSON: {str(e)}")
- logger.error(f"AI response content: {ai_response}")
- return ActionResult.isFailure(error="AI response was not valid JSON format")
-
- except Exception as e:
- logger.error(f"Error calling AI service: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to generate email content: {str(e)}")
-
- # Now create the email with AI-generated content
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Clean and format body content
- cleaned_body = body.strip()
-
- # Check if body is already HTML
- if cleaned_body.startswith('') or cleaned_body.startswith('') or '
' in cleaned_body:
- html_body = cleaned_body
- else:
- # Convert plain text to proper HTML formatting
- html_body = cleaned_body.replace('\n', '
')
- html_body = f"{html_body}"
-
- # Build the email message
- message = {
- "subject": subject,
- "body": {
- "contentType": "HTML",
- "content": html_body
- },
- "toRecipients": [{"emailAddress": {"address": email}} for email in to],
- "ccRecipients": [{"emailAddress": {"address": email}} for email in cc] if cc else [],
- "bccRecipients": [{"emailAddress": {"address": email}} for email in bcc] if bcc else []
- }
-
- # Add documents as attachments if provided
- if documentList:
- message["attachments"] = []
- for attachment_ref in documentList:
- # Get attachment document from service center
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- attachment_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([attachment_ref]))
- if attachment_docs:
- for doc in attachment_docs:
- file_id = getattr(doc, 'fileId', None)
- if file_id:
- try:
- file_content = self.services.chat.getFileData(file_id)
- if file_content:
- if isinstance(file_content, bytes):
- content_bytes = file_content
- else:
- content_bytes = str(file_content).encode('utf-8')
-
- base64_content = base64.b64encode(content_bytes).decode('utf-8')
-
- attachment = {
- "@odata.type": "#microsoft.graph.fileAttachment",
- "name": doc.fileName,
- "contentType": doc.mimeType or "application/octet-stream",
- "contentBytes": base64_content
- }
- message["attachments"].append(attachment)
- except Exception as e:
- logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}")
-
- # Create the draft message
- drafts_folder_id = self._getFolderId("Drafts", connection)
-
- if drafts_folder_id:
- api_url = f"{graph_url}/me/mailFolders/{drafts_folder_id}/messages"
- else:
- api_url = f"{graph_url}/me/messages"
- logger.warning("Could not find Drafts folder, creating draft in default location")
-
- response = requests.post(api_url, headers=headers, json=message)
-
- if response.status_code in [200, 201]:
- draft_data = response.json()
- draft_id = draft_data.get("id", "Unknown")
-
- # Create draft result data with full draft information
- draftResultData = {
- "status": "draft",
- "message": "Email draft created successfully with AI-generated content",
- "draftId": draft_id,
- "folder": "Drafts (Entwürfe)",
- "mailbox": connection.get('userEmail', 'Unknown'),
- "subject": subject,
- "body": body,
- "recipients": to,
- "cc": cc,
- "bcc": bcc,
- "attachments": len(documentList),
- "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
- "aiGenerated": True,
- "context": context,
- "emailStyle": emailStyle,
- "timestamp": self.services.utils.timestampGetUtc(),
- "draftData": draft_data
- }
-
- # Extract attachment filenames for validation metadata
- attachmentFilenames = []
- attachmentReferences = []
- if documentList:
- try:
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
- attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
- # Store normalized document references (with filenames) - use normalized_ai_attachments if available
- attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
- except Exception:
- pass
-
- # Create validation metadata for content validator
- validationMetadata = {
- "actionType": "outlook.composeAndDraftEmailWithContext",
- "emailRecipients": to,
- "emailCc": cc,
- "emailBcc": bcc,
- "emailSubject": subject,
- "emailAttachments": attachmentFilenames,
- "emailAttachmentReferences": attachmentReferences,
- "emailAttachmentCount": len(attachmentFilenames),
- "emailStyle": emailStyle,
- "hasAttachments": len(attachmentFilenames) > 0
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(draftResultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- else:
- logger.error(f"Failed to create draft. Status: {response.status_code}, Response: {response.text}")
- return ActionResult.isFailure(error=f"Failed to create email draft: {response.status_code} - {response.text}")
-
- except Exception as e:
- logger.error(f"Error creating email via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to create email: {str(e)}")
-
- except Exception as e:
- logger.error(f"Error in composeAndDraftEmailWithContext: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
- - Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
- - Output format: JSON confirmation with sent mail metadata for all emails.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"outlook_send_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Send Draft Email",
- "Outlook Email Sending",
- f"Processing {len(parameters.get('documentList', []))} draft(s)",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList", [])
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- if not documentList:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="documentList is required and cannot be empty")
-
- # Convert single value to list if needed
- if isinstance(documentList, str):
- documentList = [documentList]
-
- # Get Microsoft connection
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Check permissions
- self.services.chat.progressLogUpdate(operationId, 0.3, "Checking permissions")
- permissions_ok = await self._checkPermissions(connection)
- if not permissions_ok:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
-
- # Read draft email JSON documents from documentList
- self.services.chat.progressLogUpdate(operationId, 0.4, "Reading draft email documents")
- draftEmails = []
- for docRef in documentList:
- try:
- # Get documents from document reference
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([docRef]))
- if not chatDocuments:
- logger.warning(f"No documents found for reference: {docRef}")
- continue
-
- # Process each document in the reference
- for doc in chatDocuments:
- try:
- # Read file data
- fileId = getattr(doc, 'fileId', None)
- if not fileId:
- logger.warning(f"Document {doc.fileName} has no fileId")
- continue
-
- fileData = self.services.chat.getFileData(fileId)
- if not fileData:
- logger.warning(f"No file data found for document: {doc.fileName}")
- continue
-
- # Parse JSON content
- if isinstance(fileData, bytes):
- jsonContent = fileData.decode('utf-8')
- else:
- jsonContent = str(fileData)
-
- # Parse JSON - handle both direct JSON and JSON wrapped in documentData
- try:
- draftEmailData = json.loads(jsonContent)
-
- # If the JSON contains a 'documentData' field, extract it
- if isinstance(draftEmailData, dict) and 'documentData' in draftEmailData:
- documentDataStr = draftEmailData['documentData']
- if isinstance(documentDataStr, str):
- draftEmailData = json.loads(documentDataStr)
-
- # Validate draft email structure
- if not isinstance(draftEmailData, dict):
- logger.warning(f"Document {doc.fileName} does not contain a valid draft email JSON object")
- continue
-
- draftId = draftEmailData.get("draftId")
- if not draftId:
- logger.warning(f"Document {doc.fileName} does not contain 'draftId' field")
- continue
-
- draftEmails.append({
- "draftEmailJson": draftEmailData,
- "draftId": draftId,
- "sourceDocument": doc.fileName,
- "sourceReference": docRef
- })
-
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse JSON from document {doc.fileName}: {str(e)}")
- continue
-
- except Exception as e:
- logger.error(f"Error processing document {doc.fileName}: {str(e)}")
- continue
-
- except Exception as e:
- logger.error(f"Error reading documents from reference {docRef}: {str(e)}")
- continue
-
- if not draftEmails:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid draft email JSON documents found in documentList")
-
- self.services.chat.progressLogUpdate(operationId, 0.6, f"Found {len(draftEmails)} draft email(s) to send")
-
- # Send all draft emails
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- sentResults = []
- failedResults = []
-
- self.services.chat.progressLogUpdate(operationId, 0.7, "Sending emails")
- for idx, draftEmail in enumerate(draftEmails):
- draftEmailJson = draftEmail["draftEmailJson"]
- draftId = draftEmail["draftId"]
- sourceDocument = draftEmail["sourceDocument"]
-
- try:
- send_url = f"{graph_url}/me/messages/{draftId}/send"
- sendResponse = requests.post(send_url, headers=headers)
-
- # Extract email details from draft JSON for confirmation
- subject = draftEmailJson.get("subject", "Unknown")
- recipients = draftEmailJson.get("recipients", [])
- cc = draftEmailJson.get("cc", [])
- bcc = draftEmailJson.get("bcc", [])
- attachmentsCount = draftEmailJson.get("attachments", 0)
-
- if sendResponse.status_code in [200, 202, 204]:
- sentResults.append({
- "status": "sent",
- "message": "Email sent successfully",
- "draftId": draftId,
- "subject": subject,
- "recipients": recipients,
- "cc": cc,
- "bcc": bcc,
- "attachments": attachmentsCount,
- "sentTimestamp": self.services.utils.timestampGetUtc(),
- "sourceDocument": sourceDocument
- })
- logger.info(f"Email sent successfully. Draft ID: {draftId}, Subject: {subject}")
- self.services.chat.progressLogUpdate(operationId, 0.7 + (idx + 1) * 0.2 / len(draftEmails), f"Sent {idx + 1}/{len(draftEmails)}: {subject}")
- else:
- errorResult = {
- "status": "error",
- "message": "Failed to send draft email",
- "draftId": draftId,
- "subject": subject,
- "recipients": recipients,
- "sendError": {
- "statusCode": sendResponse.status_code,
- "response": sendResponse.text
- },
- "sentTimestamp": self.services.utils.timestampGetUtc(),
- "sourceDocument": sourceDocument
- }
- failedResults.append(errorResult)
- logger.error(f"Failed to send email. Draft ID: {draftId}, Status: {sendResponse.status_code}, Response: {sendResponse.text}")
-
- except Exception as e:
- errorResult = {
- "status": "error",
- "message": f"Exception while sending draft email: {str(e)}",
- "draftId": draftId,
- "subject": draftEmailJson.get("subject", "Unknown"),
- "recipients": draftEmailJson.get("recipients", []),
- "exception": str(e),
- "sentTimestamp": self.services.utils.timestampGetUtc(),
- "sourceDocument": sourceDocument
- }
- failedResults.append(errorResult)
- logger.error(f"Error sending draft email {draftId}: {str(e)}")
-
- # Build result summary
- totalEmails = len(draftEmails)
- successfulEmails = len(sentResults)
- failedEmails = len(failedResults)
-
- resultData = {
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "sentResults": sentResults,
- "failedResults": failedResults,
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Determine overall success status
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Sent {successfulEmails}/{totalEmails} email(s)")
- if successfulEmails == 0:
- self.services.chat.progressLogFinish(operationId, False)
- validationMetadata = {
- "actionType": "outlook.sendDraftEmail",
- "connectionReference": connectionReference,
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "status": "all_failed"
- }
- return ActionResult.isFailure(
- error=f"Failed to send all {totalEmails} email(s)",
- documents=[ActionDocument(
- documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- elif failedEmails > 0:
- # Partial success
- logger.warning(f"Sent {successfulEmails} out of {totalEmails} emails. {failedEmails} failed.")
- validationMetadata = {
- "actionType": "outlook.sendDraftEmail",
- "connectionReference": connectionReference,
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "status": "partial_success"
- }
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- else:
- # All successful
- logger.info(f"Successfully sent all {totalEmails} email(s)")
- validationMetadata = {
- "actionType": "outlook.sendDraftEmail",
- "connectionReference": connectionReference,
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "status": "all_successful"
- }
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error in sendDraftEmail: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- async def checkPermissions(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Verify that the connection has required permissions for Outlook operations.
- - Input requirements: connectionReference (required).
- - Output format: JSON with permission status and details.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label to check.
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="Failed to get Microsoft connection")
-
- # Check permissions
- permissions_ok = await self._checkPermissions(connection)
-
- if permissions_ok:
- result_data = {
- "permissions": "✅ All necessary permissions are available",
- "scopes": connection.get("scopes", []),
- "connectionId": connection.get("id"),
- "status": "ready"
- }
-
- validationMetadata = {
- "actionType": "outlook.checkPermissions",
- "connectionReference": connectionReference,
- "permissionsStatus": "ready",
- "hasPermissions": True
- }
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- else:
- result_data = {
- "permissions": "❌ Missing necessary permissions",
- "requiredScopes": ["Mail.ReadWrite", "Mail.Send", "Mail.ReadWrite.Shared", "User.Read"],
- "currentScopes": connection.get("scopes", []),
- "connectionId": connection.get("id"),
- "status": "needs_reauthentication",
- "message": "Please re-authenticate your Microsoft connection to get updated permissions."
- }
-
- validationMetadata = {
- "actionType": "outlook.checkPermissions",
- "connectionReference": connectionReference,
- "permissionsStatus": "needs_reauthentication",
- "hasPermissions": False
- }
- return ActionResult(
- success=False,
- documents=[ActionDocument(
- documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )],
- error="Connection lacks necessary permissions for Outlook operations"
- )
-
- except Exception as e:
- logger.error(f"Error checking permissions: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
diff --git a/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
index 2bad3838..59604896 100644
--- a/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
+++ b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
@@ -1,39 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Compose And Draft Email With Context action for Outlook operations.
-Composes email content using AI from context and optional documents, then creates a draft.
-"""
-
import logging
import json
import base64
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Compose email content using AI from context and optional documents, then create a draft.
- - Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
- - Output format: JSON confirmation with AI-generated draft metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - to (list, required): Recipient email addresses.
- - context (str, required): Detailled context for composing the email.
- - documentList (list, optional): Document references for context/attachments.
- - cc (list, optional): CC recipients.
- - bcc (list, optional): BCC recipients.
- - emailStyle (str, optional): formal | casual | business. Default: business.
- - maxLength (int, optional): Maximum length for generated content. Default: 1000.
- """
try:
connectionReference = parameters.get("connectionReference")
to = parameters.get("to")
diff --git a/modules/workflows/methods/methodOutlook/actions/readEmails.py b/modules/workflows/methods/methodOutlook/actions/readEmails.py
index e698cb9f..2d325d9f 100644
--- a/modules/workflows/methods/methodOutlook/actions/readEmails.py
+++ b/modules/workflows/methods/methodOutlook/actions/readEmails.py
@@ -1,36 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Read Emails action for Outlook operations.
-Reads emails and metadata from a mailbox folder.
-"""
-
import logging
import time
import json
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read emails and metadata from a mailbox folder.
- - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
- - Output format: JSON with emails and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - folder (str, optional): Folder to read from. Default: Inbox.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - filter (str, optional): Sender, query operators, or subject text.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodOutlook/actions/searchEmails.py b/modules/workflows/methods/methodOutlook/actions/searchEmails.py
index 72830caf..f8831d59 100644
--- a/modules/workflows/methods/methodOutlook/actions/searchEmails.py
+++ b/modules/workflows/methods/methodOutlook/actions/searchEmails.py
@@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Search Emails action for Outlook operations.
-Searches emails by query and returns matching items with metadata.
-"""
-
import logging
import json
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Search emails by query and return matching items with metadata.
- - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
- - Output format: JSON with search results and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - query (str, required): Search expression.
- - folder (str, optional): Folder scope or All. Default: All.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
try:
connectionReference = parameters.get("connectionReference")
query = parameters.get("query")
diff --git a/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
index ffae4c8d..9b7fb011 100644
--- a/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
+++ b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
@@ -1,33 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Send Draft Email action for Outlook operations.
-Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
-"""
-
import logging
import time
import json
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
- - Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
- - Output format: JSON confirmation with sent mail metadata for all emails.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint.py.old b/modules/workflows/methods/methodSharepoint.py.old
deleted file mode 100644
index d12b53eb..00000000
--- a/modules/workflows/methods/methodSharepoint.py.old
+++ /dev/null
@@ -1,2840 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-SharePoint operations method module.
-Handles SharePoint document operations using the SharePoint service.
-"""
-
-import logging
-import re
-import json
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC, timedelta, timezone
-import urllib
-import aiohttp
-import asyncio
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-
-logger = logging.getLogger(__name__)
-
-class MethodSharepoint(MethodBase):
- """SharePoint operations methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "sharepoint"
- self.description = "SharePoint operations methods"
-
- def _format_timestamp_for_filename(self) -> str:
- """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
- return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-
- def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
- """Get Microsoft connection from connection reference and configure SharePoint service"""
- try:
- userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
- if not userConnection:
- logger.warning(f"No user connection found for reference: {connectionReference}")
- return None
-
- if userConnection.authority.value != "msft":
- logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
- return None
-
- # Check if connection is active or pending (pending means OAuth in progress)
- if userConnection.status.value not in ["active", "pending"]:
- logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
- return None
-
- # Configure SharePoint service with the UserConnection
- if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
- logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
- return None
-
- logger.info(f"Successfully configured SharePoint service with Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
-
- return {
- "id": userConnection.id,
- "userConnection": userConnection,
- "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
- }
- except Exception as e:
- logger.error(f"Error getting Microsoft connection: {str(e)}")
- return None
-
- async def _discoverSharePointSites(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
- """
- Discover SharePoint sites accessible to the user via Microsoft Graph API
-
- Parameters:
- limit (Optional[int]): Limit number of sites to return (for optimization when only hostname is needed)
-
- Returns:
- List[Dict[str, Any]]: List of SharePoint site information
- """
- try:
- # Query Microsoft Graph to get sites the user has access to
- endpoint = "sites?search=*"
- if limit:
- endpoint += f"&$top={limit}"
-
- result = await self._makeGraphApiCall(endpoint)
-
- if "error" in result:
- logger.error(f"Error discovering SharePoint sites: {result['error']}")
- return []
-
- sites = result.get("value", [])
- if limit:
- sites = sites[:limit]
-
- logger.info(f"Discovered {len(sites)} SharePoint sites" + (f" (limited to {limit})" if limit else ""))
-
- # Process and return site information
- processedSites = []
- for site in sites:
- siteInfo = {
- "id": site.get("id"),
- "displayName": site.get("displayName"),
- "name": site.get("name"),
- "webUrl": site.get("webUrl"),
- "description": site.get("description"),
- "createdDateTime": site.get("createdDateTime"),
- "lastModifiedDateTime": site.get("lastModifiedDateTime")
- }
- processedSites.append(siteInfo)
- logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
-
- return processedSites
-
- except Exception as e:
- logger.error(f"Error discovering SharePoint sites: {str(e)}")
- return []
-
- def _extractHostnameFromWebUrl(self, webUrl: str) -> Optional[str]:
- """Extract hostname from SharePoint webUrl (e.g., https://pcuster.sharepoint.com)"""
- try:
- if not webUrl:
- return None
- parsed = urllib.parse.urlparse(webUrl)
- return parsed.hostname
- except Exception as e:
- logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}")
- return None
-
- def _extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
- """
- Extract site name from Microsoft-standard server-relative path.
- Delegates to SharePoint service.
- """
- return self.services.sharepoint.extractSiteFromStandardPath(pathQuery)
-
- async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]:
- """
- Get SharePoint site directly by Microsoft-standard path.
- Delegates to SharePoint service.
- """
- return await self.services.sharepoint.getSiteByStandardPath(sitePath)
-
- def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
- """
- Filter discovered sites by a human-entered site hint.
- Delegates to SharePoint service.
- """
- return self.services.sharepoint.filterSitesByHint(sites, siteHint)
-
- def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
- """
- Parse searchQuery to extract path, search terms, search type, and search options.
-
- CRITICAL: NEVER convert words to paths! Words stay as search terms.
- - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
- - "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
- - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"
-
- Parameters:
- searchQuery (str): Enhanced search query with options:
- - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
- - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
- - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
- - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
- - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
- - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
- - "exact:\"Operations 2025\"" -> exact phrase matching
- - "regex:^Operations.*2025$" -> regex pattern matching
- - "case:DELTA" -> case-sensitive search
- - "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present
-
- Returns:
- tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
- """
- try:
- if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
- return "*", "*", "all", {}
-
- searchQuery = searchQuery.strip()
- searchOptions = {}
-
- # CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
- # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
- # "root, gose" should stay as "root, gose", NOT "/root/gose"
-
- # Check for search type specification (files:, folders:, all:) FIRST
- searchType = "all" # Default
- if searchQuery.startswith(("files:", "folders:", "all:")):
- typeParts = searchQuery.split(':', 1)
- searchType = typeParts[0].strip()
- searchQuery = typeParts[1].strip()
-
- # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
- def _extractSiteHint(q: str) -> tuple[str, Optional[str]]:
- try:
- qStrip = q.strip()
- # Leading form: site:KM LayerFinance ...
- if qStrip.lower().startswith("site:"):
- after = qStrip[5:].lstrip()
- # site name until next space or end
- if ' ' in after:
- siteName, rest = after.split(' ', 1)
- else:
- siteName, rest = after, ''
- return rest.strip(), siteName.strip()
- # Inline key=value form anywhere
- m = re.search(r"\bsite=([^;\s]+)", qStrip, flags=re.IGNORECASE)
- if m:
- siteName = m.group(1).strip()
- # remove the token from query
- qNew = re.sub(r"\bsite=[^;\s]+;?", "", qStrip, flags=re.IGNORECASE).strip()
- return qNew, siteName
- except Exception:
- pass
- return q, None
-
- searchQuery, extractedSite = _extractSiteHint(searchQuery)
- if extractedSite:
- searchOptions["site_hint"] = extractedSite
- logger.info(f"Extracted site hint: '{extractedSite}'")
-
- # Extract name="..." if present (for quoted multi-word names)
- nameMatch = re.search(r"name=\"([^\"]+)\"", searchQuery)
- if nameMatch:
- searchQuery = nameMatch.group(1)
- logger.info(f"Extracted name from quotes: '{searchQuery}'")
-
- # Check for search mode specification (exact:, regex:, case:, and:)
- if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
- modeParts = searchQuery.split(':', 1)
- mode = modeParts[0].strip()
- searchQuery = modeParts[1].strip()
-
- if mode == "exact":
- searchOptions["exact_match"] = True
- # Remove quotes if present
- if searchQuery.startswith('"') and searchQuery.endswith('"'):
- searchQuery = searchQuery[1:-1]
- elif mode == "regex":
- searchOptions["regex_match"] = True
- elif mode == "case":
- searchOptions["case_sensitive"] = True
- elif mode == "and":
- searchOptions["and_terms"] = True
-
- # Check if it contains path:search format
- # Microsoft-standard paths: /sites/SiteName/Path:files:.pdf
- if ':' in searchQuery:
- # For Microsoft-standard paths (/sites/...), find the colon that separates path from search
- if searchQuery.startswith('/sites/'):
- # Find the colon that separates path from search (after the full path)
- # Look for pattern: /sites/SiteName/Path/...:files:.pdf
- # We need to find the colon that's followed by search type or file extension
- colonPositions = []
- for i, char in enumerate(searchQuery):
- if char == ':':
- colonPositions.append(i)
-
- # If we have colons, find the one that's followed by search type or file extension
- splitPos = None
- if colonPositions:
- for pos in colonPositions:
- afterColon = searchQuery[pos+1:pos+10].strip().lower()
- # Check if this colon is followed by search type or looks like a file extension
- if afterColon.startswith(('files:', 'folders:', 'all:', '.')) or afterColon == '':
- splitPos = pos
- break
-
- # If no clear split found, use the last colon
- if splitPos is None and colonPositions:
- splitPos = colonPositions[-1]
-
- if splitPos:
- pathPart = searchQuery[:splitPos].strip()
- searchPart = searchQuery[splitPos+1:].strip()
- else:
- # Fallback: split on first colon
- parts = searchQuery.split(':', 1)
- pathPart = parts[0].strip()
- searchPart = parts[1].strip()
- else:
- # Regular path:search format - split on first colon
- parts = searchQuery.split(':', 1)
- pathPart = parts[0].strip()
- searchPart = parts[1].strip()
-
- # Check if searchPart starts with search type (files:, folders:, all:)
- if searchPart.startswith(("files:", "folders:", "all:")):
- typeParts = searchPart.split(':', 1)
- searchType = typeParts[0].strip() # Update searchType
- searchPart = typeParts[1].strip() if len(typeParts) > 1 else ""
-
- # Handle path part
- if not pathPart or pathPart == "*":
- pathQuery = "*"
- elif pathPart.startswith('/'):
- pathQuery = pathPart
- else:
- pathQuery = f"/Documents/{pathPart}"
-
- # Handle search part
- if not searchPart or searchPart == "*":
- fileQuery = "*"
- else:
- fileQuery = searchPart
-
- return pathQuery, fileQuery, searchType, searchOptions
-
- # No colon - check if it looks like a path
- elif searchQuery.startswith('/'):
- # It's a path only
- return searchQuery, "*", searchType, searchOptions
-
- else:
- # It's a search term only - keep words as-is, do NOT convert to paths
- # "root document lesson" stays as "root document lesson"
- # "root, gose" stays as "root, gose"
- return "*", searchQuery, searchType, searchOptions
-
- except Exception as e:
- logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
- raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")
-
- def _resolvePathQuery(self, pathQuery: str) -> List[str]:
- """
- Resolve pathQuery into a list of search paths for SharePoint operations.
-
- Parameters:
- pathQuery (str): Query string that can contain:
- - Direct paths (e.g., "/Documents/Project1")
- - Wildcards (e.g., "/Documents/*")
- - Multiple paths separated by semicolons (e.g., "/Docs; /Files")
- - Single word relative paths (e.g., "Project1" -> resolved to default folder)
- - Empty string or "*" for global search
- - Space-separated words are treated as search terms, NOT folder paths
-
- Returns:
- List[str]: List of resolved paths
- """
- try:
- if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
- return ["*"] # Global search across all sites
-
- # Split by semicolon to handle multiple paths
- rawPaths = [path.strip() for path in pathQuery.split(';') if path.strip()]
- resolvedPaths = []
-
- for rawPath in rawPaths:
- # Handle wildcards - return as-is
- if '*' in rawPath:
- resolvedPaths.append(rawPath)
- # Handle absolute paths
- elif rawPath.startswith('/'):
- resolvedPaths.append(rawPath)
- # Handle single word relative paths - prepend default folder
- # BUT NOT space-separated words (those are search terms, not paths)
- elif ' ' not in rawPath:
- resolvedPaths.append(f"/Documents/{rawPath}")
- else:
- # Check if this looks like a path (has path separators) or search terms
- if '\\' in rawPath or '/' in rawPath:
- # This looks like a path with spaces in folder names - treat as valid path
- resolvedPaths.append(rawPath)
- logger.info(f"Path with spaces '{rawPath}' treated as valid folder path")
- else:
- # Space-separated words without path separators are search terms
- # Return as "*" to search globally
- logger.info(f"Space-separated words '{rawPath}' treated as search terms, not folder path")
- resolvedPaths.append("*")
-
- # Remove duplicates while preserving order
- seen = set()
- uniquePaths = []
- for path in resolvedPaths:
- if path not in seen:
- seen.add(path)
- uniquePaths.append(path)
-
- logger.info(f"Resolved pathQuery '{pathQuery}' to {len(uniquePaths)} paths: {uniquePaths}")
- return uniquePaths
-
- except Exception as e:
- logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
- raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")
-
- def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
- """Parse SharePoint site URL to extract hostname and site path"""
- try:
- parsed = urllib.parse.urlparse(siteUrl)
- hostname = parsed.hostname
- path = parsed.path.strip('/')
-
- return {
- "hostname": hostname,
- "sitePath": path
- }
- except Exception as e:
- logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
- return {"hostname": "", "sitePath": ""}
-
- def _cleanSearchQuery(self, query: str) -> str:
- """
- Clean search query to make it compatible with Graph API KQL syntax.
- Removes path-like syntax and invalid KQL constructs.
-
- Parameters:
- query (str): Raw search query that may contain paths and invalid syntax
-
- Returns:
- str: Cleaned query suitable for Graph API search endpoint
- """
- if not query or not query.strip():
- return ""
-
- query = query.strip()
-
- # Handle patterns like: "Company Share/Freigegebene Dokumente/.../expenses:files:.pdf"
- # Extract the search term and file extension
-
- # First, extract file extension if present (format: :files:.pdf or just .pdf at the end)
- fileExtension = ""
- if ':files:' in query.lower() or ':folders:' in query.lower():
- # Extract extension after the type filter
- extMatch = re.search(r':(?:files|folders):(\.\w+)', query, re.IGNORECASE)
- if extMatch:
- fileExtension = extMatch.group(1)
- # Remove the type filter part
- query = re.sub(r':(?:files|folders):\.?\w*', '', query, flags=re.IGNORECASE)
- elif query.endswith(('.pdf', '.doc', '.docx', '.xls', '.xlsx', '.txt', '.csv', '.ppt', '.pptx')):
- # Extract extension from end
- extMatch = re.search(r'(\.\w+)$', query)
- if extMatch:
- fileExtension = extMatch.group(1)
- query = query[:-len(fileExtension)]
-
- # Extract search term: get the last segment after the last slash (filename part)
- queryNormalized = query.replace('\\', '/')
- if '/' in queryNormalized:
- # Extract the last segment (usually the filename/search term)
- lastSegment = queryNormalized.split('/')[-1]
- # Remove any remaining colons or type filters
- if ':' in lastSegment:
- lastSegment = lastSegment.split(':')[0]
- searchTerm = lastSegment.strip()
- else:
- # No path separators, use the query as-is but remove type filters
- if ':' in query:
- searchTerm = query.split(':')[0].strip()
- else:
- searchTerm = query.strip()
-
- # Remove any remaining type filters or invalid syntax
- searchTerm = re.sub(r':(?:files|folders|all):?', '', searchTerm, flags=re.IGNORECASE)
- searchTerm = searchTerm.strip()
-
- # If we have a file extension, include it in the search term
- # Note: Graph API search endpoint may not support filetype: syntax
- # So we include the extension as part of the search term or filter results after
- if fileExtension:
- extWithoutDot = fileExtension.lstrip('.')
- # Try simple approach: add extension as search term
- # If this doesn't work, we'll filter results after search
- if searchTerm:
- # Include extension in search - Graph API will search in filename
- searchTerm = f"{searchTerm} {extWithoutDot}"
- else:
- searchTerm = extWithoutDot
-
- # Final cleanup: remove any remaining invalid characters for KQL
- # Keep alphanumeric, spaces, hyphens, underscores, dots, and common search operators
- searchTerm = re.sub(r'[^\w\s\-\.\*]', ' ', searchTerm)
- searchTerm = ' '.join(searchTerm.split()) # Normalize whitespace
-
- return searchTerm if searchTerm else "*"
-
- async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
- """Make a Microsoft Graph API call with timeout and detailed logging"""
- try:
- if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.accessToken:
- return {"error": "SharePoint service not configured with access token"}
-
- headers = {
- "Authorization": f"Bearer {self.services.sharepoint._target.accessToken}",
- "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
- }
-
- url = f"https://graph.microsoft.com/v1.0/{endpoint}"
- logger.info(f"Making Graph API call: {method} {url}")
-
- # Set timeout to 30 seconds
- timeout = aiohttp.ClientTimeout(total=30)
-
- async with aiohttp.ClientSession(timeout=timeout) as session:
- if method == "GET":
- logger.debug(f"Starting GET request to {url}")
- async with session.get(url, headers=headers) as response:
- logger.info(f"Graph API response: {response.status}")
- if response.status == 200:
- result = await response.json()
- logger.debug(f"Graph API success: {len(str(result))} characters response")
- return result
- else:
- errorText = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {errorText}")
- return {"error": f"API call failed: {response.status} - {errorText}"}
-
- elif method == "PUT":
- logger.debug(f"Starting PUT request to {url}")
- async with session.put(url, headers=headers, data=data) as response:
- logger.info(f"Graph API response: {response.status}")
- if response.status in [200, 201]:
- result = await response.json()
- logger.debug(f"Graph API success: {len(str(result))} characters response")
- return result
- else:
- errorText = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {errorText}")
- return {"error": f"API call failed: {response.status} - {errorText}"}
-
- elif method == "POST":
- logger.debug(f"Starting POST request to {url}")
- async with session.post(url, headers=headers, data=data) as response:
- logger.info(f"Graph API response: {response.status}")
- if response.status in [200, 201]:
- result = await response.json()
- logger.debug(f"Graph API success: {len(str(result))} characters response")
- return result
- else:
- errorText = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {errorText}")
- return {"error": f"API call failed: {response.status} - {errorText}"}
-
- except asyncio.TimeoutError:
- logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
- return {"error": f"API call timed out after 30 seconds: {endpoint}"}
- except Exception as e:
- logger.error(f"Error making Graph API call: {str(e)}")
- return {"error": f"Error making Graph API call: {str(e)}"}
-
- async def _getSiteId(self, hostname: str, sitePath: str) -> str:
- """Get SharePoint site ID from hostname and site path"""
- try:
- endpoint = f"sites/{hostname}:/{sitePath}"
- result = await self._makeGraphApiCall(endpoint)
-
- if "error" in result:
- logger.error(f"Error getting site ID: {result['error']}")
- return ""
-
- return result.get("id", "")
- except Exception as e:
- logger.error(f"Error getting site ID: {str(e)}")
- return ""
-
- async def _parseDocumentListForFoundDocuments(self, documentList: Any) -> tuple[Optional[List[Dict[str, Any]]], Optional[List[Dict[str, Any]]], Optional[str]]:
- """
- Parse documentList to extract foundDocuments and site information.
-
- Parameters:
- documentList: Document list (can be list, DocumentReferenceList, or string)
-
- Returns:
- tuple: (foundDocuments, sites, errorMessage)
- - foundDocuments: List of found documents from findDocumentPath result
- - sites: List of site dictionaries with id, displayName, webUrl
- - errorMessage: Error message if parsing failed, None otherwise
- """
- try:
- if isinstance(documentList, str):
- documentList = [documentList]
-
- # Resolve documentList to get actual documents
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- else:
- docRefList = DocumentReferenceList(references=[])
-
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
- if not chatDocuments:
- return None, None, "No documents found for the provided document list"
-
- firstDocument = chatDocuments[0]
- fileData = self.services.chat.getFileData(firstDocument.fileId)
- if not fileData:
- return None, None, None # No fileData, but not an error (might be regular file)
-
- try:
- resultData = json.loads(fileData)
- foundDocuments = resultData.get("foundDocuments", [])
-
- # If no foundDocuments, check if it's a listDocuments result (has listResults)
- if not foundDocuments and "listResults" in resultData:
- logger.info(f"documentList contains listResults from listDocuments, converting to foundDocuments format")
- listResults = resultData.get("listResults", [])
- foundDocuments = []
- siteIdFromList = None
- siteNameFromList = None
-
- for listResult in listResults:
- siteResults = listResult.get("siteResults", [])
- for siteResult in siteResults:
- items = siteResult.get("items", [])
- # Extract site info from first item if available
- if items and not siteIdFromList:
- siteNameFromList = items[0].get("siteName")
-
- for item in items:
- # Convert listDocuments item format to foundDocuments format
- if item.get("type") == "file":
- foundDoc = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "file",
- "siteName": item.get("siteName"),
- "siteId": None, # Will be determined from site discovery
- "webUrl": item.get("webUrl"),
- "fullPath": item.get("webUrl", ""),
- "parentPath": item.get("parentPath", "")
- }
- foundDocuments.append(foundDoc)
-
- # Discover sites to get siteId if we have siteName
- if foundDocuments and siteNameFromList and not siteIdFromList:
- logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
- if matchingSites:
- siteIdFromList = matchingSites[0].get("id")
- # Update all foundDocuments with siteId
- for doc in foundDocuments:
- doc["siteId"] = siteIdFromList
- logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
-
- logger.info(f"Converted {len(foundDocuments)} files from listResults format")
-
- if not foundDocuments:
- return None, None, None # No foundDocuments, but not an error
-
- # Extract site information from foundDocuments
- firstDoc = foundDocuments[0]
- siteName = firstDoc.get("siteName")
- siteId = firstDoc.get("siteId")
-
- # If siteId is missing (from listDocuments conversion), discover sites to find it
- if siteName and not siteId:
- logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteName)
- if matchingSites:
- siteId = matchingSites[0].get("id")
- logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
-
- sites = None
- if siteName and siteId:
- sites = [{
- "id": siteId,
- "displayName": siteName,
- "webUrl": firstDoc.get("webUrl", "")
- }]
- logger.info(f"Using specific site from documentList: {siteName} (ID: {siteId})")
- elif siteName:
- # Try to get site by name
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteName)
- if matchingSites:
- sites = [{
- "id": matchingSites[0].get("id"),
- "displayName": siteName,
- "webUrl": matchingSites[0].get("webUrl", "")
- }]
- logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
- else:
- return None, None, f"Site '{siteName}' not found. Cannot determine target site."
- else:
- return None, None, "Site information missing from documentList. Cannot determine target site."
-
- return foundDocuments, sites, None
-
- except json.JSONDecodeError as e:
- return None, None, f"Invalid JSON in documentList: {str(e)}"
- except Exception as e:
- return None, None, f"Error processing documentList: {str(e)}"
-
- except Exception as e:
- logger.error(f"Error parsing documentList: {str(e)}")
- return None, None, f"Error parsing documentList: {str(e)}"
-
- async def _resolveSitesFromPathQuery(self, pathQuery: str) -> tuple[List[Dict[str, Any]], Optional[str]]:
- """
- Resolve sites from pathQuery using SharePoint service helper methods.
-
- Parameters:
- pathQuery (str): Path query string
-
- Returns:
- tuple: (sites, errorMessage)
- - sites: List of site dictionaries
- - errorMessage: Error message if resolution failed, None otherwise
- """
- try:
- # Validate pathQuery format
- isValid, errorMsg = self.services.sharepoint.validatePathQuery(pathQuery)
- if not isValid:
- return [], errorMsg
-
- # Resolve sites using service helper
- sites = await self.services.sharepoint.resolveSitesFromPathQuery(pathQuery)
- if not sites:
- return [], "No SharePoint sites found or accessible"
-
- return sites, None
- except Exception as e:
- logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
- return [], f"Error resolving sites from pathQuery: {str(e)}"
-
-
- @action
- async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Find documents and folders by name/path across sites.
- - Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
- - Output format: JSON with found items and paths.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - site (str, optional): Site hint.
- - searchQuery (str, required): Search terms or path.
- - maxResults (int, optional): Maximum items to return. Default: 1000.
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_find_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Find Document Path",
- "SharePoint Search",
- f"Query: {parameters.get('searchQuery', '*')}",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- site = parameters.get("site")
- searchQuery = parameters.get("searchQuery", "*")
- maxResults = parameters.get("maxResults", 1000)
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Parse searchQuery to extract path, search terms, search type, and options
- pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
- logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'")
-
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...)
- siteFromPath = None
- directSite = None
- if pathQuery and pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- siteFromPath = parsedPath.get("siteName")
- logger.info(f"Extracted site from Microsoft-standard pathQuery '{pathQuery}': '{siteFromPath}'")
-
- # Try to get site directly by path (optimization - no need to load all 60 sites)
- directSite = await self._getSiteByStandardPath(siteFromPath)
- if directSite:
- logger.info(f"Got site directly by standard path - no need to discover all sites")
- sites = [directSite]
- else:
- logger.warning(f"Could not get site directly, falling back to site discovery")
- directSite = None
- else:
- logger.warning(f"Failed to parse site from standard pathQuery '{pathQuery}'")
-
- # If we didn't get the site directly, use discovery and filtering
- if not directSite:
- # Determine which site hint to use (priority: site parameter > site from pathQuery > site_hint from searchOptions)
- siteHintToUse = site or siteFromPath or searchOptions.get("site_hint")
-
- # Discover SharePoint sites - use targeted approach when site hint is available
- self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites")
- if siteHintToUse:
- # When site hint is available, discover all sites first, then filter
- allSites = await self._discoverSharePointSites()
- if not allSites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- sites = self._filterSitesByHint(allSites, siteHintToUse)
- logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites")
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'")
- else:
- # No site hint - discover all sites
- sites = await self._discoverSharePointSites()
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # Resolve path query into search paths
- searchPaths = self._resolvePathQuery(pathQuery)
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)")
-
- try:
- # Search across all discovered sites
- foundDocuments = []
- allSitesSearched = []
-
- # Handle different search approaches based on search type
- if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
- # Use unified search for folders - this is global and searches all sites
- try:
-
- # Use Microsoft Graph Search API syntax (simple term search only)
- terms = [t for t in fileQuery.split() if t.strip()]
-
- if len(terms) > 1:
- # Multiple terms: search for ALL terms (AND) - more specific results
- queryString = " AND ".join(terms)
- else:
- # Single term: search for the term
- queryString = terms[0] if terms else fileQuery
- logger.info(f"Using unified search for folders: {queryString}")
-
- payload = {
- "requests": [
- {
- "entityTypes": ["driveItem"],
- "query": {"queryString": queryString},
- "from": 0,
- "size": 50
- }
- ]
- }
- logger.info(f"Using unified search API for folders with queryString: {queryString}")
-
- # Use global search endpoint (site-specific search not available)
- unifiedResult = await self._makeGraphApiCall(
- "search/query",
- method="POST",
- data=json.dumps(payload).encode("utf-8")
- )
-
- if "error" in unifiedResult:
- logger.warning(f"Unified search failed: {unifiedResult['error']}")
- items = []
- else:
- # Flatten hits -> driveItem resources
- items = []
- for container in (unifiedResult.get("value", []) or []):
- for hitsContainer in (container.get("hitsContainers", []) or []):
- for hit in (hitsContainer.get("hits", []) or []):
- resource = hit.get("resource")
- if resource:
- items.append(resource)
-
- logger.info(f"Unified search returned {len(items)} items (pre-filter)")
-
- # Apply our improved folder detection logic
- folderItems = []
- for item in items:
- resource = item
-
- # Use the same detection logic as our test
- isFolder = self.services.sharepoint.detectFolderType(resource)
-
- if isFolder:
- folderItems.append(item)
-
- items = folderItems
- logger.info(f"Filtered to {len(items)} folders using improved detection logic")
-
- # Process unified search results - extract site information from webUrl
- for item in items:
- itemName = item.get("name", "")
- webUrl = item.get("webUrl", "")
-
- # Extract site information from webUrl
- siteName = "Unknown Site"
- siteId = "unknown"
-
- if webUrl and '/sites/' in webUrl:
- try:
- # Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
- urlParts = webUrl.split('/sites/')
- if len(urlParts) > 1:
- sitePath = urlParts[1].split('/')[0]
- # Find matching site from discovered sites
- # First try to match by site name (URL path)
- for site in sites:
- if site.get("name") == sitePath:
- siteName = site.get("displayName", sitePath)
- siteId = site.get("id", "unknown")
- break
- else:
- # If no match by name, try to match by displayName
- for site in sites:
- if site.get("displayName") == sitePath:
- siteName = site.get("displayName", sitePath)
- siteId = site.get("id", "unknown")
- break
- else:
- # If no exact match, use the site path as site name
- siteName = sitePath
- # Try to find a site with similar name
- for site in sites:
- if sitePath.lower() in site.get("name", "").lower() or sitePath.lower() in site.get("displayName", "").lower():
- siteName = site.get("displayName", sitePath)
- siteId = site.get("id", "unknown")
- break
- except Exception as e:
- logger.warning(f"Error extracting site info from URL {webUrl}: {e}")
-
- # Use improved folder detection logic
- isFolder = self.services.sharepoint.detectFolderType(item)
- itemType = "folder" if isFolder else "file"
- itemPath = item.get("parentReference", {}).get("path", "")
- logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
-
- # Simple filtering like test file - just check search type
- if searchType == "files" and isFolder:
- continue # Skip folders when searching for files
- elif searchType == "folders" and not isFolder:
- continue # Skip files when searching for folders
-
- # Simple approach like test file - no complex filtering
- logger.debug(f"Item '{itemName}' found - adding to results")
-
- # Create result with full path information for proper action chaining
- parentPath = item.get("parentReference", {}).get("path", "")
-
- # Extract the full SharePoint path from webUrl or parentReference
- fullPath = ""
- if webUrl:
- # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
- if '/sites/' in webUrl:
- pathPart = webUrl.split('/sites/')[1]
- # Decode URL encoding and convert to backslash format
- decodedPath = urllib.parse.unquote(pathPart)
- fullPath = "\\" + decodedPath.replace('/', '\\')
- elif parentPath:
- # Use parentReference path if available
- fullPath = parentPath.replace('/', '\\')
-
- docInfo = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "folder" if isFolder else "file",
- "siteName": siteName,
- "siteId": siteId,
- "webUrl": webUrl,
- "fullPath": fullPath,
- "parentPath": parentPath
- }
-
- foundDocuments.append(docInfo)
-
- logger.info(f"Found {len(foundDocuments)} documents from unified search")
-
- except Exception as e:
- logger.error(f"Error performing unified folder search: {str(e)}")
- # Fallback to site-by-site search
- pass
-
- # If no unified search was performed or it failed, fall back to site-by-site search
- if not foundDocuments:
- # Use simple approach like test file - no complex filtering
- siteScopedSites = sites
-
- for site in siteScopedSites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- logger.info(f"Searching in site: {siteName} ({siteUrl})")
-
- # Check if pathQuery contains a specific folder path (not just /sites/SiteName)
- folderPath = None
- if pathQuery and pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- innerPath = parsedPath.get("innerPath", "")
- if innerPath and innerPath.strip():
- # Remove leading slash if present
- folderPath = innerPath.lstrip('/')
-
- # Generic approach: Try to find the folder, if it fails, remove first segment
- # This works for all languages because we test the actual API response
- # In SharePoint Graph API, /drive/root already points to the default document library,
- # so library names in paths should be removed
- pathSegments = [s for s in folderPath.split('/') if s.strip()]
- if len(pathSegments) > 1:
- # Try with first segment removed (first segment is likely the document library)
- testPath = '/'.join(pathSegments[1:])
- # Quick test: try to get folder info (this is fast and doesn't require full search)
- testEndpoint = f"sites/{siteId}/drive/root:/{urllib.parse.quote(testPath, safe='')}:"
- testResult = await self._makeGraphApiCall(testEndpoint)
- if testResult and "error" not in testResult:
- # Path without first segment works - first segment was likely the document library
- folderPath = testPath
- logger.info(f"Removed document library name '{pathSegments[0]}' from folder path (tested via API)")
- else:
- # Keep original path - first segment is not a document library
- logger.info(f"Keeping original folder path '{folderPath}' (first segment is not a document library)")
- elif len(pathSegments) == 1:
- # Only one segment - likely the document library itself, use root
- folderPath = None
- logger.info(f"Only one segment '{pathSegments[0]}' found, likely document library - using root")
-
- if folderPath:
- logger.info(f"Extracted folder path from pathQuery: '{folderPath}'")
- else:
- logger.info(f"Folder path resolved to root (only document library in path)")
-
- # Use Microsoft Graph API for this specific site
- # Handle empty or wildcard queries
- if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
- # For wildcard/empty queries, list all items
- if folderPath:
- # List items in specific folder
- encodedPath = urllib.parse.quote(folderPath, safe='')
- endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/children"
- logger.info(f"Listing items in folder: '{folderPath}'")
- else:
- # List all items in the drive root
- endpoint = f"sites/{siteId}/drive/root/children"
-
- # Make the API call to list items
- listResult = await self._makeGraphApiCall(endpoint)
- if "error" in listResult:
- logger.warning(f"List failed for site {siteName}: {listResult['error']}")
- continue
- # Process list results for this site
- items = listResult.get("value", [])
- logger.info(f"Retrieved {len(items)} items from site {siteName}")
- else:
- # For files, use regular search API
- # Clean the query: remove path-like syntax and invalid KQL syntax
- searchQuery = self._cleanSearchQuery(fileQuery)
- # URL-encode the query parameter
- encodedQuery = urllib.parse.quote(searchQuery, safe='')
-
- if folderPath:
- # Search in specific folder
- encodedPath = urllib.parse.quote(folderPath, safe='')
- endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/search(q='{encodedQuery}')"
- logger.info(f"Searching in folder '{folderPath}' with query: '{searchQuery}' (encoded: '{encodedQuery}')")
- else:
- # Search in drive root
- endpoint = f"sites/{siteId}/drive/root/search(q='{encodedQuery}')"
- logger.info(f"Using search API for files with query: '{searchQuery}' (encoded: '{encodedQuery}')")
-
- # Make the search API call (files)
- searchResult = await self._makeGraphApiCall(endpoint)
- if "error" in searchResult:
- logger.warning(f"Search failed for site {siteName}: {searchResult['error']}")
- continue
- # Process search results for this site (files)
- items = searchResult.get("value", [])
- logger.info(f"Retrieved {len(items)} items from site {siteName}")
-
- siteDocuments = []
-
- for item in items:
- itemName = item.get("name", "")
-
- # Use improved folder detection logic
- isFolder = self.services.sharepoint.detectFolderType(item)
-
- itemType = "folder" if isFolder else "file"
- itemPath = item.get("parentReference", {}).get("path", "")
- logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
-
- # Simple filtering like test file - just check search type
- if searchType == "files" and isFolder:
- continue # Skip folders when searching for files
- elif searchType == "folders" and not isFolder:
- continue # Skip files when searching for folders
-
- # Simple approach like test file - no complex filtering
- logger.debug(f"Item '{itemName}' found - adding to results")
-
- # Create result with full path information for proper action chaining
- webUrl = item.get("webUrl", "")
- parentPath = item.get("parentReference", {}).get("path", "")
-
- # Extract the full SharePoint path from webUrl or parentReference
- fullPath = ""
- if webUrl:
- # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
- if '/sites/' in webUrl:
- pathPart = webUrl.split('/sites/')[1]
- # Decode URL encoding and convert to backslash format
- decodedPath = urllib.parse.unquote(pathPart)
- fullPath = "\\" + decodedPath.replace('/', '\\')
- elif parentPath:
- # Use parentReference path if available
- fullPath = parentPath.replace('/', '\\')
-
- docInfo = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "folder" if isFolder else "file",
- "siteName": siteName,
- "siteId": siteId,
- "webUrl": webUrl,
- "fullPath": fullPath,
- "parentPath": parentPath
- }
-
- siteDocuments.append(docInfo)
-
- foundDocuments.extend(siteDocuments)
- allSitesSearched.append({
- "siteName": siteName,
- "siteUrl": siteUrl,
- "siteId": siteId,
- "documentsFound": len(siteDocuments)
- })
-
- logger.info(f"Found {len(siteDocuments)} documents in site {siteName}")
-
- # Limit total results to maxResults
- if len(foundDocuments) > maxResults:
- foundDocuments = foundDocuments[:maxResults]
- logger.info(f"Limited results to {maxResults} items")
-
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)")
-
- resultData = {
- "searchQuery": searchQuery,
- "totalResults": len(foundDocuments),
- "maxResults": maxResults,
- "foundDocuments": foundDocuments,
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- except Exception as e:
- logger.error(f"Error searching SharePoint: {str(e)}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=str(e))
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.findDocumentPath",
- "searchQuery": searchQuery,
- "maxResults": maxResults,
- "totalResults": len(foundDocuments),
- "hasResults": len(foundDocuments) > 0
- }
-
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error finding document path: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read documents from SharePoint and extract content/metadata.
- - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
- - Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
- - Binary files (PDFs, etc.) are Base64-encoded in documentData.
- - Text files are stored as plain text in documentData.
- - Returns ActionResult with documents list for template processing.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, optional): Document list reference(s) containing findDocumentPath result.
- - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
- - includeMetadata (bool, optional): Include metadata. Default: True.
-
- Returns:
- - ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
- - documentName: File name
- - documentData: Base64-encoded content (binary files) or plain text (text files)
- - mimeType: MIME type (e.g., application/pdf, text/plain)
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_read_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Read Documents",
- "SharePoint Document Reading",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery", "*")
- connectionReference = parameters.get("connectionReference")
- includeMetadata = parameters.get("includeMetadata", True)
-
- # Validate connection reference
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Require either documentList or pathQuery
- if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList or pathQuery is required")
-
- # Get connection first
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Parse documentList to extract foundDocuments and site information
- sharePointFileIds = None
- sites = None
-
- if documentList:
- foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- if foundDocuments:
- # Extract SharePoint file IDs from foundDocuments
- sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
- if not sharePointFileIds:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files found in documentList from findDocumentPath result")
- logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from documentList")
-
- # If we have SharePoint file IDs from documentList (findDocumentPath result), read them directly
- if sharePointFileIds and sites:
- # Read SharePoint files directly using their IDs
- readResults = []
- siteId = sites[0]['id']
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Reading {len(sharePointFileIds)} file(s) from SharePoint")
- for idx, fileId in enumerate(sharePointFileIds):
- try:
- self.services.chat.progressLogUpdate(operationId, 0.5 + (idx * 0.3 / len(sharePointFileIds)), f"Reading file {idx + 1}/{len(sharePointFileIds)}")
- # Get file info from SharePoint
- endpoint = f"sites/{siteId}/drive/items/{fileId}"
- fileInfo = await self._makeGraphApiCall(endpoint)
-
- if "error" in fileInfo:
- logger.warning(f"Failed to get file info for {fileId}: {fileInfo['error']}")
- continue
-
- # Get file content using SharePoint service (handles binary data correctly)
- fileName = fileInfo.get("name", f"file_{fileId}")
- fileContent = await self.services.sharepoint.downloadFile(siteId, fileId)
-
- # Create result document
- resultItem = {
- "fileId": fileId,
- "fileName": fileName,
- "sharepointFileId": fileId,
- "siteName": sites[0]['displayName'],
- "siteUrl": sites[0]['webUrl'],
- "size": fileInfo.get("size", 0),
- "createdDateTime": fileInfo.get("createdDateTime"),
- "lastModifiedDateTime": fileInfo.get("lastModifiedDateTime"),
- "webUrl": fileInfo.get("webUrl")
- }
-
- # Add content if available
- if fileContent:
- resultItem["content"] = fileContent
-
- # Add metadata if requested
- if includeMetadata:
- resultItem["metadata"] = {
- "mimeType": fileInfo.get("file", {}).get("mimeType"),
- "downloadUrl": fileInfo.get("@microsoft.graph.downloadUrl"),
- "createdBy": fileInfo.get("createdBy", {}),
- "lastModifiedBy": fileInfo.get("lastModifiedBy", {}),
- "parentReference": fileInfo.get("parentReference", {})
- }
-
- readResults.append(resultItem)
- except Exception as e:
- logger.error(f"Error reading file {fileId}: {str(e)}")
- continue
-
- if not readResults:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files could be read from documentList")
-
- # Convert read results to ActionDocument objects
- # IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
- # The system will create FileData and ChatDocument automatically
- self.services.chat.progressLogUpdate(operationId, 0.8, f"Processing {len(readResults)} document(s)")
- from modules.datamodels.datamodelChat import ActionDocument
- import base64
-
- actionDocuments = []
- for resultItem in readResults:
- fileContent = resultItem.get("content")
- fileName = resultItem.get("fileName", f"file_{resultItem.get('fileId')}")
-
- # Determine MIME type from metadata or file extension
- mimeType = "application/octet-stream"
- if resultItem.get("metadata", {}).get("mimeType"):
- mimeType = resultItem["metadata"]["mimeType"]
- elif fileName:
- if fileName.endswith('.pdf'):
- mimeType = "application/pdf"
- elif fileName.endswith('.txt'):
- mimeType = "text/plain"
- elif fileName.endswith('.json'):
- mimeType = "application/json"
-
- # For binary files (PDFs, etc.), store Base64-encoded content directly
- # The GenerationService will detect PDF mimeType and handle base64 decoding
- if fileContent and isinstance(fileContent, bytes):
- # Encode binary content as Base64 string
- base64Content = base64.b64encode(fileContent).decode('utf-8')
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "mimeType": mimeType,
- "contentType": "binary",
- "size": len(fileContent),
- "includeMetadata": includeMetadata
- }
- actionDoc = ActionDocument(
- documentName=fileName,
- documentData=base64Content, # Base64 string for binary files
- mimeType=mimeType,
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
- logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument")
- elif fileContent:
- # Text content - store directly in documentData
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "mimeType": mimeType,
- "contentType": "text",
- "includeMetadata": includeMetadata
- }
- actionDoc = ActionDocument(
- documentName=fileName,
- documentData=fileContent if isinstance(fileContent, str) else str(fileContent),
- mimeType=mimeType,
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
- else:
- # No content - store metadata only
- docData = {
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "siteUrl": resultItem.get("siteUrl"),
- "size": resultItem.get("size"),
- "createdDateTime": resultItem.get("createdDateTime"),
- "lastModifiedDateTime": resultItem.get("lastModifiedDateTime"),
- "webUrl": resultItem.get("webUrl")
- }
- if resultItem.get("metadata"):
- docData["metadata"] = resultItem["metadata"]
-
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "mimeType": mimeType,
- "contentType": "metadata_only",
- "includeMetadata": includeMetadata
- }
- actionDoc = ActionDocument(
- documentName=fileName,
- documentData=json.dumps(docData, indent=2),
- mimeType=mimeType,
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
-
- # Return success with action documents
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Read {len(actionDocuments)} document(s)")
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult.isSuccess(documents=actionDocuments)
-
- # If no sites from documentList, try pathQuery fallback
- if not sites and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # If still no sites, return error
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with file information, or pathQuery must be provided. Use findDocumentPath first to get file paths, or provide pathQuery directly.")
-
- # This should never be reached if logic above is correct
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Unexpected error: could not process documentList or pathQuery")
- except Exception as e:
- logger.error(f"Error reading SharePoint documents: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- - Output format: JSON with upload status and file info.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
- - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Upload Document",
- "SharePoint Upload",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery")
- if isinstance(documentList, str):
- documentList = [documentList]
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- if not documentList:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Document list is required")
-
- # Parse documentList to extract folder path and site information
- uploadPath, sites, filesToUpload, errorMsg = await self._parseDocumentListForFolder(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # If no folder path found from documentList, use pathQuery if provided
- if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- uploadPath = pathQuery
- logger.info(f"Using pathQuery for upload path: {uploadPath}")
- # Resolve sites from pathQuery
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # Validate required parameters
- if not uploadPath:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")
-
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")
-
- if not filesToUpload:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files to upload found in documentList.")
-
- # Get connection
- self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Process upload paths
- uploadPaths = []
- if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
- # It's a folder ID - use it directly
- uploadPaths = [uploadPath]
- logger.info(f"Using folder ID directly for upload: {uploadPath}")
- else:
- # It's a path - resolve it normally
- uploadPaths = self._resolvePathQuery(uploadPath)
-
- # Process each document upload
- uploadResults = []
-
- # Extract file names from documents
- fileNames = [doc.fileName for doc in filesToUpload]
- logger.info(f"Using file names from documentList: {fileNames}")
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
-
- # Process upload paths
-
- # Process each document upload
- uploadResults = []
-
- # Extract file names from documents
- fileNames = [doc.fileName for doc in filesToUpload]
- logger.info(f"Using file names from documentList: {fileNames}")
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
-
- for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
- try:
- fileId = chatDocument.fileId
- fileData = self.services.chat.getFileData(fileId)
-
- if not fileData:
- logger.warning(f"File data not found for fileId: {fileId}")
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "error": "File data not found",
- "uploadStatus": "failed"
- })
- continue
-
- # Upload to the first available site (or could be made configurable)
- uploadSuccessful = False
-
- for site in sites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- # Use the first upload path or default to Documents
- uploadPath = uploadPaths[0] if uploadPaths else "/Documents"
-
- # Handle wildcard paths - replace with default Documents folder
- if uploadPath == "*":
- uploadPath = "/Documents"
- logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
-
- # Check if uploadPath is a folder ID or a regular path
- if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
- # It's a folder ID - use the folder-specific upload endpoint
- uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
- logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
- else:
- # It's a regular path - use the root-based upload endpoint
- uploadPath = uploadPath.rstrip('/') + '/' + fileName
- uploadPathClean = uploadPath.lstrip('/')
- uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
- logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
-
- # Upload endpoint for small files (< 4MB)
- if len(fileData) < 4 * 1024 * 1024: # 4MB
-
- # Upload the file
- uploadResult = await self._makeGraphApiCall(
- uploadEndpoint,
- method="PUT",
- data=fileData
- )
-
- if "error" not in uploadResult:
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "uploadStatus": "success",
- "siteName": siteName,
- "siteUrl": siteUrl,
- "uploadPath": uploadPath,
- "uploadEndpoint": uploadEndpoint,
- "sharepointFileId": uploadResult.get("id"),
- "webUrl": uploadResult.get("webUrl"),
- "size": uploadResult.get("size"),
- "createdDateTime": uploadResult.get("createdDateTime")
- })
- uploadSuccessful = True
- break
- else:
- logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
- else:
- # For large files, we would need to implement resumable upload
- logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
- continue
-
- if not uploadSuccessful:
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
- "uploadStatus": "failed"
- })
-
- except Exception as e:
- logger.error(f"Error uploading document {fileName}: {str(e)}")
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "error": str(e),
- "uploadStatus": "failed"
- })
-
- # Update progress for each file
- self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")
-
- # Create result data
- resultData = {
- "connectionReference": connectionReference,
- "uploadPath": uploadPath,
- "documentList": documentList,
- "fileNames": fileNames,
- "sitesAvailable": len(sites),
- "uploadResults": uploadResults,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.uploadDocument",
- "connectionReference": connectionReference,
- "uploadPath": uploadPath,
- "fileNames": fileNames,
- "uploadCount": len(uploadResults),
- "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
- "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
- }
-
- successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
- self.services.chat.progressLogFinish(operationId, successfulUploads > 0)
-
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error uploading to SharePoint: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: List documents and folders in SharePoint paths across sites.
- - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
- - Output format: JSON with folder items and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_list_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "List Documents",
- "SharePoint Listing",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery", "*")
- if isinstance(documentList, str):
- documentList = [documentList]
- includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Require either documentList or pathQuery
- if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList or pathQuery is required")
-
- # Parse documentList to extract folder path and site information
- listQuery, sites, _, errorMsg = await self._parseDocumentListForFolder(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # If no folder path found from documentList, use pathQuery if provided
- if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- listQuery = pathQuery
- logger.info(f"Using pathQuery for list query: {listQuery}")
- # Resolve sites from pathQuery
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # Validate required parameters
- if not listQuery:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
-
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.")
-
- # Get connection
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}")
- logger.debug(f"Connection ID: {connection['id']}")
-
- self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path")
-
- # Parse listQuery to extract path, search terms, search type, and options
- pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(listQuery)
-
- # Check if listQuery is a folder ID (starts with 01PPXICCB...)
- if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'):
- # Direct folder ID - use it directly
- folderPaths = [listQuery]
- logger.info(f"Using direct folder ID: {listQuery}")
- else:
- # Remove site prefix from pathQuery before resolving (it's only for site filtering)
- pathQueryForResolve = pathQuery
- # Microsoft-standard path: /sites/SiteName/Path -> /Path
- if pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- innerPath = parsedPath.get("innerPath", "")
- pathQueryForResolve = '/' + innerPath if innerPath else '/'
- else:
- pathQueryForResolve = '/'
-
- # Remove first path segment if it looks like a document library name
- # In SharePoint Graph API, /drive/root already points to the default document library,
- # so library names in paths should be removed
- # Generic approach: if path has multiple segments, store original for fallback
- pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()]
- if len(pathSegments) > 1:
- # Path has multiple segments - first might be a library name
- # Store original for potential fallback
- originalPath = pathQueryForResolve
- # Try without first segment (assuming it's a library name)
- pathQueryForResolve = '/' + '/'.join(pathSegments[1:])
- logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'")
- elif len(pathSegments) == 1:
- # Only one segment - if it's a common library-like name, use root
- firstSegmentLower = pathSegments[0].lower()
- libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
- if any(indicator in firstSegmentLower for indicator in libraryIndicators):
- pathQueryForResolve = '/'
- logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
-
- # Resolve path query into folder paths
- folderPaths = self._resolvePathQuery(pathQueryForResolve)
- logger.info(f"Resolved folder paths: {folderPaths}")
-
- # Process each folder path across all sites
- listResults = []
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)")
-
- for folderPath in folderPaths:
- try:
- folderResults = []
-
- for site in sites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- logger.info(f"Listing folder {folderPath} in site: {siteName}")
-
- # Determine the endpoint based on folder path
- if folderPath in ["/", ""] or folderPath == "*":
- # Root folder
- endpoint = f"sites/{siteId}/drive/root/children"
- elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
- # Direct folder ID
- endpoint = f"sites/{siteId}/drive/items/{folderPath}/children"
- else:
- # Specific folder path - remove leading slash if present and URL encode
- folderPathClean = folderPath.lstrip('/')
- # URL encode the path for Graph API (spaces and special characters need encoding)
- folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/')
- endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children"
-
- # Make the API call to list folder contents
- apiResult = await self._makeGraphApiCall(endpoint)
-
- if "error" in apiResult:
- logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}")
- continue
-
- # Process the results
- items = apiResult.get("value", [])
- processedItems = []
-
- for item in items:
- # Use improved folder detection logic
- isFolder = self.services.sharepoint.detectFolderType(item)
-
- itemInfo = {
- "id": item.get("id"),
- "name": item.get("name"),
- "size": item.get("size", 0),
- "createdDateTime": item.get("createdDateTime"),
- "lastModifiedDateTime": item.get("lastModifiedDateTime"),
- "webUrl": item.get("webUrl"),
- "type": "folder" if isFolder else "file",
- "siteName": siteName,
- "siteUrl": siteUrl
- }
-
- # Add file-specific information
- if "file" in item:
- itemInfo.update({
- "mimeType": item["file"].get("mimeType"),
- "downloadUrl": item.get("@microsoft.graph.downloadUrl")
- })
-
- # Add folder-specific information
- if "folder" in item:
- itemInfo.update({
- "childCount": item["folder"].get("childCount", 0)
- })
-
- processedItems.append(itemInfo)
-
- # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
- if includeSubfolders:
- folderItems = [item for item in processedItems if item['type'] == 'folder']
- logger.info(f"Including subfolders - processing {len(folderItems)} folders")
- subfolderCount = 0
- maxSubfolders = 10 # Limit to prevent infinite loops
-
- for item in processedItems[:]: # Use slice to avoid modifying list during iteration
- if item["type"] == "folder" and subfolderCount < maxSubfolders:
- subfolderCount += 1
- subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}"
- subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children"
-
- logger.debug(f"Getting contents of subfolder: {item['name']}")
- subfolderResult = await self._makeGraphApiCall(subfolderEndpoint)
- if "error" not in subfolderResult:
- subfolderItems = subfolderResult.get("value", [])
- logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}")
-
- for subfolderItem in subfolderItems:
- # Use improved folder detection logic for subfolder items
- subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem)
-
- # Only add files and direct subfolders, NO RECURSION
- subfolderItemInfo = {
- "id": subfolderItem.get("id"),
- "name": subfolderItem.get("name"),
- "size": subfolderItem.get("size", 0),
- "createdDateTime": subfolderItem.get("createdDateTime"),
- "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"),
- "webUrl": subfolderItem.get("webUrl"),
- "type": "folder" if subfolderIsFolder else "file",
- "parentPath": subfolderPath,
- "siteName": siteName,
- "siteUrl": siteUrl
- }
-
- if "file" in subfolderItem:
- subfolderItemInfo.update({
- "mimeType": subfolderItem["file"].get("mimeType"),
- "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl")
- })
-
- processedItems.append(subfolderItemInfo)
- else:
- logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}")
- elif subfolderCount >= maxSubfolders:
- logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders")
- break
-
- logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}")
-
- folderResults.append({
- "siteName": siteName,
- "siteUrl": siteUrl,
- "itemCount": len(processedItems),
- "items": processedItems
- })
-
- listResults.append({
- "folderPath": folderPath,
- "sitesProcessed": len(folderResults),
- "siteResults": folderResults
- })
-
- except Exception as e:
- logger.error(f"Error listing folder {folderPath}: {str(e)}")
- listResults.append({
- "folderPath": folderPath,
- "error": str(e),
- "siteResults": []
- })
-
- totalItems = sum(len(result.get("siteResults", [])) for result in listResults)
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s)")
-
- # Create result data
- resultData = {
- "pathQuery": listQuery,
- "includeSubfolders": includeSubfolders,
- "sitesSearched": len(sites),
- "listResults": listResults,
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.listDocuments",
- "pathQuery": listQuery,
- "includeSubfolders": includeSubfolders,
- "sitesSearched": len(sites),
- "folderCount": len(listResults),
- "totalItems": totalItems
- }
-
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error listing SharePoint documents: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Analyze usage intensity of folders and files in SharePoint.
- - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
- - Output format: JSON with usage analytics grouped by time intervals.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
- - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
- - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Analyze Folder Usage",
- "SharePoint Analytics",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery")
- if isinstance(documentList, str):
- documentList = [documentList]
- startDateTime = parameters.get("startDateTime")
- endDateTime = parameters.get("endDateTime")
- interval = parameters.get("interval", "day")
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Require either documentList or pathQuery
- if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList or pathQuery is required")
-
- # Resolve folder/item information from documentList or pathQuery
- siteId = None
- driveId = None
- itemId = None
- folderPath = None
- folderName = None
-
- if documentList:
- foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- if not foundDocuments:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No documents found in documentList")
-
- # Get siteId from first document (all should be from same site)
- firstItem = foundDocuments[0]
- siteId = firstItem.get("siteId")
- if not siteId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Site ID missing from documentList")
-
- # Get drive ID (needed for analytics)
- driveId = await self.services.sharepoint.getDriveId(siteId)
- if not driveId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Could not determine drive ID for the site")
-
- # If no items from documentList, try pathQuery fallback
- if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- if sites:
- siteId = sites[0].get("id")
- # Parse pathQuery to find the folder/item
- pathQueryParsed, fileQuery, searchType, searchOptions = self._parseSearchQuery(pathQuery)
-
- # Extract folder path from pathQuery
- folderPath = '/'
- if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQueryParsed)
- if parsedPath:
- innerPath = parsedPath.get("innerPath", "")
- folderPath = '/' + innerPath if innerPath else '/'
- elif pathQueryParsed:
- folderPath = pathQueryParsed
-
- # Get drive ID
- driveId = await self.services.sharepoint.getDriveId(siteId)
- if not driveId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Could not determine drive ID for the site")
-
- # Get folder/item by path
- folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
- if not folderInfo:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
-
- # Add pathQuery item to foundDocuments for processing
- foundDocuments = [{
- "id": folderInfo.get("id"),
- "name": folderInfo.get("name", ""),
- "type": "folder" if folderInfo.get("folder") else "file",
- "siteId": siteId,
- "fullPath": folderPath,
- "webUrl": folderInfo.get("webUrl", "")
- }]
-
- if not siteId or not driveId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
-
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Set access token
- if not self.services.sharepoint.setAccessTokenFromConnection(connection):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Failed to set SharePoint access token")
-
- # Process all items from documentList or pathQuery
- # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
- itemsToAnalyze = []
- if foundDocuments:
- for item in foundDocuments:
- itemId = item.get("id")
- itemType = item.get("type", "").lower()
-
- # Only process folders, skip files and site-level items
- if itemId and itemType == "folder":
- itemsToAnalyze.append({
- "id": itemId,
- "name": item.get("name", ""),
- "type": itemType,
- "path": item.get("fullPath", ""),
- "webUrl": item.get("webUrl", "")
- })
-
- if not itemsToAnalyze:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
-
- self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
-
- # Analyze each item
- allAnalytics = []
- totalActivities = 0
- uniqueUsers = set()
- activityTypes = {}
-
- # Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
- # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
- actualStartDateTime = startDateTime
- actualEndDateTime = endDateTime
- if not actualEndDateTime:
- actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
- if not actualStartDateTime:
- startDate = datetime.now(timezone.utc) - timedelta(days=30)
- actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
-
- for idx, item in enumerate(itemsToAnalyze):
- progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
- self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
-
- # Get usage analytics for this folder
- analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
- siteId=siteId,
- driveId=driveId,
- itemId=item["id"],
- startDateTime=startDateTime,
- endDateTime=endDateTime,
- interval=interval
- )
-
- if "error" in analyticsResult:
- logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
- # Continue with other items even if one fails
- itemAnalytics = {
- "itemId": item["id"],
- "itemName": item["name"],
- "itemType": item["type"],
- "itemPath": item["path"],
- "error": analyticsResult.get("error", "Unknown error")
- }
- else:
- # Process analytics for this item
- itemActivities = 0
- itemUsers = set()
- itemActivityTypes = {}
-
- if "value" in analyticsResult:
- for intervalData in analyticsResult["value"]:
- activities = intervalData.get("activities", [])
- for activity in activities:
- itemActivities += 1
- totalActivities += 1
-
- action = activity.get("action", {})
- actionType = action.get("verb", "unknown")
- itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
- activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
-
- actor = activity.get("actor", {})
- userPrincipalName = actor.get("userPrincipalName", "")
- if userPrincipalName:
- itemUsers.add(userPrincipalName)
- uniqueUsers.add(userPrincipalName)
-
- itemAnalytics = {
- "itemId": item["id"],
- "itemName": item["name"],
- "itemType": item["type"],
- "itemPath": item["path"],
- "webUrl": item["webUrl"],
- "analytics": analyticsResult,
- "summary": {
- "totalActivities": itemActivities,
- "uniqueUsers": len(itemUsers),
- "activityTypes": itemActivityTypes
- }
- }
-
- # Include note if analytics are not available
- if "note" in analyticsResult:
- itemAnalytics["note"] = analyticsResult["note"]
-
- allAnalytics.append(itemAnalytics)
-
- self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
-
- # Process and format analytics data
- resultData = {
- "siteId": siteId,
- "driveId": driveId,
- "startDateTime": actualStartDateTime, # Store computed date range (not None)
- "endDateTime": actualEndDateTime, # Store computed date range (not None)
- "interval": interval,
- "itemsAnalyzed": len(itemsToAnalyze),
- "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
- "items": allAnalytics,
- "summary": {
- "totalActivities": totalActivities,
- "uniqueUsers": len(uniqueUsers),
- "activityTypes": activityTypes
- },
- "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
- f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
- (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
-
- validationMetadata = {
- "actionType": "sharepoint.analyzeFolderUsage",
- "itemsAnalyzed": len(itemsToAnalyze),
- "interval": interval,
- "totalActivities": totalActivities,
- "uniqueUsers": len(uniqueUsers)
- }
-
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_usage_analysis_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error analyzing folder usage: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Find SharePoint site by hostname and site path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
- - sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
-
- Returns:
- - ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- hostname = parameters.get("hostname")
- if not hostname:
- return ActionResult.isFailure(error="hostname parameter is required")
-
- sitePath = parameters.get("sitePath")
- if not sitePath:
- return ActionResult.isFailure(error="sitePath parameter is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Find site by URL
- siteInfo = await self.services.sharepoint.findSiteByUrl(
- hostname=hostname,
- sitePath=sitePath
- )
-
- if not siteInfo:
- return ActionResult.isFailure(error=f"Site not found: {hostname}:/sites/{sitePath}")
-
- logger.info(f"Found SharePoint site: {siteInfo.get('displayName')} (ID: {siteInfo.get('id')})")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "sharepoint_site",
- "json",
- workflowContext,
- "findSiteByUrl"
- )
-
- validationMetadata = self._createValidationMetadata(
- "findSiteByUrl",
- hostname=hostname,
- sitePath=sitePath,
- siteId=siteInfo.get("id")
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(siteInfo, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error finding SharePoint site: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Download file from SharePoint by exact file path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
-
- Returns:
- - ActionResult with ActionDocument containing file content as base64-encoded bytes
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- siteIdParam = parameters.get("siteId")
- if not siteIdParam:
- return ActionResult.isFailure(error="siteId parameter is required")
-
- filePath = parameters.get("filePath")
- if not filePath:
- return ActionResult.isFailure(error="filePath parameter is required")
-
- # Extract siteId from document if it's a reference
- siteId = None
- if isinstance(siteIdParam, str):
- # Try to parse from document reference
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- try:
- docList = DocumentReferenceList.from_string_list([siteIdParam])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if chatDocuments and len(chatDocuments) > 0:
- siteInfoJson = json.loads(chatDocuments[0].documentData)
- siteId = siteInfoJson.get("id")
- except:
- pass
-
- if not siteId:
- # Assume it's the site ID directly
- siteId = siteIdParam
- else:
- siteId = siteIdParam
-
- if not siteId:
- return ActionResult.isFailure(error="Could not extract siteId from parameter")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Download file
- fileContent = await self.services.sharepoint.downloadFileByPath(
- siteId=siteId,
- filePath=filePath
- )
-
- if fileContent is None:
- return ActionResult.isFailure(error=f"File not found or could not be downloaded: {filePath}")
-
- logger.info(f"Downloaded file from SharePoint: {filePath} ({len(fileContent)} bytes)")
-
- # Generate filename from filePath
- import os
- fileName = os.path.basename(filePath) or "downloaded_file"
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- fileName.split('.')[0] if '.' in fileName else fileName,
- fileName.split('.')[-1] if '.' in fileName else "bin",
- workflowContext,
- "downloadFileByPath"
- )
-
- # Encode as base64
- import base64
- fileBase64 = base64.b64encode(fileContent).decode('utf-8')
-
- validationMetadata = self._createValidationMetadata(
- "downloadFileByPath",
- siteId=siteId,
- filePath=filePath,
- fileSize=len(fileContent)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=fileBase64,
- mimeType="application/octet-stream",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error downloading file from SharePoint: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Copy file within SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - sourceFolder (str, required): Source folder path relative to site root
- - sourceFile (str, required): Source file name
- - destFolder (str, required): Destination folder path relative to site root
- - destFile (str, required): Destination file name
-
- Returns:
- - ActionResult with ActionDocument containing copy result
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- siteIdParam = parameters.get("siteId")
- if not siteIdParam:
- return ActionResult.isFailure(error="siteId parameter is required")
-
- sourceFolder = parameters.get("sourceFolder")
- if not sourceFolder:
- return ActionResult.isFailure(error="sourceFolder parameter is required")
-
- sourceFile = parameters.get("sourceFile")
- if not sourceFile:
- return ActionResult.isFailure(error="sourceFile parameter is required")
-
- destFolder = parameters.get("destFolder")
- if not destFolder:
- return ActionResult.isFailure(error="destFolder parameter is required")
-
- destFile = parameters.get("destFile")
- if not destFile:
- return ActionResult.isFailure(error="destFile parameter is required")
-
- # Extract siteId from document if it's a reference
- siteId = None
- if isinstance(siteIdParam, str):
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- try:
- docList = DocumentReferenceList.from_string_list([siteIdParam])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if chatDocuments and len(chatDocuments) > 0:
- siteInfoJson = json.loads(chatDocuments[0].documentData)
- siteId = siteInfoJson.get("id")
- except:
- pass
-
- if not siteId:
- siteId = siteIdParam
- else:
- siteId = siteIdParam
-
- if not siteId:
- return ActionResult.isFailure(error="Could not extract siteId from parameter")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Copy file
- await self.services.sharepoint.copyFileAsync(
- siteId=siteId,
- sourceFolder=sourceFolder,
- sourceFile=sourceFile,
- destFolder=destFolder,
- destFile=destFile
- )
-
- logger.info(f"Copied file in SharePoint: {sourceFolder}/{sourceFile} -> {destFolder}/{destFile}")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "file_copy_result",
- "json",
- workflowContext,
- "copyFile"
- )
-
- result = {
- "success": True,
- "siteId": siteId,
- "sourcePath": f"{sourceFolder}/{sourceFile}",
- "destPath": f"{destFolder}/{destFile}"
- }
-
- validationMetadata = self._createValidationMetadata(
- "copyFile",
- siteId=siteId,
- sourcePath=f"{sourceFolder}/{sourceFile}",
- destPath=f"{destFolder}/{destFile}"
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- # Handle file not found gracefully
- if "itemNotFound" in str(e) or "404" in str(e):
- logger.warning(f"File not found for copy: {parameters.get('sourceFolder')}/{parameters.get('sourceFile')}")
- # Return success with skipped status
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "file_copy_result",
- "json",
- workflowContext,
- "copyFile"
- )
-
- result = {
- "success": True,
- "skipped": True,
- "reason": "File not found (may not exist yet)"
- }
-
- validationMetadata = self._createValidationMetadata(
- "copyFile",
- skipped=True
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- errorMsg = f"Error copying file in SharePoint: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Upload raw file content (bytes) to SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - folderPath (str, required): Folder path relative to site root
- - fileName (str, required): File name
- - content (str, required): Document reference containing file content as base64-encoded bytes
-
- Returns:
- - ActionResult with ActionDocument containing upload result
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- siteIdParam = parameters.get("siteId")
- if not siteIdParam:
- return ActionResult.isFailure(error="siteId parameter is required")
-
- folderPath = parameters.get("folderPath")
- if not folderPath:
- return ActionResult.isFailure(error="folderPath parameter is required")
-
- fileName = parameters.get("fileName")
- if not fileName:
- return ActionResult.isFailure(error="fileName parameter is required")
-
- contentParam = parameters.get("content")
- if not contentParam:
- return ActionResult.isFailure(error="content parameter is required")
-
- # Extract siteId from document if it's a reference
- siteId = None
- if isinstance(siteIdParam, str):
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- try:
- docList = DocumentReferenceList.from_string_list([siteIdParam])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if chatDocuments and len(chatDocuments) > 0:
- siteInfoJson = json.loads(chatDocuments[0].documentData)
- siteId = siteInfoJson.get("id")
- except:
- pass
-
- if not siteId:
- siteId = siteIdParam
- else:
- siteId = siteIdParam
-
- if not siteId:
- return ActionResult.isFailure(error="Could not extract siteId from parameter")
-
- # Get file content from document
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- docList = DocumentReferenceList.from_string_list([contentParam] if isinstance(contentParam, str) else contentParam)
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if not chatDocuments or len(chatDocuments) == 0:
- return ActionResult.isFailure(error="Could not get file content from document reference")
-
- fileContentBase64 = chatDocuments[0].documentData
-
- # Decode base64
- import base64
- try:
- fileContent = base64.b64decode(fileContentBase64)
- except Exception as e:
- return ActionResult.isFailure(error=f"Could not decode base64 file content: {str(e)}")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Upload file
- uploadResult = await self.services.sharepoint.uploadFile(
- siteId=siteId,
- folderPath=folderPath,
- fileName=fileName,
- content=fileContent
- )
-
- if "error" in uploadResult:
- return ActionResult.isFailure(error=f"Upload failed: {uploadResult['error']}")
-
- logger.info(f"Uploaded file to SharePoint: {folderPath}/{fileName} ({len(fileContent)} bytes)")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "file_upload_result",
- "json",
- workflowContext,
- "uploadFile"
- )
-
- result = {
- "success": True,
- "siteId": siteId,
- "filePath": f"{folderPath}/{fileName}",
- "fileSize": len(fileContent),
- "uploadResult": uploadResult
- }
-
- validationMetadata = self._createValidationMetadata(
- "uploadFile",
- siteId=siteId,
- filePath=f"{folderPath}/{fileName}",
- fileSize=len(fileContent)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error uploading file to SharePoint: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
\ No newline at end of file
diff --git a/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
index 075c8b96..a4bf18b6 100644
--- a/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
+++ b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
@@ -1,36 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Analyze Folder Usage action for SharePoint operations.
-Analyzes usage intensity of folders and files in SharePoint.
-"""
-
import logging
import time
import json
from datetime import datetime, timezone, timedelta
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Analyze usage intensity of folders and files in SharePoint.
- - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
- - Output format: JSON with usage analytics grouped by time intervals.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
- - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
- - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/copyFile.py b/modules/workflows/methods/methodSharepoint/actions/copyFile.py
index 1b6d821d..f149e482 100644
--- a/modules/workflows/methods/methodSharepoint/actions/copyFile.py
+++ b/modules/workflows/methods/methodSharepoint/actions/copyFile.py
@@ -1,35 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Copy File action for SharePoint operations.
-Copies file within SharePoint.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Copy file within SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - sourceFolder (str, required): Source folder path relative to site root
- - sourceFile (str, required): Source file name
- - destFolder (str, required): Destination folder path relative to site root
- - destFile (str, required): Destination file name
-
- Returns:
- - ActionResult with ActionDocument containing copy result
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
index d6e291a8..c64a6637 100644
--- a/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
+++ b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Download File By Path action for SharePoint operations.
-Downloads file from SharePoint by exact file path.
-"""
-
import logging
import json
import base64
import os
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Download file from SharePoint by exact file path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
-
- Returns:
- - ActionResult with ActionDocument containing file content as base64-encoded bytes
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
index 01c1baf3..722dbc99 100644
--- a/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
+++ b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
@@ -1,35 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Find Document Path action for SharePoint operations.
-Finds documents and folders by name/path across SharePoint sites.
-"""
-
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Find documents and folders by name/path across sites.
- - Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
- - Output format: JSON with found items and paths.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - site (str, optional): Site hint.
- - searchQuery (str, required): Search terms or path.
- - maxResults (int, optional): Maximum items to return. Default: 1000.
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
index 405b35f2..62b6dd94 100644
--- a/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
+++ b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
@@ -1,32 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Find Site By URL action for SharePoint operations.
-Finds SharePoint site by hostname and site path.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Find SharePoint site by hostname and site path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
- - sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
-
- Returns:
- - ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/actions/listDocuments.py b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
index 78aabadc..318271c3 100644
--- a/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
+++ b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-List Documents action for SharePoint operations.
-Lists documents and folders in SharePoint paths across sites.
-"""
-
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: List documents and folders in SharePoint paths across sites.
- - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
- - Output format: JSON with folder items and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/readDocuments.py b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
index 2bc2688c..73cdb730 100644
--- a/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
+++ b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
@@ -1,44 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Read Documents action for SharePoint operations.
-Reads documents from SharePoint and extracts content/metadata.
-"""
-
import logging
import time
import json
import base64
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read documents from SharePoint and extract content/metadata.
- - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
- - Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
- - Binary files (PDFs, etc.) are Base64-encoded in documentData.
- - Text files are stored as plain text in documentData.
- - Returns ActionResult with documents list for template processing.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, optional): Document list reference(s) containing findDocumentPath result.
- - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
- - includeMetadata (bool, optional): Include metadata. Default: True.
-
- Returns:
- - ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
- - documentName: File name
- - documentData: Base64-encoded content (binary files) or plain text (text files)
- - mimeType: MIME type (e.g., application/pdf, text/plain)
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
index 82c93434..cfe4cf86 100644
--- a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Upload Document action for SharePoint operations.
-Uploads documents to SharePoint.
-"""
-
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- - Output format: JSON with upload status and file info.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
- - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadFile.py b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
index 3d8a9499..1f469b80 100644
--- a/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
@@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Upload File action for SharePoint operations.
-Uploads raw file content (bytes) to SharePoint.
-"""
-
import logging
import json
import base64
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Upload raw file content (bytes) to SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - folderPath (str, required): Folder path relative to site root
- - fileName (str, required): File name
- - content (str, required): Document reference containing file content as base64-encoded bytes
-
- Returns:
- - ActionResult with ActionDocument containing upload result
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 36673ed0..fe0ee5bd 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -24,7 +24,7 @@ class ContentValidator:
self.services = services
self.learningEngine = learningEngine
- async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
+ async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
Args:
@@ -34,8 +34,9 @@ class ContentValidator:
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
+ context: Optional context object to access all documents delivered in the current round
"""
- return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
+ return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory, context)
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
@@ -533,7 +534,7 @@ class ContentValidator:
return False
- async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
+ async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
"""AI-based comprehensive validation - generic approach"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
@@ -636,9 +637,46 @@ class ContentValidator:
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
+ # Build document index context (all documents delivered in current round)
+ documentIndexContext = ""
+ if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
+ try:
+ documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
+ if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
+ # Extract only "Current round documents" section if present
+ lines = documentIndex.split('\n')
+ currentRoundSection = []
+ inCurrentRound = False
+ for line in lines:
+ if "Current round documents:" in line:
+ inCurrentRound = True
+ currentRoundSection.append(line)
+ elif inCurrentRound:
+ if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
+ currentRoundSection.append(line)
+ elif line.strip() == "":
+ # Empty line is okay, continue
+ continue
+ elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
+ # End of current round section
+ break
+ else:
+ # Still in current round section
+ currentRoundSection.append(line)
+
+ if currentRoundSection:
+ documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
+ documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
+ except Exception as e:
+ logger.warning(f"Error extracting document index for validation: {str(e)}")
+ # Continue without document index - not critical
+
+ # Transform criteria that require data access into metadata-only checks
+ transformedCriteria = self._transformCriteriaForMetadataOnly(successCriteria)
+
# Format success criteria for display with index numbers
- if successCriteria:
- criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
+ if transformedCriteria:
+ criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(transformedCriteria)])
else:
criteriaDisplay = "[]"
@@ -647,7 +685,7 @@ class ContentValidator:
=== TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
-EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}
+EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}{documentIndexContext}
=== VALIDATION INSTRUCTIONS ===
@@ -661,6 +699,7 @@ VALIDATION RULES:
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
+8. DATA-LEVEL CRITERIA TRANSFORMATION: Criteria mentioning accuracy percentages (e.g., "95% accuracy"), completeness percentages (e.g., "98% completeness"), or "all X extracted" have been transformed to metadata-only checks. For accuracy/completeness: Check if contentPartIds reference all source documents and if structure metadata shows expected data types (tables, lists, etc.) exist. For "all X extracted": Check if contentPartIds reference all source documents mentioned in ACTION HISTORY or document index. NEVER attempt to verify accuracy/completeness by comparing actual data values - only use metadata indicators.
VALIDATION STEPS:
- Check ACTION HISTORY for process-oriented criteria
@@ -812,6 +851,52 @@ DELIVERED DOCUMENTS ({len(documents)} items):
logger.error(f"AI validation failed: {str(e)}")
raise
+ def _transformCriteriaForMetadataOnly(self, criteria: List[str]) -> List[str]:
+ """
+ Transform criteria that require data access into metadata-only checks.
+
+ Preserves original criterion intent while converting data-level checks to metadata checks.
+ Examples:
+ - "95% accuracy" → "[METADATA ONLY] Data structure indicates extraction completed (check contentPartIds reference all source documents)"
+ - "98% completeness" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
+ - "all transactions extracted" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
+ """
+ if not criteria:
+ return []
+
+ transformed = []
+ for criterion in criteria:
+ original = criterion.strip()
+ transformed_criterion = original
+
+ # Pattern: accuracy percentage (e.g., "95% accuracy", "accuracy meets or exceeds 95% threshold")
+ if re.search(r'\d+%?\s*accuracy|accuracy.*\d+%', original, re.IGNORECASE):
+ # Extract the main subject (e.g., "transactions", "data", etc.)
+ subject_match = re.search(r'(transactions?|data|items?|records?|entries?)', original, re.IGNORECASE)
+ subject = subject_match.group(1).lower() if subject_match else "data"
+
+ transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference all source documents and jsonStructure shows expected {subject} structure exists (tables/lists with rowCount/itemCount > 0). Cannot verify actual {subject} accuracy values from metadata."
+
+ # Pattern: completeness percentage or "all X extracted" (e.g., "98% completeness", "all transactions extracted")
+ elif re.search(r'\d+%?\s*completeness|completeness.*\d+%|all\s+.*extracted|extract.*all', original, re.IGNORECASE):
+ # Extract the main subject
+ subject_match = re.search(r'(transactions?|data|items?|records?|entries?|statements?|documents?)', original, re.IGNORECASE)
+ subject = subject_match.group(1).lower() if subject_match else "items"
+
+ transformed_criterion = f"[METADATA ONLY] {original}: Verify that contentPartIds reference all source documents mentioned in ACTION HISTORY/document index, and jsonStructure shows {subject} structure exists (check rowCount/itemCount in tables/lists). Cannot verify actual {subject} count from metadata."
+
+ # Pattern: "no missing data" or "no incorrect data"
+ elif re.search(r'no\s+missing|no\s+incorrect|no\s+errors?', original, re.IGNORECASE):
+ transformed_criterion = f"[METADATA ONLY] {original}: Check that jsonStructure.content_type shows expected data types present (tables, lists, etc.) and contentPreview.looksLikeRenderedContent=true. Cannot verify actual data values from metadata."
+
+ # Pattern: data accuracy without percentage (e.g., "data is accurate", "accurate data")
+ elif re.search(r'data.*accurate|accurate.*data', original, re.IGNORECASE) and '%' not in original:
+ transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference source documents and jsonStructure shows expected data structure exists. Cannot verify actual data accuracy values from metadata."
+
+ transformed.append(transformed_criterion)
+
+ return transformed
+
def _createFailedValidationResult(self, errorMessage: str) -> Dict[str, Any]:
"""Create a standardized failed validation result"""
return {
diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py
index 50889b22..92e04e96 100644
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@@ -158,7 +158,7 @@ class DynamicMode(BaseMode):
actionName = selection.get('action', 'unknown')
actionParameters = selection.get('parameters', {})
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
- validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory)
+ validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory, context)
observation.contentValidation = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
@@ -194,6 +194,31 @@ class DynamicMode(BaseMode):
if decision: # Only append if decision is not None
context.previousReviewResult.append(decision)
+ # Send ChatLog message if userMessage is present in refinement response
+ if decision and decision.userMessage:
+ try:
+ currentRound = getattr(workflow, 'currentRound', 0)
+ currentTask = getattr(workflow, 'currentTask', 0)
+
+ messageData = {
+ "workflowId": workflow.id,
+ "role": "assistant",
+ "message": decision.userMessage,
+ "status": "refinement",
+ "sequenceNr": len(workflow.messages) + 1,
+ "publishedAt": self.services.utils.timestampGetUtc(),
+ "documentsLabel": None,
+ "documents": [],
+ "roundNumber": currentRound,
+ "taskNumber": currentTask,
+ "actionNumber": step
+ }
+
+ self.services.chat.storeMessageWithDocuments(workflow, messageData, [])
+ logger.info(f"Sent refinement userMessage to UI: {decision.userMessage[:100]}...")
+ except Exception as e:
+ logger.warning(f"Failed to send refinement userMessage to UI: {str(e)}")
+
# Store next action guidance from decision for use in next iteration
if decision and decision.status == "continue" and decision.nextAction:
# Set nextActionGuidance directly (now defined in TaskContext model)
diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py
index 8d963643..9ce9b367 100644
--- a/tests/functional/test10_document_generation_formats.py
+++ b/tests/functional/test10_document_generation_formats.py
@@ -413,12 +413,11 @@ class DocumentGenerationFormatsTester10:
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80)
- print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
+ print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
print("="*80)
- # Only test HTML format
- formats = ["html"]
- # formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
+ # Test all document formats
+ formats = ["docx", "xlsx", "pptx", "pdf", "html"]
results = {}
for format in formats:
@@ -471,7 +470,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
- print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
+ print("DOCUMENT GENERATION FORMATS TEST 10 - ALL FORMATS")
print("="*80)
try: