diff --git a/modules/aicore/aicorePluginPerplexity.py b/modules/aicore/aicorePluginPerplexity.py index 3f1d9815..e129b047 100644 --- a/modules/aicore/aicorePluginPerplexity.py +++ b/modules/aicore/aicorePluginPerplexity.py @@ -70,7 +70,7 @@ class AiPerplexity(BaseConnectorAi): priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.ADVANCED, operationTypes=createOperationTypeRatings( - (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_SEARCH_DATA, 9), (OperationTypeEnum.WEB_CRAWL, 7) ), version="sonar", @@ -93,7 +93,7 @@ class AiPerplexity(BaseConnectorAi): priority=PriorityEnum.QUALITY, processingMode=ProcessingModeEnum.DETAILED, operationTypes=createOperationTypeRatings( - (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_SEARCH_DATA, 9), (OperationTypeEnum.WEB_CRAWL, 8) ), version="sonar-pro", @@ -211,7 +211,7 @@ class AiPerplexity(BaseConnectorAi): """ operationType = modelCall.options.operationType - if operationType == OperationTypeEnum.WEB_SEARCH: + if operationType == OperationTypeEnum.WEB_SEARCH_DATA: return await self.webSearch(modelCall) elif operationType == OperationTypeEnum.WEB_CRAWL: return await self.webCrawl(modelCall) @@ -257,7 +257,7 @@ class AiPerplexity(BaseConnectorAi): async def webSearch(self, modelCall: AiModelCall) -> AiModelResponse: """ - WEB_SEARCH operation - returns list of URLs based on search query. + WEB_SEARCH_DATA operation - returns list of URLs based on search query. Args: modelCall: AiModelCall with AiCallPromptWebSearch as prompt @@ -340,7 +340,7 @@ Return ONLY a JSON array of URLs, no additional text: content=content, success=True, modelId=model.name, - metadata={"response_id": apiResponse.get("id", ""), "operation": "WEB_SEARCH"} + metadata={"response_id": apiResponse.get("id", ""), "operation": "WEB_SEARCH_DATA"} ) except Exception as e: diff --git a/modules/aicore/aicorePluginTavily.py b/modules/aicore/aicorePluginTavily.py index 65a3aa6e..90718683 100644 --- a/modules/aicore/aicorePluginTavily.py +++ b/modules/aicore/aicorePluginTavily.py @@ -67,7 +67,7 @@ class AiTavily(BaseConnectorAi): priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.BASIC, operationTypes=createOperationTypeRatings( - (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_SEARCH_DATA, 9), (OperationTypeEnum.WEB_CRAWL, 10) ), version="tavily-search", @@ -445,7 +445,7 @@ class AiTavily(BaseConnectorAi): """ operationType = modelCall.options.operationType - if operationType == OperationTypeEnum.WEB_SEARCH: + if operationType == OperationTypeEnum.WEB_SEARCH_DATA: return await self.webSearch(modelCall) elif operationType == OperationTypeEnum.WEB_CRAWL: return await self.webCrawl(modelCall) @@ -459,7 +459,7 @@ class AiTavily(BaseConnectorAi): async def webSearch(self, modelCall: AiModelCall) -> "AiModelResponse": """ - WEB_SEARCH operation - returns list of URLs using Tavily search. + WEB_SEARCH_DATA operation - returns list of URLs using Tavily search. Args: modelCall: AiModelCall with AiCallPromptWebSearch as prompt @@ -516,7 +516,7 @@ class AiTavily(BaseConnectorAi): return AiModelResponse( content=json.dumps(urls, indent=2), success=True, - metadata={"total_urls": len(urls), "operation": "WEB_SEARCH"} + metadata={"total_urls": len(urls), "operation": "WEB_SEARCH_DATA"} ) except Exception as e: diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index 2434c451..9e680164 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -25,7 +25,7 @@ class OperationTypeEnum(str, Enum): IMAGE_GENERATE = "imageGenerate" # Web Operations - WEB_SEARCH = "webSearch" # Returns list of URLs only + WEB_SEARCH_DATA = "webSearch" # Returns list of URLs only WEB_CRAWL = "webCrawl" # Web crawl for a given URL @@ -50,7 +50,7 @@ def createOperationTypeRatings(*ratings: Tuple[OperationTypeEnum, int]) -> List[ Usage: operationTypes = createOperationTypeRatings( (OperationTypeEnum.DATA_ANALYSE, 8), - (OperationTypeEnum.WEB_SEARCH, 10), + (OperationTypeEnum.WEB_SEARCH_DATA, 10), (OperationTypeEnum.WEB_CRAWL, 9) ) """ @@ -197,7 +197,7 @@ class AiModelResponse(BaseModel): # Structured prompt models for specialized operations class AiCallPromptWebSearch(BaseModel): - """Structured prompt format for WEB_SEARCH operation - returns list of URLs.""" + """Structured prompt format for WEB_SEARCH_DATA operation - returns list of URLs.""" instruction: str = Field(description="Search instruction/query for finding relevant URLs") country: Optional[str] = Field(default=None, description="Two-digit country code (lowercase, e.g., ch, us, de, fr)") diff --git a/modules/datamodels/datamodelWorkflowActions.py b/modules/datamodels/datamodelWorkflowActions.py index a3812955..8bac1fd5 100644 --- a/modules/datamodels/datamodelWorkflowActions.py +++ b/modules/datamodels/datamodelWorkflowActions.py @@ -56,6 +56,7 @@ class WorkflowActionDefinition(BaseModel): ) category: Optional[str] = Field(None, description="Action category for grouping") tags: List[str] = Field(default_factory=list, description="Tags for search/filtering") + dynamicMode: bool = Field(False, description="Whether this action is available in dynamic workflow mode (only tagged actions are visible in action planning and refinement prompts)") # Register model labels for UI @@ -68,6 +69,7 @@ registerModelLabels( "parameters": {"en": "Parameters", "fr": "Paramètres"}, "category": {"en": "Category", "fr": "Catégorie"}, "tags": {"en": "Tags", "fr": "Étiquettes"}, + "dynamicMode": {"en": "Dynamic Mode", "fr": "Mode dynamique"}, }, ) diff --git a/modules/services/serviceAi/CONTENT_EXTRACTION_ANALYSIS.md b/modules/services/serviceAi/CONTENT_EXTRACTION_ANALYSIS.md new file mode 100644 index 00000000..b83d328f --- /dev/null +++ b/modules/services/serviceAi/CONTENT_EXTRACTION_ANALYSIS.md @@ -0,0 +1,2564 @@ +# Content Extraction Logic Analysis - ai.process Action + +## Overview +This document provides a stepwise structured analysis of the content extraction logic in the main AI call (`ai.process` action). It covers input formats, document processing, AI service communication, and content handling. + +--- + +## 1. Input Content Formats + +### 1.1 Document Input Formats +The `ai.process` action accepts documents in the following formats: + +#### Supported Document Types (via Extraction Service) +- **PDF** (`application/pdf`) - Extracted via `PdfExtractor` +- **Word Documents** (`application/vnd.openxmlformats-officedocument.wordprocessingml.document`) - Extracted via `DocxExtractor` +- **Excel** (`application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`) - Extracted via `XlsxExtractor` +- **PowerPoint** (`application/vnd.openxmlformats-officedocument.presentationml.presentation`) - Extracted via `PptxExtractor` +- **CSV** (`text/csv`) - Extracted via `CsvExtractor` +- **HTML** (`text/html`) - Extracted via `HtmlExtractor` +- **XML** (`application/xml`, `text/xml`) - Extracted via `XmlExtractor` +- **JSON** (`application/json`) - Extracted via `JsonExtractor` +- **Images** (`image/jpeg`, `image/png`, `image/gif`, `image/webp`) - Extracted via `ImageExtractor` +- **Text** (`text/plain`) - Extracted via `TextExtractor` +- **SQL** (`application/sql`) - Extracted via `SqlExtractor` +- **Binary** (other formats) - Extracted via `BinaryExtractor` + +#### Document Reference Formats +Documents are provided via the `documentList` parameter which accepts: +- `DocumentReferenceList` object (preferred) +- List of strings (document references) +- Single string (single document reference) +- `None` (no documents) + +### 1.2 Content Parts Input Format +Alternatively, pre-extracted content can be provided via `contentParts` parameter: +- **Type**: `List[ContentPart]` +- **ContentPart Structure**: + ```python + ContentPart( + id: str, # Unique identifier + parentId: Optional[str], # Parent part ID (for hierarchical content) + label: str, # Human-readable label + typeGroup: str, # "text", "table", "image", "structure", "container", "binary" + mimeType: str, # MIME type of the content + data: Union[str, bytes], # Actual content data + metadata: Dict[str, Any] # Metadata including: + # - documentId + # - documentMimeType + # - originalFileName + # - contentFormat ("extracted", "object", "reference") + # - intent ("extract", "display", "analyze") + # - usageHint + # - extractionPrompt + # - sourceAction + ) + ``` + +### 1.3 Prompt Input Format +- **Type**: `str` +- **Required**: Yes +- **Description**: Instruction for the AI describing what processing to perform + +### 1.4 Result Type Format +- **Type**: `str` +- **Default**: `"txt"` +- **Supported Formats**: `txt`, `json`, `md`, `csv`, `xml`, `html`, `pdf`, `docx`, `xlsx`, `pptx`, `png`, `jpg`, `jpeg`, `gif`, `webp` +- **Purpose**: Determines output file extension and generation intent + +--- + +## 2. Document Processing Flow + +### 2.1 Entry Point: `ai.process` Action +**Location**: `gateway/modules/workflows/methods/methodAi/actions/process.py` + +**Flow**: +1. **Parameter Extraction** (lines 35-55) + - Extract `aiPrompt` from parameters + - Extract `documentList` and convert to `DocumentReferenceList` + - Extract `resultType` (default: "txt") + - Extract `contentParts` if already provided + +2. **Content Extraction Decision** (lines 72-119) + - **Path A**: If `contentParts` already provided → Skip extraction, use provided parts + - **Path B**: If `documentList` provided but no `contentParts` → Extract content from documents + - **Path C**: If BOTH `contentParts` AND `documentList` provided: + - **In `ai.process` action** (lines 85-86, 167-174): + - Condition: `if not contentParts and documentList.references:` (line 86) + - **Behavior**: Only extracts from `documentList` if `contentParts` is NOT provided + - **Result**: If both provided, `contentParts` takes precedence + - **Important**: `documentList` is **NOT passed** to `callAiContent()` (line 167) + - Only `contentParts` is passed to the AI service + - **Conclusion**: `documentList` is **ignored** when `contentParts` is provided + - **Note**: Merging logic exists in document generation path (`DocumentGenerationPath.generateDocument`, lines 109-119), but this only applies when `documentList` is passed separately to `callAiContent()` (not from `ai.process` action) + - **Note**: Similar merging exists in data extraction path (`_handleDataExtraction`, lines 727-733), but also requires `documentList` to be passed to `callAiContent()` + +### 2.2 Content Extraction Process (Path B) + +**Location**: `gateway/modules/services/serviceExtraction/mainServiceExtraction.py` + +#### Step 1: Document Resolution (lines 86-94 in process.py) +```python +chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) +``` +- Converts `DocumentReferenceList` to `List[ChatDocument]` +- Each `ChatDocument` contains: + - `id`: Document ID + - `fileId`: File ID for database lookup + - `fileName`: Original filename + - `mimeType`: MIME type + +#### Step 2: Extraction Options Preparation (lines 96-108 in process.py) +```python +extractionOptions = ExtractionOptions( + prompt="Extract all content from the document", + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True +) +``` + +#### Step 3: Content Extraction (line 111 in process.py) +```python +extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions) +``` + +**Extraction Service Flow** (`mainServiceExtraction.py:extractContent`): + +1. **For each document** (lines 69-288): + - **Load document bytes** (line 96): + ```python + documentBytes = dbInterface.getFileData(doc.fileId) + ``` + + - **Run extraction pipeline** (lines 113-120): + ```python + ec = runExtraction( + extractorRegistry=self._extractorRegistry, + chunkerRegistry=self._chunkerRegistry, + documentBytes=documentData["bytes"], + fileName=documentData["fileName"], + mimeType=documentData["mimeType"], + options=options + ) + ``` + + - **Extraction Process**: + - **Extractor Selection**: Based on MIME type, select appropriate extractor (PDF, DOCX, XLSX, etc.) + - **Content Parsing**: Extractor parses document and extracts structured content + - **Chunking** (if needed): Large content is chunked based on size limits + - **ContentPart Creation**: Each extracted piece becomes a `ContentPart` with: + - `typeGroup`: "text", "table", "image", "structure", "container", "binary" + - `data`: Extracted content (text, table data, base64 image, etc.) + - `mimeType`: Original MIME type + - `label`: Descriptive label + + - **Metadata Attachment** (lines 132-166): + ```python + # Required metadata fields + p.metadata["documentId"] = documentData["id"] + p.metadata["documentMimeType"] = documentData["mimeType"] + p.metadata["originalFileName"] = documentData["fileName"] + p.metadata["contentFormat"] = "extracted" # Default + p.metadata["intent"] = "extract" # Default + p.metadata["extractionPrompt"] = options.prompt + p.metadata["usageHint"] = f"Use extracted content from {documentData['fileName']}" + p.metadata["sourceAction"] = "extraction.extractContent" + ``` + +2. **Return Results**: + - Returns `List[ContentExtracted]` (one per input document) + - Each `ContentExtracted` contains: + - `id`: Document ID + - `parts`: `List[ContentPart]` - All extracted content parts + +#### Step 4: Combine ContentParts (lines 113-119 in process.py) +```python +contentParts = [] +for extracted in extractedResults: + if extracted.parts: + contentParts.extend(extracted.parts) +``` + +**Result**: Single `List[ContentPart]` containing all extracted content from all documents. + +--- + +## 3. What is Sent to the AI Service + +### 3.1 AI Service Call +**Location**: `gateway/modules/workflows/methods/methodAi/actions/process.py` (line 167) + +```python +aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + contentParts=contentParts, # Already extracted (or None if no documents) + outputFormat=output_format, + parentOperationId=operationId, + generationIntent=generationIntent # REQUIRED for DATA_GENERATE +) +``` + +### 3.2 Parameters Sent to AI Service + +#### 3.2.1 Prompt +- **Type**: `str` +- **Content**: User-provided instruction describing what processing to perform +- **Example**: "Extract all content from the document" + +#### 3.2.2 Options (`AiCallOptions`) +```python +options = AiCallOptions( + resultFormat=output_format, # e.g., "txt", "json", "docx" + operationType=OperationTypeEnum.DATA_GENERATE # or IMAGE_GENERATE +) +``` + +**Operation Types**: +- `DATA_GENERATE`: Generate structured content (documents, code) +- `IMAGE_GENERATE`: Generate images +- `DATA_EXTRACT`: Extract and process content +- `DATA_ANALYSE`: Analyze content +- `IMAGE_ANALYSE`: Analyze images + +#### 3.2.3 ContentParts (`List[ContentPart]`) +**Structure per ContentPart**: +```python +ContentPart( + id="part_123", + parentId=None, + label="Chapter 1 Text", + typeGroup="text", # or "table", "image", "structure", "container", "binary" + mimeType="text/plain", + data="Actual content text here...", # or base64 for images + metadata={ + "documentId": "doc_456", + "documentMimeType": "application/pdf", + "originalFileName": "document.pdf", + "contentFormat": "extracted", + "intent": "extract", + "usageHint": "Use extracted content from document.pdf", + "extractionPrompt": "Extract all content from the document", + "sourceAction": "extraction.extractContent" + } +) +``` + +#### 3.2.4 Output Format +- **Type**: `str` +- **Examples**: `"txt"`, `"json"`, `"docx"`, `"pdf"`, `"xlsx"`, `"png"` + +#### 3.2.5 Generation Intent +- **Type**: `str` +- **Values**: `"document"`, `"code"`, `"image"` +- **Default Logic** (lines 142-160 in process.py): + - Document formats (xlsx, docx, pdf, txt, md, html, csv, xml, pptx) → `"document"` + - Code formats (py, js, ts, java, cpp, c, go, rs, rb, php, swift, kt) → `"code"` + - Image formats (png, jpg, jpeg, gif, webp) → `"image"` (handled separately) + +--- + +## 4. What the AI Service Does with Documents and Contents + +### 4.1 AI Service Entry Point +**Location**: `gateway/modules/services/serviceAi/mainServiceAi.py:callAiContent` (line 540) + +### 4.2 Operation Type Routing + +#### 4.2.1 IMAGE_GENERATE (lines 599-601) +- Routes to `_handleImageGeneration()` +- Generates images from prompt (no document processing) + +#### 4.2.2 DATA_GENERATE (lines 607-640) +- **Requires**: `generationIntent` parameter +- **Routes based on intent**: + - `generationIntent == "code"` → `_handleCodeGeneration()` + - `generationIntent == "document"` → `_handleDocumentGeneration()` + +#### 4.2.3 DATA_EXTRACT (lines 643-653) +- Routes to `_handleDataExtraction()` +- Extracts content from documents, then processes with AI + +### 4.3 Document Generation Flow (`_handleDocumentGeneration`) + +**Location**: `mainServiceAi.py:_handleDocumentGeneration` (referenced at line 631) + +**CRITICAL**: When called from `ai.process` action: +- **Only `contentParts` is passed** to `callAiContent()` (line 167 in `process.py`) +- **`documentList` is NOT passed** (it's `None`) +- Therefore, **extraction does NOT happen again** in the document generation path +- The `contentParts` already extracted in `ai.process` are used directly +- **Steps 1-2 below are SKIPPED** for `ai.process` flow (no `documentList` to process) + +**Note**: `DocumentGenerationPath.generateDocument()` can also be called directly from other code paths with `documentList`, so it handles both cases. The following steps describe the general flow when `documentList` IS provided (not from `ai.process`). + +#### Step 1: Document Intent Clarification +- **Condition**: `if documentList:` AND `documentIntents` not provided +- If documents exist: + - Calls `clarifyDocumentIntents()` to analyze document purposes + - Determines how each document should be used (extract, display, analyze) +- **For `ai.process` flow**: This step is **skipped** (no `documentList` passed) + +#### Step 2: Content Extraction and Preparation +- **Condition**: `if documents:` (i.e., if `documentList` was provided and converted to documents) +- If documents exist: + - Calls `extractAndPrepareContent()`: + - **RAW Extraction (NO AI)**: Uses `extractContent()` service for pure document parsing + - **What it does**: Parses PDF, DOCX, XLSX, etc. to extract structured content + - **What it creates**: ContentParts with raw extracted data + - **AI involved**: NONE - this is pure parsing/parsing, no AI calls + - **Prompt Used**: `intent.extractionPrompt` or default `"Extract all content from the document"` + - **Important**: This prompt is stored in metadata but NOT used for AI extraction here + - It's only used later during section generation (Step 4) for Vision AI extraction + - **Purpose**: Just metadata storage, not actual AI prompt execution + - **ContentPart Preparation**: + - **For Images**: + - Creates image ContentPart with base64 image data + - Marks with `needsVisionExtraction: True` + - Stores `extractionPrompt` in metadata for later use + - **Reason**: Vision AI extraction is expensive, so it's deferred to section generation + - **No AI extraction happens here** - image is just parsed and stored + - **For Text**: + - Creates text ContentPart with extracted text (from PDF text layer, DOCX text, etc.) + - Marks with `skipExtraction: True` (already extracted from parsing, no AI needed) + - **No AI extraction happens here** - text is already extracted from document parsing + - **For Objects**: Creates object ContentParts for rendering (images, videos, etc.) + - Then merges with provided `contentParts` (if any) +- **For `ai.process` flow**: This step is **skipped** (no `documentList` passed, `contentParts` already extracted) +- **Why Extract (Parse) Before Structure Generation?** + - **ContentParts are needed BEFORE structure generation** so AI can assign them to chapters + - Structure generation needs to know: + - What documents exist (documentId) + - What content types are available (typeGroup: text, image, table, etc.) + - What content formats exist (contentFormat: extracted, object, reference) + - **Structure generation doesn't need AI-extracted text from images** - it just needs to know images exist + - Vision AI extraction (converting images to text) is deferred to section generation (Step 4) for efficiency + - **Key Point**: Only RAW parsing happens here - NO AI calls, NO Vision AI, NO text extraction from images + +#### Step 3: Structure Generation (for document formats) +- Calls `structureGenerator.generateStructure()`: + - Generates document structure (chapters, sections) + - Creates JSON structure with: + - `metadata`: Title, language + - `documents`: Array of document structures + - `chapters`: Array of chapter structures with: + - `id`, `level`, `title` + - `contentParts`: Assignment of ContentParts to chapters + - `generationHint`: Description of chapter content + +#### Step 4: Structure Filling +- Calls `structureFiller.fillStructure()`: + - For each chapter: + - Extracts relevant ContentParts assigned to chapter + - **Vision AI Extraction (if needed)**: + - Checks for ContentParts with `needsVisionExtraction == True` (images) + - Calls Vision AI with `extractionPrompt` from metadata (line 651 in `subStructureFilling.py`) + - Converts image ContentPart to text ContentPart with extracted text + - **Prompt Used**: `part.metadata.get("extractionPrompt")` or default `"Extract all text content from this image..."` + - **Section Generation**: + - Generates section content using AI with processed ContentParts + - Processes ContentParts with model-aware chunking if needed + - Merges results intelligently +- **Two-Phase Extraction Explained**: + - **Phase 1 (Step 2)**: RAW extraction (parsing) - creates ContentParts for structure generation + - **Phase 2 (Step 4)**: Vision AI extraction (for images only) - happens during section generation + - **Why Two Phases?** + - Structure generation needs ContentParts early (to assign to chapters) + - Vision AI extraction is expensive and only needed when generating content + - Text content doesn't need AI extraction (already extracted in Phase 1) + +#### Step 5: Document Rendering +- Converts filled structure to final document format (PDF, DOCX, XLSX, etc.) +- Returns `AiResponse` with rendered documents + +### 4.4 Content Parts Processing (`processContentPartsWithAi`) + +**Location**: `gateway/modules/services/serviceExtraction/mainServiceExtraction.py:processContentPartsWithAi` (line 1499) + +#### Step 1: Model Selection +```python +availableModels = modelRegistry.getAvailableModels() +failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels) +``` +- Selects appropriate AI models based on: + - Operation type + - Content type (text, images, etc.) + - Model capabilities + +#### Step 2: Parallel Processing +- Processes all ContentParts in parallel (max 5 concurrent by default) +- For each ContentPart: + - Calls `processContentPartWithFallback()` + +#### Step 3: ContentPart Processing (`processContentPartWithFallback`) + +**Location**: `mainServiceExtraction.py:processContentPartWithFallback` (line 1232) + +**Flow**: + +1. **Size Check** (lines 1328-1379): + ```python + # Calculate if content fits in model context + partSize = len(contentPart.data.encode('utf-8')) + modelContextTokens = model.contextLength + availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8) + ``` + +2. **Chunking Decision**: + - If content exceeds model limits → **Chunk content** + - If content fits → **Process directly** + +3. **Chunking Process** (`chunkContentPartForAi`, line 1146): + - Calculates model-specific chunk sizes: + ```python + # Reserve tokens for: + # - Prompt + # - System message wrapper + # - Max output tokens + # - Message overhead + availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.60) + ``` + - Uses appropriate chunker based on `typeGroup`: + - `TextChunker` for text + - `StructureChunker` for JSON/structured content + - `TableChunker` for tables + - `ImageChunker` for images + +4. **AI Call**: + - **For chunks**: Process each chunk separately, then merge results + - **For single part**: Call AI directly + - **For images**: Special handling with vision models (base64 encoding) + +5. **Model Fallback**: + - If model fails → Try next model in failover list + - Continues until success or all models exhausted + +#### Step 4: Result Merging (`mergePartResults`) + +**Location**: `mainServiceExtraction.py:mergePartResults` (line 615) + +**Merging Strategies**: + +1. **Elements Response Format** (detected at line 657): + - Merges JSON responses with `"elements"` array + - Specifically merges tables by headers + - Combines rows from tables with same headers + +2. **JSON Extraction Response Format** (detected at line 669): + - Merges `{"extracted_content": {...}}` structures + - Combines: + - Text blocks + - Tables (by headers) + - Headings + - Lists + - Images + +3. **Regular Merging** (line 680): + - Uses `MergeStrategy`: + - `groupBy`: "typeGroup" or "documentId" + - `orderBy`: "id" or "originalIndex" + - `mergeType`: "concatenate" + - Applies intelligent token-aware merging if enabled + - Preserves ContentPart metadata + +#### Step 5: Return Merged Content +- Returns single `AiCallResponse` with: + - `content`: Merged content string + - `modelName`: "multiple" (if multiple models used) + - `priceUsd`: Sum of all model costs + - `processingTime`: Sum of all processing times + - `bytesSent`: Sum of all bytes sent + - `bytesReceived`: Sum of all bytes received + +--- + +## 5. Summary Flow Diagram + +``` +ai.process Action + │ + ├─→ Extract Parameters (aiPrompt, documentList, resultType) + │ + ├─→ Check contentParts + │ ├─→ If provided → Use directly + │ └─→ If not provided → Extract from documents + │ │ + │ ├─→ Convert documentList → ChatDocuments + │ │ + │ ├─→ For each document: + │ │ ├─→ Load document bytes from database + │ │ ├─→ Select extractor (PDF, DOCX, XLSX, etc.) + │ │ ├─→ Extract content → ContentParts + │ │ ├─→ Chunk if needed (size-based) + │ │ └─→ Attach metadata + │ │ + │ └─→ Combine all ContentParts + │ + ├─→ Determine operationType (DATA_GENERATE, IMAGE_GENERATE, etc.) + │ + ├─→ Determine generationIntent (document, code, image) + │ + └─→ Call AI Service (callAiContent) + │ + ├─→ Route by operationType + │ │ + │ ├─→ DATA_GENERATE + document → Document Generation + │ │ ├─→ Clarify document intents + │ │ ├─→ Extract/prepare content + │ │ ├─→ Generate structure (chapters, sections) + │ │ ├─→ Fill structure (generate content per section) + │ │ └─→ Render document (PDF, DOCX, etc.) + │ │ + │ ├─→ DATA_GENERATE + code → Code Generation + │ │ └─→ Generate code directly + │ │ + │ └─→ DATA_EXTRACT → Data Extraction + │ ├─→ Extract content from documents + │ └─→ Process with AI (simple text processing) + │ + └─→ Process ContentParts (if provided) + │ + ├─→ For each ContentPart: + │ ├─→ Check size vs model limits + │ ├─→ If too large → Chunk (model-aware) + │ ├─→ Call AI with chunk/part + │ ├─→ Handle model fallback if needed + │ └─→ Collect results + │ + └─→ Merge results + ├─→ Detect response format (elements, extraction, regular) + ├─→ Apply merging strategy + └─→ Return merged content +``` + +--- + +## 6. Key Data Structures + +### 6.1 ContentPart +```python +ContentPart( + id: str, # Unique identifier + parentId: Optional[str], # Parent part ID + label: str, # Human-readable label + typeGroup: str, # "text", "table", "image", "structure", "container", "binary" + mimeType: str, # MIME type + data: Union[str, bytes], # Content data + metadata: Dict[str, Any] # Metadata dictionary +) +``` + +### 6.2 ContentExtracted +```python +ContentExtracted( + id: str, # Document ID + parts: List[ContentPart] # Extracted content parts +) +``` + +### 6.3 AiCallOptions +```python +AiCallOptions( + resultFormat: str, # Output format ("txt", "json", "docx", etc.) + operationType: OperationTypeEnum, # Operation type + priority: PriorityEnum, # Quality vs speed + processingMode: ProcessingModeEnum, # Detailed vs fast + compressPrompt: bool, # Compress prompt + compressContext: bool # Compress context +) +``` + +### 6.4 AiCallResponse +```python +AiCallResponse( + content: str, # Generated/processed content + modelName: str, # Model used + priceUsd: float, # Cost in USD + processingTime: float, # Processing time in seconds + bytesSent: int, # Bytes sent to model + bytesReceived: int, # Bytes received from model + errorCount: int # Number of errors +) +``` + +--- + +## 7. Important Notes + +### 7.1 Content Extraction Separation +- **Extraction** (no AI): Pure document parsing and content extraction +- **AI Processing**: Content analysis, generation, transformation + +### 7.2 Model-Aware Chunking +- Chunking considers: + - Model context length + - Model max output tokens + - Prompt size + - System message overhead + - Conservative safety margins (60% of available tokens) + +### 7.3 Parallel Processing +- ContentParts are processed in parallel (max 5 concurrent) +- Improves performance for multiple documents/parts + +### 7.4 Intelligent Merging +- Merges content intelligently: + - Tables by headers + - Text blocks with separators + - Preserves document structure + - Token-aware optimization + +### 7.5 Metadata Preservation +- ContentPart metadata is preserved throughout the pipeline +- Includes document source, extraction prompt, usage hints +- Enables traceability and proper content assignment + +--- + +## 8. Debug Files Generated + +During processing, the following debug files may be generated: + +1. **Extraction Results**: `extraction_result_{filename}.txt` + - Contains extraction summary per document + - Includes part metadata and data previews + +2. **Text Parts**: `extraction_text_part_{N}_{filename}.txt` + - Contains full extracted text for each text part + +3. **Per-Part Extracted Data**: `content_extraction_per_part.txt` + - Contains per-part extracted content summary + +4. **Original Parts Extracted Data**: `content_extraction_original_parts.txt` + - Contains original parts with extracted content + +5. **Generation Prompts/Responses**: `generation_contentPart_{id}_{label}_{prompt|response}.txt` + - Contains prompts and responses for generation phase + +6. **Structure Generation**: `chapter_structure_generation_{prompt|response}.txt` + - Contains structure generation prompts and responses + +--- + +## 9. Recommendations and Next Steps + +This section documents architectural findings, recommendations, and planned improvements. Topics will be added step by step as analysis progresses. + +### 9.1 Architectural Inconsistency: contentParts + documentList Merging Behavior + +#### Problem Statement + +The `ai.process` action exhibits **inconsistent behavior** when both `contentParts` and `documentList` parameters are provided: + +**Current Behavior Across Code Paths:** + +1. **`ai.process` Action** (`process.py` lines 85-86): + - **Logic**: `if not contentParts and documentList.references:` + - **Behavior**: If both provided → Only `contentParts` used, `documentList` ignored + - **Issue**: `documentList` is not passed to `callAiContent()`, so it's completely ignored + +2. **Document Generation Path** (`documentPath.py` lines 109-119): + - **Logic**: Extracts from `documentList`, then merges with `contentParts` + - **Behavior**: If both provided → **MERGES** both + - **Code**: `preparedContentParts.extend(contentParts)` + +3. **Data Extraction Path** (`mainServiceAi.py` lines 727-733): + - **Logic**: Extracts from `documentList`, then merges with `contentParts` + - **Behavior**: If both provided → **MERGES** both + - **Code**: `preparedContentParts.extend(contentParts)` + +#### Analysis + +**Arguments FOR Current Behavior (Skip documentList):** +- Performance: Avoids redundant extraction if contentParts already provided +- Explicit Intent: If user provides contentParts, they may want only those +- Pre-extracted Content: contentParts might be pre-processed/filtered content +- Simplicity: Simpler logic, fewer edge cases + +**Arguments AGAINST Current Behavior (Should Merge):** +- **Inconsistency**: Other paths merge, creating confusion +- **User Intent**: If user provides both, they likely want both used +- **Flexibility**: Allows combining pre-extracted content with additional documents +- **Architectural Pattern**: Document generation path already handles this correctly +- **No Performance Issue**: Extraction is fast, merging is trivial + +#### Recommendation + +**The current behavior in `ai.process` does NOT make architectural sense** because: + +1. **Inconsistency**: The action routes to paths that DO merge, but the action itself doesn't +2. **Lost Functionality**: User cannot combine pre-extracted contentParts with additional documents +3. **Unexpected Behavior**: Users might expect both to be used (like in other paths) + +#### Proposed Fix + +Change `ai.process` to merge both with intelligent deduplication: + +**Logic Requirements:** +- Extract content parts from documents (without AI) **only if** that document is not already represented in the `contentParts` list +- Merge all contentParts +- Result: Complete list of contentParts for all provided documents (no duplicates) + +**Current Implementation** (lines 85-119): +```python +# If contentParts not provided but documentList is, extract content first +if not contentParts and documentList.references: + # Extract from documentList + extractedResults = self.services.extraction.extractContent(...) + contentParts = [] + for extracted in extractedResults: + if extracted.parts: + contentParts.extend(extracted.parts) +``` + +**Proposed Implementation**: +```python +# Step 1: Identify documents already represented in contentParts +documentsAlreadyExtracted = set() +if contentParts: + for part in contentParts: + documentId = part.metadata.get("documentId") + if documentId: + documentsAlreadyExtracted.add(documentId) + logger.info(f"Found {len(documentsAlreadyExtracted)} documents already represented in contentParts: {documentsAlreadyExtracted}") + +# Step 2: Extract from documentList only for documents NOT already in contentParts +extractedParts = [] +if documentList and documentList.references: + self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents") + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + + if chatDocuments: + # Filter: Only extract documents not already represented + documentsToExtract = [ + doc for doc in chatDocuments + if doc.id not in documentsAlreadyExtracted + ] + + if documentsToExtract: + logger.info(f"Extracting content from {len(documentsToExtract)} new documents (skipping {len(chatDocuments) - len(documentsToExtract)} already represented)") + + # Prepare extraction options + extractionOptions = parameters.get("extractionOptions") + if not extractionOptions: + extractionOptions = ExtractionOptions( + prompt="Extract all content from the document", + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + + # Extract content (without AI - pure extraction) + extractedResults = self.services.extraction.extractContent(documentsToExtract, extractionOptions) + + # Combine all ContentParts from extracted results + for extracted in extractedResults: + if extracted.parts: + extractedParts.extend(extracted.parts) + + logger.info(f"Extracted {len(extractedParts)} content parts from {len(extractedResults)} documents") + else: + logger.info(f"All documents from documentList are already represented in contentParts, skipping extraction") + +# Step 3: Merge all contentParts +if contentParts: + # Preserve pre-extracted content metadata + for part in contentParts: + if part.metadata.get("skipExtraction", False): + part.metadata.setdefault("contentFormat", "extracted") + part.metadata.setdefault("isPreExtracted", True) + + # Merge: extracted parts first, then provided contentParts + # This ensures extracted content comes before pre-extracted content + finalContentParts = extractedParts + contentParts + contentParts = finalContentParts + logger.info(f"Merged contentParts: {len(extractedParts)} extracted + {len(contentParts) - len(extractedParts)} provided = {len(contentParts)} total") +elif extractedParts: + contentParts = extractedParts +``` + +**Benefits:** +- Makes behavior consistent across all paths +- Allows users to combine pre-extracted content with documents +- Matches user expectations +- Follows the architectural pattern already established in document generation path + +#### Edge Cases Handled + +1. **Duplicate Documents**: Same document in both `contentParts` and `documentList` + - **Solution**: Check `documentId` in `contentParts` metadata before extracting + - **Implementation**: Build set of `documentsAlreadyExtracted` from `part.metadata.get("documentId")` + - **Result**: Only extract documents NOT already represented in `contentParts` + - **Benefit**: Avoids redundant extraction, prevents duplicate content + +2. **Different Extraction Options**: contentParts might have different extraction settings + - **Solution**: Preserve metadata, let AI handle differences + - **Note**: Each ContentPart retains its own metadata (extractionPrompt, etc.) + - **Behavior**: Documents extracted with current options, pre-extracted parts keep their original metadata + +3. **Ordering**: Which comes first - extracted or provided? + - **Solution**: Extracted parts first, then provided contentParts + - **Rationale**: Newly extracted content comes first, pre-extracted content follows + - **Implementation**: `finalContentParts = extractedParts + contentParts` + +4. **Performance**: Avoids unnecessary extraction + - **Solution**: Only extracts documents not already in `contentParts` + - **Benefit**: Skips extraction for documents already represented + - **Logging**: Logs which documents are skipped and why + +5. **Missing documentId in Metadata**: What if contentPart doesn't have documentId? + - **Solution**: Only documents with `documentId` in metadata are considered "already extracted" + - **Behavior**: If `documentId` missing, document will be extracted (safe default) + - **Note**: Extraction service always sets `documentId` in metadata, so this is rare + +#### Implementation Steps + +1. **Update `ai.process` action** (`process.py` lines 85-119): + - **Step 1**: Build set of `documentsAlreadyExtracted` from `contentParts` metadata + - **Step 2**: Filter `chatDocuments` to only include documents NOT in `documentsAlreadyExtracted` + - **Step 3**: Extract content only from filtered documents (pure extraction, no AI) + - **Step 4**: Merge extracted parts with provided `contentParts` (extracted first, then provided) + - **Step 5**: Preserve metadata for pre-extracted contentParts + - **Step 6**: Add logging for transparency (which documents skipped, counts, etc.) + +2. **Update Documentation**: + - Update action parameter documentation to clarify deduplication behavior + - Document that extraction only happens for documents not already in `contentParts` + - Add examples showing both parameters used together + - Explain how `documentId` metadata is used for deduplication + +3. **Testing**: + - **Test Case 1**: Both parameters provided, no overlap → Both extracted and merged + - **Test Case 2**: Both parameters provided, full overlap → Only contentParts used, no extraction + - **Test Case 3**: Both parameters provided, partial overlap → Extract only new documents, merge all + - **Test Case 4**: Only contentParts → Use as-is + - **Test Case 5**: Only documentList → Extract all documents + - **Test Case 6**: contentParts without documentId metadata → Extract all documents (safe default) + +4. **Migration**: + - No breaking changes expected (only adds functionality) + - Existing code using only one parameter continues to work + - New behavior: When both provided, intelligently deduplicates before merging + +### 9.2 Architectural Redundancy: Duplicate Extraction Logic + +#### Problem Statement + +**Current Architecture:** +- `ai.process` action extracts documents and creates `contentParts` (lines 86-119) +- Then passes only `contentParts` to `callAiContent()` (line 167) +- `callAiContent()` accepts both `contentParts` AND `documentList` (line 545) +- Document generation path has `extractAndPrepareContent()` logic (line 103 in `documentPath.py`) +- But this extraction logic is **never used** when called from `ai.process` (because `documentList` is not passed) + +**Question**: Why does `ai.process` extract documents when the AI service already has extraction logic? + +#### Analysis + +**Current Flow:** +``` +ai.process + ├─→ Extract documents → contentParts (lines 86-119) + ├─→ Pass contentParts to callAiContent() (line 167) + └─→ callAiContent() routes to document generation path + └─→ extractAndPrepareContent() exists but is SKIPPED (no documentList) +``` + +**Alternative Flow (More Logical):** +``` +ai.process + ├─→ Pass documentList to callAiContent() (line 167) + └─→ callAiContent() routes to document generation path + └─→ extractAndPrepareContent() handles extraction +``` + +#### Issues with Current Architecture + +1. **Code Duplication**: Extraction logic exists in both `ai.process` and document generation path +2. **Inconsistency**: Different extraction paths use different extraction options/logic +3. **Maintenance Burden**: Changes to extraction logic must be made in multiple places +4. **Unused Code**: `extractAndPrepareContent()` in document generation path is unused when called from `ai.process` +5. **Loss of Flexibility**: `ai.process` can't leverage document intent clarification and other features in `extractAndPrepareContent()` + +#### Why Current Architecture Exists (Possible Reasons) + +1. **Historical**: Extraction may have been added to `ai.process` before AI service had extraction +2. **Separation of Concerns**: `ai.process` might be intended as a simpler entry point +3. **Progress Tracking**: Early extraction allows better progress tracking at action level +4. **Performance**: Early extraction might allow parallel processing + +However, these don't justify the duplication and inconsistency. + +#### Recommendation + +**Option A: Remove Extraction from `ai.process` (Preferred)** +- `ai.process` should pass `documentList` to `callAiContent()` instead of extracting +- Let the AI service handle all extraction through `extractAndPrepareContent()` +- Benefits: + - Single source of truth for extraction logic + - Consistent extraction options and behavior + - Leverages document intent clarification + - Simpler `ai.process` action + - Better separation: action layer vs service layer + +**Option B: Keep Extraction in `ai.process` but Make it Optional** +- Add parameter to control whether extraction happens in `ai.process` or AI service +- Still creates complexity and potential inconsistency + +**Option C: Keep Current Architecture (Not Recommended)** +- Document the duplication and accept it +- Maintain extraction logic in both places +- Risk of divergence over time + +#### Proposed Refactoring (Option A) + +**Current Implementation** (`process.py` lines 85-119): +```python +# Extract in ai.process +if not contentParts and documentList.references: + extractedResults = self.services.extraction.extractContent(...) + contentParts = combineExtractedResults(extractedResults) + +# Pass only contentParts +aiResponse = await self.services.ai.callAiContent( + contentParts=contentParts, # documentList NOT passed + ... +) +``` + +**Proposed Implementation**: +```python +# Don't extract in ai.process - let AI service handle it +# Pass documentList to AI service +aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + documentList=documentList, # Pass documentList instead + contentParts=contentParts, # Still support pre-extracted contentParts + outputFormat=output_format, + parentOperationId=operationId, + generationIntent=generationIntent +) +``` + +**Benefits:** +- Single extraction path in AI service +- Consistent extraction behavior +- Leverages document intent clarification +- Simpler `ai.process` action +- Better architecture: action layer delegates to service layer + +**Migration Path:** +1. Update `ai.process` to pass `documentList` to `callAiContent()` +2. Remove extraction logic from `ai.process` (or make it optional) +3. Ensure `extractAndPrepareContent()` handles all extraction cases +4. Test that all existing workflows continue to work +5. Update documentation + +**Edge Cases:** +- Pre-extracted `contentParts` should still be supported (merge with extracted) +- Extraction options should be configurable via parameters +- Progress tracking should work at both levels + +### 9.3 Target State: Ideal Architecture and Flow + +#### Target Architecture Overview + +The target state addresses all architectural issues identified: +1. **Single extraction path** in AI service (no duplication in `ai.process`) +2. **Intelligent merging** of `contentParts` and `documentList` with deduplication +3. **Clear separation** of concerns: action layer delegates to service layer +4. **Consistent behavior** across all code paths + +#### Target Flow Diagram + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ ai.process Action │ +│ │ +│ 1. Extract Parameters │ +│ ├─→ aiPrompt │ +│ ├─→ documentList (optional) │ +│ ├─→ contentParts (optional) │ +│ ├─→ resultType │ +│ └─→ generationIntent │ +│ │ +│ 2. Determine Operation Type │ +│ ├─→ IMAGE_GENERATE → Route to image generation │ +│ ├─→ DATA_GENERATE → Route to document/code generation │ +│ └─→ DATA_EXTRACT → Route to data extraction │ +│ │ +│ 3. Pass Parameters to AI Service │ +│ └─→ callAiContent( │ +│ prompt=aiPrompt, │ +│ documentList=documentList, ← PASS documentList │ +│ contentParts=contentParts, ← PASS contentParts │ +│ options=options, │ +│ generationIntent=generationIntent │ +│ ) │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ AI Service: callAiContent() │ +│ │ +│ 1. Route by Operation Type │ +│ └─→ DATA_GENERATE → _handleDocumentGeneration() │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Document Generation Path: generateDocument() │ +│ │ +│ Phase 1: Document Intent Clarification │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ if documentList: │ │ +│ │ documents = getChatDocumentsFromDocumentList() │ │ +│ │ │ │ +│ │ # Step 1: Map pre-extracted JSONs to original docs │ │ +│ │ # (for intent analysis, analyze original docs, not JSON)│ │ +│ │ documentMapping = {} │ │ +│ │ resolvedDocuments = [] │ │ +│ │ for doc in documents: │ │ +│ │ preExtracted = resolvePreExtractedDocument(doc) │ │ +│ │ if preExtracted: │ │ +│ │ originalDocId = preExtracted["originalDocument"]["id"]│ +│ │ documentMapping[originalDocId] = doc.id │ │ +│ │ resolvedDocuments.append(originalDoc) │ │ +│ │ else: │ │ +│ │ resolvedDocuments.append(doc) │ │ +│ │ │ │ +│ │ # Step 2: AI analyzes document purposes │ │ +│ │ documentIntents = clarifyDocumentIntents( │ │ +│ │ resolvedDocuments, │ │ +│ │ userPrompt, │ │ +│ │ actionParameters │ │ +│ │ ) │ │ +│ │ │ │ +│ │ # Step 3: Map intents back to JSON doc IDs │ │ +│ │ # (if intent was for original doc, map to JSON doc) │ │ +│ │ for intent in documentIntents: │ │ +│ │ if intent.documentId in documentMapping: │ │ +│ │ intent.documentId = documentMapping[intent.documentId]│ +│ │ │ │ +│ │ # Result: List[DocumentIntent] with: │ │ +│ │ # - documentId: Document ID │ │ +│ │ # - intents: ["extract", "render", "reference"] │ │ +│ │ # - extractionPrompt: Prompt for extraction │ │ +│ │ # - reasoning: Why these intents were chosen │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Phase 2: Content Extraction and Preparation │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Step 1: Identify Pre-Extracted JSON Documents │ │ +│ │ preExtractedDocs = [] │ │ +│ │ originalDocIdsCovered = set() │ │ +│ │ for doc in documents: │ │ +│ │ preExtracted = resolvePreExtractedDocument(doc) │ │ +│ │ if preExtracted: │ │ +│ │ preExtractedDocs.append(doc) │ │ +│ │ originalDocId = preExtracted["originalDocument"]["id"]│ +│ │ originalDocIdsCovered.add(originalDocId) │ │ +│ │ │ │ +│ │ Step 2: Filter Out Original Documents │ │ +│ │ # Remove original documents covered by pre-extracted │ │ +│ │ filteredDocuments = [ │ │ +│ │ doc for doc in documents │ │ +│ │ if doc.id not in originalDocIdsCovered │ │ +│ │ ] │ │ +│ │ │ │ +│ │ Step 3: Identify Already Extracted Documents │ │ +│ │ documentsAlreadyExtracted = set() │ │ +│ │ for part in contentParts: │ │ +│ │ if part.metadata.get("documentId"): │ │ +│ │ documentsAlreadyExtracted.add(documentId) │ │ +│ │ │ │ +│ │ Step 4: Filter Documents to Extract │ │ +│ │ documentsToExtract = [ │ │ +│ │ doc for doc in filteredDocuments │ │ +│ │ if doc.id not in documentsAlreadyExtracted │ │ +│ │ ] │ │ +│ │ │ │ +│ │ Step 5: Process Pre-Extracted JSON Documents │ │ +│ │ preExtractedParts = [] │ │ +│ │ for doc in preExtractedDocs: │ │ +│ │ preExtracted = resolvePreExtractedDocument(doc) │ │ +│ │ contentExtracted = preExtracted["contentExtracted"] │ │ +│ │ # Extract ContentParts from JSON (not regular JSON) │ │ +│ │ for part in contentExtracted.parts: │ │ +│ │ # Process nested parts if structure part │ │ +│ │ # Apply intents (extract, render, reference) │ │ +│ │ # Mark as pre-extracted │ │ +│ │ part.metadata["isPreExtracted"] = True │ │ +│ │ part.metadata["fromPreExtractedJson"] = True │ │ +│ │ preExtractedParts.append(part) │ │ +│ │ │ │ +│ │ Step 6: RAW Extraction (NO AI) for Regular Documents │ │ +│ │ if documentsToExtract: │ │ +│ │ extractedResults = extractContent( │ │ +│ │ documentsToExtract, │ │ +│ │ extractionOptions │ │ +│ │ ) │ │ +│ │ extractedParts = combineResults(extractedResults) │ │ +│ │ else: │ │ +│ │ extractedParts = [] │ │ +│ │ │ │ +│ │ Step 7: Merge All ContentParts │ │ +│ │ allParts = [] │ │ +│ │ allParts.extend(preExtractedParts) # Pre-extracted first│ +│ │ allParts.extend(extractedParts) # Then extracted │ │ +│ │ if contentParts: │ │ +│ │ # Preserve metadata │ │ +│ │ for part in contentParts: │ │ +│ │ part.metadata.setdefault("isPreExtracted", True) │ │ +│ │ allParts.extend(contentParts) # Then provided │ │ +│ │ │ │ +│ │ finalContentParts = allParts │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Phase 3: Structure Generation │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ structure = generateStructure( │ │ +│ │ userPrompt, │ │ +│ │ finalContentParts, ← Uses ContentParts metadata │ │ +│ │ outputFormat │ │ +│ │ ) │ │ +│ │ │ │ +│ │ Result: JSON structure with chapters │ │ +│ │ - Each chapter has contentParts assignments │ │ +│ │ - Based on ContentPart metadata (documentId, etc.) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Phase 4: Structure Filling │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ filledStructure = fillStructure( │ │ +│ │ structure, │ │ +│ │ finalContentParts, │ │ +│ │ userPrompt │ │ +│ │ ) │ │ +│ │ │ │ +│ │ For each section: │ │ +│ │ 1. Check if ContentPart needsVisionExtraction │ │ +│ │ 2. If yes: Call Vision AI (Phase 2 extraction) │ │ +│ │ 3. Generate section content with AI │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Phase 5: Document Rendering │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ renderedDocuments = renderDocuments( │ │ +│ │ filledStructure, │ │ +│ │ outputFormat │ │ +│ │ ) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +#### Key Differences from Current State + +**Current State Issues:** +1. ❌ `ai.process` extracts documents (duplication) +2. ❌ `ai.process` doesn't pass `documentList` to AI service +3. ❌ No deduplication when both `contentParts` and `documentList` provided +4. ❌ Inconsistent behavior across code paths +5. ❌ Pre-extracted JSON documents in `documentList` may not be properly identified + +**Target State Benefits:** +1. ✅ Single extraction path in AI service +2. ✅ `ai.process` passes both `documentList` and `contentParts` +3. ✅ Intelligent deduplication (extract only new documents) +4. ✅ Pre-extracted JSON documents identified and processed as ContentParts (not regular JSON) +5. ✅ Original documents filtered out if covered by pre-extracted JSON +6. ✅ Consistent behavior across all code paths +7. ✅ Better separation of concerns + +#### Document Intent Clarification Details + +**What Happens in Phase 1:** + +1. **Document Resolution**: + - Maps pre-extracted JSON documents to their original documents + - Creates `documentMapping` to track original → JSON document ID mapping + - Resolves documents for intent analysis (analyze original docs, not JSON) + +2. **AI Analysis** (`clarifyDocumentIntents`): + - **Input**: User prompt, resolved documents, action parameters (outputFormat, etc.) + - **Process**: Uses AI (`callAiPlanning()`) to analyze how each document should be used + - **Output**: List of `DocumentIntent` objects, one per document + - **AI Call**: Structured JSON response with intents and reasoning + +3. **Intent Determination**: + - **"extract"**: Content extraction needed (text, structure, OCR, etc.) + - Used for: PDFs, DOCX, images with text, tables, etc. + - Generates `extractionPrompt` for specific extraction needs + - Example: `"Extract all text content, preserving structure"` + - **"render"**: Image/binary should be rendered as-is (visual element) + - Used for: Images that should appear in final document + - No extraction prompt needed + - Example: Image that should be displayed in PDF/DOCX + - **"reference"**: Document reference/attachment (no extraction) + - Used for: Documents mentioned but not extracted + - No extraction prompt needed + - Example: Template document referenced but not included + +4. **Multiple Intents**: + - A document can have multiple intents (e.g., `["extract", "render"]`) + - Example: Image that needs text extraction AND visual rendering + - Each intent creates a separate ContentPart later in extraction phase + +5. **Extraction Prompt Generation**: + - AI generates specific extraction prompt for each document + - Based on user prompt, document type, and output format + - Examples: + - `"Extract all text content, preserving structure"` + - `"Extract text content from image using vision AI"` + - `"Extract tables and data, preserving formatting"` + - Stored in `DocumentIntent.extractionPrompt` for later use + +6. **Mapping Back**: + - If intent was for original document, map back to JSON document ID + - Ensures intents are associated with correct documents + - Pre-extracted JSON documents get intents mapped correctly + +**Example Flow**: +``` +Input: + documents = [ + ChatDocument(id="doc_1", fileName="report.pdf"), + ChatDocument(id="doc_2", fileName="image.jpg"), + ChatDocument(id="json_3", fileName="pre_extracted.json") # Pre-extracted + ] + userPrompt = "Create a report with the PDF content and show the image" + +Step 1: Map pre-extracted JSON + → json_3 maps to original_doc_3 + → resolvedDocuments = [doc_1, doc_2, original_doc_3] + +Step 2: AI Analysis + → Analyzes: "Create report with PDF content and show image" + → Determines: + - doc_1: ["extract"] (needs text extraction) + extractionPrompt: "Extract all text content, preserving structure" + - doc_2: ["render"] (needs visual rendering) + extractionPrompt: null + - original_doc_3: ["extract"] (needs extraction) + extractionPrompt: "Extract all text content, preserving structure" + +Step 3: Map back + → original_doc_3 intent mapped to json_3 + → Final intents: + - doc_1: ["extract"] + - doc_2: ["render"] + - json_3: ["extract"] +``` + +**Why This Matters**: +- Determines HOW each document should be processed (extract vs. render vs. reference) +- Generates appropriate extraction prompts for each document +- Handles pre-extracted JSON documents correctly (maps to original for analysis) +- Enables multiple intents per document (extract + render for images) +- Guides content extraction phase (Phase 2) on what to extract and how + +**Output Structure**: +```python +DocumentIntent( + documentId: str, # Document ID + intents: List[str], # ["extract", "render", "reference"] + extractionPrompt: Optional[str], # Prompt for extraction (if extract intent) + reasoning: str # Why these intents were chosen +) +``` + +#### Pre-Extracted JSON Documents Handling + +**Scenario**: ContentParts are already extracted and handed over as JSON documents in `documentList` + +**Target State Behavior**: + +1. **Identification** (Step 1 in Phase 2): + - Use `resolvePreExtractedDocument()` to identify JSON documents containing `ContentExtracted` structure + - These are NOT regular JSON documents - they contain pre-processed ContentParts + - Map back to original document ID to identify which original documents are covered + +2. **Filtering** (Step 2 in Phase 2): + - Keep pre-extracted JSON documents (will be processed as ContentParts) + - Remove original documents if covered by pre-extracted JSON (prevents duplicate extraction) + - Keep regular documents (not pre-extracted, not covered) + +3. **Processing** (Step 5 in Phase 2): + - Extract ContentParts from pre-extracted JSON (not treat as regular JSON) + - Process nested parts if structure parts contain nested ContentParts + - Apply intents (extract, render, reference) to each ContentPart + - Mark with metadata: + - `isPreExtracted: True` + - `fromPreExtractedJson: True` + - `originalFileName`: Original document filename + - `documentId`: Pre-extracted JSON document ID + +4. **Merging** (Step 7 in Phase 2): + - Merge order: pre-extracted parts → extracted parts → provided contentParts + - All ContentParts treated equally regardless of source + +**Example Flow**: +``` +documentList = [ + "doc:original_pdf_123", # Original PDF document + "doc:pre_extracted_json_456" # Pre-extracted JSON (contains ContentParts from original_pdf_123) +] + +Step 1: Identify pre-extracted JSON + → pre_extracted_json_456 is identified as pre-extracted + → Maps to original_pdf_123 + +Step 2: Filter documents + → Keep pre_extracted_json_456 (will extract ContentParts from JSON) + → Remove original_pdf_123 (covered by pre-extracted JSON) + +Step 5: Process pre-extracted JSON + → Extract ContentParts from pre_extracted_json_456 + → Mark as isPreExtracted=True, fromPreExtractedJson=True + +Step 6: Extract regular documents + → No documents to extract (all filtered out or pre-extracted) + +Step 7: Merge + → finalContentParts = [ContentParts from pre_extracted_json_456] +``` + +**Key Point**: Pre-extracted JSON documents are identified BEFORE deduplication and processed as ContentParts, NOT as regular JSON documents. This prevents treating them as regular JSON and ensures ContentParts are properly extracted and used. + +#### Migration Steps + +**Phase 1: Update `ai.process` Action** + +**Step 1.1: Remove Extraction Logic from `ai.process`** +- **File**: `gateway/modules/workflows/methods/methodAi/actions/process.py` +- **Lines**: 85-119 +- **Action**: Remove or comment out extraction logic +- **Code Change**: + ```python + # REMOVE THIS: + # if not contentParts and documentList.references: + # extractedResults = self.services.extraction.extractContent(...) + # contentParts = combineExtractedResults(extractedResults) + ``` + +**Step 1.2: Pass `documentList` to `callAiContent()`** +- **File**: `gateway/modules/workflows/methods/methodAi/actions/process.py` +- **Line**: 167 +- **Action**: Add `documentList` parameter +- **Code Change**: + ```python + # CURRENT: + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + contentParts=contentParts, # Only contentParts + outputFormat=output_format, + parentOperationId=operationId, + generationIntent=generationIntent + ) + + # TARGET: + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + documentList=documentList, # ADD documentList + contentParts=contentParts, # Keep contentParts + outputFormat=output_format, + parentOperationId=operationId, + generationIntent=generationIntent + ) + ``` + +**Step 1.3: Update Progress Tracking** +- **File**: `gateway/modules/workflows/methods/methodAi/actions/process.py` +- **Action**: Remove extraction progress tracking (moved to AI service) +- **Note**: Progress tracking will happen in `extractAndPrepareContent()` + +**Phase 2: Update Document Generation Path** + +**Step 2.1: Document Intent Clarification (Already Exists)** +- **File**: `gateway/modules/services/serviceAi/subDocumentIntents.py` +- **Lines**: 30-120 +- **Action**: Verify intent clarification works correctly with new flow +- **What it does**: + - **AI Analysis**: Uses AI to analyze user prompt and documents + - **Determines Intents**: For each document, determines how it should be used: + - `"extract"`: Content extraction needed (text, structure, OCR, etc.) + - `"render"`: Image/binary should be rendered as-is (visual element) + - `"reference"`: Document reference/attachment (no extraction, just reference) + - **Multiple Intents**: A document can have multiple intents (e.g., `["extract", "render"]` for images) + - **Extraction Prompt**: Generates specific extraction prompt for each document + - **Pre-Extracted JSON Handling**: Maps pre-extracted JSONs to original documents for analysis, then maps back +- **Example Output**: + ```python + [ + DocumentIntent( + documentId="doc_1", + intents=["extract"], + extractionPrompt="Extract all text content, preserving structure", + reasoning="User needs text content for document generation" + ), + DocumentIntent( + documentId="doc_2", + intents=["extract", "render"], # Both! + extractionPrompt="Extract text content from image using vision AI", + reasoning="Image contains text that needs extraction, but also should be rendered visually" + ) + ] + ``` +- **Note**: This step already exists and works correctly, just needs to be verified with new flow + +**Step 2.2: Identify Pre-Extracted JSON Documents** +- **File**: `gateway/modules/services/serviceGeneration/paths/documentPath.py` +- **Lines**: 62-87 (already exists, but needs to be integrated with deduplication) +- **Action**: Ensure pre-extracted JSON documents are identified BEFORE deduplication +- **Code Change**: + ```python + # Step 1: Identify pre-extracted JSON documents + preExtractedDocs = [] + originalDocIdsCoveredByPreExtracted = set() + for doc in documents: + preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc) + if preExtracted: + preExtractedDocs.append(doc) + originalDocId = preExtracted["originalDocument"]["id"] + originalDocIdsCoveredByPreExtracted.add(originalDocId) + logger.info(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}") + + # Step 2: Filter out original documents covered by pre-extracted JSONs + filteredDocuments = [] + for doc in documents: + preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc) + if preExtracted: + # Pre-extracted JSON - keep it (will be processed as ContentParts, not regular JSON) + filteredDocuments.append(doc) + elif doc.id in originalDocIdsCoveredByPreExtracted: + # Original document covered by pre-extracted JSON - skip it + logger.info(f"Skipping original document {doc.id} - already covered by pre-extracted JSON") + else: + # Regular document - keep it + filteredDocuments.append(doc) + + documents = filteredDocuments + ``` + +**Step 2.2: Add Deduplication Logic for Regular Documents** +- **File**: `gateway/modules/services/serviceGeneration/paths/documentPath.py` +- **Lines**: 101-119 +- **Action**: Add deduplication before extraction (after pre-extracted JSON handling) +- **Code Change**: + ```python + # Step 3: Identify already extracted documents (from contentParts) + documentsAlreadyExtracted = set() + if contentParts: + for part in contentParts: + documentId = part.metadata.get("documentId") + if documentId: + documentsAlreadyExtracted.add(documentId) + + # Step 4: Filter documents to extract (exclude pre-extracted JSONs and already extracted) + documentsToExtract = [ + doc for doc in documents + if doc.id not in documentsAlreadyExtracted + and not self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc) # Not pre-extracted JSON + ] + + # Step 5: Process pre-extracted JSON documents (handled in extractAndPrepareContent) + # Step 6: Extract regular documents + if documentsToExtract: + preparedContentParts = await extractAndPrepareContent( + documentsToExtract, # Only new documents (not pre-extracted, not already extracted) + documentIntents or [], + docOperationId + ) + + # Merge: pre-extracted parts + extracted parts + provided contentParts + if contentParts: + # Preserve metadata + for part in contentParts: + part.metadata.setdefault("isPreExtracted", True) + preparedContentParts.extend(contentParts) + + contentParts = preparedContentParts + elif contentParts: + # All documents already extracted or pre-extracted, use contentParts as-is + contentParts = contentParts + ``` + +**Step 2.4: Ensure Pre-Extracted JSON Processing** +- **File**: `gateway/modules/services/serviceAi/subContentExtraction.py` +- **Lines**: 75-253 +- **Action**: Ensure `extractAndPrepareContent()` properly handles pre-extracted JSON documents +- **Note**: This logic already exists (lines 75-253) but needs to be verified: + - Pre-extracted JSON documents are identified via `resolvePreExtractedDocument()` + - ContentParts are extracted from JSON (not treated as regular JSON) + - Original documents are skipped if covered by pre-extracted JSON + - Metadata is preserved (`isPreExtracted`, `fromPreExtractedJson`) + +**Step 2.5: Verify Pre-Extracted JSON Identification** +- **File**: `gateway/modules/services/serviceAi/subDocumentIntents.py` +- **Action**: Ensure `resolvePreExtractedDocument()` correctly identifies pre-extracted JSON documents +- **Requirements**: + - Must identify JSON documents containing `ContentExtracted` structure + - Must map back to original document ID + - Must extract ContentParts from JSON (not treat as regular JSON) + - Must preserve metadata (`isPreExtracted`, `fromPreExtractedJson`) + +**Step 2.6: Update Extraction Logic** +- **File**: `gateway/modules/services/serviceAi/subContentExtraction.py` +- **Action**: Ensure extraction handles deduplication gracefully +- **Note**: Extraction service already supports this, just need to pass filtered documents +- **Important**: Pre-extracted JSON documents should be processed BEFORE regular extraction + +**Phase 3: Testing and Validation** + +**Step 3.1: Unit Tests** +- Test `ai.process` with only `documentList` +- Test `ai.process` with only `contentParts` +- Test `ai.process` with both `documentList` and `contentParts` (no overlap) +- Test `ai.process` with both `documentList` and `contentParts` (full overlap) +- Test `ai.process` with both `documentList` and `contentParts` (partial overlap) + +**Step 3.2: Integration Tests** +- Test full document generation flow +- Test progress tracking at all levels +- Test error handling (missing documents, extraction failures) +- Test performance (no duplicate extraction) + +**Step 3.3: Regression Tests** +- Ensure existing workflows continue to work +- Test backward compatibility +- Test edge cases (empty lists, missing metadata, etc.) + +**Phase 4: Documentation Updates** + +**Step 4.1: Update Action Documentation** +- **File**: `gateway/modules/workflows/methods/methodAi/methodAi.py` +- **Action**: Update parameter descriptions to clarify merging behavior +- **Content**: Document that both parameters can be provided and will be merged intelligently + +**Step 4.2: Update API Documentation** +- Document new behavior in API docs +- Add examples showing both parameters used together +- Explain deduplication logic + +**Step 4.3: Update This Analysis Document** +- Mark current state sections as "Current State (Pre-Migration)" +- Add "Target State" sections (this chapter) +- Document migration progress + +**Phase 5: Rollout Strategy** + +**Step 5.1: Feature Flag (Optional)** +- Add feature flag to control new vs. old behavior +- Allows gradual rollout +- Easy rollback if issues found + +**Step 5.2: Gradual Migration** +- Migrate one workflow at a time +- Monitor for issues +- Collect feedback + +**Step 5.3: Full Migration** +- Remove old extraction logic from `ai.process` +- Remove feature flag +- Update all documentation + +#### Migration Checklist + +- [ ] **Phase 1: Update `ai.process` Action** + - [ ] Remove extraction logic from `ai.process` + - [ ] Pass `documentList` to `callAiContent()` + - [ ] Update progress tracking + - [ ] Test `ai.process` with new parameters + +- [ ] **Phase 2: Update Document Generation Path** + - [ ] Identify pre-extracted JSON documents (before deduplication) + - [ ] Filter out original documents covered by pre-extracted JSONs + - [ ] Add deduplication logic for regular documents + - [ ] Ensure pre-extracted JSON processing (extract ContentParts, not treat as JSON) + - [ ] Update extraction to handle filtered documents + - [ ] Test merging behavior (pre-extracted + extracted + provided) + - [ ] Test pre-extracted JSON identification + +- [ ] **Phase 3: Testing and Validation** + - [ ] Unit tests for all scenarios + - [ ] Integration tests for full flow + - [ ] Regression tests for existing workflows + - [ ] Performance tests (no duplicate extraction) + +- [ ] **Phase 4: Documentation Updates** + - [ ] Update action parameter documentation + - [ ] Update API documentation + - [ ] Update analysis document + +- [ ] **Phase 5: Rollout** + - [ ] Feature flag (if needed) + - [ ] Gradual migration + - [ ] Full migration + - [ ] Remove old code + +- [ ] **Phase 6: Security and Design Improvements** + - [ ] **CRITICAL: Fix unfenced user input** (Finding 1) + - [ ] Add fencing around `userPrompt` in intent analysis prompt + - [ ] Test with various user inputs (special chars, JSON, newlines) + - [ ] Verify AI still correctly parses user request + - [ ] **IMPROVEMENT: Per-document output format** (Finding 2) + - [ ] Add `outputFormat` field to `DocumentIntent` model (optional) + - [ ] Update intent analysis prompt to determine format per document + - [ ] Update structure generation to use per-document format + - [ ] Fallback to global format if not specified + +#### Expected Benefits After Migration + +1. **Architectural Improvements**: + - Single source of truth for extraction logic + - Consistent behavior across all code paths + - Better separation of concerns + +2. **Functional Improvements**: + - Users can combine pre-extracted content with documents + - Intelligent deduplication prevents redundant extraction + - More flexible and powerful API + +3. **Maintenance Improvements**: + - Less code duplication + - Easier to maintain and extend + - Clearer code organization + +4. **Performance Improvements**: + - No duplicate extraction + - Better resource utilization + - Faster processing for common cases + +### 9.4 Two-Phase Extraction: Why Extract Before Structure Generation? + +#### Problem Statement + +**Question**: Why do we extract content (Step 2) BEFORE structure generation (Step 3), when we need AI to fill sections (Step 4) anyway? Are we extracting twice? + +**Answer**: Yes, but it's intentional and necessary. There are TWO different types of extraction happening at different phases: + +1. **Phase 1 (Step 2)**: RAW extraction (parsing) - NO AI +2. **Phase 2 (Step 4)**: Vision AI extraction (for images only) - WITH AI + +#### Analysis + +**Phase 1: RAW Extraction (Step 2 - `extractAndPrepareContent`)** + +**What happens:** +- Uses `extractContent()` service for pure document parsing +- Parses PDF, DOCX, XLSX, etc. to extract structured content +- Creates ContentParts with raw extracted data +- **No AI involved** - just parsing/parsing + +**Prompt used:** +- `intent.extractionPrompt` or default `"Extract all content from the document"` +- **Important**: This prompt is stored in metadata but NOT used for AI extraction here +- It's only used later during section generation (Step 4) for Vision AI + +**ContentPart preparation:** +- **For Images**: + - Marks with `needsVisionExtraction: True` + - Stores `extractionPrompt` in metadata + - **Reason**: Vision AI extraction is expensive, so it's deferred to section generation +- **For Text**: + - Marks with `skipExtraction: True` (already extracted, no AI needed) + - Text is already extracted from document parsing +- **For Objects**: + - Creates object ContentParts for rendering (images, videos, etc.) + +**Why extract before structure generation?** +- ContentParts are needed BEFORE structure generation so AI can assign them to chapters +- Structure generation needs to know what content is available to assign to chapters +- The AI needs ContentPart metadata (documentId, typeGroup, etc.) to make intelligent assignments + +**Phase 2: Vision AI Extraction (Step 4 - `fillStructure`)** + +**What happens:** +- During section generation, checks for ContentParts with `needsVisionExtraction == True` +- Calls Vision AI with `extractionPrompt` from metadata (line 651 in `subStructureFilling.py`) +- Converts image ContentPart to text ContentPart with extracted text +- Then uses the text part for section generation + +**Prompt used:** +- `part.metadata.get("extractionPrompt")` or default `"Extract all text content from this image. Return only the extracted text, no additional formatting."` +- This is the actual AI extraction prompt + +**Why extract during section generation?** +- Vision AI extraction is expensive (costs tokens, takes time) +- Only needed when actually generating content for a section +- Not needed for structure generation (just needs to know images exist) +- Deferred extraction saves costs and improves performance + +#### Current Flow + +``` +Step 2: extractAndPrepareContent() + ├─→ RAW extraction (parsing PDF/DOCX/etc.) - NO AI + ├─→ Creates ContentParts with raw data + ├─→ For images: marks needsVisionExtraction=True, stores extractionPrompt + └─→ For text: marks skipExtraction=True (already extracted) + +Step 3: generateStructure() + ├─→ Uses ContentParts metadata to assign to chapters + └─→ Creates structure with contentPart assignments + +Step 4: fillStructure() + ├─→ For each section: + │ ├─→ Check if ContentPart needsVisionExtraction==True + │ ├─→ If yes: Call Vision AI with extractionPrompt (Phase 2 extraction) + │ ├─→ Convert image → text ContentPart + │ └─→ Generate section content with processed ContentParts + └─→ Text ContentParts: Used directly (skipExtraction=True) +``` + +#### Is This Optimal? + +**Arguments FOR current approach:** +- Structure generation needs ContentParts early (to assign to chapters) +- Vision AI extraction is expensive - deferring saves costs +- Text content doesn't need AI extraction (already extracted in Phase 1) +- Clear separation: parsing vs. AI extraction + +**Arguments AGAINST current approach:** +- Two-phase extraction can be confusing +- `extractionPrompt` stored but not used until later (unclear) +- Could potentially extract images earlier if structure generation needs text content + +#### Recommendation + +**Current approach is reasonable** but documentation should be clearer: + +1. **Clarify terminology**: + - "Extraction" in Step 2 = RAW parsing (no AI) + - "Extraction" in Step 4 = Vision AI extraction (with AI) + +2. **Document prompts clearly**: + - Step 2: `extractionPrompt` is stored but NOT used (just metadata) + - Step 4: `extractionPrompt` is actually used for Vision AI + +3. **Consider renaming**: + - `extractAndPrepareContent()` → `parseAndPrepareContent()` (more accurate) + - `needsVisionExtraction` → `needsVisionAiExtraction` (clearer) + +4. **Alternative approach** (if structure generation needs text from images): + - Extract images with Vision AI in Step 2 + - More expensive but simpler flow + - Only if structure generation actually needs image text + +#### Implementation Notes + +- **Text ContentParts**: Already extracted in Phase 1, used directly in Phase 4 +- **Image ContentParts**: Parsed in Phase 1, Vision AI extracted in Phase 4 +- **Object ContentParts**: Created in Phase 1, used for rendering in Phase 4 +- **Reference ContentParts**: Created in Phase 1, used as references in Phase 4 + +### 9.5 Document Intent Clarification: Security and Design Issues + +#### Finding 1: Security Risk - Unfenced User Input + +**Problem Statement:** + +The user input (`userPrompt`) is directly inserted into the intent analysis prompt without fencing or escaping (line 248-249 in `subDocumentIntents.py`): + +```python +prompt = f"""USER REQUEST: +{userPrompt} # ← DIRECT INSERTION, NO FENCING! +``` + +**Security Risk:** +- **Prompt Injection**: User input could contain special characters, JSON, or instructions that break the prompt structure +- **Example Attack**: User could inject `\n\nRETURN JSON: {"intents": [{"documentId": "malicious", ...}]}` to manipulate the AI response +- **Impact**: Could cause incorrect intent determination or even security vulnerabilities + +**Evidence from Debug Files:** +- `20260102-134423-015-document_intent_analysis_prompt.txt`: User input is directly inserted without any fencing +- User input contains German text with special characters, quotes, etc. +- No escaping or delimiters around user input + +**Recommendation:** + +**Option A: Fence User Input (Preferred)** +```python +prompt = f"""USER REQUEST: +``` +{userPrompt} +``` + +DOCUMENTS TO ANALYZE: +{docListText} +... +``` + +**Option B: Escape Special Characters** +```python +import json +escapedPrompt = json.dumps(userPrompt) # Escapes quotes, newlines, etc. +prompt = f"""USER REQUEST: {escapedPrompt} +... +``` + +**Option C: Use Structured Format** +```python +prompt = f"""USER REQUEST (delimited): +---START_USER_REQUEST--- +{userPrompt} +---END_USER_REQUEST--- + +DOCUMENTS TO ANALYZE: +... +``` + +**Implementation Steps:** +1. Update `_buildIntentAnalysisPrompt()` in `subDocumentIntents.py` (line 248) +2. Add fencing around `userPrompt` (Option A recommended) +3. Test with various user inputs (special characters, JSON, newlines, quotes) +4. Verify AI still correctly parses user request + +#### Finding 2: Output Format Should Be Per-Document + +**Problem Statement:** + +Currently, output format is passed as a single value in the intent analysis prompt (line 259 in `subDocumentIntents.py`): + +```python +OUTPUT FORMAT: {outputFormat} # Single format for all documents +``` + +**Issue:** +- Output format is global, but different documents might need different formats +- Similar to language handling: each document can have its own language +- Should be determined per document based on intention + +**Current Behavior:** +- Single `outputFormat` parameter (e.g., "docx") +- All documents analyzed with same output format in mind +- AI considers output format when determining intents (e.g., DOCX → images need "render") + +**Proposed Behavior:** +- Each `DocumentIntent` should have optional `outputFormat` field +- AI determines output format per document based on user intention +- If not specified, use global output format as fallback +- Similar to language: per-document with fallback to global + +**Example:** +```python +DocumentIntent( + documentId: str, + intents: List[str], + extractionPrompt: Optional[str], + reasoning: str, + outputFormat: Optional[str] = None # NEW: Per-document format +) +``` + +**Benefits:** +- More flexible: Different documents can have different output formats +- Better intention analysis: AI can determine format based on document purpose +- Consistent with language handling (per-document with fallback) + +**Migration Steps:** +1. Add `outputFormat` field to `DocumentIntent` model (optional) +2. Update intent analysis prompt to ask AI to determine format per document +3. Update prompt to show: "OUTPUT FORMAT (default: {outputFormat})" instead of "OUTPUT FORMAT: {outputFormat}" +4. Update structure generation to use per-document format if available +5. Fallback to global format if not specified per document + +**Updated Prompt Structure:** +```python +OUTPUT FORMAT (default: {outputFormat}): +- If not specified per document, use default format above +- Determine format per document based on user intention +- Examples: "docx", "pdf", "html", "json", etc. + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], + "extractionPrompt": "...", + "outputFormat": "docx", # NEW: Per-document format + "reasoning": "..." + }} + ] +}} +``` + +#### Implementation Priority + +**High Priority:** +- Finding 1 (Security Risk): **CRITICAL** - Fix immediately + - Security vulnerability that could be exploited + - Easy to fix (add fencing) + - Low risk change + +**Medium Priority:** +- Finding 2 (Output Format): **IMPROVEMENT** - Plan for next iteration + - Architectural improvement + - Requires model changes + - More complex migration + +--- + +## 10. Implementation Plan: Target State Migration + +This section provides a detailed implementation plan for migrating to the target architecture described in Section 9.3. The plan focuses on documents/content handling, output formats, languages, and clear handover states between phases. + +### 10.1 Overview: Major Phases and Handover States + +#### Phase Flow Diagram + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ PHASE 1: Document Intent Clarification │ +│ ────────────────────────────────────────────────────────────────── │ +│ INPUT: │ +│ - userPrompt: str (fenced) │ +│ - documentList: DocumentReferenceList (optional) │ +│ - contentParts: List[ContentPart] (optional) │ +│ - actionParameters: Dict (outputFormat, language, etc.) │ +│ │ +│ THROUGHPUT: │ +│ 1. Resolve documents from documentList │ +│ 2. Map pre-extracted JSONs to original documents │ +│ 3. AI analyzes document purposes │ +│ 4. Map intents back to JSON doc IDs (if applicable) │ +│ │ +│ OUTPUT: │ +│ - documentIntents: List[DocumentIntent] │ +│ * documentId: str │ +│ * intents: List[str] (["extract", "render", "reference"]) │ +│ * extractionPrompt: str (optional) │ +│ * outputFormat: str (optional, per-document) ← NEW │ +│ * language: str (optional, per-document) ← NEW │ +│ * reasoning: str │ +│ │ +│ HANDOVER STATE: │ +│ - documentIntents: Complete intent analysis │ +│ - documents: Resolved ChatDocuments │ +│ - preExtractedMapping: Map[originalDocId, jsonDocId] │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ PHASE 2: Content Extraction and Preparation │ +│ ────────────────────────────────────────────────────────────────── │ +│ INPUT: │ +│ - documents: List[ChatDocument] │ +│ - documentIntents: List[DocumentIntent] │ +│ - contentParts: List[ContentPart] (optional, pre-extracted) │ +│ - preExtractedMapping: Map[originalDocId, jsonDocId] │ +│ │ +│ THROUGHPUT: │ +│ 1. Identify pre-extracted JSON documents │ +│ 2. Filter out original documents covered by pre-extracted │ +│ 3. Identify already extracted documents (from contentParts) │ +│ 4. Filter documents to extract (exclude duplicates) │ +│ 5. Process pre-extracted JSON documents → ContentParts │ +│ 6. RAW extraction (NO AI) for regular documents │ +│ 7. Merge: pre-extracted + extracted + provided contentParts │ +│ 8. Apply intents to ContentParts (extract, render, reference) │ +│ 9. Mark images for Vision AI extraction (deferred) │ +│ │ +│ OUTPUT: │ +│ - finalContentParts: List[ContentPart] │ +│ * id: str │ +│ * typeGroup: str │ +│ * mimeType: str │ +│ * data: Union[str, bytes] │ +│ * metadata: Dict │ +│ - documentId: str │ +│ - contentFormat: str ("extracted", "object", "reference") │ +│ - intent: str │ +│ - needsVisionExtraction: bool (for images) │ +│ - extractionPrompt: str (for Vision AI) │ +│ - originalFileName: str │ +│ - isPreExtracted: bool │ +│ - outputFormat: str (from DocumentIntent) ← NEW │ +│ - language: str (from DocumentIntent) ← NEW │ +│ │ +│ HANDOVER STATE: │ +│ - finalContentParts: Complete, ready for structure generation │ +│ - All documents processed (extracted or pre-extracted) │ +│ - Vision AI extraction deferred to Phase 4 │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ PHASE 3: Structure Generation │ +│ ────────────────────────────────────────────────────────────────── │ +│ INPUT: │ +│ - userPrompt: str │ +│ - finalContentParts: List[ContentPart] │ +│ - globalOutputFormat: str (fallback) │ +│ - globalLanguage: str (fallback) │ +│ │ +│ THROUGHPUT: │ +│ 1. Group ContentParts by documentId │ +│ 2. Determine per-document outputFormat (from ContentPart.metadata│ +│ or global fallback) │ +│ 3. Determine per-document language (from ContentPart.metadata │ +│ or global fallback) │ +│ 4. AI generates structure with chapters │ +│ 5. Assign ContentParts to chapters │ +│ │ +│ OUTPUT: │ +│ - chapterStructure: Dict │ +│ * documents: List[Dict] │ +│ - id: str │ +│ - title: str │ +│ - outputFormat: str (per-document) ← NEW │ +│ - language: str (per-document) ← NEW │ +│ - chapters: List[Dict] │ +│ * id: str │ +│ * level: int │ +│ * title: str │ +│ * generationHint: str │ +│ * contentParts: List[str] (ContentPart IDs) │ +│ │ +│ HANDOVER STATE: │ +│ - chapterStructure: Complete structure with ContentPart │ +│ assignments │ +│ - Per-document format/language determined │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ PHASE 4: Structure Filling │ +│ ────────────────────────────────────────────────────────────────── │ +│ INPUT: │ +│ - chapterStructure: Dict │ +│ - finalContentParts: List[ContentPart] │ +│ - userPrompt: str │ +│ │ +│ THROUGHPUT: │ +│ For each chapter: │ +│ 1. Generate sections structure (parallel) │ +│ 2. For each section: │ +│ a. Check if ContentParts need Vision AI extraction │ +│ b. If yes: Call Vision AI (Phase 2 deferred extraction) │ +│ c. Determine prompt type: │ +│ - WITH CONTENT: If contentParts assigned │ +│ → Use aggregation prompt (isAggregation=True) │ +│ → ContentParts passed as parameters │ +│ - WITHOUT CONTENT: If no contentParts │ +│ → Use generation prompt (isAggregation=False) │ +│ → Only generationHint in prompt │ +│ d. Generate section content with AI │ +│ │ +│ OUTPUT: │ +│ - filledStructure: Dict │ +│ * documents: List[Dict] │ +│ - chapters: List[Dict] │ +│ * sections: List[Dict] │ +│ - id: str │ +│ - content_type: str │ +│ - elements: List[Dict] │ +│ * type: str │ +│ * content: str (or base64 for images) │ +│ │ +│ HANDOVER STATE: │ +│ - filledStructure: Complete content, ready for rendering │ +│ - All Vision AI extractions completed │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ PHASE 5: Document Rendering │ +│ ────────────────────────────────────────────────────────────────── │ +│ INPUT: │ +│ - filledStructure: Dict │ +│ - per-document outputFormat (from Phase 3) │ +│ - per-document language (from Phase 3) │ +│ │ +│ THROUGHPUT: │ +│ 1. Group sections by document (from structure) │ +│ 2. For each document: │ +│ a. Use per-document outputFormat │ +│ b. Use per-document language │ +│ c. Render document in specified format │ +│ │ +│ OUTPUT: │ +│ - renderedDocuments: List[DocumentData] │ +│ * documentName: str │ +│ * documentData: bytes │ +│ * mimeType: str │ +│ │ +│ HANDOVER STATE: │ +│ - renderedDocuments: Final output ready for user │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### 10.2 Detailed Implementation Steps + +#### Step 1: Update DocumentIntent Model + +**File**: `gateway/modules/datamodels/datamodelExtraction.py` + +**Changes**: +```python +class DocumentIntent(BaseModel): + documentId: str + intents: List[str] # ["extract", "render", "reference"] + extractionPrompt: Optional[str] = None + outputFormat: Optional[str] = None # ← NEW: Per-document format + language: Optional[str] = None # ← NEW: Per-document language + reasoning: str +``` + +**Rationale**: +- Enables per-document output format and language determination +- Aligns with existing language handling pattern +- Allows AI to determine format/language based on document purpose + +#### Step 2: Update Intent Analysis Prompt + +**File**: `gateway/modules/services/serviceAi/subDocumentIntents.py` + +**Changes**: + +1. **Add fencing around userPrompt** (Security Fix): +```python +def _buildIntentAnalysisPrompt( + self, + userPrompt: str, + documents: List[ChatDocument], + actionParameters: Dict[str, Any] +) -> str: + # FENCE user input to prevent prompt injection + fencedUserPrompt = f"""```user_request +{userPrompt} +```""" + + prompt = f"""USER REQUEST: +{fencedUserPrompt} + +DOCUMENTS TO ANALYZE: +{docListText} + +TASK: For each document, determine: +1. Intents (can be multiple): "extract", "render", "reference" +2. Output format (optional): If document should be rendered in specific format +3. Language (optional): If document content should be in specific language + +OUTPUT FORMAT: {outputFormat} (global fallback) + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], + "extractionPrompt": "Extract all text content", + "outputFormat": "pdf", // ← NEW: Optional, per-document + "language": "de", // ← NEW: Optional, per-document + "reasoning": "..." + }} + ] +}} +""" +``` + +2. **Remove global outputFormat from prompt** (or keep as fallback only): + - Output format should be determined per document based on intent + - Global format remains as fallback if not specified per document + +#### Step 3: Update ContentPart Metadata Propagation + +**File**: `gateway/modules/services/serviceAi/subContentExtraction.py` + +**Changes**: +```python +async def extractAndPrepareContent( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str, + getIntentForDocument: callable +) -> List[ContentPart]: + # ... existing extraction logic ... + + # When creating ContentParts, propagate outputFormat and language from DocumentIntent + for part in allContentParts: + intent = getIntentForDocument(part.metadata.get("documentId"), documentIntents) + if intent: + # Propagate per-document format and language to ContentPart + if intent.outputFormat: + part.metadata["outputFormat"] = intent.outputFormat + if intent.language: + part.metadata["language"] = intent.language +``` + +**Rationale**: +- ContentParts carry format/language information through pipeline +- Enables per-document rendering in Phase 5 + +#### Step 4: Update Structure Generation + +**File**: `gateway/modules/services/serviceAi/subStructureGeneration.py` + +**Changes**: + +1. **Determine per-document format/language from ContentParts**: +```python +def generateStructure( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, # Global fallback + language: str, # Global fallback + parentOperationId: str +) -> Dict[str, Any]: + # Group ContentParts by documentId + partsByDocument = {} + for part in contentParts: + docId = part.metadata.get("documentId", "default") + if docId not in partsByDocument: + partsByDocument[docId] = [] + partsByDocument[docId].append(part) + + # Determine per-document format and language + documentFormats = {} + documentLanguages = {} + for docId, parts in partsByDocument.items(): + # Get format from first ContentPart (all parts from same doc should have same format) + docFormat = parts[0].metadata.get("outputFormat") or outputFormat + docLanguage = parts[0].metadata.get("language") or language + documentFormats[docId] = docFormat + documentLanguages[docId] = docLanguage + + # Update prompt to include per-document format/language + prompt = self._buildStructureGenerationPrompt( + userPrompt=userPrompt, + contentParts=contentParts, + documentFormats=documentFormats, # ← NEW + documentLanguages=documentLanguages, # ← NEW + globalOutputFormat=outputFormat, # Fallback + globalLanguage=language # Fallback + ) +``` + +2. **Update prompt to include per-document format/language**: +```python +def _buildStructureGenerationPrompt( + self, + userPrompt: str, + contentParts: List[ContentPart], + documentFormats: Dict[str, str], # ← NEW + documentLanguages: Dict[str, str], # ← NEW + globalOutputFormat: str, + globalLanguage: str +) -> str: + # ... existing prompt building ... + + # Add per-document format/language information + formatLanguageInfo = "\n## PER-DOCUMENT OUTPUT FORMATS AND LANGUAGES\n" + for docId, docFormat in documentFormats.items(): + docLanguage = documentLanguages.get(docId, globalLanguage) + formatLanguageInfo += f"- Document {docId}: Format={docFormat}, Language={docLanguage}\n" + + prompt += formatLanguageInfo + + prompt += """ +## DOCUMENT LANGUAGE +- Each document can have its own language (ISO 639-1 code: "de", "en", "fr", etc.) +- Per-document languages are listed above +- If not specified, use global language: "{globalLanguage}" + +## OUTPUT FORMAT +- Each document can have its own output format +- Per-document formats are listed above +- If not specified, use global format: "{globalOutputFormat}" +""" +``` + +#### Step 5: Update Structure Filling - Two Prompt Types + +**File**: `gateway/modules/services/serviceAi/subStructureFilling.py` + +**Changes**: + +1. **Ensure two prompt types are used** (already implemented, verify): +```python +async def _fillSingleSection( + self, + section: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + generationHint: str, + # ... other params ... +) -> List[Dict[str, Any]]: + contentPartIds = section.get("contentPartIds", []) + hasContentParts = len(contentPartIds) > 0 + + if hasContentParts: + # PROMPT TYPE 1: WITH CONTENT (Aggregation) + # ContentParts passed as parameters, not in prompt text + isAggregation = True + relevantParts = [p for p in contentParts if p.id in contentPartIds] + + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=relevantParts, # Passed as parameters + userPrompt=userPrompt, + generationHint=generationHint, + isAggregation=True, # ← Key flag + language=language + ) + else: + # PROMPT TYPE 2: WITHOUT CONTENT (Generation) + # Only generationHint in prompt, no ContentParts + isAggregation = False + + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[], # Empty + userPrompt=userPrompt, + generationHint=generationHint, + isAggregation=False, # ← Key flag + language=language + ) +``` + +2. **Verify `_buildSectionGenerationPrompt` handles both cases**: +```python +def _buildSectionGenerationPrompt( + self, + section: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + generationHint: str, + isAggregation: bool, # ← Determines prompt type + language: str +) -> str: + if isAggregation: + # TYPE 1: WITH CONTENT + # ContentParts are passed as parameters to AI call + # Don't include full content in prompt text (token efficiency) + prompt = f"""Generate content for section based on provided ContentParts. + +Section: {sectionTitle} +Generation Hint: {generationHint} +Language: {language} + +ContentParts are provided as parameters (not shown in prompt for efficiency). +Use the ContentParts data to generate the section content. +""" + else: + # TYPE 2: WITHOUT CONTENT + # Only generationHint, no ContentParts + prompt = f"""Generate content for section based on generation hint. + +Section: {sectionTitle} +Generation Hint: {generationHint} +Language: {language} + +Generate content based on the generation hint without referencing external content. +""" +``` + +**Rationale**: +- **Type 1 (with content)**: Efficient for large content (ContentParts as parameters) +- **Type 2 (without content)**: Simple generation based on hint only +- Already implemented via `isAggregation` flag, verify it's used correctly + +#### Step 6: Update Document Rendering + +**File**: `gateway/modules/services/serviceGeneration/paths/documentPath.py` + +**Changes**: +```python +async def renderDocuments( + self, + filledStructure: Dict[str, Any], + outputFormat: str, # Global fallback + language: str # Global fallback +) -> List[DocumentData]: + renderedDocuments = [] + + for doc in filledStructure.get("documents", []): + docId = doc.get("id") + docFormat = doc.get("outputFormat") or outputFormat # ← Use per-document format + docLanguage = doc.get("language") or language # ← Use per-document language + + # Render document with per-document format and language + renderedDoc = await self._renderSingleDocument( + doc=doc, + outputFormat=docFormat, + language=docLanguage + ) + renderedDocuments.append(renderedDoc) + + return renderedDocuments +``` + +#### Step 7: Update ai.process to Pass documentList + +**File**: `gateway/modules/workflows/methods/methodAi/actions/process.py` + +**Changes**: +```python +# Phase 7.3: Pass both documentList and contentParts to AI service +# (Remove extraction logic from here - handled by AI service) + +# Use unified callAiContent method with BOTH parameters +aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + documentList=documentList, # ← PASS documentList (was missing) + contentParts=contentParts, # ← PASS contentParts + outputFormat=output_format, + parentOperationId=operationId, + generationIntent=generationIntent +) +``` + +**Rationale**: +- Centralizes extraction logic in AI service +- Enables intelligent merging with deduplication +- Consistent behavior across all code paths + +### 10.3 Handover State Definitions + +#### State 1: After Intent Clarification +```python +class IntentClarificationState: + documentIntents: List[DocumentIntent] # Complete intent analysis + documents: List[ChatDocument] # Resolved documents + preExtractedMapping: Dict[str, str] # Map[originalDocId, jsonDocId] + + # Validation + assert len(documentIntents) == len(documents) # One intent per document + assert all(intent.documentId in [d.id for d in documents] for intent in documentIntents) +``` + +#### State 2: After Content Extraction +```python +class ContentExtractionState: + finalContentParts: List[ContentPart] # All content parts ready + + # Validation + assert all(part.metadata.get("documentId") for part in finalContentParts) + assert all(part.metadata.get("contentFormat") in ["extracted", "object", "reference"] + for part in finalContentParts) + # All documents either extracted or pre-extracted + assert len(set(p.metadata.get("documentId") for p in finalContentParts)) == len(documents) +``` + +#### State 3: After Structure Generation +```python +class StructureGenerationState: + chapterStructure: Dict[str, Any] # Complete structure + + # Validation + assert "documents" in chapterStructure + for doc in chapterStructure["documents"]: + assert "outputFormat" in doc # Per-document format + assert "language" in doc # Per-document language + assert "chapters" in doc + for chapter in doc["chapters"]: + assert "contentParts" in chapter # ContentPart assignments +``` + +#### State 4: After Structure Filling +```python +class StructureFillingState: + filledStructure: Dict[str, Any] # Complete content + + # Validation + assert "documents" in filledStructure + for doc in filledStructure["documents"]: + for chapter in doc.get("chapters", []): + for section in chapter.get("sections", []): + assert "elements" in section # Generated elements + # All Vision AI extractions completed + assert not any(p.metadata.get("needsVisionExtraction") + for p in contentParts) +``` + +#### State 5: After Document Rendering +```python +class DocumentRenderingState: + renderedDocuments: List[DocumentData] # Final output + + # Validation + assert len(renderedDocuments) > 0 + for doc in renderedDocuments: + assert doc.documentData # Non-empty + assert doc.mimeType # Valid MIME type +``` + +### 10.4 Migration Checklist + +#### Phase 1: Model Updates +- [ ] Add `outputFormat` and `language` to `DocumentIntent` model +- [ ] Update intent analysis prompt parser to handle new fields +- [ ] Add validation for new fields + +#### Phase 2: Intent Analysis Updates +- [ ] **CRITICAL**: Add fencing around `userPrompt` in intent analysis prompt +- [ ] Update prompt to ask for per-document format/language +- [ ] Update prompt to remove global outputFormat dependency (or keep as fallback) +- [ ] Test with various user inputs (special chars, JSON, newlines) + +#### Phase 3: Content Extraction Updates +- [ ] Propagate `outputFormat` and `language` from `DocumentIntent` to `ContentPart.metadata` +- [ ] Verify pre-extracted JSON handling preserves format/language +- [ ] Test merging logic with format/language propagation + +#### Phase 4: Structure Generation Updates +- [ ] Group ContentParts by documentId +- [ ] Determine per-document format/language from ContentPart metadata +- [ ] Update structure generation prompt to include per-document info +- [ ] Update structure output to include per-document format/language + +#### Phase 5: Structure Filling Verification +- [ ] Verify two prompt types are correctly used: + - [ ] `isAggregation=True`: ContentParts as parameters + - [ ] `isAggregation=False`: Only generationHint +- [ ] Test both prompt types with various scenarios +- [ ] Verify Vision AI extraction happens during filling phase + +#### Phase 6: Document Rendering Updates +- [ ] Use per-document format from structure +- [ ] Use per-document language from structure +- [ ] Fallback to global format/language if not specified +- [ ] Test multi-document rendering with different formats/languages + +#### Phase 7: ai.process Refactoring +- [ ] Remove extraction logic from `ai.process` +- [ ] Pass `documentList` to `callAiContent()` +- [ ] Pass `contentParts` to `callAiContent()` +- [ ] Verify intelligent merging in AI service works correctly + +#### Phase 8: Testing +- [ ] Test with pre-extracted JSON documents +- [ ] Test with mixed `documentList` + `contentParts` +- [ ] Test per-document format/language determination +- [ ] Test two prompt types in structure filling +- [ ] Test multi-document output with different formats/languages +- [ ] Test security: prompt injection attempts with fenced input + +#### Phase 9: Documentation +- [ ] Update API documentation +- [ ] Update developer documentation +- [ ] Update user documentation (if applicable) + +--- + +## End of Analysis + +This document provides a comprehensive overview of the content extraction and processing logic in the `ai.process` action. For implementation details, refer to the source files referenced throughout this document. + +**Note**: The "Recommendations and Next Steps" section (Section 9) will be expanded with additional findings and improvements as analysis continues. diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 65bae155..a07aa441 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -14,10 +14,6 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, from modules.datamodels.datamodelDocument import RenderedDocument from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( - extractJsonString, - repairBrokenJson, - extractSectionsFromDocument, - buildContinuationContext, parseJsonWithModel ) from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler @@ -209,7 +205,7 @@ Respond with ONLY a JSON object in this exact format: processingMode=ProcessingModeEnum.BASIC ) - async def _callAiWithLooping( + async def callAiWithLooping( self, prompt: str, options: AiCallOptions, @@ -218,11 +214,12 @@ Respond with ONLY a JSON object in this exact format: promptArgs: Optional[Dict[str, Any]] = None, operationId: Optional[str] = None, userPrompt: Optional[str] = None, - contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content + contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content + useCaseId: Optional[str] = None # REQUIRED: Explicit use case ID for generic looping system ) -> str: - """Delegate to AiCallLooper.""" + """Public method: Delegate to AiCallLooper for AI calls with looping support.""" return await self.aiCallLooper.callAiWithLooping( - prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts + prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId ) async def _defineKpisFromPrompt( @@ -341,49 +338,21 @@ Respond with ONLY a JSON object in this exact format: prompt: str, options: AiCallOptions, title: Optional[str], - aiOperationId: str + parentOperationId: Optional[str] ) -> AiResponse: - """Handle IMAGE_GENERATE operation type.""" - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") + """Handle IMAGE_GENERATE operation type using image generation path.""" + from modules.services.serviceGeneration.paths.imagePath import ImageGenerationPath - request = AiCallRequest( - prompt=prompt, - context="", - options=options - ) + imagePath = ImageGenerationPath(self.services) - response = await self.callAi(request) + # Extract format from options + format = options.resultFormat or "png" - if not response.content: - errorMsg = f"No image data returned: {response.content}" - logger.error(f"Error in AI image generation: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) - - imageDoc = DocumentData( - documentName="generated_image.png", - documentData=response.content, - mimeType="image/png" - ) - - metadata = AiResponseMetadata( - title=title or "Generated Image", - operationType=options.operationType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - "ai.generate.image" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata, - documents=[imageDoc] + return await imagePath.generateImages( + userPrompt=prompt, + format=format, + title=title, + parentOperationId=parentOperationId ) async def _handleWebOperation( @@ -393,7 +362,7 @@ Respond with ONLY a JSON object in this exact format: opType: OperationTypeEnum, aiOperationId: str ) -> AiResponse: - """Handle WEB_SEARCH and WEB_CRAWL operation types.""" + """Handle WEB_SEARCH_DATA and WEB_CRAWL operation types.""" self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") request = AiCallRequest( @@ -441,57 +410,58 @@ Respond with ONLY a JSON object in this exact format: return intent return None - async def _clarifyDocumentIntents( + async def clarifyDocumentIntents( self, documents: List[ChatDocument], userPrompt: str, actionParameters: Dict[str, Any], parentOperationId: str ) -> List[DocumentIntent]: - """Delegate to DocumentIntentAnalyzer.""" + """Public method: Delegate to DocumentIntentAnalyzer.""" return await self.intentAnalyzer.clarifyDocumentIntents( documents, userPrompt, actionParameters, parentOperationId ) - async def _extractAndPrepareContent( + async def extractAndPrepareContent( self, documents: List[ChatDocument], documentIntents: List[DocumentIntent], parentOperationId: str ) -> List[ContentPart]: - """Delegate to ContentExtractor.""" + """Public method: Delegate to ContentExtractor.""" return await self.contentExtractor.extractAndPrepareContent( documents, documentIntents, parentOperationId, self._getIntentForDocument ) - async def _generateStructure( + async def generateStructure( self, userPrompt: str, contentParts: List[ContentPart], - outputFormat: str, - parentOperationId: str + outputFormat: Optional[str] = None, + parentOperationId: str = None ) -> Dict[str, Any]: - """Delegate to StructureGenerator.""" + """Public method: Delegate to StructureGenerator.""" return await self.structureGenerator.generateStructure( userPrompt, contentParts, outputFormat, parentOperationId ) - async def _fillStructure( + async def fillStructure( self, structure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: - """Delegate to StructureFiller.""" + """Public method: Delegate to StructureFiller.""" return await self.structureFiller.fillStructure( structure, contentParts, userPrompt, parentOperationId ) - async def _renderResult( + async def renderResult( self, filledStructure: Dict[str, Any], outputFormat: str, + language: str, title: str, userPrompt: str, parentOperationId: str @@ -500,9 +470,15 @@ Respond with ONLY a JSON object in this exact format: Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben. + Render filled structure to documents. + Per-document format and language are extracted from structure (validated in State 3). + The outputFormat and language parameters are only used as global fallbacks. + Multiple documents can have different formats and languages. + Args: filledStructure: Gefüllte Struktur mit elements - outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet + outputFormat: Ziel-Format (pdf, docx, html, etc.) - Global fallback + language: Language (global fallback) - Per-document language extracted from structure title: Dokument-Titel userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie @@ -511,6 +487,11 @@ Respond with ONLY a JSON object in this exact format: List of RenderedDocument objects. Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei) """ + # Language comes from structure (per-document), validated in State 3 + # This parameter is only used as global fallback if structure validation fails + # Use validated currentUserLanguage as fallback (always valid) + if not language: + language = self._getUserLanguage() if hasattr(self, '_getUserLanguage') else (self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') else 'en') # Erstelle Operation-ID für Rendering renderOperationId = f"{parentOperationId}_rendering" @@ -533,6 +514,7 @@ Respond with ONLY a JSON object in this exact format: renderedDocuments = await generationService.renderReport( filledStructure, outputFormat, + language, # Pass language (global fallback, per-document extracted in renderReport) title, userPrompt, self, @@ -577,13 +559,14 @@ Respond with ONLY a JSON object in this exact format: documentIntents: Optional[List[DocumentIntent]] = None, outputFormat: Optional[str] = None, title: Optional[str] = None, - parentOperationId: Optional[str] = None + parentOperationId: Optional[str] = None, + generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection) ) -> AiResponse: """ - Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions. + Unified AI content generation with explicit intent requirement. - Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch. - Sie unterscheiden sich nur in Parametern, nicht in Logik. + All AI-Actions (ai.process, ai.generateDocument, etc.) route through here. + They differ only in parameters, not in logic. Args: prompt: The main prompt for the AI call @@ -594,6 +577,8 @@ Respond with ONLY a JSON object in this exact format: outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx') title: Optional title for generated documents parentOperationId: Optional parent operation ID for hierarchical logging + generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action. + NO auto-detection - actions must explicitly specify intent. Returns: AiResponse with content, metadata, and optional documents @@ -605,18 +590,18 @@ Respond with ONLY a JSON object in this exact format: aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" # Starte Progress-Tracking mit Parent-Referenz + formatDisplay = outputFormat if outputFormat else "auto-determined" self.services.chat.progressLogStart( aiOperationId, "AI content processing", "Content Processing", - f"Format: {outputFormat or 'text'}", + f"Format: {formatDisplay}", parentOperationId=parentOperationId ) try: - # Initialisiere Defaults - if not outputFormat: - outputFormat = "txt" + # outputFormat is optional - if None, formats determined from prompt by AI + # No default fallback here - let AI service handle it opType = getattr(options, "operationType", None) if not opType: @@ -625,118 +610,257 @@ Respond with ONLY a JSON object in this exact format: # Route zu Operation-spezifischen Handlern if opType == OperationTypeEnum.IMAGE_GENERATE: - return await self._handleImageGeneration(prompt, options, title, aiOperationId) + # Image generation - route to image path + return await self._handleImageGeneration(prompt, options, title, parentOperationId) - if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL: + if opType == OperationTypeEnum.WEB_SEARCH_DATA or opType == OperationTypeEnum.WEB_CRAWL: return await self._handleWebOperation(prompt, options, opType, aiOperationId) - # Dokument-Generierungs-Pfad - options.compressPrompt = False - options.compressContext = False + # Data generation - REQUIRES explicit generationIntent + if opType == OperationTypeEnum.DATA_GENERATE: + if not generationIntent: + errorMsg = ( + "generationIntent is required for DATA_GENERATE operation. " + "Actions must explicitly specify 'document' or 'code' intent. " + "No auto-detection - use qualified actions (ai.generateDocument, ai.generateCode)." + ) + logger.error(errorMsg) + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + # Route based on explicit intent (no auto-detection, no fallback) + if generationIntent == "code": + # Route to code generation path + return await self._handleCodeGeneration( + prompt=prompt, + options=options, + contentParts=contentParts, + outputFormat=outputFormat, + title=title, + parentOperationId=parentOperationId + ) + else: + # Route to document generation path (existing behavior) + return await self._handleDocumentGeneration( + prompt=prompt, + options=options, + documentList=documentList, + documentIntents=documentIntents, + contentParts=contentParts, + outputFormat=outputFormat, + title=title, + parentOperationId=parentOperationId + ) - # Schritt 5A: Kläre Dokument-Intents + # DATA_EXTRACT: Extract content from documents and process with AI (no structure generation) + if opType == OperationTypeEnum.DATA_EXTRACT: + return await self._handleDataExtraction( + prompt=prompt, + options=options, + documentList=documentList, + documentIntents=documentIntents, + contentParts=contentParts, + outputFormat=outputFormat, + title=title, + parentOperationId=parentOperationId + ) + + # Other operation types (DATA_ANALYSE, etc.) - not supported + errorMsg = f"Unsupported operation type: {opType}. Supported types: IMAGE_GENERATE, DATA_GENERATE, DATA_EXTRACT" + logger.error(errorMsg) + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + except Exception as e: + logger.error(f"Error in callAiContent: {str(e)}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise + + async def _handleDataExtraction( + self, + prompt: str, + options: AiCallOptions, + documentList: Optional[Any], + documentIntents: Optional[List[DocumentIntent]], + contentParts: Optional[List[ContentPart]], + outputFormat: str, + title: str, + parentOperationId: Optional[str] + ) -> AiResponse: + """ + Handle DATA_EXTRACT: Extract content from documents (no AI), then process with AI. + This is the original flow: extract all documents first, then process contentParts with AI. + """ + import time + + # Create operation ID + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + extractOperationId = f"data_extract_{workflowId}_{int(time.time())}" + + # Start progress tracking + self.services.chat.progressLogStart( + extractOperationId, + "Data Extraction", + "Extraction", + f"Format: {outputFormat}", + parentOperationId=parentOperationId + ) + + try: + # Step 1: Get documents from documentList documents = [] if documentList: documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) + # Filter: Remove original documents if already covered by pre-extracted JSONs + # (to prevent duplicate ContentParts - pre-extracted JSONs contain already extracted ContentParts) + if documents: + # Step 1: Identify all original document IDs covered by pre-extracted JSONs + originalDocIdsCoveredByPreExtracted = set() + for doc in documents: + preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + originalDocIdsCoveredByPreExtracted.add(originalDocId) + logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}") + + # Step 2: Filter documents - remove originals covered by pre-extracted JSONs + filteredDocuments = [] + for doc in documents: + preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc) + if preExtracted: + filteredDocuments.append(doc) # Keep pre-extracted JSON + elif doc.id in originalDocIdsCoveredByPreExtracted: + logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON") + else: + filteredDocuments.append(doc) # Keep regular document + + documents = filteredDocuments # Use filtered list + + # Step 2: Clarify document intents (if not provided) - REQUIRED for all documents if not documentIntents and documents: - documentIntents = await self._clarifyDocumentIntents( + documentIntents = await self.clarifyDocumentIntents( documents, prompt, {"outputFormat": outputFormat}, - aiOperationId + extractOperationId ) - # Schritt 5B: Extrahiere und bereite Content vor + # Step 3: Extract and prepare content (NO AI - pure extraction) - REQUIRED for all documents if documents: - preparedContentParts = await self._extractAndPrepareContent( + preparedContentParts = await self.extractAndPrepareContent( documents, documentIntents or [], - aiOperationId + extractOperationId ) - # Merge mit bereitgestellten contentParts (falls vorhanden) + # Merge with provided contentParts (if any) if contentParts: - # Prüfe auf pre-extracted Content for part in contentParts: if part.metadata.get("skipExtraction", False): - # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig part.metadata.setdefault("contentFormat", "extracted") part.metadata.setdefault("isPreExtracted", True) preparedContentParts.extend(contentParts) contentParts = preparedContentParts - # Schritt 5C: Generiere Struktur - structure = await self._generateStructure( - prompt, - contentParts or [], - outputFormat, - aiOperationId + # Step 4: Process extracted contentParts with AI (simple text processing, no structure generation) + if not contentParts: + raise ValueError("No content extracted from documents") + + # Use simple AI call to process extracted content + # Prepare content for AI processing + contentText = "\n\n".join([ + f"[Document: {part.metadata.get('documentName', 'Unknown')}]\n{part.data}" + for part in contentParts + if part.data + ]) + + # Call AI with extracted content + aiRequest = AiCallRequest( + prompt=f"{prompt}\n\nExtracted Content:\n{contentText}", + context="", + options=options ) - # Schritt 5D: Fülle Struktur - # Language will be extracted from services (user intention analysis) in fillStructure - filledStructure = await self._fillStructure( - structure, - contentParts or [], - prompt, - aiOperationId + aiResponse = await self.callAi(aiRequest) + + # Create response document + resultDocument = DocumentData( + documentName=f"{title or 'extracted_data'}.{outputFormat}", + documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content, + mimeType=f"text/{outputFormat}" if outputFormat in ["txt", "json", "csv"] else "application/octet-stream" ) - # Schritt 5E: Rendere Resultat - # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder) - renderedDocuments = await self._renderResult( - filledStructure, - outputFormat, - title or "Generated Document", - prompt, - aiOperationId - ) - - # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData - documentDataList = [] - for renderedDoc in renderedDocuments: - try: - # Erstelle DocumentData für jedes gerenderte Dokument - docDataObj = DocumentData( - documentName=renderedDoc.filename, - documentData=renderedDoc.documentData, - mimeType=renderedDoc.mimeType, - sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument - ) - documentDataList.append(docDataObj) - logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})") - except Exception as e: - logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}") - - if not documentDataList: - raise ValueError("No documents were rendered") - metadata = AiResponseMetadata( - title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), - operationType=opType.value + title=title or "Extracted Data", + operationType=OperationTypeEnum.DATA_EXTRACT.value ) - # Debug-Log (harmonisiert) - self.services.utils.writeDebugFile( - json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str), - "document_generation_response" - ) - - self.services.chat.progressLogFinish(aiOperationId, True) + self.services.chat.progressLogFinish(extractOperationId, True) return AiResponse( - content=json.dumps(filledStructure), + content=aiResponse.content if isinstance(aiResponse.content, str) else aiResponse.content.decode('utf-8', errors='replace'), metadata=metadata, - documents=documentDataList + documents=[resultDocument] ) except Exception as e: - logger.error(f"Error in callAiContent: {str(e)}") - self.services.chat.progressLogFinish(aiOperationId, False) + logger.error(f"Error in data extraction: {str(e)}") + self.services.chat.progressLogFinish(extractOperationId, False) raise + async def _handleCodeGeneration( + self, + prompt: str, + options: AiCallOptions, + contentParts: Optional[List[ContentPart]], + outputFormat: str, + title: str, + parentOperationId: Optional[str] + ) -> AiResponse: + """Handle code generation using code generation path.""" + from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath + + codePath = CodeGenerationPath(self.services) + return await codePath.generateCode( + userPrompt=prompt, + outputFormat=outputFormat, + contentParts=contentParts, + title=title or "Generated Code", + parentOperationId=parentOperationId + ) + + async def _handleDocumentGeneration( + self, + prompt: str, + options: AiCallOptions, + documentList: Optional[Any], + documentIntents: Optional[List[DocumentIntent]], + contentParts: Optional[List[ContentPart]], + outputFormat: str, + title: str, + parentOperationId: Optional[str] + ) -> AiResponse: + """Handle document generation using document generation path.""" + from modules.services.serviceGeneration.paths.documentPath import DocumentGenerationPath + + # Set compression options for document generation + options.compressPrompt = False + options.compressContext = False + + documentPath = DocumentGenerationPath(self.services) + return await documentPath.generateDocument( + userPrompt=prompt, + documentList=documentList, + documentIntents=documentIntents, + contentParts=contentParts, + outputFormat=outputFormat, + title=title or "Generated Document", + parentOperationId=parentOperationId + ) + + def _determineDocumentName( self, filledStructure: Dict[str, Any], diff --git a/modules/services/serviceAi/subAiCallLooping.py b/modules/services/serviceAi/subAiCallLooping.py index 6e2c90b5..2b71520b 100644 --- a/modules/services/serviceAi/subAiCallLooping.py +++ b/modules/services/serviceAi/subAiCallLooping.py @@ -16,6 +16,8 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati from modules.datamodels.datamodelExtraction import ContentPart from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry +from modules.workflows.processing.shared.stateTools import checkWorkflowStopped logger = logging.getLogger(__name__) @@ -28,6 +30,7 @@ class AiCallLooper: self.services = services self.aiService = aiService self.responseParser = responseParser + self.useCaseRegistry = LoopingUseCaseRegistry() # Initialize use case registry async def callAiWithLooping( self, @@ -38,7 +41,8 @@ class AiCallLooper: promptArgs: Optional[Dict[str, Any]] = None, operationId: Optional[str] = None, userPrompt: Optional[str] = None, - contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content + contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content + useCaseId: str = None # REQUIRED: Explicit use case ID - no auto-detection, no fallback ) -> str: """ Shared core function for AI calls with repair-based looping system. @@ -53,16 +57,38 @@ class AiCallLooper: operationId: Optional operation ID for progress tracking userPrompt: Optional user prompt for KPI definition contentParts: Optional content parts for first iteration + useCaseId: REQUIRED: Explicit use case ID - no auto-detection, no fallback Returns: Complete AI response after all iterations """ + # REQUIRED: useCaseId must be provided - no auto-detection, no fallback + if not useCaseId: + errorMsg = ( + "useCaseId is REQUIRED for callAiWithLooping. " + "No auto-detection - must explicitly specify use case ID. " + f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}" + ) + logger.error(errorMsg) + raise ValueError(errorMsg) + + # Validate use case exists + useCase = self.useCaseRegistry.get(useCaseId) + if not useCase: + errorMsg = ( + f"Use case '{useCaseId}' not found in registry. " + f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}" + ) + logger.error(errorMsg) + raise ValueError(errorMsg) + maxIterations = 50 # Prevent infinite loops iteration = 0 allSections = [] # Accumulate all sections across iterations lastRawResponse = None # Store last raw JSON response for continuation documentMetadata = None # Store document metadata (title, filename) from first iteration accumulationState = None # Track accumulation state for string accumulation + accumulatedDirectJson = [] # Accumulate JSON strings for direct return use cases (chapter_structure, code_structure) # Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID) parentOperationId = operationId # Use the parent's operationId directly @@ -91,24 +117,31 @@ class AiCallLooper: if not lastRawResponse: logger.warning(f"Iteration {iteration}: No previous response available for continuation!") - # Filter promptArgs to only include parameters that buildGenerationPrompt accepts - # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services - filteredPromptArgs = { - k: v for k, v in promptArgs.items() - if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services'] - } - # Always include services if available - if not filteredPromptArgs.get('services') and hasattr(self, 'services'): - filteredPromptArgs['services'] = self.services - - # Rebuild prompt with continuation context using the provided prompt builder - iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext) + # For section_content, pass all promptArgs (it uses buildSectionPromptWithContinuation which needs all args) + # For other use cases (chapter_structure, code_structure), filter to only accepted parameters + if useCaseId == "section_content": + # Pass all promptArgs plus continuationContext for section_content + iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext) + else: + # Filter promptArgs to only include parameters that buildGenerationPrompt accepts + # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services + filteredPromptArgs = { + k: v for k, v in promptArgs.items() + if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services'] + } + # Always include services if available + if not filteredPromptArgs.get('services') and hasattr(self, 'services'): + filteredPromptArgs['services'] = self.services + + # Rebuild prompt with continuation context using the provided prompt builder + iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext) else: # First iteration - use original prompt iterationPrompt = prompt # Make AI call try: + checkWorkflowStopped(self.services) if iterationOperationId: self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") # ARCHITECTURE: Pass ContentParts directly to AiCallRequest @@ -199,36 +232,88 @@ class AiCallLooper: # Store raw response for continuation (even if broken) lastRawResponse = result - # Check if this is section content generation (has "elements" not "sections") - # Section content generation returns JSON with "elements" array, not document structure with "sections" - isSectionContentGeneration = False - parsedJsonForSection = None - extractedJsonForSection = None + # Parse JSON for use case handling + parsedJsonForUseCase = None + extractedJsonForUseCase = None + try: - extractedJsonForSection = extractJsonString(result) - parsedJson, parseError, _ = tryParseJson(extractedJsonForSection) + extractedJsonForUseCase = extractJsonString(result) + parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase) if parseError is None and parsedJson: - parsedJsonForSection = parsedJson - # Check if JSON has "elements" (section content) or "sections" (document structure) - if isinstance(parsedJson, dict): - if "elements" in parsedJson: - isSectionContentGeneration = True - elif isinstance(parsedJson, list) and len(parsedJson) > 0: - # Check if it's a list of elements (section content format) - if isinstance(parsedJson[0], dict) and "type" in parsedJson[0]: - isSectionContentGeneration = True + parsedJsonForUseCase = parsedJson except Exception: pass - if isSectionContentGeneration: - # This is section content generation - return the JSON directly - # No need to extract sections, just return the complete JSON string - logger.info(f"Iteration {iteration}: Section content generation detected (elements found), returning JSON directly") + # Handle use cases that return JSON directly (no section extraction needed) + directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"] + if useCaseId in directReturnUseCases: + # For chapter_structure, code_structure, and section_content, check completeness and support looping + loopingUseCases = ["chapter_structure", "code_structure", "section_content"] + if useCaseId in loopingUseCases: + # If parsing failed (e.g., invalid JSON with comments or truncated JSON), continue looping to get valid JSON + if not parsedJsonForUseCase: + logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON parsing failed (likely incomplete/truncated), continuing iteration to complete") + # Accumulate response for merging in next iteration + accumulatedDirectJson.append(result) + + # Continue to next iteration - continuation prompt builder will handle the rest + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation") + self.services.chat.progressLogFinish(iterationOperationId, True) + continue + + # Check completeness if we have parsed JSON + isComplete = JsonResponseHandler.isJsonComplete(parsedJsonForUseCase) + + if not isComplete: + logger.warning(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is incomplete, continuing for continuation") + # Accumulate response for merging in next iteration + accumulatedDirectJson.append(result) + + # Continue to next iteration - continuation prompt builder will handle the rest + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation") + self.services.chat.progressLogFinish(iterationOperationId, True) + continue + else: + # JSON is complete - merge accumulated responses if any + if accumulatedDirectJson: + logger.info(f"Iteration {iteration}: Merging {len(accumulatedDirectJson) + 1} accumulated responses") + # Merge accumulated JSON strings with current response + mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result + for prevJson in accumulatedDirectJson[1:]: + mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson) + # Finally merge with current response + mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result) + + # Re-parse merged JSON + try: + extractedMerged = extractJsonString(mergedJsonString) + parsedMerged, parseError, _ = tryParseJson(extractedMerged) + if parseError is None and parsedMerged: + parsedJsonForUseCase = parsedMerged + result = mergedJsonString + logger.info(f"Successfully merged and parsed {len(accumulatedDirectJson) + 1} JSON fragments") + except Exception as e: + logger.warning(f"Failed to parse merged JSON, using last response: {e}") + + logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is complete") + + logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly") if iterationOperationId: self.services.chat.progressLogFinish(iterationOperationId, True) - # Note: Debug files (_prompt and _response) are already written above for iteration 1 - # No need to write _final_result as it's redundant with _response - final_json = json.dumps(parsedJsonForSection, indent=2, ensure_ascii=False) if parsedJsonForSection else (extractedJsonForSection or result) + + # For section_content, return raw result to allow merging of multiple JSON blocks + # The merging logic in subStructureFilling.py will handle extraction and merging + if useCaseId == "section_content": + final_json = result # Return raw response to preserve all JSON blocks + else: + final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result) + + # Write final result for chapter structure and code structure (section_content skips it) + if useCaseId in ["chapter_structure", "code_structure"]: + self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result") + return final_json # Extract sections from response (handles both valid and broken JSON) @@ -558,6 +643,7 @@ If no trackable items can be identified, return: {{"kpis": []}} # Write KPI definition prompt to debug file self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt") + checkWorkflowStopped(self.services) response = await self.aiService.callAi(request) # Write KPI definition response to debug file diff --git a/modules/services/serviceAi/subContentExtraction.py b/modules/services/serviceAi/subContentExtraction.py index 229587f8..71c90879 100644 --- a/modules/services/serviceAi/subContentExtraction.py +++ b/modules/services/serviceAi/subContentExtraction.py @@ -16,6 +16,7 @@ from typing import Dict, Any, List, Optional from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent +from modules.workflows.processing.shared.stateTools import checkWorkflowStopped logger = logging.getLogger(__name__) @@ -70,6 +71,7 @@ class ContentExtractor: allContentParts = [] for document in documents: + checkWorkflowStopped(self.services) # Check if document is already a ContentExtracted document (pre-extracted JSON) logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content") preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document) @@ -92,12 +94,28 @@ class ContentExtractor: logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}") if contentExtracted.parts: + # CRITICAL: Process pre-extracted parts - analyze structure parts for nested content + processedParts = [] for part in contentExtracted.parts: # Überspringe leere Parts (Container ohne Daten) if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): if part.typeGroup == "container": continue # Überspringe leere Container + # CRITICAL: Check if structure part contains nested parts (e.g., JSON with documentData.parts) + if part.typeGroup == "structure" and part.mimeType == "application/json" and part.data: + nestedParts = self._extractNestedPartsFromStructure(part, document, preExtracted, intent) + if nestedParts: + # Replace structure part with extracted nested parts + processedParts.extend(nestedParts) + logger.info(f"✅ Extracted {len(nestedParts)} nested parts from structure part {part.id}") + continue # Skip original structure part + + # Keep original part if no nested parts found + processedParts.append(part) + + # Use processed parts (with nested parts extracted) + for part in processedParts: if not part.metadata: part.metadata = {} @@ -180,177 +198,41 @@ class ContentExtractor: elif hasRenderIntent and not hasPartData: logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part") - # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung) + # 3. Extract Intent: Erstelle Extracted ContentPart (NO AI processing here - happens during section generation) if hasExtractIntent: - # Spezielle Behandlung für Images: Vision AI für Text-Extraktion + # For images: Keep as image part with extract intent - Vision AI extraction happens during section generation if part.typeGroup == "image" and hasPartData: - logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)") - try: - extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting." - extractedText = await self.extractTextFromImage(part, extractionPrompt) - if extractedText: - # Prüfe ob es ein Error-Message ist - isError = extractedText.startswith("[ERROR:") - - # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message - textPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}", - typeGroup="text", - mimeType="text/plain", - data=extractedText, - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, - "extractionPrompt": extractionPrompt, - "extractionMethod": "vision", - "isError": isError - } - ) - allContentParts.append(textPart) - if isError: - logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}") - else: - logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars") - else: - # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) - errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}" - logger.error(errorMsg) - errorPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"Error extracting from {part.label or 'Image'}", - typeGroup="text", - mimeType="text/plain", - data=f"[ERROR: {errorMsg}]", - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "extractionPrompt": extractionPrompt, - "extractionMethod": "vision", - "isError": True - } - ) - allContentParts.append(errorPart) - except Exception as e: - logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}") - import traceback - logger.debug(f"Traceback: {traceback.format_exc()}") - # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part - # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen - if not hasRenderIntent: - logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available") + logger.info(f"📷 Image {part.id} with extract intent - will be processed with Vision AI during section generation") + # Keep image part as-is, mark with extract intent + part.metadata.update({ + "contentFormat": "extracted", # Marked for extraction, but not yet extracted + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image.", + "needsVisionExtraction": True # Flag to indicate Vision AI extraction needed + }) + allContentParts.append(part) + originalPartAdded = True else: - # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird - # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content - # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist, - # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt. - - # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden) - isTextContent = ( - part.typeGroup == "text" or - part.typeGroup == "table" or - (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0) - ) - - if isTextContent and intent and intent.extractionPrompt: - # Text-Content mit extractionPrompt: Verarbeite mit AI - logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)") - try: - extractionPrompt = intent.extractionPrompt - processedText = await self.processTextContentWithAi(part, extractionPrompt) - if processedText: - # Prüfe ob es ein Error-Message ist - isError = processedText.startswith("[ERROR:") - - # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message - processedPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}", - typeGroup="text", - mimeType="text/plain", - data=processedText, - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, - "extractionPrompt": extractionPrompt, - "extractionMethod": "ai", - "sourcePartId": part.id, - "fromExtractContent": True, - "isError": isError - } - ) - allContentParts.append(processedPart) - originalPartAdded = True - if isError: - logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}") - else: - logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars") - else: - # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) - errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}" - logger.error(errorMsg) - errorPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"Error processing {part.label or 'Content'}", - typeGroup="text", - mimeType="text/plain", - data=f"[ERROR: {errorMsg}]", - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "extractionPrompt": extractionPrompt, - "extractionMethod": "ai", - "sourcePartId": part.id, - "isError": True - } - ) - allContentParts.append(errorPart) - originalPartAdded = True - except Exception as e: - logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}") - import traceback - logger.debug(f"Traceback: {traceback.format_exc()}") - # Fallback: Verwende Original-Part - if not originalPartAdded: - part.metadata.update({ - "contentFormat": "extracted", - "intent": "extract", - "fromExtractContent": True, - "skipExtraction": True, - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None - }) - allContentParts.append(part) - originalPartAdded = True - else: - # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted - # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig) - # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent) - if not originalPartAdded: - part.metadata.update({ - "contentFormat": "extracted", - "intent": "extract", - "fromExtractContent": True, - "skipExtraction": True, # Bereits extrahiert - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None - }) - # Stelle sicher dass contentFormat gesetzt ist - if "contentFormat" not in part.metadata: - part.metadata["contentFormat"] = "extracted" - allContentParts.append(part) - originalPartAdded = True - logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") + # For text/table content: Use directly as extracted (no AI processing here) + # AI processing with extractionPrompt happens during section generation + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, # Already extracted (raw extraction) + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": intent.extractionPrompt if intent and intent.extractionPrompt else None + }) + # Stelle sicher dass contentFormat gesetzt ist + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + allContentParts.append(part) + originalPartAdded = True + logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt # (sollte normalerweise nicht vorkommen, da default "extract" ist) @@ -488,6 +370,7 @@ class ContentExtractor: ) # extractContent ist nicht async - keine await nötig + checkWorkflowStopped(self.services) extractedResults = self.services.extraction.extractContent( [document], extractionOptions, @@ -508,6 +391,12 @@ class ContentExtractor: # Verknüpfung zu object Part (falls vorhanden) "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None }) + + # For images: Mark that Vision AI extraction is needed during section generation + if part.typeGroup == "image": + part.metadata["needsVisionExtraction"] = True + logger.info(f"📷 Image part {part.id} marked for Vision AI extraction during section generation") + # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) if "render" in intent.intents: part.id = f"ext_{document.id}_{part.id}" @@ -519,10 +408,28 @@ class ContentExtractor: "content_extraction_result" ) + # State 2 Validation: Validate and auto-fix ContentParts + validatedParts = [] + for part in allContentParts: + # Validation 2.1: Skip ContentParts without documentId + if not part.metadata.get("documentId"): + logger.warning(f"Skipping ContentPart {part.id} - missing documentId in metadata") + continue + + # Validation 2.2: Skip ContentParts with invalid contentFormat + contentFormat = part.metadata.get("contentFormat") + if contentFormat not in ["extracted", "object", "reference"]: + logger.warning( + f"Skipping ContentPart {part.id} - invalid contentFormat: {contentFormat}" + ) + continue + + validatedParts.append(part) + # ChatLog abschließen self.services.chat.progressLogFinish(extractionOperationId, True) - return allContentParts + return validatedParts except Exception as e: self.services.chat.progressLogFinish(extractionOperationId, False) @@ -561,6 +468,7 @@ class ContentExtractor: ) # Verwende AI-Service für Vision AI-Verarbeitung + checkWorkflowStopped(self.services) response = await self.aiService.callAi(request) # Debug-Log für Response (harmonisiert) @@ -634,6 +542,7 @@ class ContentExtractor: ) # Verwende AI-Service für Text-Verarbeitung + checkWorkflowStopped(self.services) response = await self.aiService.callAi(request) # Debug-Log für Response (harmonisiert) @@ -667,4 +576,84 @@ class ContentExtractor: "application/x-zip-compressed" ] return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") + + def _extractNestedPartsFromStructure( + self, + structurePart: ContentPart, + document: ChatDocument, + preExtracted: Dict[str, Any], + intent: Optional[Any] + ) -> List[ContentPart]: + """ + Extract nested parts from a structure ContentPart (e.g., JSON with documentData.parts). + + This is a generic function that analyzes pre-processed ContentParts and extracts + any nested parts that are embedded in structure data (typically JSON). + + Works with standard ContentExtracted format: documentData.parts array. + Each nested part is extracted as a separate ContentPart with proper metadata. + + Args: + structurePart: ContentPart with typeGroup="structure" containing nested parts + document: The document this part belongs to + preExtracted: Pre-extracted document metadata + intent: Document intent for nested parts + + Returns: + List of extracted ContentParts, empty if no nested parts found + """ + nestedParts = [] + + try: + # Parse JSON structure + jsonData = json.loads(structurePart.data) + + # Check for standard ContentExtracted format: documentData.parts + if isinstance(jsonData, dict): + documentData = jsonData.get("documentData") + if isinstance(documentData, dict): + parts = documentData.get("parts", []) + if isinstance(parts, list) and len(parts) > 0: + # Extract each nested part + for nestedPartData in parts: + if not isinstance(nestedPartData, dict): + continue + + nestedPartId = nestedPartData.get("id") or f"nested_{len(nestedParts)}" + nestedTypeGroup = nestedPartData.get("typeGroup", "text") + nestedMimeType = nestedPartData.get("mimeType", "text/plain") + nestedLabel = nestedPartData.get("label", structurePart.label) + nestedData = nestedPartData.get("data", "") + nestedMetadata = nestedPartData.get("metadata", {}) + + # Create ContentPart for nested part + nestedPart = ContentPart( + id=f"{structurePart.id}_{nestedPartId}", + parentId=structurePart.id, + label=nestedLabel, + typeGroup=nestedTypeGroup, + mimeType=nestedMimeType, + data=nestedData, + metadata={ + **nestedMetadata, + "documentId": document.id, + "fromNestedStructure": True, + "parentStructurePartId": structurePart.id, + "originalFileName": preExtracted["originalDocument"]["fileName"] + } + ) + + nestedParts.append(nestedPart) + logger.debug(f"✅ Extracted nested part: {nestedPart.id} (typeGroup={nestedTypeGroup}, mimeType={nestedMimeType})") + + # If no nested parts found, return empty list (original part will be kept) + if not nestedParts: + logger.debug(f"No nested parts found in structure part {structurePart.id}") + + except json.JSONDecodeError as e: + logger.warning(f"Could not parse structure part {structurePart.id} as JSON: {str(e)}") + except Exception as e: + logger.error(f"Error extracting nested parts from structure part {structurePart.id}: {str(e)}") + + return nestedParts diff --git a/modules/services/serviceAi/subDocumentIntents.py b/modules/services/serviceAi/subDocumentIntents.py index c1faba39..e78ed11c 100644 --- a/modules/services/serviceAi/subDocumentIntents.py +++ b/modules/services/serviceAi/subDocumentIntents.py @@ -14,6 +14,7 @@ from typing import Dict, Any, List, Optional from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelExtraction import DocumentIntent +from modules.workflows.processing.shared.stateTools import checkWorkflowStopped logger = logging.getLogger(__name__) @@ -86,6 +87,7 @@ class DocumentIntentAnalyzer: # AI-Call (verwende callAiPlanning für einfache JSON-Responses) # Debug-Logs werden bereits von callAiPlanning geschrieben + checkWorkflowStopped(self.services) aiResponse = await self.aiService.callAiPlanning( prompt=intentPrompt, debugType="document_intent_analysis" @@ -107,6 +109,21 @@ class DocumentIntentAnalyzer: "document_intent_analysis_result" ) + # State 1 Validation: Validate and auto-fix document intents + documentIds = {d.id for d in documents} + validatedIntents = [] + + for intent in documentIntents: + # Validation 1.2: Skip intents for unknown documents + if intent.documentId not in documentIds: + logger.warning(f"Skipping intent for unknown document: {intent.documentId}") + continue + validatedIntents.append(intent) + + # Validation 1.1: Documents without intents are OK (not needed) + # Intents for non-existing documents are already filtered above + documentIntents = validatedIntents + # ChatLog abschließen self.services.chat.progressLogFinish(intentOperationId, True) @@ -243,8 +260,13 @@ class DocumentIntentAnalyzer: outputFormat = actionParameters.get("outputFormat", "txt") - prompt = f"""USER REQUEST: + # FENCE user input to prevent prompt injection + fencedUserPrompt = f"""```user_request {userPrompt} +```""" + + prompt = f"""USER REQUEST: +{fencedUserPrompt} DOCUMENTS TO ANALYZE: {docListText} @@ -254,20 +276,25 @@ TASK: For each document, determine its intents (can be multiple): - "render": Image/binary should be rendered as-is (visual element) - "reference": Document reference/attachment (no extraction, just reference) -OUTPUT FORMAT: {outputFormat} +TASK: For each document, determine: +1. Intents (can be multiple): "extract", "render", "reference" +Note: Output format and language are NOT determined here - they will be + determined during structure generation (Phase 3) in the chapter structure JSON + +OUTPUT FORMAT: {outputFormat} (global fallback - for reference only) RETURN JSON: {{ "intents": [ {{ "documentId": "doc_1", - "intents": ["extract"], # Array - can contain multiple! + "intents": ["extract"], "extractionPrompt": "Extract all text content, preserving structure", "reasoning": "User needs text content for document generation" }}, {{ "documentId": "doc_2", - "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "intents": ["extract", "render"], "extractionPrompt": "Extract text content from image using vision AI", "reasoning": "Image contains text that needs extraction, but also should be rendered visually" }}, diff --git a/modules/services/serviceAi/subLoopingUseCases.py b/modules/services/serviceAi/subLoopingUseCases.py new file mode 100644 index 00000000..c52ed1bc --- /dev/null +++ b/modules/services/serviceAi/subLoopingUseCases.py @@ -0,0 +1,231 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Generic Looping Use Case System + +Provides parametrized looping infrastructure supporting different JSON formats and use cases. +""" + +import logging +from dataclasses import dataclass, field +from typing import Dict, Any, List, Optional, Callable + +logger = logging.getLogger(__name__) + + +@dataclass +class LoopingUseCase: + """Configuration for a specific looping use case.""" + + # Identification + useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch" + + # JSON Format Detection + jsonTemplate: Dict[str, Any] # Expected JSON structure template + detectionKeys: List[str] # Keys to check for format detection (e.g., ["elements"], ["chapters"], ["files"]) + detectionPath: str # JSONPath to check (e.g., "documents[0].chapters", "files[0].content") + + # Prompt Building + initialPromptBuilder: Optional[Callable] = None # Function to build initial prompt + continuationPromptBuilder: Optional[Callable] = None # Function to build continuation prompt + + # Accumulation & Merging + accumulator: Optional[Callable] = None # Function to accumulate fragments + merger: Optional[Callable] = None # Function to merge accumulated data + + # Continuation Context + continuationContextBuilder: Optional[Callable] = None # Build continuation context for this format + + # Result Building + resultBuilder: Optional[Callable] = None # Build final result from accumulated data + + # Metadata + supportsAccumulation: bool = True # Whether this use case supports accumulation + requiresExtraction: bool = False # Whether this requires extraction (like sections) + + +class LoopingUseCaseRegistry: + """Registry of all looping use cases.""" + + def __init__(self): + self.useCases: Dict[str, LoopingUseCase] = {} + self._registerDefaultUseCases() + + def register(self, useCase: LoopingUseCase): + """Register a new use case.""" + self.useCases[useCase.useCaseId] = useCase + logger.debug(f"Registered looping use case: {useCase.useCaseId}") + + def get(self, useCaseId: str) -> Optional[LoopingUseCase]: + """Get use case by ID.""" + return self.useCases.get(useCaseId) + + def detectUseCase(self, parsedJson: Dict[str, Any]) -> Optional[str]: + """Detect which use case matches the JSON structure.""" + for useCaseId, useCase in self.useCases.items(): + if self._matchesFormat(parsedJson, useCase): + return useCaseId + return None + + def _matchesFormat(self, json: Dict[str, Any], useCase: LoopingUseCase) -> bool: + """Check if JSON matches use case format.""" + # Check top-level keys + for key in useCase.detectionKeys: + if key in json: + return True + + # Check nested path using simple dictionary traversal (no jsonpath_ng needed) + if useCase.detectionPath: + try: + # Simple path matching without jsonpath_ng + # Format: "documents[0].chapters" or "files[0].content" + pathParts = useCase.detectionPath.split(".") + current = json + + for part in pathParts: + # Handle array indices like "documents[0]" + if "[" in part and "]" in part: + key = part.split("[")[0] + index = int(part.split("[")[1].split("]")[0]) + if isinstance(current, dict) and key in current: + if isinstance(current[key], list) and 0 <= index < len(current[key]): + current = current[key][index] + else: + return False + else: + return False + else: + # Regular key access + if isinstance(current, dict) and part in current: + current = current[part] + else: + return False + + # If we successfully traversed the path, it matches + return True + except Exception as e: + logger.debug(f"Path matching failed for {useCase.useCaseId}: {e}") + + return False + + def _registerDefaultUseCases(self): + """Register default use cases.""" + + # Use Case 1: Section Content Generation + # Returns JSON with "elements" array directly + self.register(LoopingUseCase( + useCaseId="section_content", + jsonTemplate={"elements": []}, + detectionKeys=["elements"], + detectionPath="", + initialPromptBuilder=None, # Will use default prompt builder + continuationPromptBuilder=None, # Will use default continuation builder + accumulator=None, # Direct return, no accumulation + merger=None, + continuationContextBuilder=None, # Will use default continuation context + resultBuilder=None, # Return JSON directly + supportsAccumulation=False, + requiresExtraction=False + )) + + # Use Case 2: Chapter Structure Generation + # Returns JSON with "documents[0].chapters" structure + self.register(LoopingUseCase( + useCaseId="chapter_structure", + jsonTemplate={"documents": [{"chapters": []}]}, + detectionKeys=["chapters"], + detectionPath="documents[0].chapters", + initialPromptBuilder=None, + continuationPromptBuilder=None, + accumulator=None, # Direct return, no accumulation + merger=None, + continuationContextBuilder=None, + resultBuilder=None, # Return JSON directly + supportsAccumulation=False, + requiresExtraction=False + )) + + # Use Case 3: Document Structure Generation + # Returns JSON with "documents[0].sections" structure, requires extraction and accumulation + self.register(LoopingUseCase( + useCaseId="document_structure", + jsonTemplate={"documents": [{"sections": []}]}, + detectionKeys=["sections"], + detectionPath="documents[0].sections", + initialPromptBuilder=None, + continuationPromptBuilder=None, + accumulator=None, # Will use default accumulator + merger=None, # Will use default merger + continuationContextBuilder=None, + resultBuilder=None, # Will use default result builder + supportsAccumulation=True, + requiresExtraction=True + )) + + # Use Case 4: Code Structure Generation (NEW) + self.register(LoopingUseCase( + useCaseId="code_structure", + jsonTemplate={ + "metadata": { + "language": "", + "projectType": "single_file|multi_file", + "projectName": "" + }, + "files": [ + { + "id": "", + "filename": "", + "fileType": "", + "dependencies": [], + "imports": [], + "functions": [], + "classes": [] + } + ] + }, + detectionKeys=["files"], + detectionPath="files", + initialPromptBuilder=None, + continuationPromptBuilder=None, + accumulator=None, # Direct return + merger=None, + continuationContextBuilder=None, + resultBuilder=None, + supportsAccumulation=False, + requiresExtraction=False + )) + + # Use Case 5: Code Content Generation (NEW) + self.register(LoopingUseCase( + useCaseId="code_content", + jsonTemplate={"files": [{"content": "", "functions": []}]}, + detectionKeys=["content", "functions"], + detectionPath="files[0].content", + initialPromptBuilder=None, + continuationPromptBuilder=None, + accumulator=None, # Will use default accumulator + merger=None, # Will use default merger + continuationContextBuilder=None, + resultBuilder=None, # Will use default result builder + supportsAccumulation=True, + requiresExtraction=False + )) + + # Use Case 6: Image Batch Generation (NEW) + self.register(LoopingUseCase( + useCaseId="image_batch", + jsonTemplate={"images": []}, + detectionKeys=["images"], + detectionPath="images", + initialPromptBuilder=None, + continuationPromptBuilder=None, + accumulator=None, # Direct return + merger=None, + continuationContextBuilder=None, + resultBuilder=None, + supportsAccumulation=False, + requiresExtraction=False + )) + + logger.info(f"Registered {len(self.useCases)} default looping use cases") + diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index 138f6572..3d687398 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -12,10 +12,11 @@ import json import logging import copy import asyncio -from typing import Dict, Any, List, Optional +from typing import Dict, Any, List, Optional, Tuple from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum +from modules.workflows.processing.shared.stateTools import checkWorkflowStopped logger = logging.getLogger(__name__) @@ -23,11 +24,20 @@ logger = logging.getLogger(__name__) class StructureFiller: """Handles filling document structure with content.""" + # Default concurrency limit for parallel generation (chapters/sections) + DEFAULT_MAX_CONCURRENT_GENERATION = 16 + def __init__(self, services, aiService): """Initialize StructureFiller with service center and AI service access.""" self.services = services self.aiService = aiService + def _getMaxConcurrentGeneration(self, options: Optional[AiCallOptions] = None) -> int: + """Get max concurrent generation limit, configurable via options.""" + if options and hasattr(options, 'maxConcurrentGeneration'): + return options.maxConcurrentGeneration + return self.DEFAULT_MAX_CONCURRENT_GENERATION + def _getUserLanguage(self) -> str: """Get user language for document generation""" try: @@ -42,6 +52,72 @@ class StructureFiller: pass return 'en' # Default fallback + def _getDocumentLanguage(self, structure: Dict[str, Any], documentId: str) -> str: + """ + Get language for a specific document from structure. + Falls back to user language if not specified. + + Args: + structure: The document structure with documents array + documentId: The ID of the document to get language for + + Returns: + ISO 639-1 language code (e.g., "de", "en", "fr") + """ + # Try to find document in structure + for doc in structure.get("documents", []): + if doc.get("id") == documentId: + docLanguage = doc.get("language") + if docLanguage: + return docLanguage + + # Fallback to metadata language + metadataLanguage = structure.get("metadata", {}).get("language") + if metadataLanguage: + return metadataLanguage + + # Fallback to user language + return self._getUserLanguage() + + def _extractContentPartInfo(self, chapter: Dict[str, Any]) -> Tuple[List[str], Dict[str, Any]]: + """ + Extract contentPartIds and contentPartInstructions from chapter's contentParts structure. + + Returns: + tuple: (contentPartIds list, contentPartInstructions dict) + """ + contentParts = chapter.get("contentParts", {}) + contentPartIds = list(contentParts.keys()) + # Extract instructions (entries with "instruction" field) and captions (entries with "caption" field) + contentPartInstructions = {} + for partId, partInfo in contentParts.items(): + if isinstance(partInfo, dict): + if "instruction" in partInfo: + contentPartInstructions[partId] = {"instruction": partInfo["instruction"]} + elif "caption" in partInfo: + # For entries with only caption (no instruction), still add to dict so it's available + contentPartInstructions[partId] = {"caption": partInfo["caption"]} + return contentPartIds, contentPartInstructions + + def _getContentPartCaption(self, chapter: Dict[str, Any], partId: str) -> Optional[str]: + """ + Get caption for a contentPart from chapter's contentParts structure. + Returns None if no caption is available. + + Args: + chapter: Chapter dict + partId: ContentPart ID + + Returns: + Caption string or None + """ + if "contentParts" in chapter: + contentParts = chapter.get("contentParts", {}) + partInfo = contentParts.get(partId) + if isinstance(partInfo, dict) and "caption" in partInfo: + return partInfo["caption"] + return None + async def fillStructure( self, structure: Dict[str, Any], @@ -101,14 +177,19 @@ class StructureFiller: try: filledStructure = copy.deepcopy(structure) + # Get options from AI service if available (for concurrency control) + # Default concurrency limit (16) will be used if options is None + options = None + # Note: Options can be passed via fillStructure if needed in the future + # Phase 5D.1: Sections-Struktur für jedes Chapter generieren filledStructure = await self._generateChapterSectionsStructure( - filledStructure, contentParts, userPrompt, fillOperationId, language + filledStructure, contentParts, userPrompt, fillOperationId, language, options ) # Phase 5D.2: Sections mit ContentParts füllen filledStructure = await self._fillChapterSections( - filledStructure, contentParts, userPrompt, fillOperationId, language + filledStructure, contentParts, userPrompt, fillOperationId, language, options ) # Flattening: Chapters zu Sections konvertieren @@ -117,6 +198,31 @@ class StructureFiller: # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts) + # State 4 Validation: Validate and auto-fix filled structure + # Validation 4.1: Filled structure missing 'documents' field + if "documents" not in flattenedStructure: + raise ValueError("Filled structure missing 'documents' field - cannot auto-fix") + + for doc in flattenedStructure["documents"]: + # Validation 4.4: Verify language is preserved from input structure + # Language MUST be preserved from Phase 3 structure (validated in State 3) + if "language" not in doc: + raise ValueError(f"Document {doc.get('id')} missing language in filled structure - should have been preserved from Phase 3") + + # Validate language format + if not isinstance(doc["language"], str) or len(doc["language"]) != 2: + raise ValueError(f"Document {doc.get('id')} has invalid language format in filled structure: {doc['language']} - should be 2-character ISO 639-1 code") + + for chapter in doc.get("chapters", []): + for section in chapter.get("sections", []): + # Validation 4.2: Section missing 'elements' field + if "elements" not in section: + section["elements"] = [] + logger.info(f"Section {section.get('id')} missing 'elements' - created empty list") + + # Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK) + # No action needed - empty elements are allowed + # ChatLog abschließen self.services.chat.progressLogFinish(fillOperationId, True) @@ -170,6 +276,7 @@ class StructureFiller: # AI-Call für Chapter-Struktur-Generierung # Note: Debug logging is handled by callAiPlanning + checkWorkflowStopped(self.services) aiResponse = await self.aiService.callAiPlanning( prompt=chapterPrompt, debugType=f"chapter_structure_{chapterId}" @@ -243,7 +350,8 @@ class StructureFiller: contentParts: List[ContentPart], userPrompt: str, parentOperationId: str, - language: str + language: str, + options: Optional[AiCallOptions] = None ) -> Dict[str, Any]: """ Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content) in parallel. @@ -252,39 +360,53 @@ class StructureFiller: # Count total chapters for progress tracking totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) + # Get concurrency limit + maxConcurrent = self._getMaxConcurrentGeneration(options) + semaphore = asyncio.Semaphore(maxConcurrent) + # Collect all chapters with their indices for parallel processing chapterTasks = [] chapterIndex = 0 for doc in chapterStructure.get("documents", []): + docId = doc.get("id", "unknown") + # Get language for this specific document + docLanguage = self._getDocumentLanguage(chapterStructure, docId) + for chapter in doc.get("chapters", []): chapterIndex += 1 chapterId = chapter.get("id", "unknown") chapterLevel = chapter.get("level", 1) chapterTitle = chapter.get("title", "Untitled Chapter") generationHint = chapter.get("generationHint", "") - contentPartIds = chapter.get("contentPartIds", []) - contentPartInstructions = chapter.get("contentPartInstructions", {}) + contentPartIds, contentPartInstructions = self._extractContentPartInfo(chapter) - # Create task for parallel processing - task = self._generateSingleChapterSectionsStructure( - chapter=chapter, - chapterIndex=chapterIndex, - chapterId=chapterId, - chapterLevel=chapterLevel, - chapterTitle=chapterTitle, - generationHint=generationHint, - contentPartIds=contentPartIds, - contentPartInstructions=contentPartInstructions, - contentParts=contentParts, - userPrompt=userPrompt, - language=language, - parentOperationId=parentOperationId, - totalChapters=totalChapters + # Create task for parallel processing with semaphore + async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage): + checkWorkflowStopped(self.services) + async with semaphore: + return await self._generateSingleChapterSectionsStructure( + chapter=chapter, + chapterIndex=chapterIndex, + chapterId=chapterId, + chapterLevel=chapterLevel, + chapterTitle=chapterTitle, + generationHint=generationHint, + contentPartIds=contentPartIds, + contentPartInstructions=contentPartInstructions, + contentParts=contentParts, + userPrompt=userPrompt, + language=docLanguage, # Use document-specific language + parentOperationId=parentOperationId, + totalChapters=totalChapters + ) + + task = processChapterWithSemaphore( + chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage ) chapterTasks.append((chapterIndex, chapter, task)) - # Execute all chapter tasks in parallel + # Execute all chapter tasks in parallel with concurrency control if chapterTasks: # Create list of tasks (without indices for gather) tasks = [task for _, _, task in chapterTasks] @@ -308,7 +430,8 @@ class StructureFiller: operationType: OperationTypeEnum, sectionId: str, generationHint: str, - generatedElements: List[Dict[str, Any]] + generatedElements: List[Dict[str, Any]], + section: Dict[str, Any] ) -> List[Dict[str, Any]]: """ Helper method to process AI response and extract elements. @@ -365,13 +488,16 @@ class StructureFiller: # Image already processed as JSON, skip pass elif base64Data: + # Get caption from section if available + caption = section.get("caption") or section.get("metadata", {}).get("caption") or "" elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", - "caption": "" - } + "caption": caption # Use caption from section if available + }, + "caption": caption # Also at element level for compatibility }) logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") else: @@ -386,11 +512,25 @@ class StructureFiller: if generatedElements: elements.extend(generatedElements) else: - # Fallback: Try to parse JSON response directly + # Fallback: Try to parse JSON response directly with repair logic try: - fallbackElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) + from modules.shared.jsonUtils import tryParseJson, repairBrokenJson + + # Use tryParseJson which handles extraction and basic parsing + fallbackElements, parseError, cleanedStr = tryParseJson(aiResponse.content) + + # If parsing failed, try repair + if parseError and isinstance(aiResponse.content, str): + logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}") + repairedJson = repairBrokenJson(aiResponse.content) + if repairedJson: + fallbackElements = repairedJson + parseError = None + logger.info(f"Successfully repaired JSON for section {sectionId}") + + if parseError: + raise parseError + if isinstance(fallbackElements, list): elements.extend(fallbackElements) elif isinstance(fallbackElements, dict) and "elements" in fallbackElements: @@ -493,14 +633,26 @@ class StructureFiller: }) elif contentFormat == "object": if part.typeGroup == "image": - elements.append({ - "type": "image", - "content": { - "base64Data": part.data, - "altText": part.metadata.get("usageHint", part.label), - "caption": part.metadata.get("caption", "") - } - }) + # Validate that image data exists + if not part.data: + logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (object format). Skipping image element.") + elements.append({ + "type": "error", + "message": f"Image ContentPart {part.id} has no data", + "sectionId": sectionId + }) + else: + # Get caption from section (priority: section.caption > part.metadata.caption) + caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "") + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": caption # Use caption from section + }, + "caption": caption # Also at element level for compatibility + }) else: elements.append({ "type": part.typeGroup, @@ -511,13 +663,93 @@ class StructureFiller: } }) - # Aggregiere extracted Parts mit AI - if extractedParts: - logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") + # Extract images with Vision AI if needed (before aggregation) + processedExtractedParts = [] + for part in extractedParts: + # Check if this is an image that needs Vision AI extraction + if (part.typeGroup == "image" and + part.metadata.get("needsVisionExtraction") == True and + part.metadata.get("intent") == "extract"): + + logger.info(f"Section {sectionId}: Extracting text from image {part.id} using Vision AI") + try: + extractionPrompt = part.metadata.get("extractionPrompt") or "Extract all text content from this image. Return only the extracted text, no additional formatting." + + # Write debug file for image extraction prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + partId = part.id[:8] if part.id else "unknown" + partLabelSafe = (part.label or "image").replace(" ", "_").replace("/", "_").replace("\\", "_")[:30] + debugPrefix = f"extraction_image_{partId}_{partLabelSafe}" + self.services.utils.writeDebugFile(extractionPrompt, f"{debugPrefix}_prompt") + logger.debug(f"Wrote image extraction prompt debug file: {debugPrefix}_prompt") + except Exception as debugError: + logger.warning(f"Failed to write image extraction debug file: {str(debugError)}") + + # Call Vision AI to extract text from image + visionRequest = AiCallRequest( + prompt=extractionPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), + contentParts=[part] + ) + + checkWorkflowStopped(self.services) + visionResponse = await self.aiService.callAi(visionRequest) + + # Write debug file for image extraction response + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + partId = part.id[:8] if part.id else "unknown" + partLabelSafe = (part.label or "image").replace(" ", "_").replace("/", "_").replace("\\", "_")[:30] + debugPrefix = f"extraction_image_{partId}_{partLabelSafe}" + responseContent = visionResponse.content if visionResponse and visionResponse.content else "" + self.services.utils.writeDebugFile(responseContent, f"{debugPrefix}_response") + logger.debug(f"Wrote image extraction response debug file: {debugPrefix}_response") + except Exception as debugError: + logger.warning(f"Failed to write image extraction response debug file: {str(debugError)}") + + if visionResponse and visionResponse.content: + # Create text part with extracted content + textPart = ContentPart( + id=f"vision_extracted_{part.id}", + label=f"Extracted text from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=visionResponse.content.strip(), + metadata={ + **part.metadata, + "contentFormat": "extracted", + "extractionMethod": "vision", + "sourceImagePartId": part.id, + "needsVisionExtraction": False # Already extracted + } + ) + processedExtractedParts.append(textPart) + logger.info(f"✅ Extracted text from image {part.id}: {len(visionResponse.content)} chars") + else: + logger.warning(f"⚠️ Vision AI extraction returned no content for image {part.id}") + # Keep original image part, but mark extraction as attempted + part.metadata["needsVisionExtraction"] = False + part.metadata["visionExtractionFailed"] = True + processedExtractedParts.append(part) + except Exception as e: + logger.error(f"❌ Vision AI extraction failed for image {part.id}: {str(e)}") + # Keep original image part, but mark extraction as attempted + part.metadata["needsVisionExtraction"] = False + part.metadata["visionExtractionFailed"] = True + processedExtractedParts.append(part) + else: + # Not an image needing extraction, or already processed + processedExtractedParts.append(part) + + # Aggregiere extracted Parts mit AI (now with Vision-extracted text parts) + if processedExtractedParts: + logger.debug(f"Section {sectionId}: Aggregating {len(processedExtractedParts)} extracted parts with AI") isAggregation = True generationPrompt = self._buildSectionGenerationPrompt( section=section, - contentParts=extractedParts, + contentParts=processedExtractedParts, userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, @@ -563,6 +795,7 @@ class StructureFiller: processingMode=ProcessingModeEnum.DETAILED ) ) + checkWorkflowStopped(self.services) aiResponse = await self.aiService.callAi(request) generatedElements = [] @@ -621,7 +854,8 @@ The JSON should be a fragment that can be merged with the previous response.""" processingMode=ProcessingModeEnum.DETAILED ) - aiResponseJson = await self.aiService._callAiWithLooping( + checkWorkflowStopped(self.services) + aiResponseJson = await self.aiService.callAiWithLooping( prompt=generationPrompt, options=options, debugPrefix=f"{chapterId}_section_{sectionId}", @@ -638,25 +872,48 @@ The JSON should be a fragment that can be merged with the previous response.""" }, operationId=sectionOperationId, userPrompt=userPrompt, - contentParts=extractedParts + contentParts=extractedParts, + useCaseId="section_content" # REQUIRED: Explicit use case ID ) try: - parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson)) - if isinstance(parsedResponse, list): - generatedElements = parsedResponse - elif isinstance(parsedResponse, dict): - if "elements" in parsedResponse: - generatedElements = parsedResponse["elements"] - elif "sections" in parsedResponse and len(parsedResponse["sections"]) > 0: - firstSection = parsedResponse["sections"][0] - generatedElements = firstSection.get("elements", []) - elif parsedResponse.get("type"): - generatedElements = [parsedResponse] + # Use tryParseJson which handles extraction and basic parsing + from modules.shared.jsonUtils import tryParseJson, repairBrokenJson + + # Check if response contains multiple JSON blocks (separated by --- or multiple ```json blocks) + # This can happen when AI returns multiple complete responses + if isinstance(aiResponseJson, str) and ("---" in aiResponseJson or aiResponseJson.count("```json") > 1): + logger.info(f"Section {sectionId}: Detected multiple JSON blocks in response, attempting to merge") + generatedElements = self._extractAndMergeMultipleJsonBlocks(aiResponseJson, contentType, sectionId) + else: + parsedResponse, parseError, cleanedStr = tryParseJson(aiResponseJson) + + # If parsing failed, try repair + if parseError and isinstance(aiResponseJson, str): + logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}") + repairedJson = repairBrokenJson(aiResponseJson) + if repairedJson: + parsedResponse = repairedJson + parseError = None + logger.info(f"Successfully repaired JSON for section {sectionId}") + + if parseError: + raise parseError + + if isinstance(parsedResponse, list): + generatedElements = parsedResponse + elif isinstance(parsedResponse, dict): + if "elements" in parsedResponse: + generatedElements = parsedResponse["elements"] + elif "sections" in parsedResponse and len(parsedResponse["sections"]) > 0: + firstSection = parsedResponse["sections"][0] + generatedElements = firstSection.get("elements", []) + elif parsedResponse.get("type"): + generatedElements = [parsedResponse] + else: + generatedElements = [] else: generatedElements = [] - else: - generatedElements = [] class AiResponse: def __init__(self, content): @@ -683,7 +940,8 @@ The JSON should be a fragment that can be merged with the previous response.""" operationType=operationType, sectionId=sectionId, generationHint=generationHint, - generatedElements=generatedElements + generatedElements=generatedElements, + section=section ) elements.extend(responseElements) @@ -824,7 +1082,7 @@ The JSON should be a fragment that can be merged with the previous response.""" processingMode=ProcessingModeEnum.DETAILED ) - aiResponseJson = await self.aiService._callAiWithLooping( + aiResponseJson = await self.aiService.callAiWithLooping( prompt=generationPrompt, options=options, debugPrefix=f"{chapterId}_section_{sectionId}", @@ -841,7 +1099,8 @@ The JSON should be a fragment that can be merged with the previous response.""" }, operationId=sectionOperationId, userPrompt=userPrompt, - contentParts=[] + contentParts=[], + useCaseId="section_content" # REQUIRED: Explicit use case ID ) try: @@ -885,7 +1144,8 @@ The JSON should be a fragment that can be merged with the previous response.""" operationType=operationType, sectionId=sectionId, generationHint=generationHint, - generatedElements=generatedElements + generatedElements=generatedElements, + section=section ) elements.extend(responseElements) @@ -930,14 +1190,26 @@ The JSON should be a fragment that can be merged with the previous response.""" elif contentFormat == "object": if part.typeGroup == "image": - elements.append({ - "type": "image", - "content": { - "base64Data": part.data, - "altText": part.metadata.get("usageHint", part.label), - "caption": part.metadata.get("caption", "") - } - }) + # Validate that image data exists + if not part.data: + logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (object format). Skipping image element.") + elements.append({ + "type": "error", + "message": f"Image ContentPart {part.id} has no data", + "sectionId": sectionId + }) + else: + # Get caption from section (priority: section.caption > part.metadata.caption) + caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "") + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": caption # Use caption from section + }, + "caption": caption # Also at element level for compatibility + }) else: elements.append({ "type": part.typeGroup, @@ -949,8 +1221,61 @@ The JSON should be a fragment that can be merged with the previous response.""" }) elif contentFormat == "extracted": + # CRITICAL: If useAiCall is true, extracted parts are used as input for AI generation + # and should NOT be added as elements. Only add extracted text as element if useAiCall is false. + if useAiCall: + # Extracted part will be used as input for AI call - skip adding as element + logger.debug(f"Section {sectionId}: Skipping extracted part {part.id} as element (useAiCall=true, will be used as AI input)") + # Continue to process this part for AI call, but don't add as element yet + # Check if this is an image that needs Vision AI extraction + originalPartId = part.id + if (part.typeGroup == "image" and + part.metadata.get("needsVisionExtraction") == True and + part.metadata.get("intent") == "extract"): + + logger.info(f"Section {sectionId}: Extracting text from single image {part.id} using Vision AI") + try: + extractionPrompt = part.metadata.get("extractionPrompt") or "Extract all text content from this image. Return only the extracted text, no additional formatting." + + # Call Vision AI to extract text from image + visionRequest = AiCallRequest( + prompt=extractionPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), + contentParts=[part] + ) + + checkWorkflowStopped(self.services) + visionResponse = await self.aiService.callAi(visionRequest) + + if visionResponse and visionResponse.content: + # Replace image part with text part for further processing + part = ContentPart( + id=f"vision_extracted_{originalPartId}", + label=f"Extracted text from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=visionResponse.content.strip(), + metadata={ + **part.metadata, + "contentFormat": "extracted", + "extractionMethod": "vision", + "sourceImagePartId": originalPartId, + "needsVisionExtraction": False # Already extracted + } + ) + logger.info(f"✅ Extracted text from image {originalPartId}: {len(visionResponse.content)} chars") + else: + logger.warning(f"⚠️ Vision AI extraction returned no content for image {originalPartId}") + part.metadata["needsVisionExtraction"] = False + part.metadata["visionExtractionFailed"] = True + except Exception as e: + logger.error(f"❌ Vision AI extraction failed for image {originalPartId}: {str(e)}") + part.metadata["needsVisionExtraction"] = False + part.metadata["visionExtractionFailed"] = True + if useAiCall and generationHint: - # AI-Call mit einzelnen ContentPart + # AI-Call mit einzelnen ContentPart (now may be text part after Vision extraction) logger.debug(f"Processing section {sectionId}: Single extracted part with AI call") generationPrompt = self._buildSectionGenerationPrompt( section=section, @@ -1060,7 +1385,7 @@ The JSON should be a fragment that can be merged with the previous response.""" processingMode=ProcessingModeEnum.DETAILED ) - aiResponseJson = await self.aiService._callAiWithLooping( + aiResponseJson = await self.aiService.callAiWithLooping( prompt=generationPrompt, options=options, debugPrefix=f"{chapterId}_section_{sectionId}", @@ -1077,7 +1402,8 @@ The JSON should be a fragment that can be merged with the previous response.""" }, operationId=sectionOperationId, userPrompt=userPrompt, - contentParts=[part] + contentParts=[part], + useCaseId="section_content" # REQUIRED: Explicit use case ID ) try: @@ -1121,7 +1447,8 @@ The JSON should be a fragment that can be merged with the previous response.""" operationType=operationType, sectionId=sectionId, generationHint=generationHint, - generatedElements=generatedElements + generatedElements=generatedElements, + section=section ) elements.extend(responseElements) @@ -1150,24 +1477,114 @@ The JSON should be a fragment that can be merged with the previous response.""" ) else: # Füge extrahierten Content direkt hinzu (kein AI-Call) - if part.typeGroup == "image": - logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call") - elements.append({ - "type": "image", - "content": { - "base64Data": part.data, - "altText": part.metadata.get("usageHint", part.label), - "caption": part.metadata.get("caption", "") - } - }) + # CRITICAL: If content_type is "image", we must render an image, not extracted text + if contentType == "image": + # Section wants to display an image - find the image part + if part.typeGroup == "image": + # Direct image part - use it + logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call") + # Validate that image data exists + if not part.data: + logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (extracted format without AI call). Skipping image element.") + elements.append({ + "type": "error", + "message": f"Image ContentPart {part.id} has no data", + "sectionId": sectionId + }) + else: + # Get caption from section (priority: section.caption > part.metadata.caption) + caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "") + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": caption # Use caption from section + }, + "caption": caption # Also at element level for compatibility + }) + elif part.typeGroup == "text" and part.metadata.get("sourceImagePartId"): + # This is a vision-extracted text part - find the original image object part + sourceImagePartId = part.metadata.get("sourceImagePartId") + logger.debug(f"Processing section {sectionId}: Found vision-extracted text part, looking for original image object part: {sourceImagePartId}") + + # Try to find the object part (format: "obj_...") + objectPartId = part.metadata.get("relatedObjectPartId") + objectPart = None + + if objectPartId: + objectPart = self._findContentPartById(objectPartId, contentParts) + + # If not found via metadata, search through all contentParts for object part + if not objectPart: + # Search for object part that references the source image part ID + for candidatePart in contentParts: + if (candidatePart.metadata.get("contentFormat") == "object" and + candidatePart.typeGroup == "image" and + sourceImagePartId in candidatePart.id): + objectPart = candidatePart + objectPartId = candidatePart.id + logger.debug(f"Section {sectionId}: Found object part {objectPartId} by searching all contentParts") + break + + if objectPart and objectPart.typeGroup == "image" and objectPart.data: + logger.info(f"Section {sectionId}: Found object part {objectPartId} for image rendering") + caption = section.get("caption") or section.get("metadata", {}).get("caption") or objectPart.metadata.get("caption", "") + elements.append({ + "type": "image", + "content": { + "base64Data": objectPart.data, + "altText": objectPart.metadata.get("usageHint", objectPart.label), + "caption": caption + }, + "caption": caption + }) + else: + logger.warning(f"Section {sectionId}: No object part found for vision-extracted text part {part.id} (sourceImagePartId={sourceImagePartId}), cannot render image") + elements.append({ + "type": "error", + "message": f"Cannot render image: no object part found for extracted text part (sourceImagePartId={sourceImagePartId})", + "sectionId": sectionId + }) + else: + logger.warning(f"Section {sectionId}: ContentPart {part.id} is not an image (typeGroup={part.typeGroup}), but section content_type is 'image'. Cannot render image.") + elements.append({ + "type": "error", + "message": f"Cannot render image: ContentPart is not an image type", + "sectionId": sectionId + }) else: - logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call") - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) + # content_type is not "image" - add extracted text as normal + if part.typeGroup == "image": + logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call") + # Validate that image data exists + if not part.data: + logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (extracted format without AI call). Skipping image element.") + elements.append({ + "type": "error", + "message": f"Image ContentPart {part.id} has no data", + "sectionId": sectionId + }) + else: + # Get caption from section (priority: section.caption > part.metadata.caption) + caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "") + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": caption # Use caption from section + }, + "caption": caption # Also at element level for compatibility + }) + else: + logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call") + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) # Update progress after section completion chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 @@ -1200,7 +1617,8 @@ The JSON should be a fragment that can be merged with the previous response.""" contentParts: List[ContentPart], userPrompt: str, parentOperationId: str, - language: str + language: str, + options: Optional[AiCallOptions] = None ) -> Dict[str, Any]: """ Phase 5D.2: Füllt Sections mit ContentParts. @@ -1217,6 +1635,10 @@ The JSON should be a fragment that can be merged with the previous response.""" totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) fillOperationId = parentOperationId + # Get concurrency limit for sections + maxConcurrent = self._getMaxConcurrentGeneration(options) + sectionSemaphore = asyncio.Semaphore(maxConcurrent) + # Helper function to calculate overall progress def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections): """Calculate overall progress: 0.0 to 1.0""" @@ -1234,6 +1656,10 @@ The JSON should be a fragment that can be merged with the previous response.""" # Process chapters sequentially with chapter-level progress chapterIndex = 0 for doc in chapterStructure.get("documents", []): + docId = doc.get("id", "unknown") + # Get language for this specific document + docLanguage = self._getDocumentLanguage(chapterStructure, docId) + for chapter in doc.get("chapters", []): chapterIndex += 1 chapterId = chapter.get("id", "unknown") @@ -1251,28 +1677,35 @@ The JSON should be a fragment that can be merged with the previous response.""" parentOperationId=fillOperationId ) - # Process sections within chapter in parallel + # Process sections within chapter in parallel with concurrency control sectionTasks = [] for sectionIndex, section in enumerate(sections): - # Create task for parallel processing - task = self._processSingleSection( - section=section, - sectionIndex=sectionIndex, - totalSections=totalSections, - chapterIndex=chapterIndex, - totalChapters=totalChapters, - chapterId=chapterId, - chapterOperationId=chapterOperationId, - fillOperationId=fillOperationId, - contentParts=contentParts, - userPrompt=userPrompt, - all_sections_list=all_sections_list, - language=language, - calculateOverallProgress=calculateOverallProgress + # Create task wrapper with semaphore for parallel processing + async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress): + checkWorkflowStopped(self.services) + async with sectionSemaphore: + return await self._processSingleSection( + section=section, + sectionIndex=sectionIndex, + totalSections=totalSections, + chapterIndex=chapterIndex, + totalChapters=totalChapters, + chapterId=chapterId, + chapterOperationId=chapterOperationId, + fillOperationId=fillOperationId, + contentParts=contentParts, + userPrompt=userPrompt, + all_sections_list=all_sections_list, + language=docLanguage, # Use document-specific language + calculateOverallProgress=calculateOverallProgress + ) + + task = processSectionWithSemaphore( + section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress ) sectionTasks.append((sectionIndex, section, task)) - # Execute all section tasks in parallel + # Execute all section tasks in parallel with concurrency control if sectionTasks: # Create list of tasks (without indices for gather) tasks = [task for _, _, task in sectionTasks] @@ -1336,7 +1769,7 @@ The JSON should be a fragment that can be merged with the previous response.""" if "chapters" in doc: for chapter in doc.get("chapters", []): # Füge Metadaten zu Chapter-Level contentPartIds hinzu - chapterContentPartIds = chapter.get("contentPartIds", []) + chapterContentPartIds, _ = self._extractContentPartInfo(chapter) if chapterContentPartIds: chapter["contentPartsMetadata"] = [] for partId in chapterContentPartIds: @@ -1362,7 +1795,7 @@ The JSON should be a fragment that can be merged with the previous response.""" Flattening: Konvertiert Chapters zu finaler Section-Struktur. Jedes Chapter wird zu einer Heading-Section (Level 1) + dessen Sections. - IMPORTANT: Chapters are the main structure elements (heading level 1). + Chapters are the main structure elements (heading level 1). All section headings with level < 2 are adjusted to level 2. """ result = { @@ -1375,6 +1808,8 @@ The JSON should be a fragment that can be merged with the previous response.""" "id": doc.get("id"), "title": doc.get("title"), "filename": doc.get("filename"), + "outputFormat": doc.get("outputFormat"), # Preserve from Phase 3 + "language": doc.get("language"), # Preserve from Phase 3 "sections": [] } @@ -1441,15 +1876,30 @@ The JSON should be a fragment that can be merged with the previous response.""" for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if not part: + # Part not found - try to show info from chapter structure + partInfo = contentPartInstructions.get(partId, {}) + if partInfo: + logger.warning(f"Chapter {chapterId}: ContentPart {partId} not found in contentParts list, but has chapter structure info.") + contentPartsIndex += f"\n- ContentPart ID: {partId}\n" + if "instruction" in partInfo: + contentPartsIndex += f" Instruction: {partInfo['instruction']}\n" + if "caption" in partInfo: + contentPartsIndex += f" Caption: {partInfo['caption']}\n" + contentPartsIndex += f" Note: ContentPart not found in contentParts list (ID may be from nested structure)\n" continue contentFormat = part.metadata.get("contentFormat", "unknown") - instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed") + partInfo = contentPartInstructions.get(partId, {}) + instruction = partInfo.get("instruction", "Use content as needed") + caption = partInfo.get("caption") contentPartsIndex += f"\n- ContentPart ID: {partId}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" - contentPartsIndex += f" Instruction: {instruction}\n" + if instruction and instruction != "Use content as needed": + contentPartsIndex += f" Instruction: {instruction}\n" + if caption: + contentPartsIndex += f" Caption: {caption}\n" if not contentPartsIndex: contentPartsIndex = "\n(No content parts specified for this chapter)" @@ -1461,9 +1911,11 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles, CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId}) GENERATION HINT: {generationHint} +**CRITICAL**: The chapter's generationHint above describes what content this chapter should generate. If the generationHint references documents/images/data, then EACH section that generates content for this chapter MUST assign the relevant ContentParts from AVAILABLE CONTENT PARTS below. + NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title. -IMPORTANT - SECTION INDEPENDENCE: +## SECTION INDEPENDENCE - Each section is independent and self-contained - One section does NOT have information about another section - Each section must provide its own context and be understandable alone @@ -1471,13 +1923,24 @@ IMPORTANT - SECTION INDEPENDENCE: AVAILABLE CONTENT PARTS: {contentPartsIndex} -CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image +## CONTENT ASSIGNMENT RULE - CRITICAL +If AVAILABLE CONTENT PARTS are listed above, then EVERY section that generates content related to those ContentParts MUST assign them explicitly. + +**Assignment logic:** +- If section generates text content ABOUT a ContentPart → assign "extracted" format ContentPart with appropriate instruction +- If section DISPLAYS a ContentPart → assign "object" format ContentPart +- If section's generationHint or purpose relates to a ContentPart listed above → it MUST have contentPartIds assigned +- If chapter's generationHint references documents/images/data AND section generates content for that chapter → section MUST assign relevant ContentParts +- Empty contentPartIds [] are only allowed if section generates content WITHOUT referencing any available ContentParts AND WITHOUT relating to chapter's generationHint + +## CONTENT TYPES +Available content types for sections: table, bullet_list, heading, paragraph, code_block, image useAiCall RULES: - useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed - useAiCall: false if Format is "object" or "reference" (direct insertion) - useAiCall: false if Format is "extracted" AND simple "include full text" instruction -- useAiCall: true if NO ContentPartIds provided (content must be generated from scratch); Sections without ContentParts MUST have a clear, detailed generationHint explaining what content to generate +- useAiCall: true if no ContentPartIds provided (content must be generated from scratch); Sections without ContentParts must have a clear, detailed generationHint explaining what content to generate RETURN JSON: {{ @@ -1488,22 +1951,23 @@ RETURN JSON: "contentPartIds": ["extracted_part_1"], "generationHint": "Include full text", "useAiCall": false, + "caption": "optional, only for image sections", "elements": [] }} ] }} -EXAMPLES (all content types): -- paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}} -- bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}} -- table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}} -- heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}} -- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}} -- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}} -- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}} -- NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}} +**MANDATORY CONTENT ASSIGNMENT CHECK:** +For each section, verify: +1. Are ContentParts listed in AVAILABLE CONTENT PARTS above? +2. Does this section's generationHint or purpose relate to those ContentParts? +3. If YES to both → section MUST have contentPartIds assigned (cannot be empty []) +4. Assign ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above -CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +IMAGE SECTIONS: +- For image sections, always provide a "caption" field with a descriptive caption for the image. + +Return only valid JSON. Do not include any explanatory text outside the JSON. """ return prompt @@ -1541,19 +2005,9 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th # Baue ContentParts-Beschreibung contentPartsText = "" if isAggregation: - # Aggregation: Zeige nur Metadaten, nicht Previews - contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" - contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" - contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" - contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" - contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" - contentPartsText += f"ContentPart IDs:\n" - for part in validParts: - contentFormat = part.metadata.get("contentFormat", "unknown") - contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" - if part.metadata.get("originalFileName"): - contentPartsText += f", Source: {part.metadata.get('originalFileName')}" - contentPartsText += ")\n" + # Aggregation: ContentParts werden als Parameter übergeben, keine IDs im Prompt nötig + # Keine ContentPart-Beschreibung nötig - Daten sind bereits im Context verfügbar + contentPartsText = "" else: # Einzelverarbeitung: Zeige Previews for part in validParts: @@ -1565,13 +2019,40 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" if contentFormat == "extracted": - # Zeige Preview von extrahiertem Text (länger für besseren Kontext) - previewLength = 1000 - if part.data: - preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data - contentPartsText += f" Content preview:\n```\n{preview}\n```\n" + # CRITICAL: Check if this is binary/image data - NEVER include in text prompt! + isBinaryOrImage = ( + part.typeGroup == "image" or + part.typeGroup == "binary" or + (part.mimeType and ( + part.mimeType.startswith("image/") or + part.mimeType.startswith("video/") or + part.mimeType.startswith("audio/") or + self._isBinaryMimeType(part.mimeType) + )) or + # Heuristic check: if data looks like base64 (long string with base64 chars) + (part.data and isinstance(part.data, str) and + len(part.data) > 100 and + self._looksLikeBase64(part.data)) + ) + + if isBinaryOrImage: + # NEVER include binary/base64 data in text prompt - security risk and token explosion! + dataLength = len(part.data) if part.data else 0 + contentPartsText += f" Type: {part.typeGroup}\n" + contentPartsText += f" MIME type: {part.mimeType or 'unknown'}\n" + contentPartsText += f" Data size: {dataLength} chars (binary/base64 - not shown in prompt)\n" + if part.metadata.get("needsVisionExtraction"): + contentPartsText += f" Note: Will be processed with Vision AI\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" else: - contentPartsText += f" Content: (empty)\n" + # Only for text data: Show preview + previewLength = 1000 + if part.data: + preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data + contentPartsText += f" Content preview:\n```\n{preview}\n```\n" + else: + contentPartsText += f" Content: (empty)\n" elif contentFormat == "reference": contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" if part.metadata.get("usageHint"): @@ -1627,6 +2108,9 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th if isAggregation: prompt = f"""# TASK: Generate Section Content (Aggregation) +Return only valid JSON. No explanatory text, no comments, no markdown formatting outside JSON. +If ContentParts have no data, return: {{"elements": [{{"type": "{contentType}", "content": {{"headers": [], "rows": []}}}}]}} + LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}. ## SECTION METADATA @@ -1634,22 +2118,14 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles, - Content Type: {contentType} - Generation Hint: {generationHint} -## AVAILABLE CONTENT FOR THIS SECTION -{contentPartsText if contentPartsText else "(No content parts specified for this section)"} - ## INSTRUCTIONS -1. Generate content for section "{sectionId}" based on the generation hint above -2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) -3. For table content_type: Create a single table with headers and rows from all ContentParts -4. For bullet_list content_type: Create a single list with items from all ContentParts -5. Format appropriately based on content_type ({contentType}) -6. Ensure the generated content is self-contained and understandable independently -7. Return ONLY a JSON object with an "elements" array -8. Each element should match the content_type: {contentType} -9. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. -10. For paragraphs: Return plain text only, no HTML tags like
, or style attributes -11. For headings: Return plain text only, no HTML tags or styling -12. For images: Do NOT include base64 data in JSON - images are handled separately +1. Extract all data from the context provided. Do not skip or omit any data. +2. Extract data only from the provided context. Never invent, create, or generate data that is not in the context. +3. If the context contains no data, return empty structures (empty rows array for tables). +4. Aggregate all data into one element (e.g., one table). +5. For table: Extract all rows from the context. Return {{"headers": [...], "rows": []}} only if no data exists. +6. Format based on content_type ({contentType}). +7. No HTML/styling: Plain text only, no markup. ## OUTPUT FORMAT Return a JSON object with this structure: @@ -1663,19 +2139,27 @@ Return a JSON object with this structure: ] }} -CRITICAL: -- "content" MUST always be an object (never a string) -- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup -- Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +Output requirements: +- "content" must be an object (never a string) +- Return only valid JSON - no text before, no text after, no comments, no explanations +- No invented data: Return empty structures if ContentParts have no data +- Extract all data: Process every ContentPart completely and include all extracted data -## CONTEXT (for reference only) -{contextText if contextText else ""} +## USER REQUEST (for context) ``` {userPrompt} ``` + +## CONTEXT +{contextText if contextText else ""} """ else: - prompt = f"""# TASK: Generate Section Content + # Determine if we have ContentParts or need to generate from scratch + hasContentParts = len(validParts) > 0 + + if hasContentParts: + # EXTRACT MODE: Extract data from provided ContentParts + prompt = f"""# TASK: Extract Section Content from Provided Data LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}. @@ -1685,19 +2169,14 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles, - Generation Hint: {generationHint} ## AVAILABLE CONTENT FOR THIS SECTION -{contentPartsText if contentPartsText else "(No content parts specified for this section)"} +{contentPartsText} ## INSTRUCTIONS -1. Generate content for section "{sectionId}" based on the generation hint above -2. Use the available content parts to populate this section -3. For extracted text: Format appropriately based on content_type ({contentType}) -4. Ensure the generated content is self-contained and understandable independently -5. Return ONLY a JSON object with an "elements" array -6. Each element should match the content_type: {contentType} -7. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. -8. For paragraphs: Return plain text only, no HTML tags like
, or style attributes
-9. For headings: Return plain text only, no HTML tags or styling
-10. For images: If you need to reference an image, describe it in altText. Do NOT include base64 data - images are handled separately
+1. Extract data only from provided ContentParts. Never invent or generate data.
+2. If ContentParts contain no data, return empty structures (empty rows array for tables).
+3. Format based on content_type ({contentType}).
+4. Return only valid JSON with "elements" array.
+5. No HTML/styling: Plain text only, no markup.
## OUTPUT FORMAT
Return a JSON object with this structure:
@@ -1711,19 +2190,328 @@ Return a JSON object with this structure:
]
}}
-CRITICAL:
-- "content" MUST always be an object (never a string)
-- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup
-- Return ONLY valid JSON. Do not include any explanatory text outside the JSON
+Output requirements:
+- "content" must be an object (never a string)
+- Return only valid JSON - no text before, no text after, no comments, no explanations, no markdown code fences
+- Start with {{ and end with }} - return ONLY the JSON object itself
+- No invented data: Return empty structures if ContentParts have no data
-## CONTEXT (for reference only)
-{contextText if contextText else ""}
+## USER REQUEST
```
{userPrompt}
```
+
+## CONTEXT
+{contextText if contextText else ""}
+"""
+ else:
+ # GENERATE MODE: Generate content from scratch based on generationHint
+ prompt = f"""# TASK: Generate Section Content
+
+LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
+
+## SECTION METADATA
+- Section ID: {sectionId}
+- Content Type: {contentType}
+- Generation Hint: {generationHint}
+
+## INSTRUCTIONS
+1. Generate content based on the Generation Hint above.
+2. Create appropriate content that matches the content_type ({contentType}).
+3. The content should be relevant to the USER REQUEST and fit the context of surrounding sections.
+4. Return only valid JSON with "elements" array.
+5. No HTML/styling: Plain text only, no markup.
+
+## OUTPUT FORMAT
+Return a JSON object with this structure:
+
+{{
+ "elements": [
+ {{
+ "type": "{contentType}",
+ "content": {contentStructureExample}
+ }}
+ ]
+}}
+
+Output requirements:
+- "content" must be an object (never a string)
+- Return only valid JSON - no text before, no text after, no comments, no explanations, no markdown code fences
+- Start with {{ and end with }} - return ONLY the JSON object itself
+- Generate meaningful content based on the Generation Hint
+
+## USER REQUEST
+```
+{userPrompt}
+```
+
+## CONTEXT
+{contextText if contextText else ""}
"""
return prompt
+ def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
+ """
+ Extract multiple JSON blocks from response and merge them appropriately.
+ For tables: Merge all rows into a single table.
+ For other types: Combine elements.
+ """
+ from modules.shared.jsonUtils import tryParseJson, stripCodeFences, normalizeJsonText, extractFirstBalancedJson
+
+ # Extract all JSON blocks, handling both --- separators and multiple ```json blocks
+ blocks = []
+
+ # Strategy: Extract all ```json blocks first (most reliable), then fall back to other methods
+ # This handles cases where --- separators and ```json blocks are mixed
+ if "```json" in responseText:
+ # Extract all ```json blocks regardless of --- separators
+ jsonParts = responseText.split("```json")
+ for jsonPart in jsonParts[1:]: # Skip first empty part
+ jsonPart = "```json" + jsonPart
+ # Extract just the JSON block (until closing ```)
+ closingFence = jsonPart.find("```", 7) # Find closing ``` after "```json"
+ if closingFence != -1:
+ jsonPart = jsonPart[:closingFence + 3]
+ jsonPart = jsonPart.strip()
+ if jsonPart:
+ blocks.append(jsonPart)
+
+ # If no ```json blocks found, try splitting by --- and extracting JSON
+ if not blocks and "---" in responseText:
+ parts = responseText.split("---")
+ for part in parts:
+ part = part.strip()
+ if not part:
+ continue
+
+ # Try to extract JSON directly from this part
+ normalized = normalizeJsonText(part)
+ normalized = stripCodeFences(normalized)
+ jsonBlock = extractFirstBalancedJson(normalized)
+ if jsonBlock:
+ blocks.append(jsonBlock)
+ elif responseText.count("```json") > 1:
+ # Split by ```json markers (no --- separator)
+ parts = responseText.split("```json")
+ for part in parts[1:]: # Skip first empty part
+ part = "```json" + part
+ part = part.strip()
+ if part:
+ blocks.append(part)
+ else:
+ # Try to find multiple JSON objects/arrays directly
+ normalized = normalizeJsonText(responseText)
+ normalized = stripCodeFences(normalized)
+
+ # Find all JSON blocks
+ start = 0
+ while start < len(normalized):
+ # Find next JSON start
+ brace = normalized.find('{', start)
+ bracket = normalized.find('[', start)
+ jsonStart = -1
+ if brace != -1 and (bracket == -1 or brace < bracket):
+ jsonStart = brace
+ elif bracket != -1:
+ jsonStart = bracket
+
+ if jsonStart == -1:
+ break
+
+ # Extract balanced JSON
+ jsonBlock = extractFirstBalancedJson(normalized[jsonStart:])
+ if jsonBlock:
+ blocks.append(jsonBlock)
+ start = jsonStart + len(jsonBlock)
+ else:
+ break
+
+ if not blocks:
+ logger.warning(f"Section {sectionId}: Could not extract multiple JSON blocks")
+ return []
+
+ logger.info(f"Section {sectionId}: Extracted {len(blocks)} JSON blocks, merging for contentType={contentType}")
+
+ # Parse all blocks
+ allElements = []
+ for i, block in enumerate(blocks):
+ parsed, parseError, _ = tryParseJson(block)
+ if parseError:
+ logger.warning(f"Section {sectionId}: Failed to parse JSON block {i+1}: {str(parseError)}")
+ continue
+
+ elementsFromBlock = []
+ if isinstance(parsed, dict):
+ if "elements" in parsed:
+ elementsFromBlock = parsed["elements"]
+ allElements.extend(elementsFromBlock)
+ elif parsed.get("type"):
+ elementsFromBlock = [parsed]
+ allElements.append(parsed)
+ elif isinstance(parsed, list):
+ elementsFromBlock = parsed
+ allElements.extend(parsed)
+
+ # Log row count for table elements
+ if contentType == "table":
+ tableCount = sum(1 for e in elementsFromBlock if isinstance(e, dict) and e.get("type") == "table")
+ rowCount = sum(
+ len(e.get("content", {}).get("rows", []))
+ for e in elementsFromBlock
+ if isinstance(e, dict) and e.get("type") == "table"
+ )
+ if tableCount > 0:
+ logger.info(f"Section {sectionId}: JSON block {i+1}: {tableCount} table(s) with {rowCount} total rows")
+
+ # Merge elements based on contentType
+ if contentType == "table" and len(allElements) > 1:
+ # Find all table elements
+ tableElements = [e for e in allElements if isinstance(e, dict) and e.get("type") == "table"]
+ if len(tableElements) > 1:
+ # Check if tables can be merged (same column counts)
+ canMerge = self._canMergeTables(tableElements)
+ if canMerge:
+ logger.info(f"Section {sectionId}: Merging {len(tableElements)} tables into one")
+ mergedTable = self._mergeTableElements(tableElements)
+ # Replace all table elements with merged one
+ nonTableElements = [e for e in allElements if not (isinstance(e, dict) and e.get("type") == "table")]
+ return [mergedTable] + nonTableElements
+ else:
+ logger.warning(f"Section {sectionId}: Cannot merge {len(tableElements)} tables (incompatible headers/columns). Keeping tables separate.")
+ # Return all elements as-is (tables remain separate)
+ return allElements
+
+ return allElements
+
+ def _canMergeTables(self, tableElements: List[Dict[str, Any]]) -> bool:
+ """Check if tables can be safely merged (same column counts)."""
+ if len(tableElements) <= 1:
+ return True
+
+ # Extract column counts from all tables
+ columnCounts = []
+ for table in tableElements:
+ headers = []
+ if isinstance(table.get("content"), dict):
+ headers = table["content"].get("headers", [])
+ elif isinstance(table.get("content"), list):
+ # Old format: content is list of rows
+ if table["content"] and isinstance(table["content"][0], list):
+ headers = table["content"][0]
+ columnCounts.append(len(headers))
+
+ # Check if all tables have the same column count
+ firstCount = columnCounts[0] if columnCounts else 0
+ return all(count == firstCount for count in columnCounts)
+
+ def _mergeTableElements(self, tableElements: List[Dict[str, Any]]) -> Dict[str, Any]:
+ """Merge multiple table elements into a single table.
+ Assumes tables have compatible column counts (checked by _canMergeTables).
+ """
+ if not tableElements:
+ return {"type": "table", "content": {"headers": [], "rows": []}}
+
+ if len(tableElements) == 1:
+ return tableElements[0]
+
+ # Extract headers from all tables
+ allHeaders = []
+ for table in tableElements:
+ headers = []
+ if isinstance(table.get("content"), dict):
+ headers = table["content"].get("headers", [])
+ elif isinstance(table.get("content"), list):
+ # Old format: content is list of rows
+ if table["content"] and isinstance(table["content"][0], list):
+ headers = table["content"][0]
+ allHeaders.append(headers)
+
+ # Check header compatibility (same headers or just same column count)
+ firstHeaders = allHeaders[0]
+ headersCompatible = all(headers == firstHeaders for headers in allHeaders)
+
+ # If headers differ but column counts match, use first table's headers and log warning
+ if not headersCompatible:
+ logger.warning(f"Merging {len(tableElements)} tables with different headers but same column count. Using headers from first table.")
+
+ # Use headers from first table
+ headers = firstHeaders
+
+ # Collect all rows from all tables, validating column count
+ allRows = []
+ for tableIdx, table in enumerate(tableElements):
+ rows = []
+ if isinstance(table.get("content"), dict):
+ rows = table["content"].get("rows", [])
+ elif isinstance(table.get("content"), list):
+ # Old format: content is list of rows
+ if table["content"] and isinstance(table["content"][0], list):
+ rows = table["content"][1:] if len(table["content"]) > 1 else []
+
+ # Validate row column count matches header count
+ expectedColCount = len(headers)
+ validRows = []
+ for rowIdx, row in enumerate(rows):
+ if isinstance(row, list):
+ if len(row) == expectedColCount:
+ validRows.append(row)
+ else:
+ logger.warning(f"Table {tableIdx+1}, row {rowIdx+1}: column count mismatch ({len(row)} vs {expectedColCount}), skipping row")
+ elif isinstance(row, dict):
+ # Convert dict row to list based on header order
+ rowList = [row.get(h, "") for h in headers]
+ validRows.append(rowList)
+ else:
+ logger.warning(f"Table {tableIdx+1}, row {rowIdx+1}: invalid row format, skipping")
+
+ allRows.extend(validRows)
+
+ # Keep all rows, including duplicates (duplicates may be intentional)
+ logger.info(f"Merged {len(tableElements)} tables: {len(allRows)} total rows (duplicates preserved)")
+
+ return {
+ "type": "table",
+ "content": {
+ "headers": headers,
+ "rows": allRows
+ }
+ }
+
+ def _isBinaryMimeType(self, mimeType: str) -> bool:
+ """Check if MIME type is binary."""
+ binaryTypes = [
+ "application/octet-stream",
+ "application/pdf",
+ "application/zip",
+ "application/x-zip-compressed"
+ ]
+ return mimeType in binaryTypes
+
+ def _looksLikeBase64(self, data: str) -> bool:
+ """
+ Heuristic check if string looks like base64-encoded data.
+
+ Base64 contains only: A-Z, a-z, 0-9, +, /, =, and whitespace.
+ If >95% of characters are base64 chars and no normal text patterns, likely base64.
+ """
+ if not data or len(data) < 100:
+ return False
+
+ base64Chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t ")
+ sample = data[:500] # Check first 500 chars
+ if not sample:
+ return False
+
+ base64Ratio = sum(1 for c in sample if c in base64Chars) / len(sample)
+
+ # If >95% base64 chars and no normal text patterns (like spaces between words) → likely base64
+ # Base64 typically has very long strings without spaces or punctuation
+ hasNormalTextPatterns = any(
+ c in sample[:200] for c in ".,!?;:()[]{}\"'"
+ ) or " " in sample[:200] # Double spaces suggest text
+
+ return base64Ratio > 0.95 and not hasNormalTextPatterns
+
def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]:
"""Finde ContentPart nach ID."""
for part in contentParts:
diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py
index bee83706..c6774fc3 100644
--- a/modules/services/serviceAi/subStructureGeneration.py
+++ b/modules/services/serviceAi/subStructureGeneration.py
@@ -9,9 +9,11 @@ Handles document structure generation, including:
"""
import json
import logging
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
+from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@@ -42,35 +44,47 @@ class StructureGenerator:
self,
userPrompt: str,
contentParts: List[ContentPart],
- outputFormat: str,
- parentOperationId: str
+ outputFormat: Optional[str] = None,
+ parentOperationId: str = None
) -> Dict[str, Any]:
"""
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
Definiert für jedes Chapter:
- Level, Title
- - contentPartIds
- - contentPartInstructions
+ - contentParts (unified object with instruction and/or caption per part)
- generationHint
+ Generate document structure with per-document format determination.
+ Multiple documents can be produced with different formats (e.g., one PDF, one HTML).
+ AI determines formats per-document from user prompt. The outputFormat parameter is
+ only a validation fallback - used if AI doesn't return format per document.
+
Args:
userPrompt: User-Anfrage
contentParts: Alle vorbereiteten ContentParts mit Metadaten
- outputFormat: Ziel-Format (html, docx, pdf, etc.)
+ outputFormat: Optional global format fallback. If omitted, formats are determined
+ from user prompt by AI. Used as validation fallback if AI doesn't
+ return format per document. Defaults to "txt" if not provided.
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
Struktur-Dict mit documents und chapters (nicht sections!)
"""
+ # If outputFormat not provided, use "txt" as fallback for validation
+ # AI will determine formats per document from user prompt
+ if not outputFormat:
+ outputFormat = "txt"
+ logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt")
# Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation"
# Starte ChatLog mit Parent-Referenz
+ formatDisplay = outputFormat if outputFormat else "auto-determined"
self.services.chat.progressLogStart(
structureOperationId,
"Chapter Structure Generation",
"Structure",
- f"Generating chapter structure for {outputFormat}",
+ f"Generating chapter structure (format: {formatDisplay})",
parentOperationId=parentOperationId
)
@@ -82,28 +96,93 @@ class StructureGenerator:
outputFormat=outputFormat
)
- # AI-Call für Chapter-Struktur-Generierung
- # Note: Debug logging is handled by callAiPlanning
- aiResponse = await self.aiService.callAiPlanning(
- prompt=structurePrompt,
- debugType="chapter_structure_generation"
+ # AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
+ # Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ priority=PriorityEnum.QUALITY,
+ processingMode=ProcessingModeEnum.DETAILED,
+ compressPrompt=False,
+ compressContext=False,
+ resultFormat="json"
)
- # Parse Struktur
- # Use tryParseJson which handles malformed JSON and unterminated strings
- extractedJson = self.services.utils.jsonExtractString(aiResponse)
+ # Create prompt builder for continuation support
+ async def buildChapterStructurePromptWithContinuation(
+ continuationContext: Optional[Dict[str, Any]] = None,
+ **kwargs
+ ) -> str:
+ """Build chapter structure prompt with optional continuation context."""
+ basePrompt = self._buildChapterStructurePrompt(
+ userPrompt=userPrompt,
+ contentParts=contentParts,
+ outputFormat=outputFormat
+ )
+
+ if continuationContext:
+ # Add continuation instructions
+ deliveredSummary = continuationContext.get("delivered_summary", "")
+ elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
+ cutOffElement = continuationContext.get("cut_off_element", "")
+
+ continuationText = f"{deliveredSummary}\n\n"
+ continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
+
+ if elementBeforeCutoff:
+ continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
+ continuationText += f"{elementBeforeCutoff}\n\n"
+
+ if cutOffElement:
+ continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
+ continuationText += f"{cutOffElement}\n\n"
+
+ continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
+ continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
+ continuationText += "Start directly with the next chapter that should follow.\n\n"
+
+ return f"""{basePrompt}
+
+{continuationText}
+
+Continue generating the remaining chapters now.
+"""
+ else:
+ return basePrompt
+
+ # Call AI with looping support
+ # NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
+ # The contentParts metadata is already included in the prompt (contentPartsIndex)
+ # Actual content extraction happens later during section generation
+ checkWorkflowStopped(self.services)
+ aiResponseJson = await self.aiService.callAiWithLooping(
+ prompt=structurePrompt,
+ options=options,
+ debugPrefix="chapter_structure_generation",
+ promptBuilder=buildChapterStructurePromptWithContinuation,
+ promptArgs={
+ "userPrompt": userPrompt,
+ "outputFormat": outputFormat,
+ "services": self.services
+ },
+ useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
+ operationId=structureOperationId,
+ userPrompt=userPrompt,
+ contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction
+ )
+
+ # Parse the complete JSON response (looping system already handles completion)
+ extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
if parseError is not None:
- # Try to repair broken JSON (handles unterminated strings, incomplete structures, etc.)
- logger.warning(f"Initial JSON parsing failed: {str(parseError)}. Attempting repair...")
+ # Even with looping, try repair as fallback
+ logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
from modules.shared import jsonUtils
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
if repairedJson:
- # Try parsing repaired JSON
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
if parseError is None:
- logger.info("Successfully repaired and parsed JSON structure")
+ logger.info("Successfully repaired and parsed JSON structure after looping")
structure = parsedJson
else:
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
@@ -115,6 +194,72 @@ class StructureGenerator:
else:
structure = parsedJson
+ # State 3 Validation: Validate and auto-fix structure
+ # Validation 3.1: Structure missing 'documents' field
+ if "documents" not in structure:
+ raise ValueError("Structure missing 'documents' field - cannot auto-fix")
+
+ documents = structure["documents"]
+
+ # Validation 3.2: Structure has no documents
+ if not isinstance(documents, list) or len(documents) == 0:
+ raise ValueError("Structure has no documents - cannot generate without documents")
+
+ # Import renderer registry for format validation (existing infrastructure)
+ from modules.services.serviceGeneration.renderers.registry import getRenderer
+
+ # Validate and fix each document
+ for doc in documents:
+ # Validation 3.3 & 3.4: Document outputFormat
+ # outputFormat parameter is optional - if omitted, formats determined from prompt by AI
+ # Use as fallback only if AI doesn't return format per document
+ # Multiple documents can have different formats (e.g., one PDF, one HTML)
+ globalFormatFallback = outputFormat or "txt" # Fallback for validation
+
+ if "outputFormat" not in doc or not doc["outputFormat"]:
+ # AI didn't return format or returned empty - use global fallback
+ doc["outputFormat"] = globalFormatFallback
+ logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}")
+ else:
+ # AI returned format - validate using existing renderer registry
+ formatName = str(doc["outputFormat"]).lower().strip()
+ renderer = getRenderer(formatName) # Uses existing infrastructure
+
+ if not renderer:
+ # Format doesn't match any renderer - use txt (simple approach)
+ logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'")
+ doc["outputFormat"] = "txt"
+ else:
+ # Valid format with renderer - normalize and keep AI result
+ doc["outputFormat"] = formatName
+ logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}")
+
+ # Validation 3.5 & 3.6: Document language
+ # Use validated currentUserLanguage (always valid, validated during user intention analysis)
+ # Access via _getUserLanguage() which uses self.services.currentUserLanguage
+ userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure
+
+ if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2:
+ # AI didn't return language or invalid format - use validated currentUserLanguage
+ doc["language"] = userPromptLanguage
+ if "language" not in doc:
+ logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}")
+ else:
+ logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage")
+ else:
+ # AI returned valid language format - normalize
+ doc["language"] = doc["language"].lower().strip()[:2]
+ logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}")
+
+ # Validation 3.7: Document missing 'chapters' field
+ if "chapters" not in doc:
+ raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix")
+
+ # Validation 3.8: Chapter missing 'contentParts' field
+ for chapter in doc["chapters"]:
+ if "contentParts" not in chapter:
+ raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix")
+
# ChatLog abschließen
self.services.chat.progressLogFinish(structureOperationId, True)
@@ -186,59 +331,79 @@ class StructureGenerator:
language = self._getUserLanguage()
logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}")
- prompt = f"""USER REQUEST (for context):
+ prompt = f"""# TASK: Generate Chapter Structure
+
+This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
+
+## USER REQUEST (for context)
```
{userPrompt}
```
-LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
-
-AVAILABLE CONTENT PARTS:
+## AVAILABLE CONTENT PARTS
{contentPartsIndex}
-TASK: Generate Chapter Structure for the documents to be generated.
+## CONTENT ASSIGNMENT RULE
+If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
-IMPORTANT - CHAPTER INDEPENDENCE:
-- Each chapter is independent and self-contained
-- One chapter does NOT have information about another chapter
-- Each chapter must provide its own context and be understandable alone
+Assignment logic:
+- If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption"
+- If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction":
+ - Prefer "extracted" format if available (contains analyzed/extracted content)
+ - If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document)
+- If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned
+- Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it)
+- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
+- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
-CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS:
-- You MUST assign available ContentParts to chapters using contentPartIds
-- Based on the user request, determine which content should be used in which chapter
-- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter
-- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis
-- To include document content analysis, chapters MUST have contentPartIds assigned
-- Review the user request carefully to match ContentParts to chapters based on context and purpose
+CRITICAL RULE: If the user request mentions BOTH:
+ a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
+ b) Generic content types (article text, main content, body text, etc.)
+Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
-CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
-- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
-- Include: what to generate, what information to include, purpose, specific details
-- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content
+## CHAPTER STRUCTURE REQUIREMENTS
+- Generate chapters based on USER REQUEST - analyze what structure the user wants
+- Each chapter needs: id, level (1, 2, 3, etc.), title
+- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
+- The "instruction" field for each ContentPart MUST contain ALL relevant details from the USER REQUEST that apply to content extraction for this specific chapter. Include all formatting rules, data requirements, constraints, and specifications mentioned in the user request that are relevant for processing this ContentPart in this chapter.
+- generationHint: Description of what content to generate for this chapter
+ The generationHint MUST contain ALL relevant details from the USER REQUEST that apply to this specific chapter. Include all formatting rules, data requirements, constraints, column specifications, validation rules, and any other specifications mentioned in the user request that are relevant for generating content for this chapter. Do NOT use generic descriptions - include specific details from the user request.
+- The number of chapters depends on the user request - create only what is requested
-IMPORTANT - FORMATTING:
-- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer
-- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments")
-- Focus on CONTENT and STRUCTURE, not visual formatting
-- The renderer will apply appropriate styling based on the output format ({outputFormat})
+## DOCUMENT OUTPUT FORMAT
+For each document, determine the output format by analyzing the USER REQUEST:
+- Look for explicit format mentions
+- Infer from document purpose
+- Infer from content type
+- If format cannot be determined from the prompt, use: "{outputFormat}"
+- Include "outputFormat" field in each document in the JSON structure
+- Multiple documents can have different formats
-For each chapter:
-- chapter id
-- level (1, 2, 3, etc.)
-- title
-- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose
-- contentPartInstructions: {{
- "partId": {{
- "instruction": "How content should be structured"
- }}
-}}
-- generationHint: Description of the content (must be self-contained with all necessary context)
- * If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
- * Focus on content and structure, NOT formatting details
+## DOCUMENT LANGUAGE
+For each document, determine the language by analyzing the USER REQUEST:
+- Look for explicit language mentions
+- Map language names to ISO 639-1 codes
+- If language cannot be determined from the prompt, use: "{language}"
+- Include "language" field in each document in the JSON structure
+- Multiple documents can have different languages
-OUTPUT FORMAT: {outputFormat}
+## JSON STRUCTURE REQUIREMENTS
+- metadata: {{"title": "...", "language": "..."}}
+- documents: Array of document objects, each with:
+ - id: Unique document identifier (e.g., "doc_1")
+ - title: Document title
+ - filename: Output filename with extension (e.g., "document.docx")
+ - outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt")
+ - language: ISO 639-1 language code (e.g., "de", "en", "fr", "it")
+ - chapters: Array of chapter objects, each with:
+ - id: Unique chapter identifier (e.g., "chapter_1")
+ - level: Heading level (1, 2, 3, etc.)
+ - title: Chapter title
+ - contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
+ - generationHint: Description of what content to generate
+ - sections: Empty array []
-RETURN JSON:
+EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
"metadata": {{
"title": "Document Title",
@@ -248,34 +413,43 @@ RETURN JSON:
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
+ "outputFormat": "{outputFormat}",
+ "language": "{language}",
"chapters": [
{{
"id": "chapter_1",
"level": 1,
- "title": "Introduction",
- "contentPartIds": ["part_ext_1"],
- "contentPartInstructions": {{
- "part_ext_1": {{
- "instruction": "Use full extracted text"
+ "title": "Chapter Title",
+ "contentParts": {{
+ "extracted_part_id": {{
+ "instruction": "Use extracted content with ALL relevant details from user request"
}}
}},
- "generationHint": "Create introduction section",
- "sections": []
- }},
- {{
- "id": "chapter_2",
- "level": 1,
- "title": "Main Title",
- "contentPartIds": [],
- "contentPartInstructions": {{}},
- "generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].",
+ "generationHint": "Detailed description including ALL relevant details from user request for this chapter",
"sections": []
}}
]
}}]
}}
-Return ONLY valid JSON following the structure above.
+CRITICAL INSTRUCTIONS:
+- Generate chapters based on USER REQUEST, NOT based on the example above
+- The example shows the JSON structure format, NOT the required chapters
+- Create only the chapters that match the user's request
+- Adapt chapter titles and structure to match the user's specific request
+- Determine outputFormat and language for each document by analyzing the USER REQUEST above
+- The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST
+
+MANDATORY CONTENT ASSIGNMENT CHECK:
+For each chapter, verify:
+1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
+2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
+ - Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data"
+ - Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type → relate them
+3. If YES to both → chapter MUST have contentParts assigned (cannot be empty {{}})
+4. If ContentPart is "object" format and chapter needs to write ABOUT it → assign with "instruction" field, not just "caption"
+
+OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
"""
return prompt
diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py
index 06877968..be38de05 100644
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@@ -15,6 +15,7 @@ from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import modelSelector
+from modules.shared.jsonUtils import stripCodeFences
logger = logging.getLogger(__name__)
@@ -164,6 +165,29 @@ class ExtractionService:
if "sourceAction" not in p.metadata:
p.metadata["sourceAction"] = "extraction.extractContent"
+ # Write debug file for each text part extracted (without AI)
+ for j, part in enumerate(ec.parts):
+ if part.typeGroup == "text" and part.data and self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
+ try:
+ debug_content = {
+ "partIndex": j + 1,
+ "partId": part.id,
+ "typeGroup": part.typeGroup,
+ "mimeType": part.mimeType or "text/plain",
+ "label": part.label,
+ "dataLength": len(part.data),
+ "metadata": part.metadata.copy() if part.metadata else {},
+ "data": part.data # Full extracted text
+ }
+ debug_json = json.dumps(debug_content, indent=2, ensure_ascii=False)
+ # Use document name and part index for filename
+ doc_name_safe = documentData["fileName"].replace(" ", "_").replace("/", "_").replace("\\", "_")[:50]
+ debug_filename = f"extraction_text_part_{j+1}_{doc_name_safe}.txt"
+ self.services.utils.writeDebugFile(debug_json, debug_filename)
+ logger.info(f"Wrote debug file for extracted text part {j+1}/{len(ec.parts)}: {debug_filename}")
+ except Exception as e:
+ logger.warning(f"Failed to write debug file for text part {j+1}: {str(e)}")
+
# Log chunking information
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
if chunkedParts:
@@ -263,256 +287,6 @@ class ExtractionService:
return results
- def mergeAiResults(
- self,
- extractedContent: List[ContentExtracted],
- aiResults: List[str],
- strategy: MergeStrategy
- ) -> ContentExtracted:
- """
- Merge AI results from chunked content back into a single ContentExtracted.
-
- Args:
- extractedContent: List of ContentExtracted objects that were processed
- aiResults: List of AI response strings, one per chunk
- strategy: Merge strategy configuration (dict or MergeStrategy object)
-
- Returns:
- Single ContentExtracted with merged AI results
- """
- logger.debug(f"=== MERGING AI RESULTS ===")
- logger.debug(f"Extracted content: {len(extractedContent)} documents")
- logger.debug(f"AI results: {len(aiResults)} responses")
- logger.debug(f"Merge strategy: {strategy.mergeType}")
-
- mergeStrategy = strategy
-
- # Collect all parts from all extracted content
- allParts: List[ContentPart] = []
- for ec in extractedContent:
- allParts.extend(ec.parts)
-
- logger.debug(f"Total original parts: {len(allParts)}")
-
- # Create AI result parts
- aiResultParts: List[ContentPart] = []
- for i, aiResult in enumerate(aiResults):
- aiPart = ContentPart(
- id=f"ai_result_{i}",
- parentId=None, # Will be set based on strategy
- label="ai_result",
- typeGroup="text",
- mimeType="text/plain",
- data=aiResult,
- metadata={
- "aiResult": True,
- "order": i,
- "size": len(aiResult.encode('utf-8'))
- }
- )
- aiResultParts.append(aiPart)
-
- logger.debug(f"Created {len(aiResultParts)} AI result parts")
-
- # Apply merging strategy
- if mergeStrategy.mergeType == "concatenate":
- mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
- elif mergeStrategy.mergeType == "hierarchical":
- mergedParts = self._mergeHierarchical(allParts, aiResultParts, mergeStrategy)
- elif mergeStrategy.mergeType == "intelligent":
- mergedParts = self._mergeIntelligent(allParts, aiResultParts, mergeStrategy)
- else:
- # Default to concatenate
- mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
-
- # Create final ContentExtracted
- mergedContent = ContentExtracted(
- id=f"merged_{uuid.uuid4()}",
- parts=mergedParts
- )
-
- logger.debug(f"=== MERGE COMPLETED ===")
- logger.debug(f"Final merged parts: {len(mergedParts)}")
- logger.debug(f"Merged content ID: {mergedContent.id}")
-
- return mergedContent
-
- def _mergeConcatenate(
- self,
- originalParts: List[ContentPart],
- aiResultParts: List[ContentPart],
- strategy: MergeStrategy
- ) -> List[ContentPart]:
- """Merge parts by simple concatenation."""
- mergedParts = []
-
- # Add original parts (filtered if needed)
- for part in originalParts:
- if strategy.preserveChunks or not part.metadata.get("chunk", False):
- mergedParts.append(part)
-
- # Add AI results
- if aiResultParts:
- # Group AI results by parentId if available
- aiResultsByParent = {}
- for aiPart in aiResultParts:
- parentId = aiPart.parentId or "root"
- if parentId not in aiResultsByParent:
- aiResultsByParent[parentId] = []
- aiResultsByParent[parentId].append(aiPart)
-
- # Merge AI results for each parent
- for parentId, aiParts in aiResultsByParent.items():
- if len(aiParts) == 1:
- mergedParts.append(aiParts[0])
- else:
- # Concatenate multiple AI results for same parent
- combinedData = strategy.chunkSeparator.join([p.data for p in aiParts])
- combinedPart = ContentPart(
- id=f"merged_ai_{parentId}",
- parentId=parentId if parentId != "root" else None,
- label="merged_ai_result",
- typeGroup="text",
- mimeType="text/plain",
- data=combinedData,
- metadata={
- "aiResult": True,
- "merged": True,
- "sourceCount": len(aiParts),
- "size": len(combinedData.encode('utf-8'))
- }
- )
- mergedParts.append(combinedPart)
-
- return mergedParts
-
- def _mergeHierarchical(
- self,
- originalParts: List[ContentPart],
- aiResultParts: List[ContentPart],
- strategy: MergeStrategy
- ) -> List[ContentPart]:
- """Merge parts hierarchically based on parentId relationships."""
- # Group parts by parentId
- partsByParent = {}
- for part in originalParts:
- parentId = part.parentId or "root"
- if parentId not in partsByParent:
- partsByParent[parentId] = []
- partsByParent[parentId].append(part)
-
- # Group AI results by parentId
- aiResultsByParent = {}
- for aiPart in aiResultParts:
- parentId = aiPart.parentId or "root"
- if parentId not in aiResultsByParent:
- aiResultsByParent[parentId] = []
- aiResultsByParent[parentId].append(aiPart)
-
- mergedParts = []
-
- # Process each parent group
- for parentId in set(list(partsByParent.keys()) + list(aiResultsByParent.keys())):
- originalGroup = partsByParent.get(parentId, [])
- aiGroup = aiResultsByParent.get(parentId, [])
-
- # Add original parts
- mergedParts.extend(originalGroup)
-
- # Add AI results for this parent
- if aiGroup:
- if len(aiGroup) == 1:
- mergedParts.append(aiGroup[0])
- else:
- # Merge multiple AI results
- combinedData = strategy.chunkSeparator.join([p.data for p in aiGroup])
- combinedPart = ContentPart(
- id=f"hierarchical_ai_{parentId}",
- parentId=parentId if parentId != "root" else None,
- label="hierarchical_ai_result",
- typeGroup="text",
- mimeType="text/plain",
- data=combinedData,
- metadata={
- "aiResult": True,
- "hierarchical": True,
- "sourceCount": len(aiGroup),
- "size": len(combinedData.encode('utf-8'))
- }
- )
- mergedParts.append(combinedPart)
-
- return mergedParts
-
- def _mergeIntelligent(
- self,
- originalParts: List[ContentPart],
- aiResultParts: List[ContentPart],
- strategy: MergeStrategy
- ) -> List[ContentPart]:
- """Merge parts using intelligent strategies based on content type."""
- mergedParts = []
-
- # Group by typeGroup for intelligent merging
- partsByType = {}
- for part in originalParts:
- typeGroup = part.typeGroup
- if typeGroup not in partsByType:
- partsByType[typeGroup] = []
- partsByType[typeGroup].append(part)
-
- # Process each type group
- for typeGroup, parts in partsByType.items():
- if typeGroup == "text":
- mergedParts.extend(self._mergeTextIntelligent(parts, aiResultParts, strategy))
- elif typeGroup == "table":
- mergedParts.extend(self._mergeTableIntelligent(parts, aiResultParts, strategy))
- elif typeGroup == "structure":
- mergedParts.extend(self._mergeStructureIntelligent(parts, aiResultParts, strategy))
- else:
- # Default handling for other types
- mergedParts.extend(parts)
-
- # Add any remaining AI results that weren't merged
- for aiPart in aiResultParts:
- if not any(p.id == aiPart.id for p in mergedParts):
- mergedParts.append(aiPart)
-
- return mergedParts
-
- def _mergeTextIntelligent(
- self,
- textParts: List[ContentPart],
- aiResultParts: List[ContentPart],
- strategy: MergeStrategy
- ) -> List[ContentPart]:
- """Intelligent merging for text content."""
- # For now, use concatenate strategy
- # This could be enhanced with semantic analysis, summarization, etc.
- return self._mergeConcatenate(textParts, aiResultParts, strategy)
-
- def _mergeTableIntelligent(
- self,
- tableParts: List[ContentPart],
- aiResultParts: List[ContentPart],
- strategy: MergeStrategy
- ) -> List[ContentPart]:
- """Intelligent merging for table content."""
- # For now, use concatenate strategy
- # This could be enhanced with table merging logic
- return self._mergeConcatenate(tableParts, aiResultParts, strategy)
-
- def _mergeStructureIntelligent(
- self,
- structureParts: List[ContentPart],
- aiResultParts: List[ContentPart],
- strategy: MergeStrategy
- ) -> List[ContentPart]:
- """Intelligent merging for structured content."""
- # For now, use concatenate strategy
- # This could be enhanced with structure-aware merging
- return self._mergeConcatenate(structureParts, aiResultParts, strategy)
-
async def processDocumentsPerChunk(
self,
documents: List[ChatDocument],
@@ -756,11 +530,15 @@ class ExtractionService:
return processedResults
def _convertToContentParts(
- self, partResults: Union[List[PartResult], List[AiCallResponse]]
+ self, partResults: Union[List[PartResult], List[AiCallResponse]], originalContentParts: Optional[List[ContentPart]] = None
) -> List[ContentPart]:
"""Convert part results to ContentParts (internal helper for consolidation).
Handles both PartResult (from extraction workflow) and AiCallResponse (from content parts processing).
+
+ Args:
+ partResults: List of PartResult or AiCallResponse objects
+ originalContentParts: Optional list of original ContentPart objects to preserve typeGroup and metadata
"""
content_parts = []
@@ -794,14 +572,30 @@ class ExtractionService:
elif isinstance(partResults[0], AiCallResponse):
# Logic from interfaceAiObjects (from content parts processing)
# Phase 7: Add originalIndex for explicit ordering
+ # REQUIRED: originalContentParts must be provided for AiCallResponse path to preserve typeGroup
+ if not originalContentParts:
+ raise ValueError("originalContentParts is required when merging AiCallResponse objects. All callers must provide the original ContentPart objects to preserve typeGroup.")
+
for i, result in enumerate(partResults):
if result.content:
+ # Handle one-to-many relationships (e.g., chunking: 1 contentPart -> N chunkResults)
+ # If we have fewer originalContentParts than partResults, use the first one for all
+ if i < len(originalContentParts):
+ originalPart = originalContentParts[i]
+ else:
+ # One-to-many: use first originalContentPart for remaining results
+ originalPart = originalContentParts[0]
+
+ originalTypeGroup = originalPart.typeGroup or "text"
+ originalMimeType = originalPart.mimeType or "text/plain"
+ originalLabel = originalPart.label or f"ai_result_{i}"
+
content_part = ContentPart(
id=str(uuid.uuid4()),
parentId=None,
- label=f"ai_result_{i}",
- typeGroup="text", # Default to text for AI results
- mimeType="text/plain",
+ label=originalLabel,
+ typeGroup=originalTypeGroup, # Preserve original typeGroup from originalContentParts
+ mimeType=originalMimeType,
data=result.content,
metadata={
"aiResult": True,
@@ -821,17 +615,23 @@ class ExtractionService:
def mergePartResults(
self,
partResults: Union[List[PartResult], List[AiCallResponse]],
- options: Optional[AiCallOptions] = None
+ options: Optional[AiCallOptions] = None,
+ originalContentParts: Optional[List[ContentPart]] = None
) -> str:
"""Unified merge for both PartResult and AiCallResponse.
Consolidated from both interfaceAiObjects.py and existing serviceExtraction method.
+
+ Args:
+ partResults: List of PartResult or AiCallResponse objects to merge
+ options: Optional AiCallOptions for merge strategy
+ originalContentParts: Optional list of original ContentPart objects to preserve typeGroup
"""
if not partResults:
return ""
- # Convert to ContentParts using unified helper
- content_parts = self._convertToContentParts(partResults)
+ # Convert to ContentParts using unified helper, preserving original typeGroup
+ content_parts = self._convertToContentParts(partResults, originalContentParts)
# Determine merge strategy based on input type
if isinstance(partResults[0], PartResult):
@@ -852,7 +652,31 @@ class ExtractionService:
mergeType="concatenate"
)
- # Apply merging
+ # Check if this is an elements response format (elements array structure)
+ # This is used for section content generation where multiple ContentParts are processed
+ isElementsResponse = self._isElementsResponse(content_parts)
+
+ if isElementsResponse:
+ # Merge JSON elements responses intelligently (merge tables, combine elements)
+ logger.info(f"Detected 'elements' JSON response format - merging {len(content_parts)} JSON responses")
+ merged_json = self._mergeElementsResponses(content_parts)
+ merged_json_str = json.dumps(merged_json, indent=2, ensure_ascii=False)
+ logger.info(f"Successfully merged 'elements' JSON responses into single unified JSON ({len(merged_json_str)} chars)")
+ return merged_json_str
+
+ # Check if this is a JSON extraction response format (extracted_content structure)
+ # If so, merge JSON structures intelligently before applying regular merging
+ isJsonExtractionResponse = self._isJsonExtractionResponse(content_parts)
+
+ if isJsonExtractionResponse:
+ # Merge JSON extraction responses intelligently
+ logger.info(f"Detected JSON extraction response format - merging {len(content_parts)} JSON responses")
+ merged_json = self._mergeJsonExtractionResponses(content_parts, originalContentParts)
+ merged_json_str = json.dumps(merged_json, indent=2, ensure_ascii=False)
+ logger.info(f"Successfully merged JSON extraction responses into single unified JSON ({len(merged_json_str)} chars)")
+ return merged_json_str
+
+ # Apply regular merging for non-JSON extraction responses
merged_parts = applyMerging(content_parts, merge_strategy)
# Phase 6: Enhanced format with metadata preservation
@@ -897,6 +721,428 @@ class ExtractionService:
logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})")
return final_content.strip()
+ def _isJsonExtractionResponse(self, content_parts: List[ContentPart]) -> bool:
+ """Check if contentParts contain JSON extraction responses (extracted_content format)."""
+ if not content_parts:
+ return False
+
+ # Check first part to see if it's JSON extraction response format
+ firstPartData = content_parts[0].data if content_parts[0].data else ""
+ if not isinstance(firstPartData, str):
+ return False
+
+ # Strip markdown code fences (```json ... ```) before checking
+ strippedData = stripCodeFences(firstPartData.strip())
+
+ # Check if it starts with JSON object/array
+ if not strippedData.startswith(('{', '[')):
+ return False
+
+ try:
+ parsed = json.loads(strippedData)
+ # Check if it has the extraction response structure: {"extracted_content": {...}}
+ if isinstance(parsed, dict) and "extracted_content" in parsed:
+ return True
+ except:
+ pass
+
+ return False
+
+ def _isElementsResponse(self, content_parts: List[ContentPart]) -> bool:
+ """Check if contentParts contain JSON responses with an 'elements' array (e.g., section content)."""
+ if not content_parts:
+ return False
+
+ firstPartData = content_parts[0].data if content_parts[0].data else ""
+ if not isinstance(firstPartData, str):
+ return False
+
+ strippedData = stripCodeFences(firstPartData.strip())
+ if not strippedData.startswith(('{', '[')):
+ return False
+
+ try:
+ parsed = json.loads(strippedData)
+ if isinstance(parsed, dict) and "elements" in parsed and isinstance(parsed["elements"], list):
+ return True
+ except:
+ pass
+
+ return False
+
+ def _mergeElementsResponses(self, content_parts: List[ContentPart]) -> Dict[str, Any]:
+ """Merge multiple JSON responses with an 'elements' array into one unified response.
+ Specifically designed to merge tables within the 'elements' array.
+ Empty tables (no rows) are ignored if a table with the same headers already has data.
+ """
+ merged_elements = []
+ table_headers_map: Dict[str, List[Dict[str, Any]]] = {} # headers_tuple -> [table_contents]
+
+ for part in content_parts:
+ if not part.data:
+ continue
+
+ # Handle multiple JSON blocks in a single response (separated by ---)
+ partDataBlocks = part.data.split('---')
+
+ for blockData in partDataBlocks:
+ if not blockData.strip():
+ continue
+
+ try:
+ strippedData = stripCodeFences(blockData.strip())
+ if not strippedData:
+ continue
+
+ parsed = json.loads(strippedData)
+ if isinstance(parsed, dict) and "elements" in parsed and isinstance(parsed["elements"], list):
+ for element in parsed["elements"]:
+ if isinstance(element, dict) and element.get("type") == "table" and "content" in element:
+ table_content = element["content"]
+ headers = table_content.get("headers", [])
+ rows = table_content.get("rows", [])
+
+ if headers:
+ headers_key = tuple(headers)
+
+ # If table has no rows, only add it if no table with these headers exists yet
+ if not rows:
+ if headers_key not in table_headers_map:
+ # No table with these headers exists - keep empty table for now
+ table_headers_map[headers_key] = []
+ # If a table with these headers already exists (with or without data), skip empty table
+ continue
+
+ # Table has rows - add to merge map
+ if headers_key not in table_headers_map:
+ table_headers_map[headers_key] = []
+ table_headers_map[headers_key].append(table_content)
+ else:
+ # Keep non-table elements as is, but avoid duplicates if possible
+ if element not in merged_elements:
+ merged_elements.append(element)
+ except Exception as e:
+ logger.warning(f"Failed to parse JSON elements response from part {part.id}: {str(e)}")
+ continue
+
+ # Merge tables by headers - combine rows from tables with same headers
+ for headers_key, tables in table_headers_map.items():
+ if not tables:
+ # Only empty tables with these headers - skip them
+ continue
+
+ all_rows = []
+
+ for table_content in tables:
+ rows = table_content.get("rows", [])
+ all_rows.extend(rows)
+
+ # Only add table if it has rows
+ if all_rows:
+ merged_elements.append({
+ "type": "table",
+ "content": {
+ "headers": list(headers_key),
+ "rows": all_rows
+ }
+ })
+
+ return {"elements": merged_elements}
+
+ def _mergeJsonExtractionResponses(self, content_parts: List[ContentPart], originalContentParts: Optional[List[ContentPart]] = None) -> Dict[str, Any]:
+ """Merge multiple JSON extraction responses into one unified response.
+
+ Merges:
+ - Tables: Combines all table rows, preserves headers (duplicates preserved)
+ - Text: Combines all text blocks
+ - Headings: Combines all headings arrays
+ - Lists: Combines all list items
+ - Images: Combines all image descriptions
+ """
+ merged = {
+ "extracted_content": {
+ "text": "",
+ "tables": [],
+ "headings": [],
+ "lists": [],
+ "images": []
+ }
+ }
+
+ # Track table headers to merge tables with same structure
+ table_headers_map: Dict[str, List[Dict[str, Any]]] = {} # headers_tuple -> [tables]
+ all_text_parts = []
+ all_headings = []
+ all_lists = []
+ all_images = []
+
+ # Collect per-part extracted data for debug file
+ per_part_extracted_data = []
+ # Track original parts and their extracted data
+ original_parts_extracted_data = []
+
+ for part_idx, part in enumerate(content_parts, 1):
+ logger.info(f"=== Processing ContentPart {part_idx}/{len(content_parts)}: id={part.id}, label={part.label}, typeGroup={part.typeGroup} ===")
+
+ if not part.data:
+ logger.warning(f"ContentPart {part.id} has no data, skipping")
+ continue
+
+ # Handle multiple JSON blocks in a single response (separated by ---)
+ # Split by --- to handle multiple JSON blocks per ContentPart
+ partDataBlocks = part.data.split('---')
+ logger.debug(f"ContentPart {part.id}: Found {len(partDataBlocks)} JSON block(s) (split by ---)")
+
+ for block_idx, blockData in enumerate(partDataBlocks, 1):
+ if not blockData.strip():
+ continue
+
+ try:
+ # Strip markdown code fences before parsing
+ strippedData = stripCodeFences(blockData.strip())
+ if not strippedData:
+ logger.debug(f"ContentPart {part.id}, Block {block_idx}: Empty after stripping code fences")
+ continue
+
+ parsed = json.loads(strippedData)
+ if not isinstance(parsed, dict) or "extracted_content" not in parsed:
+ logger.debug(f"ContentPart {part.id}, Block {block_idx}: Not a valid extraction response format")
+ continue
+
+ extracted = parsed["extracted_content"]
+
+ # Find corresponding original part (if available)
+ original_part = None
+ if originalContentParts and part_idx <= len(originalContentParts):
+ original_part = originalContentParts[part_idx - 1]
+ elif originalContentParts and len(originalContentParts) > 0:
+ # Handle one-to-many (chunking) - use first original part
+ original_part = originalContentParts[0]
+
+ # Store extracted data for this part/block for debug file
+ part_extracted = {
+ "contentPartId": part.id,
+ "contentPartLabel": part.label,
+ "contentPartTypeGroup": part.typeGroup,
+ "blockIndex": block_idx,
+ "extracted_content": extracted.copy() # Store full extracted content
+ }
+ per_part_extracted_data.append(part_extracted)
+
+ # Store original part extracted data
+ if original_part:
+ # Extract text from extracted_content for display
+ extracted_text = extracted.get("text", "") if isinstance(extracted.get("text"), str) else ""
+ if not extracted_text and extracted.get("tables"):
+ # If no text but has tables, create a text representation
+ table_texts = []
+ for table in extracted.get("tables", []):
+ if isinstance(table, dict):
+ headers = table.get("headers", [])
+ rows = table.get("rows", [])
+ if headers and rows:
+ table_texts.append(f"Table: {', '.join(headers)}\nRows: {len(rows)}")
+ extracted_text = "\n".join(table_texts) if table_texts else ""
+
+ original_part_data = {
+ "id": original_part.id,
+ "typeGroup": original_part.typeGroup,
+ "mimeType": original_part.mimeType or "text/plain",
+ "label": original_part.label,
+ "dataLength": len(extracted_text),
+ "metadata": {
+ "documentId": original_part.metadata.get("documentId") if original_part.metadata else None,
+ "documentMimeType": original_part.metadata.get("documentMimeType") if original_part.metadata else None,
+ "originalFileName": original_part.metadata.get("originalFileName") if original_part.metadata else None,
+ },
+ "data": extracted_text, # Full extracted text
+ "extracted_content": extracted.copy() # Full extracted content structure
+ }
+ original_parts_extracted_data.append(original_part_data)
+
+ # Log extracted content summary
+ extracted_summary = {
+ "text": len(extracted.get("text", "")) if extracted.get("text") else 0,
+ "tables": len(extracted.get("tables", [])) if isinstance(extracted.get("tables"), list) else 0,
+ "headings": len(extracted.get("headings", [])) if isinstance(extracted.get("headings"), list) else 0,
+ "lists": len(extracted.get("lists", [])) if isinstance(extracted.get("lists"), list) else 0,
+ "images": len(extracted.get("images", [])) if isinstance(extracted.get("images"), list) else 0,
+ }
+ logger.info(f"ContentPart {part.id}, Block {block_idx} extracted: text={extracted_summary['text']} chars, tables={extracted_summary['tables']}, headings={extracted_summary['headings']}, lists={extracted_summary['lists']}, images={extracted_summary['images']}")
+
+ # Log table details
+ if extracted_summary['tables'] > 0:
+ for table_idx, table in enumerate(extracted.get("tables", []), 1):
+ if isinstance(table, dict):
+ headers = table.get("headers", [])
+ rows = table.get("rows", [])
+ logger.info(f" Table {table_idx}: headers={headers}, rows={len(rows) if isinstance(rows, list) else 0}")
+
+ # Log list details
+ if extracted_summary['lists'] > 0:
+ for list_idx, list_item in enumerate(extracted.get("lists", []), 1):
+ if isinstance(list_item, dict):
+ list_type = list_item.get("type", "unknown")
+ items = list_item.get("items", [])
+ logger.info(f" List {list_idx}: type={list_type}, items={len(items) if isinstance(items, list) else 0}")
+
+ # Merge text
+ if "text" in extracted and extracted["text"]:
+ text_content = extracted["text"].strip()
+ if text_content:
+ all_text_parts.append(text_content)
+
+ # Merge tables - group by headers to merge compatible tables
+ if "tables" in extracted and isinstance(extracted["tables"], list):
+ for table in extracted["tables"]:
+ if not isinstance(table, dict) or "headers" not in table or "rows" not in table:
+ continue
+
+ headers = table["headers"]
+ rows = table["rows"]
+
+ if not headers or not rows:
+ continue
+
+ # Use headers as key for grouping
+ headers_key = tuple(headers)
+ if headers_key not in table_headers_map:
+ table_headers_map[headers_key] = []
+ table_headers_map[headers_key].append(table)
+
+ # Merge headings
+ if "headings" in extracted and isinstance(extracted["headings"], list):
+ for heading in extracted["headings"]:
+ if isinstance(heading, dict) and "text" in heading:
+ all_headings.append(heading)
+
+ # Merge lists
+ if "lists" in extracted and isinstance(extracted["lists"], list):
+ for list_item in extracted["lists"]:
+ if isinstance(list_item, dict) and "items" in list_item:
+ all_lists.append(list_item)
+
+ # Merge images
+ if "images" in extracted and isinstance(extracted["images"], list):
+ for image in extracted["images"]:
+ if isinstance(image, dict) and "description" in image:
+ all_images.append(image)
+
+ except Exception as e:
+ logger.warning(f"Failed to parse JSON extraction response block from part {part.id}: {str(e)}")
+ continue
+
+ # Combine text parts
+ if all_text_parts:
+ merged["extracted_content"]["text"] = "\n\n".join(all_text_parts)
+
+ # Merge tables by headers - combine rows from tables with same headers
+ for headers_key, tables in table_headers_map.items():
+ # Collect all rows from tables with same headers
+ all_rows = []
+
+ for table in tables:
+ rows = table.get("rows", [])
+ all_rows.extend(rows)
+
+ # Create merged table
+ if all_rows:
+ merged["extracted_content"]["tables"].append({
+ "headers": list(headers_key),
+ "rows": all_rows
+ })
+
+ # Add headings
+ if all_headings:
+ merged["extracted_content"]["headings"] = all_headings
+
+ # Add lists - keep them separate (like headings) to preserve document structure
+ if all_lists:
+ merged["extracted_content"]["lists"] = all_lists
+
+ # Add images
+ if all_images:
+ merged["extracted_content"]["images"] = all_images
+
+ logger.info(f"=== Merging Summary ===")
+ logger.info(f"Total ContentParts processed: {len(content_parts)}")
+ logger.info(f"Text parts collected: {len(all_text_parts)}")
+ logger.info(f"Table groups (by headers): {len(table_headers_map)}")
+ logger.info(f"Headings collected: {len(all_headings)}")
+ logger.info(f"Lists collected: {len(all_lists)}")
+ logger.info(f"Images collected: {len(all_images)}")
+
+ # Log table merging details
+ for headers_key, tables in table_headers_map.items():
+ total_rows = sum(len(table.get("rows", [])) for table in tables)
+ logger.info(f" Table group with headers {list(headers_key)}: {len(tables)} table(s), {total_rows} total rows")
+
+ logger.info(f"Merged JSON extraction responses: {len(table_headers_map)} table groups, {len(all_text_parts)} text parts, {len(all_headings)} headings, {len(all_lists)} lists, {len(all_images)} images")
+
+ # Write per-part extracted data to debug file
+ if per_part_extracted_data and self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
+ try:
+ debug_content = {
+ "summary": {
+ "totalContentParts": len(content_parts),
+ "totalExtractedBlocks": len(per_part_extracted_data),
+ "mergedResult": {
+ "textParts": len(all_text_parts),
+ "tableGroups": len(table_headers_map),
+ "headings": len(all_headings),
+ "lists": len(all_lists),
+ "images": len(all_images)
+ }
+ },
+ "perPartExtractedData": per_part_extracted_data
+ }
+ debug_json = json.dumps(debug_content, indent=2, ensure_ascii=False)
+ self.services.utils.writeDebugFile(debug_json, "content_extraction_per_part")
+ logger.info(f"Wrote per-part extracted data to debug file: {len(per_part_extracted_data)} blocks from {len(content_parts)} content parts")
+ except Exception as e:
+ logger.warning(f"Failed to write per-part extracted data to debug file: {str(e)}")
+
+ # Write original parts extracted data in extraction_result format
+ if original_parts_extracted_data and self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
+ try:
+ # Get document info from first original part if available
+ document_name = None
+ document_mime_type = None
+ if originalContentParts and len(originalContentParts) > 0:
+ first_part = originalContentParts[0]
+ if first_part.metadata:
+ document_name = first_part.metadata.get("originalFileName")
+ document_mime_type = first_part.metadata.get("documentMimeType")
+
+ # Format similar to extraction_result file
+ extraction_result_format = {
+ "documentName": document_name or "Unknown",
+ "documentMimeType": document_mime_type or "application/octet-stream",
+ "partsCount": len(original_parts_extracted_data),
+ "parts": []
+ }
+
+ for part_data in original_parts_extracted_data:
+ # Format each part similar to extraction_result format
+ formatted_part = {
+ "typeGroup": part_data["typeGroup"],
+ "mimeType": part_data["mimeType"],
+ "label": part_data["label"],
+ "dataLength": part_data["dataLength"],
+ "metadata": part_data["metadata"],
+ "data": part_data["data"], # Full extracted text
+ "extracted_content": part_data["extracted_content"] # Full structure
+ }
+ extraction_result_format["parts"].append(formatted_part)
+
+ result_json = json.dumps(extraction_result_format, indent=2, ensure_ascii=False)
+ self.services.utils.writeDebugFile(result_json, "content_extraction_original_parts")
+ logger.info(f"Wrote original parts extracted data to debug file: {len(original_parts_extracted_data)} original parts")
+ except Exception as e:
+ logger.warning(f"Failed to write original parts extracted data to debug file: {str(e)}")
+
+ return merged
+
async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
"""Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output.
@@ -1162,7 +1408,8 @@ class ExtractionService:
if not chunkResults:
raise ValueError(f"All chunks failed for content part")
- mergedContent = self.mergePartResults(chunkResults, options)
+ # Pass original contentPart to preserve typeGroup for all chunks (one-to-many: 1 part -> N chunks)
+ mergedContent = self.mergePartResults(chunkResults, options, [contentPart])
return AiCallResponse(
content=mergedContent,
modelName=model.name,
@@ -1208,7 +1455,8 @@ class ExtractionService:
raise
# Merge chunk results using unified mergePartResults
- mergedContent = self.mergePartResults(chunkResults, options)
+ # Pass original contentPart to preserve typeGroup for all chunks (one-to-many: 1 part -> N chunks)
+ mergedContent = self.mergePartResults(chunkResults, options, [contentPart])
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
return AiCallResponse(
@@ -1254,9 +1502,13 @@ class ExtractionService:
aiObjects, # Pass interface for AI calls
progressCallback=None
) -> AiCallResponse:
- """Process content parts with model-aware chunking and AI calls.
+ """Process content parts with model-aware chunking and AI calls in parallel.
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
+ Uses parallel processing similar to section generation for better performance.
+
+ SPECIAL CASE: For DATA_EXTRACT operations, processes all contentParts together in ONE call
+ to enable proper merging (e.g., merging tables from multiple PDFs into one table).
"""
prompt = request.prompt
options = request.options
@@ -1269,16 +1521,100 @@ class ExtractionService:
if not failoverModelList:
return self._createErrorResponse("No suitable models found", 0, 0)
- # Process each content part
- allResults = []
- for contentPart in contentParts:
- partResult = await self.processContentPartWithFallback(
- contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
- )
- allResults.append(partResult)
+ totalParts = len(contentParts)
+ if totalParts == 0:
+ return self._createErrorResponse("No content parts to process", 0, 0)
- # Merge all results using unified mergePartResults
- mergedContent = self.mergePartResults(allResults)
+ # NOTE: For DATA_EXTRACT operations, the extraction prompt explicitly asks the AI to merge
+ # all contentParts into ONE unified JSON response. Even though we process parts separately,
+ # each response should contain merged content. The mergePartResults will concatenate responses,
+ # but the new prompt format (flat extracted_content structure) is designed for easier merging.
+
+ # DEFAULT: Process parts in parallel
+ # Thread-safe counter for progress tracking
+ completedCount = [0] # Use list to allow modification in nested function
+
+ # Process parts in parallel with concurrency control
+ maxConcurrent = 5
+ if options and hasattr(options, 'maxConcurrentParts'):
+ maxConcurrent = options.maxConcurrentParts
+
+ semaphore = asyncio.Semaphore(maxConcurrent)
+
+ async def processSinglePart(contentPart, partIndex: int) -> AiCallResponse:
+ """Process a single content part with progress logging."""
+ async with semaphore:
+ partLabel = contentPart.label or f"Part {partIndex+1}"
+ partType = contentPart.typeGroup or "unknown"
+
+ # Log start of processing
+ if progressCallback:
+ progressCallback(0.1 + (partIndex / totalParts) * 0.8, f"Processing {partLabel} ({partType}) - {partIndex+1}/{totalParts}")
+
+ try:
+ # Process the part
+ partResult = await self.processContentPartWithFallback(
+ contentPart, prompt, options, failoverModelList, aiObjects, None # Don't pass progressCallback to avoid double logging
+ )
+
+ # Write debug files for generation phase (section content generation)
+ # Check for DATA_GENERATE or DATA_ANALYSE (used for section generation)
+ isGenerationPhase = False
+ if options and hasattr(options, 'operationType'):
+ isGenerationPhase = (options.operationType == OperationTypeEnum.DATA_GENERATE or
+ options.operationType == OperationTypeEnum.DATA_ANALYSE)
+
+ if isGenerationPhase:
+ if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
+ try:
+ # Create debug filename with contentPart ID or label
+ partId = contentPart.id[:8] if contentPart.id else f"part_{partIndex+1}"
+ partLabelSafe = (contentPart.label or f"part_{partIndex+1}").replace(" ", "_").replace("/", "_").replace("\\", "_")[:30]
+ debugPrefix = f"generation_contentPart_{partId}_{partLabelSafe}"
+
+ # Write prompt
+ self.services.utils.writeDebugFile(prompt, f"{debugPrefix}_prompt")
+
+ # Write response
+ responseContent = partResult.content if partResult.content else ""
+ self.services.utils.writeDebugFile(responseContent, f"{debugPrefix}_response")
+
+ logger.debug(f"Wrote debug files for contentPart {partId} (generation): {debugPrefix}_prompt, {debugPrefix}_response")
+ except Exception as debugError:
+ logger.warning(f"Failed to write debug file for contentPart {contentPart.id}: {str(debugError)}")
+
+ # Update completed count and log progress
+ completedCount[0] += 1
+ if progressCallback:
+ progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Completed {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
+
+ return partResult
+ except Exception as e:
+ # Update completed count even on error
+ completedCount[0] += 1
+ logger.error(f"Error processing part {partIndex+1} ({partLabel}): {str(e)}")
+ if progressCallback:
+ progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Error processing {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
+ # Return error response
+ return self._createErrorResponse(f"Error processing part: {str(e)}", 0, 0)
+
+ # Create tasks for all parts
+ tasks = [processSinglePart(contentPart, i) for i, contentPart in enumerate(contentParts)]
+
+ # Execute all tasks in parallel with error handling
+ results = await asyncio.gather(*tasks, return_exceptions=True)
+
+ # Process results and handle exceptions
+ allResults = []
+ for i, result in enumerate(results):
+ if isinstance(result, Exception):
+ logger.error(f"Exception processing part {i+1}: {str(result)}")
+ allResults.append(self._createErrorResponse(f"Exception: {str(result)}", 0, 0))
+ elif result is not None:
+ allResults.append(result)
+
+ # Merge all results using unified mergePartResults, preserving original typeGroup
+ mergedContent = self.mergePartResults(allResults, options, contentParts)
return AiCallResponse(
content=mergedContent,
diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
index 7e065e22..b24bed13 100644
--- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py
+++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
@@ -45,58 +45,51 @@ async def buildExtractionPrompt(
Complete extraction prompt string
"""
- # Unified multi-file example (single doc = multi with n=1)
+ # Flat extraction format - returns extracted content as structured data, not documents/sections
+ # This format allows merging multiple contentParts into one response
json_example = {
- "metadata": {
- "title": "Multi-Document Example",
- "split_strategy": "by_section",
- "source_documents": ["doc_001"],
- "extraction_method": "ai_extraction"
- },
- "documents": [
- {
- "id": "doc_section_1",
- "title": "Section 1 Title",
- "filename": "section_1.xlsx",
- "sections": [
- {
- "id": "section_1",
- "content_type": "heading",
- "elements": [
- {
- "level": 1,
- "text": "1. SECTION TITLE"
- }
- ],
- "order": 1
- },
- {
- "id": "section_2",
- "content_type": "paragraph",
- "elements": [
- {
- "text": "This is the actual content that should be extracted from the document."
- }
- ],
- "order": 2
- },
- {
- "id": "section_3",
- "content_type": "table",
- "elements": [
- {
- "headers": ["Column 1", "Column 2"],
- "rows": [["Value 1", "Value 2"]]
- }
- ],
- "order": 3
- }
- ]
- }
- ]
+ "extracted_content": {
+ "text": "Extracted text content from the document...",
+ "tables": [
+ {
+ "headers": ["Column 1", "Column 2"],
+ "rows": [
+ ["Value 1", "Value 2"],
+ ["Value 3", "Value 4"]
+ ]
+ }
+ ],
+ "headings": [
+ {
+ "level": 1,
+ "text": "Main Heading"
+ },
+ {
+ "level": 2,
+ "text": "Subheading"
+ }
+ ],
+ "lists": [
+ {
+ "type": "bullet",
+ "items": ["Item 1", "Item 2", "Item 3"]
+ }
+ ],
+ "images": [
+ {
+ "description": "Description of image content, including all visible text, tables, and visual elements"
+ }
+ ]
+ }
}
- structure_instruction = "CRITICAL: You MUST return a JSON structure with a \"documents\" array. For single documents, create one document entry with all sections."
+ structure_instruction = """CRITICAL EXTRACTION REQUIREMENTS:
+1. Extract content from the provided ContentPart(s) - process what is provided in this call
+2. If this ContentPart contains tables, extract them with proper structure (headers and rows)
+3. If this ContentPart contains text, extract it as structured text
+4. Return ONE JSON object with extracted content from this ContentPart
+5. Preserve all original data - do not summarize or interpret
+6. The system will merge results from multiple ContentParts automatically - focus on extracting this ContentPart's content accurately"""
# Parse extraction intent if AI service is available
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
@@ -124,30 +117,25 @@ USER REQUEST / USER PROMPT:
END OF USER REQUEST / USER PROMPT
{'='*80}
-You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
+You are a document processing assistant that extracts content from documents. Your task is to analyze the provided ContentPart(s) and extract their content into a structured JSON format.
-TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
+TASK: Extract content from the provided ContentPart(s). Extract all tables, text, headings, lists, and other content types accurately. The system processes ContentParts individually and merges results automatically.
LANGUAGE REQUIREMENT: All extracted content must be in the language '{userLanguage}'. Extract and preserve content in this language.
{extraction_intent}
-REQUIREMENTS:
-1. Analyze the document content provided in the context below
-2. Identify distinct sections in the document (by headings, topics, or logical breaks)
-3. Create one or more JSON document entries based on the content structure
-4. Extract the real content from each section (headings, paragraphs, lists, etc.)
-5. Generate appropriate filenames for each document
-
{structure_instruction}
OUTPUT FORMAT: Return only valid JSON in this exact structure:
{json.dumps(json_example, indent=2)}
-Requirements:
+CRITICAL EXTRACTION RULES:
+- Extract only content that is ACTUALLY PRESENT in the ContentPart - never create fake or placeholder data
+- Return empty arrays [] or empty strings "" when content is missing - this is normal and expected
+- Extract all tables, text, headings, lists accurately with proper structure
- Preserve all original data - do not summarize or interpret
-- Use the exact JSON format shown above
-- Maintain data integrity and structure
+- Return ONE JSON object per ContentPart (the system merges multiple ContentParts automatically)
Content Types to Extract:
1. Tables: Extract all rows and columns with proper headers
@@ -166,7 +154,7 @@ Image Analysis Requirements:
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
-Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+Extract only actual content from the ContentPart. Return empty arrays/strings when content is missing - never create fake data.
""".strip()
# Add renderer-specific guidelines if provided
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index 828f1033..a49b78c7 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -346,16 +346,19 @@ class GenerationService:
'workflowId': 'unknown'
}
- async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
+ async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
"""
Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each.
Each renderer can return 1..n documents (e.g., HTML + images).
+ Per-document format and language are extracted from structure (validated in State 3).
+ Multiple documents can have different formats and languages.
+
Args:
extractedContent: Structured JSON document with documents array
- outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
- In future, each document can have its own format
+ outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) - Global fallback
+ language: Language (global fallback) - Per-document language extracted from structure
title: Report title
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
@@ -392,9 +395,17 @@ class GenerationService:
continue
# Determine format for this document
- # TODO: In future, each document can have its own format field
- # For now, use the global outputFormat
- docFormat = doc.get("format", outputFormat)
+ # Check outputFormat field first (per-document), then format field (legacy), then global fallback
+ docFormat = doc.get("outputFormat") or doc.get("format") or outputFormat
+
+ # Determine language for this document
+ # Extract per-document language from structure (validated in State 3), fallback to global
+ docLanguage = doc.get("language") or language
+
+ # Validate language format (should be 2-character ISO code, validated in State 3)
+ if not isinstance(docLanguage, str) or len(docLanguage) != 2:
+ logger.warning(f"Document {doc.get('id')} has invalid language format: {docLanguage}, using fallback")
+ docLanguage = language # Use global fallback
# Get renderer for this document's format
renderer = self._getFormatRenderer(docFormat)
@@ -402,9 +413,19 @@ class GenerationService:
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
continue
+ # Check output style classification (code/document/image/etc.) from renderer
+ from modules.services.serviceGeneration.renderers.registry import getOutputStyle
+ outputStyle = getOutputStyle(docFormat)
+ if outputStyle:
+ logger.debug(f"Document {doc.get('id', docIndex)} format '{docFormat}' classified as '{outputStyle}' style")
+ # Store style in document metadata for potential use in processing paths
+ if "metadata" not in doc:
+ doc["metadata"] = {}
+ doc["metadata"]["outputStyle"] = outputStyle
+
# Create JSON structure with single document (preserving metadata)
singleDocContent = {
- "metadata": metadata,
+ "metadata": {**metadata, "language": docLanguage}, # Add per-document language to metadata
"documents": [doc] # Only this document
}
diff --git a/modules/services/serviceGeneration/paths/ARCHITECTURE_ANALYSIS.md b/modules/services/serviceGeneration/paths/ARCHITECTURE_ANALYSIS.md
new file mode 100644
index 00000000..5ba586a7
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/ARCHITECTURE_ANALYSIS.md
@@ -0,0 +1,114 @@
+# Document Generation Architecture Analysis
+
+## Current Flow
+
+### 1. Document Input → ContentParts (`extractAndPrepareContent`)
+
+**Location**: `gateway/modules/services/serviceAi/subContentExtraction.py`
+
+**Flow**:
+- Regular documents → Calls `extractContent()` (NON-AI extraction) → Creates contentParts with raw extracted text
+- **BUT THEN**:
+ - Images with "extract" intent → Calls Vision AI (line 190) → AI extraction
+ - Text with "extract" intent + extractionPrompt → Calls AI processing (line 265) → AI extraction
+- Pre-extracted JSON → Uses contentParts directly (no AI)
+
+**Result**: ContentParts may already be AI-processed before structure generation
+
+### 2. Structure Generation
+
+**Location**: `gateway/modules/services/serviceAi/subStructureGeneration.py`
+
+**Flow**:
+- Uses contentParts (may already be AI-processed)
+- Generates document structure (chapters, sections)
+
+### 3. Section Generation (`_processSingleSection`)
+
+**Location**: `gateway/modules/services/serviceAi/subStructureFilling.py`
+
+**Flow**:
+- Uses contentParts (which may already be AI-processed)
+- Aggregates "extracted" contentParts with AI (line 554-682)
+- Generates section content using `callAiWithLooping` with `useCaseId="section_content"`
+
+## Issues Identified
+
+### Issue 1: Duplicate AI Processing
+- AI extraction happens in `extractAndPrepareContent` (for images/text)
+- AI generation happens again in section generation
+- This is redundant and inefficient
+
+### Issue 2: Architecture Inconsistency
+- Pre-extracted JSON files → contentParts directly (no AI)
+- Regular documents → contentParts + AI extraction (inconsistent)
+- User wants: Documents → contentParts (like pre-extracted JSON) → AI only in section generation
+
+### Issue 3: Image Processing
+- Images need Vision AI to extract text
+- Currently happens in `extractAndPrepareContent`
+- Question: Should this happen during section generation instead?
+
+## Proposed Architecture
+
+### Option A: Remove All AI from `extractAndPrepareContent`
+- Documents → `extractContent()` → Raw contentParts (text, tables, etc.)
+- Images → Keep as image contentParts (no Vision AI extraction)
+- Section generation → Handle images with Vision AI when needed
+
+**Pros**:
+- Consistent with pre-extracted JSON flow
+- Single point of AI processing (section generation)
+- Clear separation of concerns
+
+**Cons**:
+- Images won't have extracted text until section generation
+- May need to handle images differently in section generation
+
+### Option B: Keep Vision AI for Images Only
+- Documents → `extractContent()` → Raw contentParts
+- Images → Vision AI extraction → Text contentParts
+- Section generation → Uses text contentParts (no additional AI extraction)
+
+**Pros**:
+- Images get text extracted early
+- Section generation can use text directly
+
+**Cons**:
+- Still has AI extraction before structure generation
+- Inconsistent with user's request
+
+## Recommendation
+
+**Follow Option A** - Remove all AI extraction from `extractAndPrepareContent`:
+
+1. **Documents → ContentParts** (like pre-extracted JSON):
+ - Call `extractContent()` (NON-AI)
+ - Create contentParts with raw extracted content
+ - Images remain as image contentParts (no Vision AI)
+
+2. **Section Generation**:
+ - Handle images with Vision AI when needed
+ - Aggregate all contentParts with AI
+ - Single point of AI processing
+
+**Benefits**:
+- Clear architecture: Documents = raw contentParts
+- Consistent with pre-extracted JSON flow
+- AI processing only where needed (section generation)
+- Easier to understand and maintain
+
+## Questions to Resolve
+
+1. **Image handling**: How should images be processed during section generation?
+ - Option 1: Vision AI extraction happens automatically when image contentParts are used
+ - Option 2: Images are passed to AI with Vision models during section generation
+ - Option 3: Images remain as binary and are rendered directly (no text extraction)
+
+2. **Text with extractionPrompt**: Should text contentParts with extractionPrompt be processed differently?
+ - Currently: AI processing in `extractAndPrepareContent`
+ - Proposed: Raw text → AI processing during section generation
+
+3. **Performance**: Will deferring image extraction to section generation cause performance issues?
+ - Need to test with multiple images
+
diff --git a/modules/services/serviceGeneration/paths/ARCHITECTURE_CHANGES.md b/modules/services/serviceGeneration/paths/ARCHITECTURE_CHANGES.md
new file mode 100644
index 00000000..3af38ef4
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/ARCHITECTURE_CHANGES.md
@@ -0,0 +1,77 @@
+# Architecture Changes Summary
+
+## Problem Identified
+
+The architecture had AI extraction happening in TWO places:
+1. **`extractAndPrepareContent`**: Vision AI for images, AI processing for text with extractionPrompt
+2. **Section generation**: AI aggregation of contentParts
+
+This was:
+- Redundant (double AI processing)
+- Inconsistent (pre-extracted JSON had no AI, regular documents had AI)
+- Against the desired architecture (documents should become contentParts like pre-extracted JSON)
+
+## Solution Implemented
+
+### 1. Removed AI Extraction from `extractAndPrepareContent`
+
+**File**: `gateway/modules/services/serviceAi/subContentExtraction.py`
+
+**Changes**:
+- **Removed**: Vision AI extraction for images (lines 186-246)
+- **Removed**: AI text processing with extractionPrompt (lines 260-334)
+- **Updated**: Images with extract intent are now marked with `needsVisionExtraction=True` flag
+- **Updated**: Regular documents mark images with `needsVisionExtraction=True` when extract intent is present
+
+**Result**: Documents → contentParts (raw extraction only, no AI)
+
+### 2. Added Vision AI Extraction in Section Generation
+
+**File**: `gateway/modules/services/serviceAi/subStructureFilling.py`
+
+**Changes**:
+- **Added**: Vision AI extraction logic before aggregation (lines 553-610)
+- **Added**: Vision AI extraction logic for single-part processing (lines 1074-1115)
+- **Logic**:
+ - Checks if `part.typeGroup == "image"` AND `needsVisionExtraction == True` AND `intent == "extract"`
+ - Extracts text using Vision AI (`IMAGE_ANALYSE` operation)
+ - Replaces image part with text part for further processing
+ - Images with `contentFormat == "object"` (render intent) are rendered directly (no extraction)
+
+**Result**: AI extraction happens ONLY during section generation
+
+## Architecture Flow (After Changes)
+
+### Document Input → ContentParts
+1. **Regular documents**: `extractContent()` (NON-AI) → Raw contentParts
+ - Images with extract intent: `contentFormat="extracted"`, `needsVisionExtraction=True`
+ - Images with render intent: `contentFormat="object"` (rendered directly)
+ - Text: `contentFormat="extracted"` (raw text, no AI processing)
+
+2. **Pre-extracted JSON**: Direct contentParts (no changes)
+
+### Section Generation → AI Processing
+1. **Images with extract intent**: Vision AI extraction → Text part → AI aggregation
+2. **Images with render intent**: Rendered directly (no extraction)
+3. **Text contentParts**: AI aggregation with extractionPrompt (if provided)
+
+## Key Benefits
+
+1. **Consistent Architecture**: Documents = raw contentParts (like pre-extracted JSON)
+2. **Single Point of AI Processing**: Only in section generation
+3. **Clear Separation**: Extraction vs Generation
+4. **Intent-Based Logic**:
+ - `intent == "extract"` → Vision AI extraction during section generation
+ - `intent == "render"` → Direct rendering (no extraction)
+ - `contentFormat == "object"` → Embedded/referenced images (no extraction)
+
+## Testing Checklist
+
+- [ ] Regular documents create contentParts without AI extraction
+- [ ] Images with extract intent are marked with `needsVisionExtraction=True`
+- [ ] Images with render intent are marked with `contentFormat="object"`
+- [ ] Section generation extracts images with Vision AI when needed
+- [ ] Section generation renders images with object format directly
+- [ ] Text contentParts are processed with AI during section generation
+- [ ] Pre-extracted JSON flow still works correctly
+
diff --git a/modules/services/serviceGeneration/paths/codePath.py b/modules/services/serviceGeneration/paths/codePath.py
new file mode 100644
index 00000000..5beb1867
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/codePath.py
@@ -0,0 +1,584 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Code Generation Path
+
+Handles code generation with multi-file project support, dependency handling,
+and proper cross-file references.
+"""
+
+import json
+import logging
+import time
+import re
+from typing import Dict, Any, List, Optional
+from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+
+logger = logging.getLogger(__name__)
+
+
+class CodeGenerationPath:
+ """Code generation path."""
+
+ def __init__(self, services):
+ self.services = services
+
+ async def generateCode(
+ self,
+ userPrompt: str,
+ outputFormat: str = None,
+ contentParts: Optional[List[ContentPart]] = None,
+ title: str = "Generated Code",
+ parentOperationId: Optional[str] = None
+ ) -> AiResponse:
+ """
+ Generate code files with multi-file project support.
+
+ Returns: AiResponse with code files as documents
+ """
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ codeOperationId = f"code_gen_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ codeOperationId,
+ "Code Generation",
+ "Code Generation",
+ f"Format: {outputFormat or 'txt'}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Detect language and project type from prompt or outputFormat
+ language, projectType = self._detectLanguageAndProjectType(userPrompt, outputFormat)
+
+ # Phase 1: Code structure generation (with looping)
+ self.services.chat.progressLogUpdate(codeOperationId, 0.2, "Generating code structure")
+ codeStructure = await self._generateCodeStructure(
+ userPrompt=userPrompt,
+ language=language,
+ outputFormat=outputFormat,
+ contentParts=contentParts
+ )
+
+ # Phase 2: Code content generation (with dependency handling)
+ self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
+ codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
+
+ # Phase 3: Code formatting & validation
+ self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
+ formattedFiles = await self._formatAndValidateCode(codeFiles)
+
+ # Convert to unified document format
+ documents = []
+ for file in formattedFiles:
+ mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
+ content = file.get("content", "")
+ if isinstance(content, str):
+ contentBytes = content.encode('utf-8')
+ else:
+ contentBytes = content
+
+ documents.append(DocumentData(
+ documentName=file.get("filename", "generated.txt"),
+ documentData=contentBytes,
+ mimeType=mimeType,
+ sourceJson=file
+ ))
+
+ metadata = AiResponseMetadata(
+ title=title,
+ operationType=OperationTypeEnum.DATA_GENERATE.value
+ )
+
+ self.services.chat.progressLogFinish(codeOperationId, True)
+
+ return AiResponse(
+ documents=documents,
+ content=None,
+ metadata=metadata
+ )
+
+ except Exception as e:
+ logger.error(f"Error in code generation: {str(e)}")
+ self.services.chat.progressLogFinish(codeOperationId, False)
+ raise
+
+ def _detectLanguageAndProjectType(self, userPrompt: str, outputFormat: Optional[str]) -> tuple:
+ """Detect programming language and project type from prompt or format."""
+ promptLower = userPrompt.lower()
+
+ # Detect language
+ language = None
+ if outputFormat:
+ if outputFormat == "py":
+ language = "python"
+ elif outputFormat in ["js", "ts"]:
+ language = outputFormat
+ elif outputFormat == "html":
+ language = "html"
+
+ if not language:
+ if "python" in promptLower or ".py" in promptLower:
+ language = "python"
+ elif "javascript" in promptLower or ".js" in promptLower:
+ language = "javascript"
+ elif "typescript" in promptLower or ".ts" in promptLower:
+ language = "typescript"
+ elif "html" in promptLower:
+ language = "html"
+ else:
+ language = "python" # Default
+
+ # Detect project type
+ projectType = "single_file"
+ if "multi" in promptLower or "multiple files" in promptLower or "project" in promptLower:
+ projectType = "multi_file"
+
+ return language, projectType
+
+ async def _generateCodeStructure(
+ self,
+ userPrompt: str,
+ language: str,
+ outputFormat: Optional[str],
+ contentParts: Optional[List[ContentPart]]
+ ) -> Dict[str, Any]:
+ """Generate code structure using looping system."""
+
+ # Build structure generation prompt
+ structurePrompt = f"""Analyze the following code generation request and create a project structure.
+
+Request: {userPrompt}
+
+Language: {language}
+
+Create a JSON structure with:
+1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
+2. files: Array of file structures, each with:
+ - id: Unique identifier
+ - filename: File name (e.g., "main.py", "utils.py")
+ - fileType: File extension (e.g., "py", "js")
+ - dependencies: List of file IDs this file depends on (for multi-file projects)
+ - imports: List of import statements (for dependency extraction)
+ - functions: Array of function signatures {{"name": "...", "signature": "..."}}
+ - classes: Array of class definitions {{"name": "...", "signature": "..."}}
+
+For single-file projects, return one file. For multi-file projects, break down into logical modules.
+
+Return ONLY valid JSON in this format:
+{{
+ "metadata": {{
+ "language": "{language}",
+ "projectType": "single_file",
+ "projectName": "generated-project"
+ }},
+ "files": [
+ {{
+ "id": "file_1",
+ "filename": "main.py",
+ "fileType": "py",
+ "dependencies": [],
+ "imports": [],
+ "functions": [],
+ "classes": []
+ }}
+ ]
+}}
+"""
+
+ # Use generic looping system with code_structure use case
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ resultFormat="json"
+ )
+
+ structureJson = await self.services.ai.callAiWithLooping(
+ prompt=structurePrompt,
+ options=options,
+ useCaseId="code_structure",
+ debugPrefix="code_structure_generation",
+ contentParts=contentParts
+ )
+
+ parsed = json.loads(structureJson)
+ return parsed
+
+ async def _generateCodeContent(
+ self,
+ codeStructure: Dict[str, Any],
+ parentOperationId: str
+ ) -> List[Dict[str, Any]]:
+ """Generate code content for each file with dependency handling."""
+ files = codeStructure.get("files", [])
+ metadata = codeStructure.get("metadata", {})
+
+ if not files:
+ raise ValueError("No files found in code structure")
+
+ # Step 1: Resolve dependency order
+ orderedFiles = self._resolveDependencyOrder(files)
+
+ # Step 2: Generate dependency files first (requirements.txt, package.json, etc.)
+ dependencyFiles = await self._generateDependencyFiles(metadata, orderedFiles)
+
+ # Step 3: Generate code files in dependency order (not fully parallel)
+ codeFiles = []
+ generatedFileContext = {} # Track what's been generated for cross-file references
+
+ for idx, fileStructure in enumerate(orderedFiles):
+ # Update progress
+ progress = 0.5 + (0.4 * (idx / len(orderedFiles)))
+ self.services.chat.progressLogUpdate(
+ parentOperationId,
+ progress,
+ f"Generating {fileStructure.get('filename', 'file')}"
+ )
+
+ # Provide context about already-generated files for proper imports
+ fileContext = self._buildFileContext(generatedFileContext, fileStructure)
+
+ # Generate this file with context
+ fileContent = await self._generateSingleFileContent(
+ fileStructure,
+ fileContext=fileContext,
+ allFilesStructure=orderedFiles,
+ metadata=metadata
+ )
+
+ codeFiles.append(fileContent)
+
+ # Update context with generated file info (for next files)
+ generatedFileContext[fileStructure["id"]] = {
+ "filename": fileContent.get("filename", fileStructure.get("filename")),
+ "functions": fileContent.get("functions", []),
+ "classes": fileContent.get("classes", []),
+ "exports": fileContent.get("exports", [])
+ }
+
+ # Combine dependency files and code files
+ return dependencyFiles + codeFiles
+
+ def _resolveDependencyOrder(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """Resolve file generation order based on dependencies using topological sort."""
+ # Build dependency graph
+ fileMap = {f["id"]: f for f in files}
+ dependencies = {}
+
+ for file in files:
+ fileId = file["id"]
+ deps = file.get("dependencies", []) # List of file IDs this file depends on
+ dependencies[fileId] = deps
+
+ # Topological sort
+ ordered = []
+ visited = set()
+ tempMark = set()
+
+ def visit(fileId: str):
+ if fileId in tempMark:
+ # Circular dependency detected - break it
+ logger.warning(f"Circular dependency detected involving {fileId}")
+ return
+ if fileId in visited:
+ return
+
+ tempMark.add(fileId)
+ for depId in dependencies.get(fileId, []):
+ if depId in fileMap:
+ visit(depId)
+ tempMark.remove(fileId)
+ visited.add(fileId)
+ ordered.append(fileMap[fileId])
+
+ for file in files:
+ if file["id"] not in visited:
+ visit(file["id"])
+
+ return ordered
+
+ async def _generateDependencyFiles(
+ self,
+ metadata: Dict[str, Any],
+ files: List[Dict[str, Any]]
+ ) -> List[Dict[str, Any]]:
+ """Generate dependency files (requirements.txt, package.json, etc.)."""
+ language = metadata.get("language", "").lower()
+ dependencyFiles = []
+
+ # Generate requirements.txt for Python
+ if language in ["python", "py"]:
+ requirementsContent = await self._generateRequirementsTxt(files)
+ if requirementsContent:
+ dependencyFiles.append({
+ "filename": "requirements.txt",
+ "content": requirementsContent,
+ "fileType": "txt",
+ "id": "requirements_txt"
+ })
+
+ # Generate package.json for JavaScript/TypeScript
+ elif language in ["javascript", "typescript", "js", "ts"]:
+ packageJson = await self._generatePackageJson(files, metadata)
+ if packageJson:
+ dependencyFiles.append({
+ "filename": "package.json",
+ "content": json.dumps(packageJson, indent=2),
+ "fileType": "json",
+ "id": "package_json"
+ })
+
+ return dependencyFiles
+
+ async def _generateRequirementsTxt(
+ self,
+ files: List[Dict[str, Any]]
+ ) -> Optional[str]:
+ """Generate requirements.txt content from Python imports."""
+ pythonPackages = set()
+
+ for file in files:
+ imports = file.get("imports", [])
+ if isinstance(imports, list):
+ for imp in imports:
+ if isinstance(imp, str):
+ # Extract package name from import
+ # Handle: "from flask import", "import flask", "from flask import Flask"
+ imp = imp.strip()
+ if "import" in imp:
+ if "from" in imp:
+ # "from package import ..."
+ parts = imp.split("from")
+ if len(parts) > 1:
+ package = parts[1].split("import")[0].strip()
+ if package and not package.startswith("."):
+ pythonPackages.add(package.split(".")[0]) # Get root package
+ else:
+ # "import package" or "import package.module"
+ parts = imp.split("import")
+ if len(parts) > 1:
+ package = parts[1].strip().split(".")[0].strip()
+ if package and not package.startswith("."):
+ pythonPackages.add(package)
+
+ if pythonPackages:
+ return "\n".join(sorted(pythonPackages))
+ return None
+
+ async def _generatePackageJson(
+ self,
+ files: List[Dict[str, Any]],
+ metadata: Dict[str, Any]
+ ) -> Optional[Dict[str, Any]]:
+ """Generate package.json content from JavaScript/TypeScript imports."""
+ npmPackages = {}
+
+ for file in files:
+ imports = file.get("imports", [])
+ if isinstance(imports, list):
+ for imp in imports:
+ if isinstance(imp, str):
+ # Extract npm package from import
+ # Handle: "import express from 'express'", "const express = require('express')"
+ imp = imp.strip()
+ if "from" in imp:
+ # ES6 import: "import ... from 'package'"
+ parts = imp.split("from")
+ if len(parts) > 1:
+ package = parts[1].strip().strip("'\"")
+ if package and not package.startswith(".") and not package.startswith("/"):
+ npmPackages[package] = "*"
+ elif "require" in imp:
+ # CommonJS: "require('package')"
+ match = re.search(r"require\(['\"]([^'\"]+)['\"]\)", imp)
+ if match:
+ package = match.group(1)
+ if not package.startswith(".") and not package.startswith("/"):
+ npmPackages[package] = "*"
+
+ if npmPackages:
+ return {
+ "name": metadata.get("projectName", "generated-project"),
+ "version": "1.0.0",
+ "dependencies": npmPackages
+ }
+ return None
+
+ def _buildFileContext(
+ self,
+ generatedFileContext: Dict[str, Dict[str, Any]],
+ currentFile: Dict[str, Any]
+ ) -> Dict[str, Any]:
+ """Build context about other files for proper imports/references."""
+ context = {
+ "availableFiles": [],
+ "availableFunctions": {},
+ "availableClasses": {}
+ }
+
+ # Add info about already-generated files
+ for fileId, fileInfo in generatedFileContext.items():
+ context["availableFiles"].append({
+ "id": fileId,
+ "filename": fileInfo["filename"],
+ "functions": fileInfo.get("functions", []),
+ "classes": fileInfo.get("classes", []),
+ "exports": fileInfo.get("exports", [])
+ })
+
+ # Build function/class maps for easy lookup
+ for func in fileInfo.get("functions", []):
+ funcName = func.get("name", "")
+ if funcName:
+ context["availableFunctions"][funcName] = {
+ "file": fileInfo["filename"],
+ "signature": func.get("signature", "")
+ }
+
+ for cls in fileInfo.get("classes", []):
+ className = cls.get("name", "")
+ if className:
+ context["availableClasses"][className] = {
+ "file": fileInfo["filename"]
+ }
+
+ return context
+
+ async def _generateSingleFileContent(
+ self,
+ fileStructure: Dict[str, Any],
+ fileContext: Dict[str, Any] = None,
+ allFilesStructure: List[Dict[str, Any]] = None,
+ metadata: Dict[str, Any] = None
+ ) -> Dict[str, Any]:
+ """Generate code content for a single file with context about other files."""
+
+ # Build prompt with context about other files for proper imports
+ filename = fileStructure.get("filename", "generated.py")
+ fileType = fileStructure.get("fileType", "py")
+ dependencies = fileStructure.get("dependencies", [])
+ functions = fileStructure.get("functions", [])
+ classes = fileStructure.get("classes", [])
+
+ contextInfo = ""
+ if fileContext and fileContext.get("availableFiles"):
+ contextInfo = "\n\nAvailable files and their exports:\n"
+ for fileInfo in fileContext["availableFiles"]:
+ contextInfo += f"- {fileInfo['filename']}: "
+ funcs = [f.get("name", "") for f in fileInfo.get("functions", [])]
+ cls = [c.get("name", "") for c in fileInfo.get("classes", [])]
+ exports = []
+ if funcs:
+ exports.extend(funcs)
+ if cls:
+ exports.extend(cls)
+ if exports:
+ contextInfo += ", ".join(exports)
+ contextInfo += "\n"
+
+ contentPrompt = f"""Generate complete, executable code for the file: {filename}
+
+File Type: {fileType}
+Language: {metadata.get('language', 'python') if metadata else 'python'}
+
+Required functions:
+{json.dumps(functions, indent=2) if functions else 'None specified'}
+
+Required classes:
+{json.dumps(classes, indent=2) if classes else 'None specified'}
+
+Dependencies on other files: {', '.join(dependencies) if dependencies else 'None'}
+{contextInfo}
+
+Generate complete, production-ready code with:
+1. Proper imports (including imports from other files in the project if dependencies exist)
+2. All required functions and classes
+3. Error handling
+4. Documentation/docstrings
+5. Type hints where appropriate
+
+Return ONLY valid JSON in this format:
+{{
+ "files": [
+ {{
+ "filename": "{filename}",
+ "content": "// Complete code here",
+ "functions": {json.dumps(functions, indent=2) if functions else '[]'},
+ "classes": {json.dumps(classes, indent=2) if classes else '[]'}
+ }}
+ ]
+}}
+"""
+
+ # Use generic looping system with code_content use case
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ resultFormat="json"
+ )
+
+ contentJson = await self.services.ai.callAiWithLooping(
+ prompt=contentPrompt,
+ options=options,
+ useCaseId="code_content",
+ debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
+ )
+
+ parsed = json.loads(contentJson)
+
+ # Extract file content and metadata
+ files = parsed.get("files", [])
+ if files and len(files) > 0:
+ fileData = files[0]
+ return {
+ "filename": fileData.get("filename", filename),
+ "content": fileData.get("content", ""),
+ "fileType": fileType,
+ "functions": fileData.get("functions", functions),
+ "classes": fileData.get("classes", classes),
+ "id": fileStructure.get("id")
+ }
+
+ # Fallback if structure is different
+ return {
+ "filename": filename,
+ "content": parsed.get("content", ""),
+ "fileType": fileType,
+ "functions": functions,
+ "classes": classes,
+ "id": fileStructure.get("id")
+ }
+
+ async def _formatAndValidateCode(self, codeFiles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """Format and validate generated code files."""
+ # For now, just return files as-is
+ # TODO: Add code formatting (black, prettier, etc.) and validation
+ formatted = []
+ for file in codeFiles:
+ content = file.get("content", "")
+ # Basic cleanup: remove markdown code fences if present
+ if isinstance(content, str):
+ content = re.sub(r'^```[\w]*\n', '', content, flags=re.MULTILINE)
+ content = re.sub(r'\n```$', '', content, flags=re.MULTILINE)
+ file["content"] = content.strip()
+ formatted.append(file)
+ return formatted
+
+ def _getMimeType(self, fileType: str) -> str:
+ """Get MIME type for file type."""
+ mimeTypes = {
+ "py": "text/x-python",
+ "js": "application/javascript",
+ "ts": "application/typescript",
+ "html": "text/html",
+ "css": "text/css",
+ "json": "application/json",
+ "txt": "text/plain",
+ "md": "text/markdown",
+ "java": "text/x-java-source",
+ "cpp": "text/x-c++src",
+ "c": "text/x-csrc"
+ }
+ return mimeTypes.get(fileType.lower(), "text/plain")
diff --git a/modules/services/serviceGeneration/paths/documentPath.py b/modules/services/serviceGeneration/paths/documentPath.py
new file mode 100644
index 00000000..94c4fc41
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/documentPath.py
@@ -0,0 +1,207 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Document Generation Path
+
+Handles document generation using existing chapter/section model.
+"""
+
+import json
+import logging
+import time
+from typing import Dict, Any, List, Optional
+from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+from modules.datamodels.datamodelDocument import RenderedDocument
+from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentGenerationPath:
+ """Document generation path (existing functionality, refactored)."""
+
+ def __init__(self, services):
+ self.services = services
+
+ async def generateDocument(
+ self,
+ userPrompt: str,
+ documentList: Optional[Any] = None, # DocumentReferenceList
+ documentIntents: Optional[List[DocumentIntent]] = None,
+ contentParts: Optional[List[ContentPart]] = None,
+ outputFormat: str = "txt",
+ title: Optional[str] = None,
+ parentOperationId: Optional[str] = None
+ ) -> AiResponse:
+ """
+ Generate document using existing chapter/section model.
+
+ Returns: AiResponse with documents list
+ """
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ docOperationId = f"doc_gen_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ docOperationId,
+ "Document Generation",
+ "Document Generation",
+ f"Format: {outputFormat}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Schritt 5A: Kläre Dokument-Intents
+ documents = []
+ if documentList:
+ documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+
+ # Filter: Entferne Original-Dokumente, wenn bereits Pre-Extracted JSONs existieren
+ # (um Duplikate zu vermeiden - Pre-Extracted JSONs enthalten bereits die ContentParts)
+ # Schritt 1: Identifiziere alle Original-Dokument-IDs, die durch Pre-Extracted JSONs abgedeckt werden
+ originalDocIdsCoveredByPreExtracted = set()
+ for doc in documents:
+ preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
+ if preExtracted:
+ originalDocId = preExtracted["originalDocument"]["id"]
+ originalDocIdsCoveredByPreExtracted.add(originalDocId)
+ logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
+
+ # Schritt 2: Filtere Dokumente - entferne Original-Dokumente, die bereits durch Pre-Extracted JSONs abgedeckt werden
+ filteredDocuments = []
+ for doc in documents:
+ preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
+ if preExtracted:
+ # Pre-Extracted JSON behalten
+ filteredDocuments.append(doc)
+ elif doc.id in originalDocIdsCoveredByPreExtracted:
+ # Original-Dokument, das bereits durch Pre-Extracted JSON abgedeckt wird - entfernen
+ logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
+ else:
+ # Normales Dokument ohne Pre-Extracted JSON - behalten
+ filteredDocuments.append(doc)
+
+ documents = filteredDocuments
+
+ checkWorkflowStopped(self.services)
+
+ if not documentIntents and documents:
+ documentIntents = await self.services.ai.clarifyDocumentIntents(
+ documents,
+ userPrompt,
+ {"outputFormat": outputFormat},
+ docOperationId
+ )
+
+ checkWorkflowStopped(self.services)
+
+ # Schritt 5B: Extrahiere und bereite Content vor
+ if documents:
+ preparedContentParts = await self.services.ai.extractAndPrepareContent(
+ documents,
+ documentIntents or [],
+ docOperationId
+ )
+
+ # Merge mit bereitgestellten contentParts (falls vorhanden)
+ if contentParts:
+ # Prüfe auf pre-extracted Content
+ for part in contentParts:
+ if part.metadata.get("skipExtraction", False):
+ # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
+ part.metadata.setdefault("contentFormat", "extracted")
+ part.metadata.setdefault("isPreExtracted", True)
+ preparedContentParts.extend(contentParts)
+
+ contentParts = preparedContentParts
+
+ # Schritt 5B.5: Documents are converted to contentParts (like pre-processed JSON files)
+ # No AI extraction here - AI extraction happens during section generation
+ if contentParts:
+ logger.info(f"Using {len(contentParts)} content parts for generation (no AI extraction at this stage)")
+
+ checkWorkflowStopped(self.services)
+
+ # Schritt 5C: Generiere Struktur
+ structure = await self.services.ai.generateStructure(
+ userPrompt,
+ contentParts or [],
+ outputFormat,
+ docOperationId
+ )
+
+ checkWorkflowStopped(self.services)
+
+ # Schritt 5D: Fülle Struktur
+ # Language will be extracted from services (user intention analysis) in fillStructure
+ filledStructure = await self.services.ai.fillStructure(
+ structure,
+ contentParts or [],
+ userPrompt,
+ docOperationId
+ )
+
+ checkWorkflowStopped(self.services)
+
+ # Schritt 5E: Rendere Resultat
+ # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
+ # Language is already validated in structure (State 3) and preserved in filled structure (State 4)
+ # Per-document language will be extracted in renderReport() from filledStructure
+ # Use validated currentUserLanguage as global fallback (always valid infrastructure)
+ language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en"
+
+ renderedDocuments = await self.services.ai.renderResult(
+ filledStructure,
+ outputFormat,
+ language, # Global fallback (per-document language extracted from structure in renderReport)
+ title or "Generated Document",
+ userPrompt,
+ docOperationId
+ )
+
+ # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
+ documentDataList = []
+ for renderedDoc in renderedDocuments:
+ try:
+ # Erstelle DocumentData für jedes gerenderte Dokument
+ docDataObj = DocumentData(
+ documentName=renderedDoc.filename,
+ documentData=renderedDoc.documentData,
+ mimeType=renderedDoc.mimeType,
+ sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
+ )
+ documentDataList.append(docDataObj)
+ logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
+ except Exception as e:
+ logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
+
+ if not documentDataList:
+ raise ValueError("No documents were rendered")
+
+ metadata = AiResponseMetadata(
+ title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
+ operationType=OperationTypeEnum.DATA_GENERATE.value
+ )
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
+ "document_generation_response"
+ )
+
+ self.services.chat.progressLogFinish(docOperationId, True)
+
+ return AiResponse(
+ content=json.dumps(filledStructure),
+ metadata=metadata,
+ documents=documentDataList
+ )
+
+ except Exception as e:
+ logger.error(f"Error in document generation: {str(e)}")
+ self.services.chat.progressLogFinish(docOperationId, False)
+ raise
+
diff --git a/modules/services/serviceGeneration/paths/imagePath.py b/modules/services/serviceGeneration/paths/imagePath.py
new file mode 100644
index 00000000..1247494f
--- /dev/null
+++ b/modules/services/serviceGeneration/paths/imagePath.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Image Generation Path
+
+Handles image generation with support for single and batch generation.
+"""
+
+import logging
+import time
+from typing import List, Optional
+from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallRequest
+
+logger = logging.getLogger(__name__)
+
+
+class ImageGenerationPath:
+ """Image generation path."""
+
+ def __init__(self, services):
+ self.services = services
+
+ async def generateImages(
+ self,
+ userPrompt: str,
+ count: int = 1,
+ style: Optional[str] = None,
+ format: str = "png",
+ title: Optional[str] = None,
+ parentOperationId: Optional[str] = None
+ ) -> AiResponse:
+ """
+ Generate image files.
+
+ Returns: AiResponse with image files as documents
+ """
+ # Create operation ID
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ imageOperationId = f"image_gen_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ self.services.chat.progressLogStart(
+ imageOperationId,
+ "Image Generation",
+ "Image Generation",
+ f"Format: {format}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ self.services.chat.progressLogUpdate(imageOperationId, 0.4, "Calling AI for image generation")
+
+ # Build prompt with style if provided
+ imagePrompt = userPrompt
+ if style:
+ imagePrompt = f"{userPrompt}\n\nStyle: {style}"
+
+ # Use IMAGE_GENERATE operation
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.IMAGE_GENERATE,
+ resultFormat=format
+ )
+
+ request = AiCallRequest(
+ prompt=imagePrompt,
+ context="",
+ options=options
+ )
+
+ response = await self.services.ai.callAi(request)
+
+ if not response.content:
+ errorMsg = f"No image data returned: {response.content}"
+ logger.error(f"Error in AI image generation: {errorMsg}")
+ self.services.chat.progressLogFinish(imageOperationId, False)
+ raise ValueError(errorMsg)
+
+ # Handle response content (could be base64 string or bytes)
+ imageData = response.content
+ if isinstance(imageData, str):
+ # Assume base64 encoded string
+ import base64
+ try:
+ imageData = base64.b64decode(imageData)
+ except Exception:
+ # If not base64, try encoding as bytes
+ imageData = imageData.encode('utf-8')
+ elif not isinstance(imageData, bytes):
+ imageData = bytes(imageData)
+
+ # Create document
+ imageDoc = DocumentData(
+ documentName=f"generated_image.{format}",
+ documentData=imageData,
+ mimeType=f"image/{format}"
+ )
+
+ metadata = AiResponseMetadata(
+ title=title or "Generated Image",
+ operationType=OperationTypeEnum.IMAGE_GENERATE.value
+ )
+
+ self.services.chat.storeWorkflowStat(
+ self.services.workflow,
+ response,
+ "ai.generate.image"
+ )
+
+ self.services.chat.progressLogUpdate(imageOperationId, 0.9, "Image generated")
+ self.services.chat.progressLogFinish(imageOperationId, True)
+
+ # Create content string describing the image generation
+ import json
+ contentJson = json.dumps({
+ "type": "image",
+ "format": format,
+ "prompt": userPrompt,
+ "filename": imageDoc.documentName
+ }, ensure_ascii=False)
+
+ return AiResponse(
+ content=contentJson, # JSON string describing the image generation
+ metadata=metadata,
+ documents=[imageDoc]
+ )
+
+ except Exception as e:
+ logger.error(f"Error in image generation: {str(e)}")
+ self.services.chat.progressLogFinish(imageOperationId, False)
+ raise
+
diff --git a/modules/services/serviceGeneration/renderers/registry.py b/modules/services/serviceGeneration/renderers/registry.py
index 04ac520f..fdaba913 100644
--- a/modules/services/serviceGeneration/renderers/registry.py
+++ b/modules/services/serviceGeneration/renderers/registry.py
@@ -139,6 +139,32 @@ class RendererRegistry:
}
return info
+
+ def getOutputStyle(self, outputFormat: str) -> Optional[str]:
+ """
+ Get the output style classification for a given format.
+ Returns: 'code', 'document', 'image', or other (e.g., 'video' for future use)
+ """
+ if not self._discovered:
+ self.discoverRenderers()
+
+ # Normalize format name
+ formatName = outputFormat.lower().strip()
+
+ # Check for aliases first
+ if formatName in self._format_mappings:
+ formatName = self._format_mappings[formatName]
+
+ # Get renderer class and call getOutputStyle (all renderers have same signature)
+ rendererClass = self._renderers.get(formatName)
+ try:
+ return rendererClass.getOutputStyle(formatName)
+ except (AttributeError, TypeError) as e:
+ logger.warning(f"No renderer found for format: {outputFormat}, cannot determine output style")
+ return None
+ except Exception as e:
+ logger.warning(f"Error getting output style for {outputFormat}: {str(e)}")
+ return None
# Global registry instance
_registry = RendererRegistry()
@@ -154,3 +180,7 @@ def getSupportedFormats() -> List[str]:
def getRendererInfo() -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
return _registry.getRendererInfo()
+
+def getOutputStyle(outputFormat: str) -> Optional[str]:
+ """Get the output style classification for a given format."""
+ return _registry.getOutputStyle(outputFormat)
diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
index efe53eaa..0c72bd24 100644
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@@ -5,7 +5,7 @@ Base renderer class for all format renderers.
"""
from abc import ABC, abstractmethod
-from typing import Dict, Any, List, Tuple
+from typing import Dict, Any, List, Tuple, Optional
from modules.datamodels.datamodelJson import supportedSectionTypes
from modules.datamodels.datamodelDocument import RenderedDocument
import json
@@ -50,6 +50,19 @@ class BaseRenderer(ABC):
"""
return 0
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """
+ Return the output style classification for this renderer.
+ Returns: 'code', 'document', 'image', or other (e.g., 'video' for future use)
+ Override this method in subclasses to specify the output style.
+
+ Args:
+ formatName: Optional format name (e.g., 'txt', 'js', 'csv') - useful for renderers
+ that handle multiple formats with different styles (e.g., RendererText)
+ """
+ return 'document' # Default to document style
+
@abstractmethod
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py
index 83ca41c1..eb00a610 100644
--- a/modules/services/serviceGeneration/renderers/rendererCsv.py
+++ b/modules/services/serviceGeneration/renderers/rendererCsv.py
@@ -6,7 +6,7 @@ CSV renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@@ -26,6 +26,11 @@ class RendererCsv(BaseRenderer):
"""Return priority for CSV renderer."""
return 70
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: CSV requires specific structure (header, then data rows)."""
+ return 'code'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to CSV format."""
try:
@@ -71,8 +76,9 @@ class RendererCsv(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
- # Use title from JSON metadata if available, otherwise use provided title
- documentTitle = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ documentTitle = title if title else metadata.get("title", "Generated Document")
# Generate CSV content
csvRows = []
diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py
index c7363918..6a714c3f 100644
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@@ -6,7 +6,7 @@ DOCX renderer for report generation using python-docx.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
import io
import base64
import re
@@ -39,6 +39,11 @@ class RendererDocx(BaseRenderer):
"""Return priority for DOCX renderer."""
return 115
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: Word documents are formatted documents."""
+ return 'document'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
@@ -121,8 +126,9 @@ class RendererDocx(BaseRenderer):
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
- # Use title from JSON metadata if available, otherwise use provided title
- document_title = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ document_title = title if title else metadata.get("title", "Generated Document")
# Add document title using Title style
if document_title:
@@ -655,10 +661,12 @@ class RendererDocx(BaseRenderer):
content = image_data.get("content", {})
base64_data = ""
alt_text = "Image"
+ caption = ""
if isinstance(content, dict):
base64_data = content.get("base64Data", "")
alt_text = content.get("altText", "Image")
+ caption = content.get("caption", "")
elif isinstance(content, str):
# Content might be base64 string directly (shouldn't happen, but handle it)
self.logger.warning("Image content is a string, not a dict. This should not happen.")
@@ -669,6 +677,8 @@ class RendererDocx(BaseRenderer):
base64_data = image_data.get("base64Data", "")
if not alt_text or alt_text == "Image":
alt_text = image_data.get("altText", "Image")
+ if not caption:
+ caption = image_data.get("caption", "")
# CRITICAL: Ensure we don't render base64 data as text
# If base64_data looks like it might be rendered elsewhere, skip it
@@ -712,8 +722,26 @@ class RendererDocx(BaseRenderer):
image_stream.seek(0)
doc.add_picture(image_stream, width=Inches(6.0))
- if alt_text and alt_text != "Image":
- caption_para = doc.add_paragraph(f"Figure: {alt_text}")
+ # Use caption from section if available, otherwise use alt_text
+ if caption:
+ caption_text = caption
+ elif alt_text and alt_text != "Image":
+ # Only use alt_text if it doesn't look like a usageHint
+ if "Render as visual element:" in alt_text:
+ # Extract filename from usageHint if possible
+ parts = alt_text.split("Render as visual element:")
+ if len(parts) > 1:
+ filename = parts[1].strip()
+ caption_text = f"Figure: {filename}"
+ else:
+ caption_text = alt_text
+ else:
+ caption_text = f"Figure: {alt_text}"
+ else:
+ caption_text = None
+
+ if caption_text:
+ caption_para = doc.add_paragraph(caption_text)
caption_para.runs[0].italic = True
except Exception as embedError:
# Image decoding or embedding failed
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 1f013e50..34017e67 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -6,7 +6,7 @@ HTML renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@@ -26,6 +26,11 @@ class RendererHtml(BaseRenderer):
"""Return priority for HTML renderer."""
return 100
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: HTML web pages are rendered documents."""
+ return 'document'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render HTML document with images as separate files.
@@ -107,8 +112,9 @@ class RendererHtml(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
- # Use title from JSON metadata if available, otherwise use provided title
- documentTitle = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ documentTitle = title if title else metadata.get("title", "Generated Document")
# Build HTML document
htmlParts = []
diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py
index 479881df..02d991fe 100644
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@@ -6,7 +6,7 @@ Image renderer for report generation using AI image generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
import logging
import base64
@@ -30,6 +30,11 @@ class RendererImage(BaseRenderer):
"""Return priority for image renderer."""
return 90
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: Images are visual media."""
+ return 'image'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to image format using AI image generation."""
try:
@@ -86,8 +91,9 @@ class RendererImage(BaseRenderer):
# Extract metadata from standardized schema
metadata = self._extractMetadata(extractedContent)
- # Use title from JSON metadata if available, otherwise use provided title
- documentTitle = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ documentTitle = title if title else metadata.get("title", "Generated Document")
# Create AI prompt for image generation
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py
index 91e8342d..10aa63d5 100644
--- a/modules/services/serviceGeneration/renderers/rendererJson.py
+++ b/modules/services/serviceGeneration/renderers/rendererJson.py
@@ -6,7 +6,7 @@ JSON renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
import json
class RendererJson(BaseRenderer):
@@ -27,6 +27,11 @@ class RendererJson(BaseRenderer):
"""Return priority for JSON renderer."""
return 80
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: JSON is structured data format."""
+ return 'code'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to JSON format."""
try:
diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
index 84644485..e76046b0 100644
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@@ -6,7 +6,7 @@ Markdown renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@@ -26,6 +26,11 @@ class RendererMarkdown(BaseRenderer):
"""Return priority for markdown renderer."""
return 95
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: Markdown documents are formatted documents."""
+ return 'document'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Markdown format."""
try:
@@ -82,8 +87,9 @@ class RendererMarkdown(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
- # Use title from JSON metadata if available, otherwise use provided title
- documentTitle = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ documentTitle = title if title else metadata.get("title", "Generated Document")
# Build markdown content
markdownParts = []
diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py
index f1c3f7fa..50ec9222 100644
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@@ -6,7 +6,7 @@ PDF renderer for report generation using reportlab.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
import io
import base64
@@ -39,6 +39,11 @@ class RendererPdf(BaseRenderer):
"""Return priority for PDF renderer."""
return 120
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: PDF documents are formatted documents."""
+ return 'document'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
@@ -110,8 +115,9 @@ class RendererPdf(BaseRenderer):
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
- # Use title from JSON metadata if available, otherwise use provided title
- document_title = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ document_title = title if title else metadata.get("title", "Generated Document")
# Make title shorter to prevent wrapping/overlapping
if len(document_title) > 40:
@@ -895,11 +901,21 @@ class RendererPdf(BaseRenderer):
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"{caption}", captionStyle))
elif alt_text and alt_text != "Image":
- # Use alt text as caption if no caption provided
+ # Use alt text as caption if no caption provided, but avoid usageHint format
+ if "Render as visual element:" in alt_text:
+ # Extract filename from usageHint if possible
+ parts = alt_text.split("Render as visual element:")
+ if len(parts) > 1:
+ filename = parts[1].strip()
+ caption_text = f"Figure: {filename}"
+ else:
+ caption_text = alt_text
+ else:
+ caption_text = f"Figure: {alt_text}"
captionStyle = self._createNormalStyle(styles)
captionStyle.fontSize = 10
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
- elements.append(Paragraph(f"Figure: {alt_text}", captionStyle))
+ elements.append(Paragraph(f"{caption_text}", captionStyle))
return elements
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index 5525ae89..bb43d8be 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -26,6 +26,21 @@ class RendererPptx(BaseRenderer):
"""Get list of supported output formats."""
return ["pptx", "ppt"]
+ @classmethod
+ def getFormatAliases(cls) -> List[str]:
+ """Return format aliases."""
+ return []
+
+ @classmethod
+ def getPriority(cls) -> int:
+ """Return priority for PowerPoint renderer."""
+ return 105
+
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: PowerPoint presentations are formatted documents."""
+ return 'document'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render content as PowerPoint presentation from JSON data.
@@ -601,8 +616,9 @@ JSON ONLY. NO OTHER TEXT."""
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
- # Use title from JSON metadata if available, otherwise use provided title
- document_title = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ document_title = title if title else metadata.get("title", "Generated Document")
# Create title slide
slides.append({
diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py
index 116d73f4..fd15e50d 100644
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@@ -6,7 +6,7 @@ Text renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@@ -48,6 +48,21 @@ class RendererText(BaseRenderer):
"""Return priority for text renderer."""
return 90
+ @classmethod
+ def getOutputStyle(cls, formatName: str = None) -> str:
+ """
+ Return output style classification based on format.
+ For txt/text/plain: 'document' (unstructured text)
+ For all other formats: 'code' (structured formats with rules/syntax)
+
+ Note: formatName parameter is provided by registry when calling this method.
+ """
+ # Plain text formats are document style
+ if formatName and formatName.lower() in ['txt', 'text', 'plain']:
+ return 'document'
+ # All other formats handled by RendererText are code style
+ return 'code'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to plain text format."""
try:
@@ -104,8 +119,9 @@ class RendererText(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
- # Use title from JSON metadata if available, otherwise use provided title
- documentTitle = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ documentTitle = title if title else metadata.get("title", "Generated Document")
# Build text content
textParts = []
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index 24c620d2..14f8a71a 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -6,10 +6,15 @@ Excel renderer for report generation using openpyxl.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
import io
import base64
-from datetime import datetime, UTC
+from datetime import datetime, UTC, date
+try:
+ from dateutil import parser as date_parser
+ DATEUTIL_AVAILABLE = True
+except ImportError:
+ DATEUTIL_AVAILABLE = False
try:
from openpyxl import Workbook
@@ -38,6 +43,11 @@ class RendererXlsx(BaseRenderer):
"""Return priority for Excel renderer."""
return 110
+ @classmethod
+ def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
+ """Return output style classification: Excel spreadsheets are formatted documents."""
+ return 'document'
+
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
@@ -285,8 +295,9 @@ class RendererXlsx(BaseRenderer):
# Extract metadata from standardized schema
metadata = self._extractMetadata(jsonContent)
- # Use title from JSON metadata if available, otherwise use provided title
- document_title = metadata.get("title", title)
+ # Use provided title (which comes from documents[].title) as primary source
+ # Fallback to metadata.title only if title parameter is empty
+ document_title = title if title else metadata.get("title", "Generated Document")
# Create workbook
wb = Workbook()
@@ -684,7 +695,12 @@ class RendererXlsx(BaseRenderer):
# If no level 1 headings found, use document title
if not sheetNames:
- documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
+ # Use documents[].title as primary source, fallback to metadata.title
+ documents = jsonContent.get("documents", [])
+ if documents and isinstance(documents[0], dict) and documents[0].get("title"):
+ documentTitle = documents[0].get("title")
+ else:
+ documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(self._sanitizeSheetName(documentTitle))
return sheetNames
@@ -787,6 +803,7 @@ class RendererXlsx(BaseRenderer):
# Add rows - handle both array format and cells object format
cell_style = styles.get("table_cell", {})
+ header_count = len(headers)
for row_idx, row_data in enumerate(rows, 4):
# Handle different row formats
if isinstance(row_data, list):
@@ -799,6 +816,14 @@ class RendererXlsx(BaseRenderer):
# Unknown format, skip
continue
+ # Validate row column count matches headers - pad or truncate if needed
+ if len(cell_values) < header_count:
+ # Pad with empty strings if row has fewer columns
+ cell_values.extend([""] * (header_count - len(cell_values)))
+ elif len(cell_values) > header_count:
+ # Truncate if row has more columns than headers
+ cell_values = cell_values[:header_count]
+
for col_idx, cell_value in enumerate(cell_values, 1):
# Extract value if it's a dict with "value" key
if isinstance(cell_value, dict):
@@ -820,8 +845,12 @@ class RendererXlsx(BaseRenderer):
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
- # Document title
- documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
+ # Document title - use documents[].title as primary source, fallback to metadata.title
+ documents = jsonContent.get("documents", [])
+ if documents and isinstance(documents[0], dict) and documents[0].get("title"):
+ documentTitle = documents[0].get("title")
+ else:
+ documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = documentTitle
# Safety check for title style
@@ -976,8 +1005,49 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return startRow + 1
+ def _parseDateString(self, text: str) -> Any:
+ """Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise."""
+ if not text or not isinstance(text, str):
+ return None
+
+ text = text.strip()
+ if not text:
+ return None
+
+ # Common date formats to try (in order of likelihood)
+ date_formats = [
+ "%Y-%m-%d", # 2025-01-01
+ "%d.%m.%Y", # 01.01.2025
+ "%d/%m/%Y", # 01/01/2025
+ "%m/%d/%Y", # 01/01/2025 (US format)
+ "%Y-%m-%d %H:%M:%S", # 2025-01-01 12:00:00
+ "%d.%m.%Y %H:%M:%S", # 01.01.2025 12:00:00
+ "%d/%m/%Y %H:%M:%S", # 01/01/2025 12:00:00
+ "%Y-%m-%d %H:%M", # 2025-01-01 12:00
+ "%d.%m.%Y %H:%M", # 01.01.2025 12:00
+ "%d/%m/%Y %H:%M", # 01/01/2025 12:00
+ ]
+
+ # Try parsing with common formats first
+ for date_format in date_formats:
+ try:
+ parsed_date = datetime.strptime(text, date_format)
+ return parsed_date
+ except ValueError:
+ continue
+
+ # If dateutil is available, use it for more flexible parsing
+ if DATEUTIL_AVAILABLE:
+ try:
+ parsed_date = date_parser.parse(text, dayfirst=True, yearfirst=False)
+ return parsed_date
+ except (ValueError, TypeError):
+ pass
+
+ return None
+
def _sanitizeCellValue(self, value: Any) -> Any:
- """Sanitize cell value: remove markdown, convert to string, handle None, limit length."""
+ """Sanitize cell value: remove markdown, convert to string, handle None, limit length. Preserve numbers as numbers."""
if value is None:
return ""
if isinstance(value, dict):
@@ -994,6 +1064,45 @@ class RendererXlsx(BaseRenderer):
# Remove other markdown
text = text.replace("__", "").replace("_", "")
text = text.strip()
+
+ # Try to convert numeric strings to actual numbers
+ # This ensures Excel treats them as numbers, not strings
+ if text:
+ # Clean text for number conversion: remove common formatting characters
+ # but preserve the original for fallback
+ cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
+
+ # Only attempt conversion if cleaned text looks like a number
+ # (starts with digit, +, -, or . followed by digit)
+ if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
+ # Try integer first (more restrictive)
+ try:
+ # Check if it's a valid integer (no decimal point, no scientific notation)
+ if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
+ int_value = int(cleaned_for_number)
+ return int_value
+ except (ValueError, OverflowError):
+ pass
+
+ # Try float if integer conversion failed
+ try:
+ float_value = float(cleaned_for_number)
+ # Only return as float if it's actually a number representation
+ # Avoid converting things like "NaN", "inf" which are valid floats but not useful
+ if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
+ # Check for reasonable float values (not too large/small)
+ if abs(float_value) < 1e308: # Avoid overflow
+ return float_value
+ except (ValueError, OverflowError):
+ pass
+
+ # Try to convert date strings to datetime objects
+ # This ensures Excel treats them as dates, not strings
+ # Use original text (not cleaned) for date parsing
+ date_value = self._parseDateString(text)
+ if date_value is not None:
+ return date_value
+
# Excel cell value limit is 32,767 characters - truncate if necessary
if len(text) > 32767:
text = text[:32764] + "..."
@@ -1083,6 +1192,7 @@ class RendererXlsx(BaseRenderer):
# Add rows with formatting
cell_style = styles.get("table_cell", {})
+ header_count = len(headers)
for row_data in rows:
# Handle different row formats
if isinstance(row_data, list):
@@ -1092,6 +1202,14 @@ class RendererXlsx(BaseRenderer):
else:
continue
+ # Validate row column count matches headers - pad or truncate if needed
+ if len(cell_values) < header_count:
+ # Pad with empty strings if row has fewer columns
+ cell_values.extend([""] * (header_count - len(cell_values)))
+ elif len(cell_values) > header_count:
+ # Truncate if row has more columns than headers
+ cell_values = cell_values[:header_count]
+
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py
index 681a5923..86464ef6 100644
--- a/modules/services/serviceGeneration/subContentGenerator.py
+++ b/modules/services/serviceGeneration/subContentGenerator.py
@@ -13,6 +13,7 @@ import re
import traceback
from typing import Dict, Any, Optional, List, Callable
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
+from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@@ -167,6 +168,7 @@ class ContentGenerator:
contentPartsMap[partId] = part
for idx, section in enumerate(sections):
+ checkWorkflowStopped(self.services)
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{idx}")
@@ -229,7 +231,8 @@ class ContentGenerator:
sections: List[Dict[str, Any]],
cachedContent: Optional[Dict[str, Any]],
userPrompt: str,
- documentMetadata: Dict[str, Any],
+ contentParts: Optional[List[Any]] = None,
+ documentMetadata: Dict[str, Any] = {},
progressCallback: Optional[Callable] = None,
batchSize: int = 10
) -> List[Dict[str, Any]]:
@@ -240,6 +243,7 @@ class ContentGenerator:
sections: List of sections to generate
cachedContent: Extracted content cache
userPrompt: Original user prompt
+ contentParts: List of all available ContentParts (for mapping by contentPartIds)
documentMetadata: Document metadata
progressCallback: Progress callback function
batchSize: Number of sections to process in parallel per batch
@@ -253,6 +257,14 @@ class ContentGenerator:
if totalSections == 0:
return []
+ # Create ContentParts lookup map by ID
+ contentPartsMap = {}
+ if contentParts:
+ for part in contentParts:
+ partId = part.id if hasattr(part, 'id') else part.get('id', '')
+ if partId:
+ contentPartsMap[partId] = part
+
# Adjust batch size based on section types (images take longer)
imageCount = sum(1 for s in sections if s.get("content_type") == "image")
if imageCount > 0:
@@ -277,6 +289,7 @@ class ContentGenerator:
)
async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]):
+ checkWorkflowStopped(self.services)
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{globalIndex}")
@@ -422,6 +435,7 @@ class ContentGenerator:
resultFormat="json"
)
+ checkWorkflowStopped(self.services)
aiResponse = await self.services.ai.callAiContent(
prompt=sectionPrompt,
options=options,
@@ -603,6 +617,59 @@ class ContentGenerator:
) -> Dict[str, Any]:
"""Generate image for image section or include existing image"""
try:
+ # First, check if section has image ContentParts to integrate directly
+ sectionContentParts = context.get("sectionContentParts", [])
+ if sectionContentParts:
+ # Look for image ContentParts
+ for part in sectionContentParts:
+ partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '')
+ partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '')
+ isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/"))
+
+ if isImage:
+ # Extract image data from ContentPart
+ partData = part.data if hasattr(part, 'data') else part.get('data', '')
+ partId = part.id if hasattr(part, 'id') else part.get('id', '')
+
+ # Get base64 data
+ base64Data = None
+ if isinstance(partData, str):
+ # Check if it's already base64 or needs extraction
+ if partData.startswith("data:image"):
+ # Extract base64 from data URL
+ base64Data = partData.split(",", 1)[1] if "," in partData else partData
+ elif len(partData) > 100: # Likely base64 string
+ base64Data = partData
+ elif isinstance(partData, bytes):
+ import base64
+ base64Data = base64.b64encode(partData).decode('utf-8')
+
+ if base64Data:
+ # Get caption from section (priority: section.caption > metadata.caption)
+ caption = section.get("caption") or section.get("metadata", {}).get("caption")
+
+ # Get alt text from ContentPart metadata or section
+ altText = part.metadata.get("altText") if hasattr(part, 'metadata') else part.get('metadata', {}).get('altText')
+ if not altText:
+ altText = section.get("generation_hint", "Image")
+
+ # Get mime type
+ mimeType = partMimeType or "image/png"
+
+ # Create image element with caption
+ section["elements"] = [{
+ "type": "image",
+ "content": {
+ "base64Data": base64Data,
+ "altText": altText,
+ "caption": caption # Include caption from section
+ },
+ "caption": caption # Also at element level for compatibility
+ }]
+
+ logger.info(f"Successfully integrated image from ContentPart {partId} for section {section.get('id')} with caption: {caption}")
+ return section
+
# Check if this is an existing image to include or render
imageSource = section.get("image_source", "generate")
@@ -623,12 +690,17 @@ class ContentGenerator:
# Create image element from existing/render image
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
mimeType = imageDoc.get("mimeType", "image/png")
+ caption = section.get("caption") or section.get("metadata", {}).get("caption")
+ # Use nested content structure for consistency with renderers
section["elements"] = [{
- "base64Data": imageDoc.get("base64Data"),
- "altText": altText,
- "mimeType": mimeType,
- "caption": section.get("metadata", {}).get("caption")
+ "type": "image",
+ "content": {
+ "base64Data": imageDoc.get("base64Data"),
+ "altText": altText,
+ "caption": caption # Include caption in content structure
+ },
+ "caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})")
@@ -666,6 +738,7 @@ class ContentGenerator:
logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...")
# Call AI for image generation
+ checkWorkflowStopped(self.services)
aiResponse = await self.services.ai.callAiContent(
prompt=promptJson,
options=options,
@@ -702,13 +775,17 @@ class ContentGenerator:
# Use image_prompt as alt text if generation_hint is generic
altText = section.get("image_prompt", "Image")[:100] # Limit length
- caption = section.get("metadata", {}).get("caption")
+ caption = section.get("caption") or section.get("metadata", {}).get("caption")
+ # Use nested content structure for consistency with renderers
section["elements"] = [{
- "url": f"data:image/png;base64,{base64Data}",
- "base64Data": base64Data,
- "altText": altText,
- "caption": caption
+ "type": "image",
+ "content": {
+ "base64Data": base64Data,
+ "altText": altText,
+ "caption": caption # Include caption in content structure
+ },
+ "caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully generated image for section {section.get('id')}")
diff --git a/modules/services/serviceWeb/mainServiceWeb.py b/modules/services/serviceWeb/mainServiceWeb.py
index 50f7a84c..469ca6ae 100644
--- a/modules/services/serviceWeb/mainServiceWeb.py
+++ b/modules/services/serviceWeb/mainServiceWeb.py
@@ -2,7 +2,7 @@
# All rights reserved.
"""
Web crawl service for handling web research operations.
-Manages the two-step process: WEB_SEARCH then WEB_CRAWL.
+Manages the two-step process: WEB_SEARCH_DATA then WEB_CRAWL.
"""
import json
@@ -35,7 +35,7 @@ class WebService:
"""
Perform web research in two steps:
1. Use AI to analyze prompt and extract parameters + URLs
- 2. Call WEB_SEARCH to get URLs (if needed)
+ 2. Call WEB_SEARCH_DATA to get URLs (if needed)
3. Combine URLs and filter to maxNumberPages
4. Call WEB_CRAWL for each URL
5. Return consolidated result
@@ -337,9 +337,9 @@ Return ONLY valid JSON, no additional text:
# Debug: persist search prompt
self.services.utils.writeDebugFile(searchPrompt, "websearch_prompt")
- # Call AI with WEB_SEARCH operation
+ # Call AI with WEB_SEARCH_DATA operation
searchOptions = AiCallOptions(
- operationType=OperationTypeEnum.WEB_SEARCH,
+ operationType=OperationTypeEnum.WEB_SEARCH_DATA,
resultFormat="json"
)
diff --git a/modules/shared/frontendTypes.py b/modules/shared/frontendTypes.py
index 1d1c4682..06a81570 100644
--- a/modules/shared/frontendTypes.py
+++ b/modules/shared/frontendTypes.py
@@ -42,6 +42,7 @@ class FrontendType(str, Enum):
JSON = "json"
MULTILINGUAL = "multilingual"
FILE = "file"
+ HIDDEN = "hidden"
# Custom Types for Actions
USER_CONNECTION = "userConnection"
diff --git a/modules/workflows/methods/methodAi.py.old b/modules/workflows/methods/methodAi.py.old
deleted file mode 100644
index fedaa0ef..00000000
--- a/modules/workflows/methods/methodAi.py.old
+++ /dev/null
@@ -1,742 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-AI processing method module.
-Handles direct AI calls for any type of task.
-"""
-
-import time
-import logging
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
-from modules.datamodels.datamodelWorkflow import ExtractContentParameters
-from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
-
-logger = logging.getLogger(__name__)
-
-class MethodAi(MethodBase):
- """AI processing methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "ai"
- self.description = "AI processing methods"
-
- def _format_timestamp_for_filename(self) -> str:
- """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
- return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-
-
- @action
- async def process(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
-
- Parameters:
- - aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- - documentList (list, optional): Document reference(s) in any format to use as input/context.
- - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
- """
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"ai_process_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Generate",
- "AI Processing",
- f"Format: {parameters.get('resultType', 'txt')}",
- parentOperationId=parentOperationId
- )
-
- aiPrompt = parameters.get("aiPrompt")
- logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
-
- # Update progress - preparing parameters
- self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
-
- from modules.datamodels.datamodelDocref import DocumentReferenceList
-
- documentListParam = parameters.get("documentList")
- # Convert to DocumentReferenceList if needed
- if documentListParam is None:
- documentList = DocumentReferenceList(references=[])
- elif isinstance(documentListParam, DocumentReferenceList):
- documentList = documentListParam
- elif isinstance(documentListParam, str):
- documentList = DocumentReferenceList.from_string_list([documentListParam])
- elif isinstance(documentListParam, list):
- documentList = DocumentReferenceList.from_string_list(documentListParam)
- else:
- logger.error(f"Invalid documentList type: {type(documentListParam)}")
- documentList = DocumentReferenceList(references=[])
-
- resultType = parameters.get("resultType", "txt")
-
-
- if not aiPrompt:
- logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
- return ActionResult.isFailure(
- error="AI prompt is required"
- )
-
- # Determine output extension and default MIME type without duplicating service logic
- normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
- output_extension = f".{normalized_result_type}"
- output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
- logger.info(f"Using result type: {resultType} -> {output_extension}")
-
- # Phase 7.3: Extract content first if documents provided, then use contentParts
- # Check if contentParts are already provided (preferred path)
- contentParts: Optional[List[ContentPart]] = None
- if "contentParts" in parameters:
- contentParts = parameters.get("contentParts")
- if contentParts and not isinstance(contentParts, list):
- # Try to extract from ContentExtracted if it's an ActionDocument
- if hasattr(contentParts, 'parts'):
- contentParts = contentParts.parts
- else:
- logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
- contentParts = None
-
- # If contentParts not provided but documentList is, extract content first
- if not contentParts and documentList.references:
- self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
-
- # Get ChatDocuments
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
- if not chatDocuments:
- logger.warning("No documents found in documentList")
- else:
- logger.info(f"Extracting content from {len(chatDocuments)} documents")
-
- # Prepare extraction options (use defaults if not provided)
- extractionOptions = parameters.get("extractionOptions")
- if not extractionOptions:
- extractionOptions = ExtractionOptions(
- prompt="Extract all content from the document",
- mergeStrategy=MergeStrategy(
- mergeType="concatenate",
- groupBy="typeGroup",
- orderBy="id"
- ),
- processDocumentsIndividually=True
- )
-
- # Extract content using extraction service with hierarchical progress logging
- # Pass operationId for per-document progress tracking
- extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
-
- # Combine all ContentParts from all extracted results
- contentParts = []
- for extracted in extractedResults:
- if extracted.parts:
- contentParts.extend(extracted.parts)
-
- logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
-
- # Update progress - preparing AI call
- self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
-
- # Build options with only resultFormat - let service layer handle all other parameters
- output_format = output_extension.replace('.', '') or 'txt'
- options = AiCallOptions(
- resultFormat=output_format
- # Removed all model parameters - service layer will analyze prompt and determine optimal parameters
- )
-
- # Update progress - calling AI
- self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
-
- # Use unified callAiContent method with contentParts (extraction is now separate)
- aiResponse = await self.services.ai.callAiContent(
- prompt=aiPrompt,
- options=options,
- contentParts=contentParts, # Already extracted (or None if no documents)
- outputFormat=output_format,
- parentOperationId=operationId
- )
-
- # Update progress - processing result
- self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
-
- from modules.datamodels.datamodelChat import ActionDocument
-
- # Extract documents from AiResponse
- if aiResponse.documents and len(aiResponse.documents) > 0:
- action_documents = []
- for doc in aiResponse.documents:
- validationMetadata = {
- "actionType": "ai.process",
- "resultType": normalized_result_type,
- "outputFormat": output_format,
- "hasDocuments": True,
- "documentCount": len(aiResponse.documents)
- }
- action_documents.append(ActionDocument(
- documentName=doc.documentName,
- documentData=doc.documentData,
- mimeType=doc.mimeType or output_mime_type,
- sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
- validationMetadata=validationMetadata
- ))
-
- final_documents = action_documents
- else:
- # Text response - create document from content
- extension = output_extension.lstrip('.')
- meaningful_name = self._generateMeaningfulFileName(
- base_name="ai",
- extension=extension,
- action_name="result"
- )
- validationMetadata = {
- "actionType": "ai.process",
- "resultType": normalized_result_type,
- "outputFormat": output_format,
- "hasDocuments": False,
- "contentType": "text"
- }
- action_document = ActionDocument(
- documentName=meaningful_name,
- documentData=aiResponse.content,
- mimeType=output_mime_type,
- validationMetadata=validationMetadata
- )
- final_documents = [action_document]
-
- # Complete progress tracking
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(documents=final_documents)
-
- except Exception as e:
- logger.error(f"Error in AI processing: {str(e)}")
-
- # Complete progress tracking with failure
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
-
- return ActionResult.isFailure(
- error=str(e)
- )
-
-
- @action
- async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Web research with two-step process: search for URLs, then crawl content.
- - Input requirements: prompt (required); optional list(url), country, language, researchDepth.
- - Output format: JSON with research results including URLs and content.
-
- Parameters:
- - prompt (str, required): Natural language research instruction.
- - urlList (list, optional): Specific URLs to crawl, if needed.
- - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
- - language (str, optional): Language code (lowercase, e.g., de, en, fr).
- - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
- """
- try:
- prompt = parameters.get("prompt")
- if not prompt:
- return ActionResult.isFailure(error="Research prompt is required")
-
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"web_research_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Web Research",
- "Searching and Crawling",
- "Extracting URLs and Content",
- parentOperationId=parentOperationId
- )
-
- # Call webcrawl service - service handles all AI intention analysis and processing
- result = await self.services.web.performWebResearch(
- prompt=prompt,
- urls=parameters.get("urlList", []),
- country=parameters.get("country"),
- language=parameters.get("language"),
- researchDepth=parameters.get("researchDepth", "general"),
- operationId=operationId
- )
-
- # Complete progress tracking
- self.services.chat.progressLogFinish(operationId, True)
-
- # Get meaningful filename from research result (generated by intent analyzer)
- suggestedFilename = result.get("suggested_filename")
- if suggestedFilename:
- # Clean and validate filename
- import re
- cleaned = suggestedFilename.strip().strip('"\'')
- cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
- # Ensure it doesn't already have extension
- if cleaned.lower().endswith('.json'):
- cleaned = cleaned[:-5]
- # Validate: should be reasonable length and contain only safe characters
- if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
- meaningfulName = f"{cleaned}.json"
- else:
- # Fallback to generic meaningful filename
- meaningfulName = self._generateMeaningfulFileName(
- base_name="web_research",
- extension="json",
- action_name="research"
- )
- else:
- # Fallback to generic meaningful filename
- meaningfulName = self._generateMeaningfulFileName(
- base_name="web_research",
- extension="json",
- action_name="research"
- )
-
- from modules.datamodels.datamodelChat import ActionDocument
- validationMetadata = {
- "actionType": "ai.webResearch",
- "prompt": prompt,
- "urlList": parameters.get("urlList", []),
- "country": parameters.get("country"),
- "language": parameters.get("language"),
- "researchDepth": parameters.get("researchDepth", "general"),
- "resultFormat": "json"
- }
- actionDocument = ActionDocument(
- documentName=meaningfulName,
- documentData=result,
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[actionDocument])
-
- except Exception as e:
- logger.error(f"Error in web research: {str(e)}")
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult.isFailure(error=str(e))
-
-
- # ============================================================================
- # Document Transformation Wrappers
- # ============================================================================
-
- @action
- async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Summarize one or more documents, extracting key points and main ideas.
- - Input requirements: documentList (required); optional summaryLength, focus.
- - Output format: Text document with summary (default: txt, can be overridden with resultType).
-
- Parameters:
- - documentList (list, required): Document reference(s) to summarize.
- - summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- - focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- - resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- summaryLength = parameters.get("summaryLength", "medium")
- focus = parameters.get("focus")
- resultType = parameters.get("resultType", "txt")
-
- lengthInstructions = {
- "brief": "Create a brief summary (2-3 paragraphs)",
- "medium": "Create a medium-length summary (comprehensive but concise)",
- "detailed": "Create a detailed summary covering all major points"
- }
- lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
-
- aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
- if focus:
- aiPrompt += f" Focus specifically on: {focus}."
- aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
- @action
- async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Translate documents to a target language while preserving formatting and structure.
- - Input requirements: documentList (required); targetLanguage (required).
- - Output format: Translated document in same format as input (default) or specified resultType.
-
- Parameters:
- - documentList (list, required): Document reference(s) to translate.
- - targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- - sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- - preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- - resultType (str, optional): Output file extension. If not specified, uses same format as input.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- targetLanguage = parameters.get("targetLanguage")
- if not targetLanguage:
- return ActionResult.isFailure(error="targetLanguage is required")
-
- sourceLanguage = parameters.get("sourceLanguage")
- preserveFormatting = parameters.get("preserveFormatting", True)
- resultType = parameters.get("resultType")
-
- aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
- if sourceLanguage:
- aiPrompt += f" The source language is {sourceLanguage}."
- if preserveFormatting:
- aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
- else:
- aiPrompt += " Focus on accurate translation of content."
- aiPrompt += " Maintain the same document structure, headings, and organization."
-
- processParams = {
- "aiPrompt": aiPrompt,
- "documentList": documentList
- }
- if resultType:
- processParams["resultType"] = resultType
-
- return await self.process(processParams)
-
-
- @action
- async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
- - Input requirements: documentList (required); inputFormat and outputFormat (required).
- - Output format: Document in target format with specified formatting options.
- - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
-
- Parameters:
- - documentList (list, required): Document reference(s) to convert.
- - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
- - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
- - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
- - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
- - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
- - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- inputFormat = parameters.get("inputFormat")
- outputFormat = parameters.get("outputFormat")
- if not inputFormat or not outputFormat:
- return ActionResult.isFailure(error="inputFormat and outputFormat are required")
-
- # Normalize formats (remove leading dot if present)
- normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
- normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
-
- # Get documents
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- else:
- docRefList = DocumentReferenceList.from_string_list([documentList])
-
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
- if not chatDocuments:
- return ActionResult.isFailure(error="No documents found in documentList")
-
- # Check if input is standardized JSON format - if so, use direct rendering
- if normalizedInputFormat == "json" and len(chatDocuments) == 1:
- try:
- import json
- doc = chatDocuments[0]
- # ChatDocument doesn't have documentData - need to load file content using fileId
- docBytes = self.services.chat.getFileData(doc.fileId)
- if not docBytes:
- raise ValueError(f"No file data found for fileId={doc.fileId}")
-
- # Decode bytes to string
- docData = docBytes.decode('utf-8')
-
- # Try to parse as JSON
- if isinstance(docData, str):
- jsonData = json.loads(docData)
- elif isinstance(docData, dict):
- jsonData = docData
- else:
- jsonData = None
-
- # Check if it's standardized JSON format (has "documents" or "sections")
- if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
- # Use direct rendering - no AI call needed!
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
-
- # Ensure format is "documents" array
- if "documents" not in jsonData:
- jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
-
- # Get title
- title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
-
- # Render with options
- renderOptions = {}
- if normalizedOutputFormat == "csv":
- renderOptions["delimiter"] = parameters.get("delimiter", ",")
- renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
- renderOptions["includeHeader"] = parameters.get("includeHeader", True)
-
- rendered_content, mime_type = await generationService.renderReport(
- jsonData, normalizedOutputFormat, title, None, None
- )
-
- # Apply CSV options if needed (renderer will handle them)
- if normalizedOutputFormat == "csv" and renderOptions:
- rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
-
- from modules.datamodels.datamodelChat import ActionDocument
- validationMetadata = {
- "actionType": "ai.convert",
- "inputFormat": normalizedInputFormat,
- "outputFormat": normalizedOutputFormat,
- "hasSourceJson": True,
- "conversionType": "direct_rendering"
- }
- actionDoc = ActionDocument(
- documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
- documentData=rendered_content,
- mimeType=mime_type,
- sourceJson=jsonData, # Preserve source JSON for structure validation
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[actionDoc])
-
- except Exception as e:
- logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
- # Fall through to AI-based conversion
-
- # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
- columnsPerRow = parameters.get("columnsPerRow")
- delimiter = parameters.get("delimiter", ",")
- includeHeader = parameters.get("includeHeader", True)
- language = parameters.get("language", "en")
-
- aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
-
- if normalizedOutputFormat == "csv":
- aiPrompt += f" Use '{delimiter}' as the delimiter character."
- if columnsPerRow:
- aiPrompt += f" Format the output with {columnsPerRow} columns per row."
- if not includeHeader:
- aiPrompt += " Do not include a header row."
- else:
- aiPrompt += " Include a header row with column names."
-
- if language and language != "en":
- aiPrompt += f" Use language: {language}."
-
- aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": normalizedOutputFormat
- })
-
- def _applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
- """Apply CSV formatting options to rendered CSV content."""
- delimiter = options.get("delimiter", ",")
- columnsPerRow = options.get("columnsPerRow")
- includeHeader = options.get("includeHeader", True)
-
- # Check if any options need to be applied
- needsProcessing = (delimiter != ",") or (columnsPerRow is not None) or (not includeHeader)
-
- if not needsProcessing:
- return csvContent
-
- import csv
- import io
- # Re-read CSV with comma, write with new delimiter
- reader = csv.reader(io.StringIO(csvContent))
- output = io.StringIO()
- writer = csv.writer(output, delimiter=delimiter)
-
- rows = list(reader)
-
- # Handle header
- if not includeHeader and rows:
- rows = rows[1:] # Skip header
-
- # Handle columnsPerRow
- if columnsPerRow:
- newRows = []
- for row in rows:
- # Split row into chunks of columnsPerRow
- for i in range(0, len(row), columnsPerRow):
- chunk = row[i:i+columnsPerRow]
- # Pad to columnsPerRow if needed
- while len(chunk) < columnsPerRow:
- chunk.append("")
- newRows.append(chunk)
- rows = newRows
-
- for row in rows:
- writer.writerow(row)
-
- return output.getvalue()
-
-
- @action
- async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
- - Input requirements: documentList (required); targetFormat (required).
- - Output format: Document in target format.
-
- Parameters:
- - documentList (list, required): Document reference(s) to convert.
- - targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- - preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- targetFormat = parameters.get("targetFormat")
- if not targetFormat:
- return ActionResult.isFailure(error="targetFormat is required")
-
- preserveStructure = parameters.get("preserveStructure", True)
-
- # Normalize format (remove leading dot if present)
- normalizedFormat = targetFormat.strip().lstrip('.').lower()
-
- aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
- if preserveStructure:
- aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
- aiPrompt += " Ensure the converted document maintains the same content and information as the original."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": normalizedFormat
- })
-
-
- @action
- async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
- - Input requirements: documentList (required); optional dataStructure, fields.
- - Output format: JSON by default, or specified resultType.
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract data from.
- - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
- - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
- - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
- """
- documentList = parameters.get("documentList", [])
- if not documentList:
- return ActionResult.isFailure(error="documentList is required")
-
- dataStructure = parameters.get("dataStructure", "nested")
- fields = parameters.get("fields", [])
- resultType = parameters.get("resultType", "json")
-
- aiPrompt = "Extract structured data from the provided document(s)."
- if fields:
- fieldsStr = ", ".join(fields)
- aiPrompt += f" Extract the following specific fields: {fieldsStr}."
- else:
- aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
-
- structureInstructions = {
- "flat": "Use a flat key-value structure with simple properties.",
- "nested": "Use a nested JSON structure with logical grouping of related data.",
- "list": "Structure the data as a list/array of objects, one per entity or record."
- }
- aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
-
- aiPrompt += " Ensure all extracted data is accurate and complete."
-
- return await self.process({
- "aiPrompt": aiPrompt,
- "documentList": documentList,
- "resultType": resultType
- })
-
-
- # ============================================================================
- # Content Generation Wrapper
- # ============================================================================
-
- @action
- async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate documents from scratch or based on templates/inputs.
- - Input requirements: prompt or description (required); optional documentList (for templates/references).
- - Output format: Document in specified format (default: docx).
-
- Parameters:
- - prompt (str, required): Description of the document to generate.
- - documentList (list, optional): Template documents or reference documents to use as a guide.
- - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
- """
- prompt = parameters.get("prompt")
- if not prompt:
- return ActionResult.isFailure(error="prompt is required")
-
- documentList = parameters.get("documentList", [])
- documentType = parameters.get("documentType")
- resultType = parameters.get("resultType", "docx")
-
- aiPrompt = f"Generate a document based on the following requirements: {prompt}"
- if documentType:
- aiPrompt += f" Document type: {documentType}."
- if documentList:
- aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
- aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
-
- processParams = {
- "aiPrompt": aiPrompt,
- "resultType": resultType
- }
- if documentList:
- processParams["documentList"] = documentList
-
- return await self.process(processParams)
diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py
index 8ebe6679..f0f18286 100644
--- a/modules/workflows/methods/methodAi/actions/__init__.py
+++ b/modules/workflows/methods/methodAi/actions/__init__.py
@@ -10,6 +10,7 @@ from .summarizeDocument import summarizeDocument
from .translateDocument import translateDocument
from .convertDocument import convertDocument
from .generateDocument import generateDocument
+from .generateCode import generateCode
__all__ = [
'process',
@@ -18,5 +19,6 @@ __all__ = [
'translateDocument',
'convertDocument',
'generateDocument',
+ 'generateCode',
]
diff --git a/modules/workflows/methods/methodAi/actions/convertDocument.py b/modules/workflows/methods/methodAi/actions/convertDocument.py
index e86b1d5a..9a7522ba 100644
--- a/modules/workflows/methods/methodAi/actions/convertDocument.py
+++ b/modules/workflows/methods/methodAi/actions/convertDocument.py
@@ -1,31 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Convert Document action for AI operations.
-Converts documents between different formats (PDF→Word, Excel→CSV, etc.).
-"""
-
import logging
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
-@action
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
- - Input requirements: documentList (required); targetFormat (required).
- - Output format: Document in target format.
-
- Parameters:
- - documentList (list, required): Document reference(s) to convert.
- - targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- - preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
- """
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
diff --git a/modules/workflows/methods/methodAi/actions/generateCode.py b/modules/workflows/methods/methodAi/actions/generateCode.py
new file mode 100644
index 00000000..4f9bbd21
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/generateCode.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import logging
+import time
+from typing import Dict, Any, Optional, List
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
+from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
+
+logger = logging.getLogger(__name__)
+
+async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
+ prompt = parameters.get("prompt")
+ if not prompt:
+ return ActionResult.isFailure(error="prompt is required")
+
+ documentList = parameters.get("documentList", [])
+ # Optional: if omitted, formats determined from prompt by AI
+ resultType = parameters.get("resultType")
+
+ if not resultType:
+ logger.debug("resultType not provided - formats will be determined from prompt by AI")
+
+ # Create operation ID for progress tracking
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"code_gen_{workflowId}_{int(time.time())}"
+ parentOperationId = parameters.get('parentOperationId')
+
+ try:
+ # Convert documentList to DocumentReferenceList if needed
+ docRefList = None
+ if documentList:
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, str):
+ docRefList = DocumentReferenceList.from_string_list([documentList])
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ else:
+ docRefList = DocumentReferenceList(references=[])
+
+ # Prepare title
+ title = "Generated Code"
+
+ # Call AI service with explicit code intent
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_GENERATE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+
+ # outputFormat: Optional - if None, formats determined from prompt by AI
+ aiResponse: AiResponse = await self.services.ai.callAiContent(
+ prompt=prompt,
+ options=options,
+ documentList=docRefList,
+ outputFormat=resultType, # Can be None - AI determines from prompt
+ title=title,
+ parentOperationId=parentOperationId,
+ generationIntent="code" # Explicit intent, skips detection
+ )
+
+ # Convert AiResponse to ActionResult
+ documents = []
+
+ # Convert DocumentData to ActionDocument
+ if aiResponse.documents:
+ for docData in aiResponse.documents:
+ documents.append(ActionDocument(
+ documentName=docData.documentName,
+ documentData=docData.documentData,
+ mimeType=docData.mimeType,
+ sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
+ ))
+
+ # If no documents but content exists, create a document from content
+ if not documents and aiResponse.content:
+ # Determine document name from metadata
+ resultTypeFallback = resultType or "txt" # Fallback for file naming
+ docName = f"code.{resultTypeFallback}"
+ if aiResponse.metadata and aiResponse.metadata.filename:
+ docName = aiResponse.metadata.filename
+ elif aiResponse.metadata and aiResponse.metadata.title:
+ import re
+ sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
+ sanitized = re.sub(r"_+", "_", sanitized).strip("_")
+ if sanitized:
+ if not sanitized.lower().endswith(f".{resultTypeFallback}"):
+ docName = f"{sanitized}.{resultTypeFallback}"
+ else:
+ docName = sanitized
+
+ # Determine mime type
+ mimeType = "text/plain"
+ if resultType == "html":
+ mimeType = "text/html"
+ elif resultType == "js":
+ mimeType = "application/javascript"
+ elif resultType == "py":
+ mimeType = "text/x-python"
+ elif resultType == "ts":
+ mimeType = "application/typescript"
+ elif resultType == "java":
+ mimeType = "text/x-java-source"
+ elif resultType == "cpp":
+ mimeType = "text/x-c++src"
+
+ documents.append(ActionDocument(
+ documentName=docName,
+ documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
+ mimeType=mimeType
+ ))
+
+ return ActionResult.isSuccess(documents=documents)
+
+ except Exception as e:
+ logger.error(f"Error in code generation: {str(e)}")
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py
index 6569ddab..65e95a32 100644
--- a/modules/workflows/methods/methodAi/actions/generateDocument.py
+++ b/modules/workflows/methods/methodAi/actions/generateDocument.py
@@ -1,15 +1,9 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Generate Document action for AI operations.
-Wrapper around AI service callAiContent method.
-"""
-
import logging
import time
from typing import Dict, Any, Optional, List
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
@@ -17,46 +11,18 @@ from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
logger = logging.getLogger(__name__)
-@action
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
- - Input requirements: prompt or description (required); optional documentList (for templates/references).
- - Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
-
- Parameters:
- - prompt (str, required): Description of the document to generate.
- - documentList (list, optional): Template documents or reference documents to use as a guide.
- - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- - resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
- - maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
- - parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
- - progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
- """
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="prompt is required")
documentList = parameters.get("documentList", [])
documentType = parameters.get("documentType")
- resultType = parameters.get("resultType", "txt")
+ # Optional: if omitted, formats determined from prompt by AI
+ resultType = parameters.get("resultType")
- # Auto-detect format from prompt if not explicitly provided
- if resultType == "txt" and prompt:
- promptLower = prompt.lower()
- if "html" in promptLower or "html5" in promptLower:
- resultType = "html"
- logger.info(f"Auto-detected HTML format from prompt")
- elif "pdf" in promptLower:
- resultType = "pdf"
- logger.info(f"Auto-detected PDF format from prompt")
- elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
- resultType = "md"
- logger.info(f"Auto-detected Markdown format from prompt")
- elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
- resultType = "txt"
- logger.info(f"Auto-detected Text format from prompt")
+ if not resultType:
+ logger.debug("resultType not provided - formats will be determined from prompt by AI")
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@@ -91,13 +57,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
compressContext=False
)
+ # outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,
options=options,
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
- outputFormat=resultType,
+ outputFormat=resultType, # Can be None - AI determines from prompt
title=title,
- parentOperationId=parentOperationId
+ parentOperationId=parentOperationId,
+ generationIntent="document" # NEW: Explicit intent, skips detection
)
# Convert AiResponse to ActionResult
@@ -116,7 +84,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
# If no documents but content exists, create a document from content
if not documents and aiResponse.content:
# Determine document name from metadata
- docName = f"document.{resultType}"
+ resultTypeFallback = resultType or "txt" # Fallback for file naming
+ docName = f"document.{resultTypeFallback}"
if aiResponse.metadata and aiResponse.metadata.filename:
docName = aiResponse.metadata.filename
elif aiResponse.metadata and aiResponse.metadata.title:
@@ -124,8 +93,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
- if not sanitized.lower().endswith(f".{resultType}"):
- docName = f"{sanitized}.{resultType}"
+ if not sanitized.lower().endswith(f".{resultTypeFallback}"):
+ docName = f"{sanitized}.{resultTypeFallback}"
else:
docName = sanitized
diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py
index 807c1a64..0dd37ce3 100644
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@@ -1,36 +1,17 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Process action for AI operations.
-Universal AI document processing action.
-"""
-
import logging
import time
import json
from typing import Dict, Any, List, Optional
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelExtraction import ContentPart
logger = logging.getLogger(__name__)
-@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
-
- Parameters:
- - aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- - documentList (list, optional): Document reference(s) in any format to use as input/context.
- - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
- """
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@@ -73,8 +54,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.error(f"Invalid documentList type: {type(documentListParam)}")
documentList = DocumentReferenceList(references=[])
- resultType = parameters.get("resultType", "txt")
-
+ # Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
+ resultType = parameters.get("resultType")
if not aiPrompt:
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
@@ -82,60 +63,71 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
error="AI prompt is required"
)
- # Determine output extension and default MIME type without duplicating service logic
- normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
- output_extension = f".{normalized_result_type}"
- output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
- logger.info(f"Using result type: {resultType} -> {output_extension}")
+ # Handle optional resultType: if None, formats determined from prompt by AI
+ if resultType:
+ normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
+ output_extension = f".{normalized_result_type}"
+ output_format = output_extension.replace('.', '') or 'txt'
+ logger.info(f"Using result type: {resultType} -> {output_extension}")
+ else:
+ # No format specified - AI will determine formats from prompt
+ normalized_result_type = None
+ output_extension = None
+ output_format = None
+ logger.debug("resultType not provided - formats will be determined from prompt by AI")
- # Check if contentParts are already provided (from context.extractContent or other sources)
+ output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
+
+ # Phase 7.3: Pass both documentList and contentParts to AI service
+ # (Extraction logic removed - handled by AI service)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
- contentParts = parameters.get("contentParts")
- if contentParts and not isinstance(contentParts, list):
- # Try to extract from ContentExtracted if it's an ActionDocument
- if hasattr(contentParts, 'parts'):
- contentParts = contentParts.parts
+ contentPartsParam = parameters.get("contentParts")
+ if contentPartsParam:
+ if isinstance(contentPartsParam, list):
+ contentParts = contentPartsParam
+ elif hasattr(contentPartsParam, 'parts'):
+ # Extract from ContentExtracted if it's an ActionDocument
+ contentParts = contentPartsParam.parts
else:
- logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
+ logger.warning(f"Invalid contentParts type: {type(contentPartsParam)}, treating as empty")
contentParts = None
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
- # Build options
- output_format = output_extension.replace('.', '') or 'txt'
+ # Detect image generation from resultType (if provided)
+ imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
+ isImageGeneration = normalized_result_type in imageFormats if normalized_result_type else False
+
+ # Build options with correct operationType
+ from modules.datamodels.datamodelAi import OperationTypeEnum
+ # resultFormat in options can be None - formats will be determined by AI if not provided
options = AiCallOptions(
- resultFormat=output_format
+ resultFormat=output_format, # Can be None - formats determined by AI
+ operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
)
+
+ # Get generationIntent from parameters (required for DATA_GENERATE)
+ # Default to "document" if not provided (most common use case)
+ # For code generation, use ai.generateCode action or explicitly pass generationIntent="code"
+ generationIntent = parameters.get("generationIntent", "document")
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
- # Use unified callAiContent method
- # If contentParts provided (pre-extracted), use them directly
- # Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
- # Note: ContentExtracted documents (from context.extractContent) are now handled
- # automatically in _extractAndPrepareContent() (Phase 5B)
- if contentParts:
- # Pre-extracted ContentParts - use them directly
- aiResponse = await self.services.ai.callAiContent(
- prompt=aiPrompt,
- options=options,
- contentParts=contentParts, # Pre-extracted ContentParts
- outputFormat=output_format,
- parentOperationId=operationId
- )
- else:
- # Pass documentList - callAiContent handles Phases 5A-5E internally
- # This includes automatic detection of ContentExtracted documents
- aiResponse = await self.services.ai.callAiContent(
- prompt=aiPrompt,
- options=options,
- documentList=documentList, # callAiContent macht Phasen 5A-5E
- outputFormat=output_format,
- parentOperationId=operationId
- )
+ # Use unified callAiContent method with BOTH documentList and contentParts
+ # Extraction is handled by AI service - no extraction here
+ # outputFormat: Optional - if None, formats determined from prompt by AI
+ aiResponse = await self.services.ai.callAiContent(
+ prompt=aiPrompt,
+ options=options,
+ documentList=documentList, # Pass documentList - AI service handles extraction
+ contentParts=contentParts, # Pass contentParts if provided (or None)
+ outputFormat=output_format, # Can be None - AI determines from prompt
+ parentOperationId=operationId,
+ generationIntent=generationIntent # REQUIRED for DATA_GENERATE
+ )
# Update progress - processing result
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
@@ -162,7 +154,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
final_documents = action_documents
else:
# Text response - create document from content
- extension = output_extension.lstrip('.')
+ # If no extension provided, use "txt" (required for filename)
+ extension = output_extension.lstrip('.') if output_extension else "txt"
meaningful_name = self._generateMeaningfulFileName(
base_name="ai",
extension=extension,
@@ -170,8 +163,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
)
validationMetadata = {
"actionType": "ai.process",
- "resultType": normalized_result_type,
- "outputFormat": output_format,
+ "resultType": normalized_result_type if normalized_result_type else None,
+ "outputFormat": output_format if output_format else None,
"hasDocuments": False,
"contentType": "text"
}
diff --git a/modules/workflows/methods/methodAi/actions/summarizeDocument.py b/modules/workflows/methods/methodAi/actions/summarizeDocument.py
index 80588712..619e80c2 100644
--- a/modules/workflows/methods/methodAi/actions/summarizeDocument.py
+++ b/modules/workflows/methods/methodAi/actions/summarizeDocument.py
@@ -1,32 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Summarize Document action for AI operations.
-Summarizes one or more documents, extracting key points and main ideas.
-"""
-
import logging
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
-@action
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Summarize one or more documents, extracting key points and main ideas.
- - Input requirements: documentList (required); optional summaryLength, focus.
- - Output format: Text document with summary (default: txt, can be overridden with resultType).
-
- Parameters:
- - documentList (list, required): Document reference(s) to summarize.
- - summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- - focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- - resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
- """
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
@@ -50,6 +31,7 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
- "resultType": resultType
+ "resultType": resultType,
+ "generationIntent": "document" # NEW: Explicit intent
})
diff --git a/modules/workflows/methods/methodAi/actions/translateDocument.py b/modules/workflows/methods/methodAi/actions/translateDocument.py
index 12264e39..7388dcc5 100644
--- a/modules/workflows/methods/methodAi/actions/translateDocument.py
+++ b/modules/workflows/methods/methodAi/actions/translateDocument.py
@@ -1,33 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Translate Document action for AI operations.
-Translates documents to a target language while preserving formatting and structure.
-"""
-
import logging
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
-@action
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Translate documents to a target language while preserving formatting and structure.
- - Input requirements: documentList (required); targetLanguage (required).
- - Output format: Translated document in same format as input (default) or specified resultType.
-
- Parameters:
- - documentList (list, required): Document reference(s) to translate.
- - targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- - sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- - preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- - resultType (str, optional): Output file extension. If not specified, uses same format as input.
- """
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
@@ -51,7 +31,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
processParams = {
"aiPrompt": aiPrompt,
- "documentList": documentList
+ "documentList": documentList,
+ "generationIntent": "document" # NEW: Explicit intent
}
if resultType:
processParams["resultType"] = resultType
diff --git a/modules/workflows/methods/methodAi/actions/webResearch.py b/modules/workflows/methods/methodAi/actions/webResearch.py
index 2bd5c3dd..62b43bce 100644
--- a/modules/workflows/methods/methodAi/actions/webResearch.py
+++ b/modules/workflows/methods/methodAi/actions/webResearch.py
@@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Web Research action for AI operations.
-Web research with two-step process: search for URLs, then crawl content.
-"""
-
import logging
import time
import re
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Web research with two-step process: search for URLs, then crawl content.
- - Input requirements: prompt (required); optional list(url), country, language, researchDepth.
- - Output format: JSON with research results including URLs and content.
-
- Parameters:
- - prompt (str, required): Natural language research instruction.
- - urlList (list, optional): Specific URLs to crawl, if needed.
- - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
- - language (str, optional): Language code (lowercase, e.g., de, en, fr).
- - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
- """
try:
prompt = parameters.get("prompt")
if not prompt:
diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py
index 881b007d..234d573b 100644
--- a/modules/workflows/methods/methodAi/methodAi.py
+++ b/modules/workflows/methods/methodAi/methodAi.py
@@ -17,6 +17,7 @@ from .actions.summarizeDocument import summarizeDocument
from .actions.translateDocument import translateDocument
from .actions.convertDocument import convertDocument
from .actions.generateDocument import generateDocument
+from .actions.generateCode import generateCode
logger = logging.getLogger(__name__)
@@ -35,7 +36,8 @@ class MethodAi(MethodBase):
self._actions = {
"process": WorkflowActionDefinition(
actionId="ai.process",
- description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt",
+ description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt. If the prompt specifies document formats to deliver, include them in the prompt",
+ dynamicMode=True,
parameters={
"aiPrompt": WorkflowActionParameter(
name="aiPrompt",
@@ -58,14 +60,31 @@ class MethodAi(MethodBase):
frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"],
required=False,
default="txt",
- description="Output file extension. All output documents will use this format"
- )
+ description="Output file extension. Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
+ ),
+ "generationIntent": WorkflowActionParameter(
+ name="generationIntent",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["document", "code", "image"],
+ required=False,
+ default="document",
+ description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored."
+ ),
+ "contentParts": WorkflowActionParameter(
+ name="contentParts",
+ type="List[ContentPart]",
+ frontendType=FrontendType.HIDDEN,
+ required=False,
+ description="Pre-extracted content parts (internal parameter, typically passed between actions). If provided, these will be used instead of extracting from documentList. Can be a list of ContentPart objects or an object with a 'parts' attribute."
+ ),
},
execute=process.__get__(self, self.__class__)
),
"webResearch": WorkflowActionDefinition(
actionId="ai.webResearch",
description="Web research with two-step process: search for URLs, then crawl content",
+ dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
@@ -110,7 +129,8 @@ class MethodAi(MethodBase):
),
"summarizeDocument": WorkflowActionDefinition(
actionId="ai.summarizeDocument",
- description="Summarize one or more documents, extracting key points and main ideas",
+ description="Summarize one or more documents, extracting key points and main ideas. If the prompt specifies document formats to deliver, include them in the prompt",
+ dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@@ -150,6 +170,7 @@ class MethodAi(MethodBase):
"translateDocument": WorkflowActionDefinition(
actionId="ai.translateDocument",
description="Translate documents to a target language while preserving formatting and structure",
+ dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@@ -193,6 +214,7 @@ class MethodAi(MethodBase):
"convertDocument": WorkflowActionDefinition(
actionId="ai.convertDocument",
description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)",
+ dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@@ -222,7 +244,8 @@ class MethodAi(MethodBase):
),
"generateDocument": WorkflowActionDefinition(
actionId="ai.generateDocument",
- description="Generate documents from scratch or based on templates/inputs",
+ description="Generate documents from scratch or based on templates/inputs. If the prompt specifies document formats to deliver, include them in the prompt",
+ dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
@@ -252,10 +275,40 @@ class MethodAi(MethodBase):
frontendType=FrontendType.TEXT,
required=False,
default="txt",
- description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt"
+ description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
)
},
execute=generateDocument.__get__(self, self.__class__)
+ ),
+ "generateCode": WorkflowActionDefinition(
+ actionId="ai.generateCode",
+ description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt",
+ dynamicMode=True,
+ parameters={
+ "prompt": WorkflowActionParameter(
+ name="prompt",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Description of code to generate"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Reference documents"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
+ required=False,
+ description="Output format (html, js, py, etc.). Optional: if omitted, formats are determined from prompt by AI. With per-document format determination, AI can determine different formats for different documents based on prompt."
+ )
+ },
+ execute=generateCode.__get__(self, self.__class__)
)
}
@@ -269,6 +322,7 @@ class MethodAi(MethodBase):
self.translateDocument = translateDocument.__get__(self, self.__class__)
self.convertDocument = convertDocument.__get__(self, self.__class__)
self.generateDocument = generateDocument.__get__(self, self.__class__)
+ self.generateCode = generateCode.__get__(self, self.__class__)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
diff --git a/modules/workflows/methods/methodContext.py.old b/modules/workflows/methods/methodContext.py.old
deleted file mode 100644
index 0c7e1cae..00000000
--- a/modules/workflows/methods/methodContext.py.old
+++ /dev/null
@@ -1,460 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-Context and workflow information method module.
-Handles workflow context queries and document indexing.
-"""
-
-import time
-import json
-import logging
-import aiohttp
-from typing import Dict, Any, List
-from datetime import datetime, UTC
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
-from modules.shared.configuration import APP_CONFIG
-
-logger = logging.getLogger(__name__)
-
-class MethodContext(MethodBase):
- """Context and workflow information methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "context"
- self.description = "Context and workflow information methods"
-
- @action
- async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
- - Input requirements: No input documents required. Optional resultType parameter.
- - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
-
- Parameters:
- - resultType (str, optional): Output format (json, txt, md). Default: json.
- """
- try:
- workflow = self.services.workflow
- if not workflow:
- return ActionResult.isFailure(
- error="No workflow available"
- )
-
- resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
-
- # Get available documents index from chat service
- documentsIndex = self.services.chat.getAvailableDocuments(workflow)
-
- if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
- # Return empty index structure
- if resultType == "json":
- indexData = {
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "totalDocuments": 0,
- "rounds": [],
- "documentReferences": []
- }
- indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
- else:
- indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
- else:
- # Parse the document index string to extract structured information
- indexData = self._parseDocumentIndex(documentsIndex, workflow)
-
- if resultType == "json":
- indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
- elif resultType == "md":
- indexContent = self._formatAsMarkdown(indexData)
- else: # txt
- indexContent = self._formatAsText(indexData, documentsIndex)
-
- # Generate meaningful filename
- workflowContext = self.services.chat.getWorkflowContext()
- filename = self._generateMeaningfulFileName(
- "document_index",
- resultType if resultType in ["json", "txt", "md"] else "json",
- workflowContext,
- "getDocumentIndex"
- )
-
- validationMetadata = {
- "actionType": "context.getDocumentIndex",
- "resultType": resultType,
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
- }
-
- # Create ActionDocument
- document = ActionDocument(
- documentName=filename,
- documentData=indexContent,
- mimeType="application/json" if resultType == "json" else "text/plain",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- logger.error(f"Error generating document index: {str(e)}")
- return ActionResult.isFailure(
- error=f"Failed to generate document index: {str(e)}"
- )
-
- def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
- """Parse the document index string into structured data."""
- try:
- indexData = {
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "generatedAt": datetime.now(UTC).isoformat(),
- "totalDocuments": 0,
- "rounds": [],
- "documentReferences": []
- }
-
- # Extract document references from the index string
- lines = documentsIndex.split('\n')
- currentRound = None
- currentDocList = None
-
- for line in lines:
- line = line.strip()
- if not line:
- continue
-
- # Check for round headers
- if "Current round documents:" in line:
- currentRound = "current"
- continue
- elif "Past rounds documents:" in line:
- currentRound = "past"
- continue
-
- # Check for document list references (docList:...)
- if line.startswith("- docList:"):
- docListRef = line.replace("- docList:", "").strip()
- currentDocList = {
- "reference": docListRef,
- "round": currentRound,
- "documents": []
- }
- indexData["rounds"].append(currentDocList)
- continue
-
- # Check for individual document references (docItem:...)
- if line.startswith(" - docItem:") or line.startswith("- docItem:"):
- docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
- indexData["documentReferences"].append({
- "reference": docItemRef,
- "round": currentRound,
- "docList": currentDocList["reference"] if currentDocList else None
- })
- indexData["totalDocuments"] += 1
- if currentDocList:
- currentDocList["documents"].append(docItemRef)
-
- return indexData
-
- except Exception as e:
- logger.error(f"Error parsing document index: {str(e)}")
- return {
- "workflowId": getattr(workflow, 'id', 'unknown'),
- "error": f"Failed to parse document index: {str(e)}",
- "rawIndex": documentsIndex
- }
-
- def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
- """Format document index as Markdown."""
- try:
- md = f"# Document Index\n\n"
- md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
- md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
- md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
-
- if indexData.get('rounds'):
- md += "## Documents by Round\n\n"
- for roundInfo in indexData['rounds']:
- roundLabel = roundInfo.get('round', 'unknown').title()
- md += f"### {roundLabel} Round\n\n"
- md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
- if roundInfo.get('documents'):
- md += "**Documents:**\n\n"
- for docRef in roundInfo['documents']:
- md += f"- `{docRef}`\n"
- md += "\n"
-
- if indexData.get('documentReferences'):
- md += "## All Document References\n\n"
- for docRef in indexData['documentReferences']:
- md += f"- `{docRef.get('reference', 'unknown')}`\n"
-
- return md
-
- except Exception as e:
- logger.error(f"Error formatting as Markdown: {str(e)}")
- return f"# Document Index\n\nError formatting index: {str(e)}\n"
-
- def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
- """Format document index as plain text."""
- try:
- text = "Document Index\n"
- text += "=" * 50 + "\n\n"
- text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
- text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
- text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
-
- # Include the raw formatted index for readability
- text += rawIndex
-
- return text
-
- except Exception as e:
- logger.error(f"Error formatting as text: {str(e)}")
- return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
-
- @action
- async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Extract content from documents (separate from AI calls).
-
- This action performs pure content extraction without AI processing.
- The extracted ContentParts can then be used by subsequent AI processing actions.
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract content from.
- - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
-
- Returns:
- - ActionResult with ActionDocument containing ContentExtracted objects
- - ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
- """
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"context_extract_{workflowId}_{int(time.time())}"
-
- # Extract documentList from parameters dict
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- documentListParam = parameters.get("documentList")
- if not documentListParam:
- return ActionResult.isFailure(error="documentList is required")
-
- # Convert to DocumentReferenceList if needed
- if isinstance(documentListParam, DocumentReferenceList):
- documentList = documentListParam
- elif isinstance(documentListParam, str):
- documentList = DocumentReferenceList.from_string_list([documentListParam])
- elif isinstance(documentListParam, list):
- documentList = DocumentReferenceList.from_string_list(documentListParam)
- else:
- return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Extracting content from documents",
- "Content Extraction",
- f"Documents: {len(documentList.references)}",
- parentOperationId=parentOperationId
- )
-
- # Get ChatDocuments from documentList
- self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
-
- if not chatDocuments:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No documents found in documentList")
-
- logger.info(f"Extracting content from {len(chatDocuments)} documents")
-
- # Prepare extraction options
- self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
- extractionOptionsParam = parameters.get("extractionOptions")
-
- # Convert dict to ExtractionOptions object if needed, or create defaults
- if extractionOptionsParam:
- if isinstance(extractionOptionsParam, dict):
- # Convert dict to ExtractionOptions object
- extractionOptions = ExtractionOptions(**extractionOptionsParam)
- elif isinstance(extractionOptionsParam, ExtractionOptions):
- extractionOptions = extractionOptionsParam
- else:
- # Invalid type, use defaults
- extractionOptions = None
- else:
- extractionOptions = None
-
- # If extractionOptions not provided, create defaults
- if not extractionOptions:
- # Default extraction options for pure content extraction (no AI processing)
- extractionOptions = ExtractionOptions(
- prompt="Extract all content from the document",
- mergeStrategy=MergeStrategy(
- mergeType="concatenate",
- groupBy="typeGroup",
- orderBy="id"
- ),
- processDocumentsIndividually=True
- )
-
- # Call extraction service with hierarchical progress logging
- self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
- # Pass operationId for hierarchical per-document progress logging
- extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
-
- # Build ActionDocuments from ContentExtracted results
- self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
- actionDocuments = []
- # Map extracted results back to original documents by index (results are in same order)
- for i, extracted in enumerate(extractedResults):
- # Get original document name if available
- originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
- if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
- # Use original filename with "extracted_" prefix
- baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
- documentName = f"{baseName}_extracted_{extracted.id}.json"
- else:
- # Fallback to generic name with index
- documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
-
- # Store ContentExtracted object in ActionDocument.documentData
- validationMetadata = {
- "actionType": "context.extractContent",
- "documentIndex": i,
- "extractedId": extracted.id,
- "partCount": len(extracted.parts) if extracted.parts else 0,
- "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
- }
- actionDoc = ActionDocument(
- documentName=documentName,
- documentData=extracted, # ContentExtracted object
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
-
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(documents=actionDocuments)
-
- except Exception as e:
- logger.error(f"Error in content extraction: {str(e)}")
-
- # Complete progress tracking with failure
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
-
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Trigger preprocessing server at customer tenant to update database with configuration.
-
- This action makes a POST request to the preprocessing server endpoint with the provided
- configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
-
- Parameters:
- - endpoint (str, required): The full URL endpoint for the preprocessing server API.
- - configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
- - authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
-
- Returns:
- - ActionResult with ActionDocument containing "ok" on success, or error message on failure.
- """
- try:
- endpoint = parameters.get("endpoint")
- if not endpoint:
- return ActionResult.isFailure(error="endpoint parameter is required")
-
- configJsonParam = parameters.get("configJson")
- if not configJsonParam:
- return ActionResult.isFailure(error="configJson parameter is required")
-
- authSecretConfigKey = parameters.get("authSecretConfigKey")
- if not authSecretConfigKey:
- return ActionResult.isFailure(error="authSecretConfigKey parameter is required")
-
- # Handle configJson as either dict or JSON string
- if isinstance(configJsonParam, str):
- try:
- configJson = json.loads(configJsonParam)
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}")
- elif isinstance(configJsonParam, dict):
- configJson = configJsonParam
- else:
- return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}")
-
- # Get authorization secret from APP_CONFIG using the provided config key
- authSecret = APP_CONFIG.get(authSecretConfigKey)
- if not authSecret:
- errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- # Prepare headers with authorization (default headers as in original function)
- headers = {
- "X-PP-API-Key": authSecret,
- "Content-Type": "application/json"
- }
-
- # Make POST request
- timeout = aiohttp.ClientTimeout(total=60)
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.post(
- endpoint,
- headers=headers,
- json=configJson
- ) as response:
- if response.status in [200, 201]:
- responseText = await response.text()
- logger.info(f"Preprocessing server trigger successful: {response.status}")
- logger.debug(f"Response: {responseText}")
-
- # Generate meaningful filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "preprocessing_result",
- "txt",
- workflowContext,
- "triggerPreprocessingServer"
- )
-
- # Create validation metadata
- validationMetadata = self._createValidationMetadata(
- "triggerPreprocessingServer",
- endpoint=endpoint,
- statusCode=response.status,
- responseText=responseText
- )
-
- # Return success with "ok" document
- document = ActionDocument(
- documentName=filename,
- documentData="ok",
- mimeType="text/plain",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
- else:
- errorText = await response.text()
- errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- except Exception as e:
- errorMsg = f"Error triggering preprocessing server: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py
index 949ac63d..5b90ce13 100644
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@@ -1,49 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Extract Content action for Context operations.
-Extracts content from documents (separate from AI calls).
-"""
-
import logging
import time
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
-@action
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Extract raw content parts from documents without AI processing.
-
- This action performs pure content extraction WITHOUT AI/OCR processing.
- It returns ContentParts with different typeGroups:
- - "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
- - "image": Images as base64-encoded data (NOT converted to text, no OCR)
- - "table": Tables as structured data
- - "structure": Structured content (JSON, etc.)
- - "container": Container elements (PDF pages, etc.)
-
- IMPORTANT:
- - Images are returned as base64 data, NOT as extracted text
- - No OCR is performed - images are preserved as visual elements
- - Text extraction only works for text-based formats (not images)
- - The extracted ContentParts can then be used by subsequent AI processing actions
-
- Parameters:
- - documentList (list, required): Document reference(s) to extract content from.
- - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
-
- Returns:
- - ActionResult with ActionDocument containing ContentExtracted objects
- - ContentExtracted.parts contains List[ContentPart] with various typeGroups
- - Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
- """
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
diff --git a/modules/workflows/methods/methodContext/actions/getDocumentIndex.py b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
index 6c9a6700..9991285b 100644
--- a/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
+++ b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
@@ -1,30 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Get Document Index action for Context operations.
-Generates a comprehensive index of all documents available in the current workflow.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
- - Input requirements: No input documents required. Optional resultType parameter.
- - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
-
- Parameters:
- - resultType (str, optional): Output format (json, txt, md). Default: json.
- """
try:
workflow = self.services.workflow
if not workflow:
diff --git a/modules/workflows/methods/methodContext/actions/neutralizeData.py b/modules/workflows/methods/methodContext/actions/neutralizeData.py
index 240fe6b1..8e3b7185 100644
--- a/modules/workflows/methods/methodContext/actions/neutralizeData.py
+++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py
@@ -1,35 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Neutralize Data action for Context operations.
-Neutralizes extracted content data from ContentExtracted documents.
-"""
-
import logging
import time
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
-@action
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Neutralize data from ContentExtracted documents.
-
- This action takes documents containing ContentExtracted objects (from extractContent)
- and neutralizes the text data in ContentPart.data fields.
-
- Parameters:
- - documentList (list, required): Document reference(s) containing ContentExtracted objects.
-
- Returns:
- - ActionResult with ActionDocument containing neutralized ContentExtracted objects
- """
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
diff --git a/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
index 7ef16d5f..2f011a25 100644
--- a/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
+++ b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
@@ -1,37 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Trigger Preprocessing Server action for Context operations.
-Triggers preprocessing server at customer tenant to update database with configuration.
-"""
-
import logging
import json
import aiohttp
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
-@action
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Trigger preprocessing server at customer tenant to update database with configuration.
-
- This action makes a POST request to the preprocessing server endpoint with the provided
- configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
-
- Parameters:
- - endpoint (str, required): The full URL endpoint for the preprocessing server API.
- - configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
- - authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
-
- Returns:
- - ActionResult with ActionDocument containing "ok" on success, or error message on failure.
- """
try:
endpoint = parameters.get("endpoint")
if not endpoint:
diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py
index 942f3f85..61afaf2e 100644
--- a/modules/workflows/methods/methodContext/methodContext.py
+++ b/modules/workflows/methods/methodContext/methodContext.py
@@ -35,6 +35,7 @@ class MethodContext(MethodBase):
"getDocumentIndex": WorkflowActionDefinition(
actionId="context.getDocumentIndex",
description="Generate a comprehensive index of all documents available in the current workflow",
+ dynamicMode=True,
parameters={
"resultType": WorkflowActionParameter(
name="resultType",
@@ -51,6 +52,7 @@ class MethodContext(MethodBase):
"extractContent": WorkflowActionDefinition(
actionId="context.extractContent",
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
+ dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
diff --git a/modules/workflows/methods/methodJira.py.old b/modules/workflows/methods/methodJira.py.old
deleted file mode 100644
index 2be46c1f..00000000
--- a/modules/workflows/methods/methodJira.py.old
+++ /dev/null
@@ -1,1101 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-JIRA operations method module.
-Handles JIRA ticket operations including connection, export, import, and data processing.
-"""
-
-import logging
-import json
-import io
-import pandas as pd
-import csv as csv_module
-from io import StringIO, BytesIO
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelDocref import DocumentReferenceList
-from modules.shared.configuration import APP_CONFIG
-
-logger = logging.getLogger(__name__)
-
-class MethodJira(MethodBase):
- """JIRA operations methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "jira"
- self.description = "JIRA operations methods"
- # Store connections in memory (keyed by connectionId)
- self._connections: Dict[str, Any] = {}
-
- def _convertAdfToText(self, adfData):
- """Convert Atlassian Document Format (ADF) to plain text.
-
- Based on Atlassian Document Format specification for JIRA fields.
- Handles paragraphs, lists, text formatting, and other ADF node types.
-
- Args:
- adfData: ADF object or None
-
- Returns:
- str: Plain text content, or empty string if None/invalid
- """
- if not adfData or not isinstance(adfData, dict):
- return ""
-
- if adfData.get("type") != "doc":
- return str(adfData) if adfData else ""
-
- content = adfData.get("content", [])
- if not isinstance(content, list):
- return ""
-
- def extractTextFromContent(contentList, listLevel=0):
- """Recursively extract text from ADF content with proper formatting."""
- textParts = []
- listCounter = 1
-
- for item in contentList:
- if not isinstance(item, dict):
- continue
-
- itemType = item.get("type", "")
-
- if itemType == "text":
- # Extract text content, preserving formatting
- text = item.get("text", "")
- marks = item.get("marks", [])
-
- # Handle text formatting (bold, italic, etc.)
- if marks:
- for mark in marks:
- if mark.get("type") == "strong":
- text = f"**{text}**"
- elif mark.get("type") == "em":
- text = f"*{text}*"
- elif mark.get("type") == "code":
- text = f"`{text}`"
- elif mark.get("type") == "link":
- attrs = mark.get("attrs", {})
- href = attrs.get("href", "")
- if href:
- text = f"[{text}]({href})"
-
- textParts.append(text)
-
- elif itemType == "hardBreak":
- textParts.append("\n")
-
- elif itemType == "paragraph":
- paragraphContent = item.get("content", [])
- if paragraphContent:
- paragraphText = extractTextFromContent(paragraphContent, listLevel)
- if paragraphText.strip():
- textParts.append(paragraphText)
-
- elif itemType == "bulletList":
- listContent = item.get("content", [])
- for listItem in listContent:
- if listItem.get("type") == "listItem":
- listItemContent = listItem.get("content", [])
- for listParagraph in listItemContent:
- if listParagraph.get("type") == "paragraph":
- listParagraphContent = listParagraph.get("content", [])
- if listParagraphContent:
- indent = " " * listLevel
- bulletText = extractTextFromContent(listParagraphContent, listLevel + 1)
- if bulletText.strip():
- textParts.append(f"{indent}• {bulletText}")
-
- elif itemType == "orderedList":
- listContent = item.get("content", [])
- for listItem in listContent:
- if listItem.get("type") == "listItem":
- listItemContent = listItem.get("content", [])
- for listParagraph in listItemContent:
- if listParagraph.get("type") == "paragraph":
- listParagraphContent = listParagraph.get("content", [])
- if listParagraphContent:
- indent = " " * listLevel
- orderedText = extractTextFromContent(listParagraphContent, listLevel + 1)
- if orderedText.strip():
- textParts.append(f"{indent}{listCounter}. {orderedText}")
- listCounter += 1
-
- elif itemType == "listItem":
- # Handle nested list items
- listItemContent = item.get("content", [])
- if listItemContent:
- textParts.append(extractTextFromContent(listItemContent, listLevel))
-
- elif itemType == "embedCard":
- # Handle embedded content (videos, etc.)
- attrs = item.get("attrs", {})
- url = attrs.get("url", "")
- if url:
- textParts.append(f"[Embedded Content: {url}]")
-
- elif itemType == "codeBlock":
- # Handle code blocks
- codeContent = item.get("content", [])
- if codeContent:
- codeText = extractTextFromContent(codeContent, listLevel)
- if codeText.strip():
- textParts.append(f"```\n{codeText}\n```")
-
- elif itemType == "blockquote":
- # Handle blockquotes
- quoteContent = item.get("content", [])
- if quoteContent:
- quoteText = extractTextFromContent(quoteContent, listLevel)
- if quoteText.strip():
- textParts.append(f"> {quoteText}")
-
- elif itemType == "heading":
- # Handle headings
- headingContent = item.get("content", [])
- if headingContent:
- headingText = extractTextFromContent(headingContent, listLevel)
- if headingText.strip():
- level = item.get("attrs", {}).get("level", 1)
- textParts.append(f"{'#' * level} {headingText}")
-
- elif itemType == "rule":
- # Handle horizontal rules
- textParts.append("---")
-
- else:
- # Handle unknown types by trying to extract content
- if "content" in item:
- contentText = extractTextFromContent(item.get("content", []), listLevel)
- if contentText.strip():
- textParts.append(contentText)
-
- return "\n".join(textParts)
-
- result = extractTextFromContent(content)
- return result.strip()
-
- def _getDocumentData(self, documentReference: str) -> Any:
- """Get document data from a document reference (string or document object)."""
- try:
- if isinstance(documentReference, str):
- # Get document from workflow
- documentList = DocumentReferenceList.from_string_list([documentReference])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
- if not chatDocuments or len(chatDocuments) == 0:
- return None
- document = chatDocuments[0]
- return document.documentData
- else:
- # Assume it's already a document object
- return documentReference.documentData if hasattr(documentReference, 'documentData') else documentReference
- except Exception as e:
- logger.error(f"Error getting document data: {str(e)}")
- return None
-
- def _parseJsonFromDocument(self, documentReference: str) -> Optional[Dict[str, Any]]:
- """Parse JSON from a document reference."""
- data = self._getDocumentData(documentReference)
- if data is None:
- return None
-
- if isinstance(data, str):
- try:
- return json.loads(data)
- except json.JSONDecodeError:
- return None
- elif isinstance(data, dict):
- return data
- else:
- return None
-
- @action
- async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Connect to JIRA instance and create ticket interface.
-
- Parameters:
- - apiUsername (str, required): JIRA API username/email
- - apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
- - apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
- - projectCode (str, required): JIRA project code (e.g., "DCS")
- - issueType (str, required): JIRA issue type (e.g., "Task")
- - taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
-
- Returns:
- - ActionResult with ActionDocument containing connection ID
- """
- try:
- apiUsername = parameters.get("apiUsername")
- if not apiUsername:
- return ActionResult.isFailure(error="apiUsername parameter is required")
-
- apiTokenConfigKey = parameters.get("apiTokenConfigKey")
- if not apiTokenConfigKey:
- return ActionResult.isFailure(error="apiTokenConfigKey parameter is required")
-
- apiUrl = parameters.get("apiUrl")
- if not apiUrl:
- return ActionResult.isFailure(error="apiUrl parameter is required")
-
- projectCode = parameters.get("projectCode")
- if not projectCode:
- return ActionResult.isFailure(error="projectCode parameter is required")
-
- issueType = parameters.get("issueType")
- if not issueType:
- return ActionResult.isFailure(error="issueType parameter is required")
-
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
- if not taskSyncDefinitionParam:
- return ActionResult.isFailure(error="taskSyncDefinition parameter is required")
-
- # Parse taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- try:
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}")
- elif isinstance(taskSyncDefinitionParam, dict):
- taskSyncDefinition = taskSyncDefinitionParam
- else:
- return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}")
-
- # Get API token from APP_CONFIG
- apiToken = APP_CONFIG.get(apiTokenConfigKey)
- if not apiToken:
- errorMsg = f"{apiTokenConfigKey} not found in APP_CONFIG"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- # Create ticket interface
- syncInterface = await self.services.ticket.connectTicket(
- taskSyncDefinition=taskSyncDefinition,
- connectorType="Jira",
- connectorParams={
- "apiUsername": apiUsername,
- "apiToken": apiToken,
- "apiUrl": apiUrl,
- "projectCode": projectCode,
- "ticketType": issueType,
- },
- )
-
- # Store connection with unique ID
- import uuid
- connectionId = str(uuid.uuid4())
- self._connections[connectionId] = {
- "interface": syncInterface,
- "taskSyncDefinition": taskSyncDefinition,
- "apiUrl": apiUrl,
- "projectCode": projectCode,
- }
-
- logger.info(f"JIRA connection established: {connectionId} (Project: {projectCode})")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "jira_connection",
- "json",
- workflowContext,
- "connectJira"
- )
-
- # Create connection info document
- connectionInfo = {
- "connectionId": connectionId,
- "apiUrl": apiUrl,
- "projectCode": projectCode,
- "issueType": issueType,
- }
-
- validationMetadata = self._createValidationMetadata(
- "connectJira",
- connectionId=connectionId,
- apiUrl=apiUrl,
- projectCode=projectCode
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(connectionInfo, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error connecting to JIRA: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Export tickets from JIRA as JSON list.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing list of tickets as JSON
- """
- try:
- connectionIdParam = parameters.get("connectionId")
- if not connectionIdParam:
- return ActionResult.isFailure(error="connectionId parameter is required")
-
- # Get connection ID from document if it's a reference
- connectionId = None
- if isinstance(connectionIdParam, str):
- # Try to parse from document reference
- connectionInfo = self._parseJsonFromDocument(connectionIdParam)
- if connectionInfo and "connectionId" in connectionInfo:
- connectionId = connectionInfo["connectionId"]
- else:
- # Assume it's the connection ID directly
- connectionId = connectionIdParam
-
- if not connectionId or connectionId not in self._connections:
- return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.")
-
- connection = self._connections[connectionId]
- syncInterface = connection["interface"]
-
- # Export tickets
- dataList = await syncInterface.exportTicketsAsList()
-
- logger.info(f"Exported {len(dataList)} tickets from JIRA")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "jira_tickets_export",
- "json",
- workflowContext,
- "exportTicketsAsJson"
- )
-
- validationMetadata = self._createValidationMetadata(
- "exportTicketsAsJson",
- connectionId=connectionId,
- ticketCount=len(dataList)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(dataList, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error exporting tickets from JIRA: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Import ticket data from JSON back to JIRA.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - ticketData (str, required): Document reference containing ticket data as JSON
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing import result with counts
- """
- try:
- connectionIdParam = parameters.get("connectionId")
- if not connectionIdParam:
- return ActionResult.isFailure(error="connectionId parameter is required")
-
- ticketDataParam = parameters.get("ticketData")
- if not ticketDataParam:
- return ActionResult.isFailure(error="ticketData parameter is required")
-
- # Get connection ID from document if it's a reference
- connectionId = None
- if isinstance(connectionIdParam, str):
- connectionInfo = self._parseJsonFromDocument(connectionIdParam)
- if connectionInfo and "connectionId" in connectionInfo:
- connectionId = connectionInfo["connectionId"]
- else:
- connectionId = connectionIdParam
-
- if not connectionId or connectionId not in self._connections:
- return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.")
-
- connection = self._connections[connectionId]
- syncInterface = connection["interface"]
-
- # Get ticket data from document
- ticketDataJson = self._parseJsonFromDocument(ticketDataParam)
- if ticketDataJson is None:
- return ActionResult.isFailure(error="Could not parse ticket data from document reference")
-
- # Ensure it's a list
- if not isinstance(ticketDataJson, list):
- return ActionResult.isFailure(error="ticketData must be a JSON array")
-
- # Import tickets
- await syncInterface.importListToTickets(ticketDataJson)
-
- logger.info(f"Imported {len(ticketDataJson)} tickets to JIRA")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "jira_import_result",
- "json",
- workflowContext,
- "importTicketsFromJson"
- )
-
- importResult = {
- "imported": len(ticketDataJson),
- "connectionId": connectionId,
- }
-
- validationMetadata = self._createValidationMetadata(
- "importTicketsFromJson",
- connectionId=connectionId,
- importedCount=len(ticketDataJson)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(importResult, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error importing tickets to JIRA: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Merge JIRA export data with existing SharePoint data.
-
- Parameters:
- - jiraData (str, required): Document reference containing JIRA ticket data as JSON array
- - existingData (str, required): Document reference containing existing SharePoint data as JSON array
- - taskSyncDefinition (str or dict, required): Field mapping definition
- - idField (str, optional): Field name to use as ID for merging (default: "ID")
-
- Returns:
- - ActionResult with ActionDocument containing merged data and merge details
- """
- try:
- jiraDataParam = parameters.get("jiraData")
- if not jiraDataParam:
- return ActionResult.isFailure(error="jiraData parameter is required")
-
- existingDataParam = parameters.get("existingData")
- if not existingDataParam:
- return ActionResult.isFailure(error="existingData parameter is required")
-
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
- if not taskSyncDefinitionParam:
- return ActionResult.isFailure(error="taskSyncDefinition parameter is required")
-
- idField = parameters.get("idField", "ID")
-
- # Parse taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- try:
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}")
- elif isinstance(taskSyncDefinitionParam, dict):
- taskSyncDefinition = taskSyncDefinitionParam
- else:
- return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}")
-
- # Get data from documents
- jiraDataJson = self._parseJsonFromDocument(jiraDataParam)
- if jiraDataJson is None or not isinstance(jiraDataJson, list):
- return ActionResult.isFailure(error="Could not parse jiraData as JSON array")
-
- existingDataJson = self._parseJsonFromDocument(existingDataParam)
- if existingDataJson is None or not isinstance(existingDataJson, list):
- # Empty existing data is OK
- existingDataJson = []
-
- # Perform merge
- existingLookup = {row.get(idField): row for row in existingDataJson if row.get(idField)}
- mergedData: List[dict] = []
- changes: List[str] = []
- updatedCount = addedCount = unchangedCount = 0
-
- for jiraRow in jiraDataJson:
- jiraId = jiraRow.get(idField)
- if jiraId and jiraId in existingLookup:
- existingRow = existingLookup[jiraId].copy()
- rowChanges: List[str] = []
-
- for fieldName, fieldConfig in taskSyncDefinition.items():
- if fieldConfig[0] == 'get':
- oldValue = "" if existingRow.get(fieldName) is None else str(existingRow.get(fieldName))
- newValue = "" if jiraRow.get(fieldName) is None else str(jiraRow.get(fieldName))
-
- # Convert ADF data to readable text for logging
- if isinstance(newValue, dict) and newValue.get("type") == "doc":
- newValueReadable = self._convertAdfToText(newValue)
- if oldValue != newValueReadable:
- rowChanges.append(f"{fieldName}: '{oldValue[:100]}...' -> '{newValueReadable[:100]}...'")
- elif oldValue != newValue:
- # Truncate long values for logging
- oldTruncated = oldValue[:100] + "..." if len(oldValue) > 100 else oldValue
- newTruncated = newValue[:100] + "..." if len(newValue) > 100 else newValue
- rowChanges.append(f"{fieldName}: '{oldTruncated}' -> '{newTruncated}'")
-
- existingRow[fieldName] = jiraRow.get(fieldName)
-
- mergedData.append(existingRow)
- if rowChanges:
- updatedCount += 1
- changes.append(f"Row ID {jiraId} updated: {', '.join(rowChanges)}")
- else:
- unchangedCount += 1
- del existingLookup[jiraId]
- else:
- mergedData.append(jiraRow)
- addedCount += 1
- changes.append(f"Row ID {jiraId} added as new record")
-
- # Add remaining existing rows
- for remaining in existingLookup.values():
- mergedData.append(remaining)
- unchangedCount += 1
-
- mergeDetails = {
- "updated": updatedCount,
- "added": addedCount,
- "unchanged": unchangedCount,
- "changes": changes
- }
-
- logger.info(f"Merged ticket data: {updatedCount} updated, {addedCount} added, {unchangedCount} unchanged")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "merged_ticket_data",
- "json",
- workflowContext,
- "mergeTicketData"
- )
-
- result = {
- "data": mergedData,
- "mergeDetails": mergeDetails
- }
-
- validationMetadata = self._createValidationMetadata(
- "mergeTicketData",
- updated=updatedCount,
- added=addedCount,
- unchanged=unchangedCount
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error merging ticket data: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse CSV content with custom headers.
-
- Parameters:
- - csvContent (str, required): Document reference containing CSV file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 2)
- - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
- try:
- csvContentParam = parameters.get("csvContent")
- if not csvContentParam:
- return ActionResult.isFailure(error="csvContent parameter is required")
-
- skipRows = parameters.get("skipRows", 2)
- hasCustomHeaders = parameters.get("hasCustomHeaders", True)
-
- # Get CSV content from document
- csvBytes = self._getDocumentData(csvContentParam)
- if csvBytes is None:
- return ActionResult.isFailure(error="Could not get CSV content from document reference")
-
- # Convert to bytes if needed
- if isinstance(csvBytes, str):
- csvBytes = csvBytes.encode('utf-8')
- elif not isinstance(csvBytes, bytes):
- return ActionResult.isFailure(error="CSV content must be bytes or string")
-
- # Parse headers if hasCustomHeaders
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if hasCustomHeaders:
- csvLines = csvBytes.decode('utf-8').split('\n')
- if len(csvLines) >= 2:
- headers["header1"] = csvLines[0].rstrip('\r\n')
- headers["header2"] = csvLines[1].rstrip('\r\n')
-
- # Parse CSV data
- df = pd.read_csv(
- io.BytesIO(csvBytes),
- skiprows=skipRows,
- quoting=1,
- escapechar='\\',
- on_bad_lines='skip',
- engine='python'
- )
-
- # Convert to dict records
- for column in df.columns:
- df[column] = df[column].astype('object').fillna('')
- data = df.to_dict(orient='records')
-
- logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "parsed_csv_data",
- "json",
- workflowContext,
- "parseCsvContent"
- )
-
- result = {
- "data": data,
- "headers": headers,
- "rowCount": len(data),
- "columnCount": len(df.columns)
- }
-
- validationMetadata = self._createValidationMetadata(
- "parseCsvContent",
- rowCount=len(data),
- columnCount=len(df.columns),
- skipRows=skipRows
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error parsing CSV content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse Excel content with custom headers.
-
- Parameters:
- - excelContent (str, required): Document reference containing Excel file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 3)
- - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
- try:
- excelContentParam = parameters.get("excelContent")
- if not excelContentParam:
- return ActionResult.isFailure(error="excelContent parameter is required")
-
- skipRows = parameters.get("skipRows", 3)
- hasCustomHeaders = parameters.get("hasCustomHeaders", True)
-
- # Get Excel content from document
- excelBytes = self._getDocumentData(excelContentParam)
- if excelBytes is None:
- return ActionResult.isFailure(error="Could not get Excel content from document reference")
-
- # Convert to bytes if needed
- if isinstance(excelBytes, str):
- excelBytes = excelBytes.encode('latin-1') # Excel might have binary data
- elif not isinstance(excelBytes, bytes):
- return ActionResult.isFailure(error="Excel content must be bytes or string")
-
- # Parse Excel
- df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None)
-
- # Extract headers if hasCustomHeaders
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if hasCustomHeaders and len(df) >= 3:
- headerRow1 = df.iloc[0:1].copy()
- headerRow2 = df.iloc[1:2].copy()
- tableHeaders = df.iloc[2:3].copy()
- dfData = df.iloc[skipRows:].copy()
- dfData.columns = tableHeaders.iloc[0]
-
- headers = {
- "header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]),
- "header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]),
- }
- else:
- # No custom headers, use standard parsing
- if skipRows > 0:
- dfData = df.iloc[skipRows:].copy()
- if len(df) > skipRows:
- dfData.columns = df.iloc[skipRows-1]
- else:
- dfData = df.copy()
-
- # Reset index and clean data
- dfData = dfData.reset_index(drop=True)
- for column in dfData.columns:
- dfData[column] = dfData[column].astype('object').fillna('')
-
- data = dfData.to_dict(orient='records')
-
- logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "parsed_excel_data",
- "json",
- workflowContext,
- "parseExcelContent"
- )
-
- result = {
- "data": data,
- "headers": headers,
- "rowCount": len(data),
- "columnCount": len(dfData.columns)
- }
-
- validationMetadata = self._createValidationMetadata(
- "parseExcelContent",
- rowCount=len(data),
- columnCount=len(dfData.columns),
- skipRows=skipRows
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2, ensure_ascii=False),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error parsing Excel content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create CSV content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing CSV content as bytes
- """
- try:
- dataParam = parameters.get("data")
- if not dataParam:
- return ActionResult.isFailure(error="data parameter is required")
-
- headersParam = parameters.get("headers")
- columnsParam = parameters.get("columns")
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
-
- # Get data from document
- dataJson = self._parseJsonFromDocument(dataParam)
- if dataJson is None:
- return ActionResult.isFailure(error="Could not parse data from document reference")
-
- # Extract data array if wrapped in object
- if isinstance(dataJson, dict) and "data" in dataJson:
- dataList = dataJson["data"]
- elif isinstance(dataJson, list):
- dataList = dataJson
- else:
- return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
-
- # Get headers
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if headersParam:
- headersJson = self._parseJsonFromDocument(headersParam)
- if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
- headers = headersJson["headers"]
- elif headersJson and isinstance(headersJson, dict):
- headers = headersJson
-
- # Get columns
- if columnsParam:
- if isinstance(columnsParam, str):
- try:
- columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
- except:
- columns = columnsParam.split(',')
- elif isinstance(columnsParam, list):
- columns = columnsParam
- else:
- columns = None
- elif taskSyncDefinitionParam:
- # Extract columns from taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- else:
- taskSyncDefinition = taskSyncDefinitionParam
- columns = list(taskSyncDefinition.keys())
- elif dataList and len(dataList) > 0:
- columns = list(dataList[0].keys())
- else:
- columns = []
-
- # Create DataFrame
- if not dataList:
- df = pd.DataFrame(columns=columns)
- else:
- df = pd.DataFrame(dataList)
- # Ensure all columns exist
- for col in columns:
- if col not in df.columns:
- df[col] = ""
- # Reorder columns
- df = df[columns]
-
- # Clean data
- for column in df.columns:
- df[column] = df[column].astype("object").fillna("")
- df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
-
- # Create headers with timestamp
- timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
- header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
- header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
- if len(header2Row) > 1:
- header2Row[1] = timestamp
-
- headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
- headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
- tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
- finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
-
- # Convert to CSV bytes
- out = StringIO()
- finalDf.to_csv(out, index=False, header=False, quoting=1, escapechar='\\')
- csvBytes = out.getvalue().encode('utf-8')
-
- logger.info(f"Created CSV content: {len(dataList)} rows, {len(columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "ticket_sync",
- "csv",
- workflowContext,
- "createCsvContent"
- )
-
- validationMetadata = self._createValidationMetadata(
- "createCsvContent",
- rowCount=len(dataList),
- columnCount=len(columns)
- )
-
- # Store as base64 for document
- import base64
- csvBase64 = base64.b64encode(csvBytes).decode('utf-8')
-
- document = ActionDocument(
- documentName=filename,
- documentData=csvBase64,
- mimeType="application/octet-stream",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error creating CSV content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create Excel content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing Excel content as bytes
- """
- try:
- dataParam = parameters.get("data")
- if not dataParam:
- return ActionResult.isFailure(error="data parameter is required")
-
- headersParam = parameters.get("headers")
- columnsParam = parameters.get("columns")
- taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
-
- # Get data from document
- dataJson = self._parseJsonFromDocument(dataParam)
- if dataJson is None:
- return ActionResult.isFailure(error="Could not parse data from document reference")
-
- # Extract data array if wrapped in object
- if isinstance(dataJson, dict) and "data" in dataJson:
- dataList = dataJson["data"]
- elif isinstance(dataJson, list):
- dataList = dataJson
- else:
- return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
-
- # Get headers
- headers = {"header1": "Header 1", "header2": "Header 2"}
- if headersParam:
- headersJson = self._parseJsonFromDocument(headersParam)
- if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
- headers = headersJson["headers"]
- elif headersJson and isinstance(headersJson, dict):
- headers = headersJson
-
- # Get columns
- if columnsParam:
- if isinstance(columnsParam, str):
- try:
- columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
- except:
- columns = columnsParam.split(',')
- elif isinstance(columnsParam, list):
- columns = columnsParam
- else:
- columns = None
- elif taskSyncDefinitionParam:
- # Extract columns from taskSyncDefinition
- if isinstance(taskSyncDefinitionParam, str):
- taskSyncDefinition = json.loads(taskSyncDefinitionParam)
- else:
- taskSyncDefinition = taskSyncDefinitionParam
- columns = list(taskSyncDefinition.keys())
- elif dataList and len(dataList) > 0:
- columns = list(dataList[0].keys())
- else:
- columns = []
-
- # Create DataFrame
- if not dataList:
- df = pd.DataFrame(columns=columns)
- else:
- df = pd.DataFrame(dataList)
- # Ensure all columns exist
- for col in columns:
- if col not in df.columns:
- df[col] = ""
- # Reorder columns
- df = df[columns]
-
- # Clean data
- for column in df.columns:
- df[column] = df[column].astype("object").fillna("")
- df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
-
- # Create headers with timestamp
- timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
- header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
- header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
- if len(header2Row) > 1:
- header2Row[1] = timestamp
-
- headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
- headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
- tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
- finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
-
- # Convert to Excel bytes
- buf = BytesIO()
- finalDf.to_excel(buf, index=False, header=False, engine='openpyxl')
- excelBytes = buf.getvalue()
-
- logger.info(f"Created Excel content: {len(dataList)} rows, {len(columns)} columns")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "ticket_sync",
- "xlsx",
- workflowContext,
- "createExcelContent"
- )
-
- validationMetadata = self._createValidationMetadata(
- "createExcelContent",
- rowCount=len(dataList),
- columnCount=len(columns)
- )
-
- # Store as base64 for document
- import base64
- excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
-
- document = ActionDocument(
- documentName=filename,
- documentData=excelBase64,
- mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error creating Excel content: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
diff --git a/modules/workflows/methods/methodJira/actions/connectJira.py b/modules/workflows/methods/methodJira/actions/connectJira.py
index 8200514a..45b60cad 100644
--- a/modules/workflows/methods/methodJira/actions/connectJira.py
+++ b/modules/workflows/methods/methodJira/actions/connectJira.py
@@ -1,37 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Connect JIRA action for JIRA operations.
-Connects to JIRA instance and creates ticket interface.
-"""
-
import logging
import json
import uuid
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
-@action
async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Connect to JIRA instance and create ticket interface.
-
- Parameters:
- - apiUsername (str, required): JIRA API username/email
- - apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
- - apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
- - projectCode (str, required): JIRA project code (e.g., "DCS")
- - issueType (str, required): JIRA issue type (e.g., "Task")
- - taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
-
- Returns:
- - ActionResult with ActionDocument containing connection ID
- """
try:
apiUsername = parameters.get("apiUsername")
if not apiUsername:
diff --git a/modules/workflows/methods/methodJira/actions/createCsvContent.py b/modules/workflows/methods/methodJira/actions/createCsvContent.py
index c856760e..cbec7960 100644
--- a/modules/workflows/methods/methodJira/actions/createCsvContent.py
+++ b/modules/workflows/methods/methodJira/actions/createCsvContent.py
@@ -1,11 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Create CSV Content action for JIRA operations.
-Creates CSV content with custom headers.
-"""
-
import logging
import json
import base64
@@ -14,25 +9,11 @@ import csv as csv_module
from io import StringIO
from datetime import datetime, UTC
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create CSV content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing CSV content as bytes
- """
try:
dataParam = parameters.get("data")
if not dataParam:
diff --git a/modules/workflows/methods/methodJira/actions/createExcelContent.py b/modules/workflows/methods/methodJira/actions/createExcelContent.py
index fbf54299..631795b3 100644
--- a/modules/workflows/methods/methodJira/actions/createExcelContent.py
+++ b/modules/workflows/methods/methodJira/actions/createExcelContent.py
@@ -1,11 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Create Excel Content action for JIRA operations.
-Creates Excel content with custom headers.
-"""
-
import logging
import json
import base64
@@ -14,25 +9,11 @@ import csv as csv_module
from io import BytesIO
from datetime import datetime, UTC
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Create Excel content with custom headers.
-
- Parameters:
- - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- - headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
- - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
-
- Returns:
- - ActionResult with ActionDocument containing Excel content as bytes
- """
try:
dataParam = parameters.get("data")
if not dataParam:
diff --git a/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
index 85926851..55d99654 100644
--- a/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
+++ b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
@@ -1,31 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Export Tickets As JSON action for JIRA operations.
-Exports tickets from JIRA as JSON list.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Export tickets from JIRA as JSON list.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing list of tickets as JSON
- """
try:
connectionIdParam = parameters.get("connectionId")
if not connectionIdParam:
diff --git a/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
index b17519ea..b997889e 100644
--- a/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
+++ b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
@@ -1,32 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Import Tickets From JSON action for JIRA operations.
-Imports ticket data from JSON back to JIRA.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Import ticket data from JSON back to JIRA.
-
- Parameters:
- - connectionId (str, required): Connection ID from connectJira action result
- - ticketData (str, required): Document reference containing ticket data as JSON
- - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
-
- Returns:
- - ActionResult with ActionDocument containing import result with counts
- """
try:
connectionIdParam = parameters.get("connectionId")
if not connectionIdParam:
diff --git a/modules/workflows/methods/methodJira/actions/mergeTicketData.py b/modules/workflows/methods/methodJira/actions/mergeTicketData.py
index a8f8b486..2bd7ab74 100644
--- a/modules/workflows/methods/methodJira/actions/mergeTicketData.py
+++ b/modules/workflows/methods/methodJira/actions/mergeTicketData.py
@@ -1,33 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Merge Ticket Data action for JIRA operations.
-Merges JIRA export data with existing SharePoint data.
-"""
-
import logging
import json
from typing import Dict, Any, List
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Merge JIRA export data with existing SharePoint data.
-
- Parameters:
- - jiraData (str, required): Document reference containing JIRA ticket data as JSON array
- - existingData (str, required): Document reference containing existing SharePoint data as JSON array
- - taskSyncDefinition (str or dict, required): Field mapping definition
- - idField (str, optional): Field name to use as ID for merging (default: "ID")
-
- Returns:
- - ActionResult with ActionDocument containing merged data and merge details
- """
try:
jiraDataParam = parameters.get("jiraData")
if not jiraDataParam:
diff --git a/modules/workflows/methods/methodJira/actions/parseCsvContent.py b/modules/workflows/methods/methodJira/actions/parseCsvContent.py
index 3038e566..bbdc2cc7 100644
--- a/modules/workflows/methods/methodJira/actions/parseCsvContent.py
+++ b/modules/workflows/methods/methodJira/actions/parseCsvContent.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Parse CSV Content action for JIRA operations.
-Parses CSV content with custom headers.
-"""
-
import logging
import json
import io
import pandas as pd
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse CSV content with custom headers.
-
- Parameters:
- - csvContent (str, required): Document reference containing CSV file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 2)
- - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
try:
csvContentParam = parameters.get("csvContent")
if not csvContentParam:
diff --git a/modules/workflows/methods/methodJira/actions/parseExcelContent.py b/modules/workflows/methods/methodJira/actions/parseExcelContent.py
index c0d64325..5ac4e548 100644
--- a/modules/workflows/methods/methodJira/actions/parseExcelContent.py
+++ b/modules/workflows/methods/methodJira/actions/parseExcelContent.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Parse Excel Content action for JIRA operations.
-Parses Excel content with custom headers.
-"""
-
import logging
import json
import pandas as pd
from io import BytesIO
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Parse Excel content with custom headers.
-
- Parameters:
- - excelContent (str, required): Document reference containing Excel file content as bytes
- - skipRows (int, optional): Number of header rows to skip (default: 3)
- - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
-
- Returns:
- - ActionResult with ActionDocument containing parsed data and headers as JSON
- """
try:
excelContentParam = parameters.get("excelContent")
if not excelContentParam:
diff --git a/modules/workflows/methods/methodOutlook.py.old b/modules/workflows/methods/methodOutlook.py.old
deleted file mode 100644
index 98dfbc41..00000000
--- a/modules/workflows/methods/methodOutlook.py.old
+++ /dev/null
@@ -1,1904 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-Microsoft Outlook Email Operations Module
-"""
-
-import base64
-import re
-import logging
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
-import json
-import requests
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-
-logger = logging.getLogger(__name__)
-
-class MethodOutlook(MethodBase):
- """Outlook method implementation for email operations"""
-
- def __init__(self, services):
- """Initialize the Outlook method"""
- super().__init__(services)
- self.name = "outlook"
- self.description = "Handle Microsoft Outlook email operations"
-
- def _format_timestamp_for_filename(self) -> str:
- """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
- return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-
- def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
- """
- Helper function to get Microsoft connection details.
- """
- try:
- logger.debug(f"Getting Microsoft connection for reference: {connectionReference}")
-
- # Get the connection from the service
- userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
- if not userConnection:
- logger.error(f"Connection not found: {connectionReference}")
- return None
-
- logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
-
- # Get a fresh token for this connection
- token = self.services.chat.getFreshConnectionToken(userConnection.id)
- if not token:
- logger.error(f"Fresh token not found for connection: {userConnection.id}")
- logger.debug(f"Connection details: {userConnection}")
- return None
-
- logger.debug(f"Fresh token retrieved for connection {userConnection.id}")
-
- # Check if connection is active
- if userConnection.status.value != "active":
- logger.error(f"Connection is not active: {userConnection.id}, status: {userConnection.status.value}")
- return None
-
- return {
- "id": userConnection.id,
- "accessToken": token.tokenAccess,
- "refreshToken": token.tokenRefresh,
- "scopes": ["Mail.ReadWrite", "Mail.Send", "Mail.ReadWrite.Shared", "User.Read"] # Valid Microsoft Graph API scopes
- }
- except Exception as e:
- logger.error(f"Error getting Microsoft connection: {str(e)}")
- return None
-
- async def _checkPermissions(self, connection: Dict[str, Any]) -> bool:
- """
- Check if the current connection has the necessary permissions for Outlook operations.
- """
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Test permissions by trying to access the user's mail folder
- test_url = f"{graph_url}/me/mailFolders"
- response = requests.get(test_url, headers=headers)
-
- if response.status_code == 200:
-
- return True
- elif response.status_code == 403:
- logger.error("Permission denied - connection lacks necessary mail permissions")
- logger.error("Required scopes: Mail.ReadWrite, Mail.Send, Mail.ReadWrite.Shared")
- return False
- else:
- logger.warning(f"Permission check returned status {response.status_code}")
- return False
-
- except Exception as e:
- logger.error(f"Error checking permissions: {str(e)}")
- return False
-
- def _sanitizeSearchQuery(self, query: str) -> str:
- """
- Sanitize and validate search query for Microsoft Graph API
-
- Microsoft Graph API has specific requirements for search queries:
- - Escape special characters properly
- - Handle search operators correctly
- - Ensure query format is valid
- """
- if not query:
- return ""
-
- # Clean the query
- clean_query = query.strip()
-
- # Handle folder specifications first
- if clean_query.lower().startswith('folder:'):
- folder_name = clean_query[7:].strip()
- if folder_name:
- # Return the folder specification as-is
- return clean_query
-
- # Remove any double quotes that might cause issues
- clean_query = clean_query.replace('"', '')
-
- # Handle common search operators
- # Recognize Graph operators including both singular and plural forms for hasAttachments
- lowered = clean_query.lower()
- if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
- # This is an advanced search query, return as-is
- return clean_query
-
- # For basic text search, ensure it's safe for contains() filter
- # Remove any characters that might break the OData filter syntax
- # Remove or escape characters that could break OData filter syntax
- safe_query = re.sub(r'[\\\'"]', '', clean_query)
-
- return safe_query
-
- def _buildSearchParameters(self, query: str, folder: str, limit: int) -> Dict[str, Any]:
- """
- Build search parameters for Microsoft Graph API
-
- This method handles the complexity of building search parameters
- while avoiding conflicts between $search and $filter parameters.
- """
- params = {
- "$top": limit
- }
-
- if not query or not query.strip():
- # No query specified, just get emails from folder
- if folder and folder.lower() != "all":
- # Use folder name directly for well-known folders, or get folder ID
- if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
- params["$filter"] = f"parentFolderId eq '{folder}'"
- else:
- # For custom folders, we need to get the folder ID first
- # This will be handled by the calling method
- params["$filter"] = f"parentFolderId eq '{folder}'"
- # Add orderby for basic queries
- params["$orderby"] = "receivedDateTime desc"
- return params
-
- clean_query = self._sanitizeSearchQuery(query)
-
- # Check if this is a folder specification (e.g., "folder:Drafts", "folder:Inbox")
- if clean_query.lower().startswith('folder:'):
- folder_name = clean_query[7:].strip() # Remove "folder:" prefix
- if folder_name:
- # This is a folder specification, not a text search
- # Just filter by folder and return
- params["$filter"] = f"parentFolderId eq '{folder_name}'"
- params["$orderby"] = "receivedDateTime desc"
- return params
-
- # Check if this is a complex search query with multiple operators
- # Recognize Graph operators including both singular and plural forms for hasAttachments
- lowered = clean_query.lower()
- if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
- # This is an advanced search query, use $search
- # Microsoft Graph API supports complex search syntax
- params["$search"] = f'"{clean_query}"'
-
- # Note: When using $search, we cannot combine it with $orderby or $filter for folder
- # We'll need to filter results after the API call
- # Folder filtering will be done after the API call
- else:
- # Use $filter for basic text search, but keep it simple to avoid "InefficientFilter" error
- # Microsoft Graph API has limitations on complex filters
- if len(clean_query) > 50:
- # If query is too long, truncate it to avoid complex filter issues
- clean_query = clean_query[:50]
-
-
- # Use only subject search to keep filter simple
- # Handle wildcard queries specially
- if clean_query == "*" or clean_query == "":
- # For wildcard or empty query, don't use contains filter
- # Just use folder filter if specified
- if folder and folder.lower() != "all":
- params["$filter"] = f"parentFolderId eq '{folder}'"
- else:
- # No filter needed for wildcard search across all folders
- pass
- else:
- params["$filter"] = f"contains(subject,'{clean_query}')"
-
- # Add folder filter if specified
- if folder and folder.lower() != "all":
- params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
-
- # Add orderby for basic queries
- params["$orderby"] = "receivedDateTime desc"
-
-
- return params
-
- def _buildGraphFilter(self, filter_text: str) -> Dict[str, str]:
- """
- Build proper Microsoft Graph API filter parameters based on filter text
-
- Args:
- filter_text (str): The filter text to process
-
- Returns:
- Dict[str, str]: Dictionary with either $filter or $search parameter
- """
- if not filter_text:
- return {}
-
- filter_text = filter_text.strip()
-
- # Handle folder specifications (e.g., "folder:Drafts", "folder:Inbox")
- if filter_text.lower().startswith('folder:'):
- folder_name = filter_text[7:].strip() # Remove "folder:" prefix
- if folder_name:
- # This is a folder specification, return empty to let the main method handle it
- return {}
-
- # Handle search queries (from:, to:, subject:, etc.) - check this FIRST
- # Support both singular and plural forms for hasAttachments
- lt = filter_text.lower()
- if any(lt.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
- return {"$search": f'"{filter_text}"'}
-
- # Handle email address filters (only if it's NOT a search query)
- if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
- return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
-
- # Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.)
- if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']):
- return {"$filter": filter_text}
-
- # Handle text content - search in subject
- return {"$filter": f"contains(subject,'{filter_text}')"}
-
- def _getFolderId(self, folder_name: str, connection: Dict[str, Any]) -> Optional[str]:
- """
- Get the folder ID for a given folder name
-
- This is needed for proper filtering when using advanced search queries
- """
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get mail folders
- api_url = f"{graph_url}/me/mailFolders"
- response = requests.get(api_url, headers=headers)
-
- if response.status_code == 200:
- folders_data = response.json()
- all_folders = folders_data.get("value", [])
-
-
-
- # Try exact match first
- for folder in all_folders:
- if folder.get("displayName", "").lower() == folder_name.lower():
-
- return folder.get("id")
-
- # Try common variations for Drafts folder
- if folder_name.lower() == "drafts":
- draft_variations = ["drafts", "draft", "entwürfe", "entwurf", "brouillons", "brouillon"]
- for folder in all_folders:
- folder_display_name = folder.get("displayName", "").lower()
- if any(variation in folder_display_name for variation in draft_variations):
-
- return folder.get("id")
-
- # Try common variations for other folders
- if folder_name.lower() == "sent items":
- sent_variations = ["sent items", "sent", "gesendete elemente", "éléments envoyés"]
- for folder in all_folders:
- folder_display_name = folder.get("displayName", "").lower()
- if any(variation in folder_display_name for variation in sent_variations):
-
- return folder.get("id")
-
- logger.warning(f"Folder '{folder_name}' not found. Available folders: {[f.get('displayName', 'Unknown') for f in all_folders]}")
- return None
- else:
- logger.warning(f"Could not retrieve folders: {response.status_code}")
- return None
-
- except Exception as e:
- logger.warning(f"Error getting folder ID for '{folder_name}': {str(e)}")
- return None
-
- @action
- async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read emails and metadata from a mailbox folder.
- - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
- - Output format: JSON with emails and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - folder (str, optional): Folder to read from. Default: Inbox.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - filter (str, optional): Sender, query operators, or subject text.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"outlook_read_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Read Emails",
- "Outlook Email Reading",
- f"Folder: {parameters.get('folder', 'Inbox')}",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- folder = parameters.get("folder", "Inbox")
- limit = parameters.get("limit", 10)
- filter = parameters.get("filter")
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- self.services.chat.progressLogUpdate(operationId, 0.2, "Validating parameters")
-
- # Validate limit parameter
- if limit <= 0:
- limit = 1000
- logger.warning(f"Invalid limit value ({limit}), using default value 1000")
-
- # Validate filter parameter if provided
- if filter:
- # Remove any potentially dangerous characters that could break the filter
- filter = filter.strip()
- if len(filter) > 100:
- logger.warning(f"Filter too long ({len(filter)} chars), truncating to 100 characters")
- filter = filter[:100]
-
-
- # Get Microsoft connection
- self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Read emails using Microsoft Graph API
- self.services.chat.progressLogUpdate(operationId, 0.4, "Reading emails from Microsoft Graph API")
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the folder ID for the specified folder
- folder_id = self._getFolderId(folder, connection)
-
- if folder_id:
- # Build the API request with folder ID
- api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
- else:
- # Fallback: use folder name directly (for well-known folders like "Inbox")
- api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
- logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
- params = {
- "$top": limit,
- "$orderby": "receivedDateTime desc"
- }
-
- if filter:
- # Build proper Graph API filter parameters
- filter_params = self._buildGraphFilter(filter)
- params.update(filter_params)
-
- # If using $search, remove $orderby as they can't be combined
- if "$search" in params:
- params.pop("$orderby", None)
-
- # If using $filter with contains(), remove $orderby as they can't be combined
- # Microsoft Graph API doesn't support contains() with orderby
- if "$filter" in params and "contains(" in params["$filter"].lower():
- params.pop("$orderby", None)
-
- # Filter applied
-
- # Make the API call
-
-
- response = requests.get(api_url, headers=headers, params=params)
-
- if response.status_code != 200:
- logger.error(f"Graph API error: {response.status_code} - {response.text}")
- logger.error(f"Request URL: {response.url}")
- logger.error(f"Request headers: {headers}")
- logger.error(f"Request params: {params}")
-
- response.raise_for_status()
-
- self.services.chat.progressLogUpdate(operationId, 0.7, "Processing email data")
- emails_data = response.json()
- email_data = {
- "emails": emails_data.get("value", []),
- "count": len(emails_data.get("value", [])),
- "folder": folder,
- "filter": filter,
- "apiMetadata": {
- "@odata.context": emails_data.get("@odata.context"),
- "@odata.count": emails_data.get("@odata.count"),
- "@odata.nextLink": emails_data.get("@odata.nextLink")
- }
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="requests module not available")
- except requests.exceptions.HTTPError as e:
- if e.response.status_code == 400:
- logger.error(f"Bad Request (400) - Invalid filter or parameter: {e.response.text}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"Invalid filter syntax. Please check your filter parameter. Error: {e.response.text}")
- elif e.response.status_code == 401:
- logger.error("Unauthorized (401) - Access token may be expired or invalid")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Authentication failed. Please check your connection and try again.")
- elif e.response.status_code == 403:
- logger.error("Forbidden (403) - Insufficient permissions to access emails")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Insufficient permissions to read emails from this folder.")
- else:
- logger.error(f"HTTP Error {e.response.status_code}: {e.response.text}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"HTTP Error {e.response.status_code}: {e.response.text}")
- except Exception as e:
- logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- # Create result data as JSON string
- result_data = {
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "filter": filter,
- "emails": email_data,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.readEmails",
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "filter": filter,
- "emailCount": email_data.get("count", 0),
- "outputMimeType": outputMimeType
- }
-
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {email_data.get('count', 0)} emails")
- self.services.chat.progressLogFinish(operationId, True)
-
- return ActionResult.isSuccess(
- documents=[ActionDocument(
- documentName=f"outlook_emails_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error reading emails: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
- return ActionResult.isFailure(
- error=str(e)
- )
-
- @action
- async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Search emails by query and return matching items with metadata.
- - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
- - Output format: JSON with search results and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - query (str, required): Search expression.
- - folder (str, optional): Folder scope or All. Default: All.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- query = parameters.get("query")
- folder = parameters.get("folder", "All")
- limit = parameters.get("limit", 1000)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- # Validate parameters
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Validate limit parameter
- if limit <= 0:
- limit = 1000
- logger.warning(f"Invalid limit value ({limit}), using default value 1000")
-
- if not query or not query.strip():
- return ActionResult.isFailure(error="Search query is required and cannot be empty")
-
- # Check if this is a folder specification query
- if query.strip().lower().startswith('folder:'):
- folder_name = query.strip()[7:].strip() # Remove "folder:" prefix
- if not folder_name:
- return ActionResult.isFailure(error="Invalid folder specification. Use format 'folder:FolderName'")
- logger.info(f"Search query is a folder specification: {folder_name}")
-
- # Validate limit
- try:
- limit = int(limit)
- if limit <= 0:
- limit = 1000
- logger.warning(f"Invalid limit value (<=0), using default value 1000")
- elif limit > 1000: # Microsoft Graph API has limits
- limit = 1000
- logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000")
- except (ValueError, TypeError):
- limit = 1000
- logger.warning(f"Invalid limit value, using default value 1000")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Search emails using Microsoft Graph API
- try:
- # Microsoft Graph API endpoint for searching messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the folder ID for the specified folder if needed
- folder_id = None
- if folder and folder.lower() != "all":
- folder_id = self._getFolderId(folder, connection)
- if folder_id:
- logger.debug(f"Found folder ID for '{folder}': {folder_id}")
- else:
- logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
-
- # Build the search API request
- api_url = f"{graph_url}/me/messages"
- params = self._buildSearchParameters(query, folder_id or folder, limit)
-
- # Log search parameters for debugging
- logger.debug(f"Search query: '{query}'")
- logger.debug(f"Search folder: '{folder}'")
- logger.debug(f"Search parameters: {params}")
- logger.debug(f"API URL: {api_url}")
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
-
- # Log response details for debugging
-
-
- if response.status_code != 200:
- # Log detailed error information
- try:
- error_data = response.json()
- logger.error(f"Microsoft Graph API error: {response.status_code} - {error_data}")
- except:
- logger.error(f"Microsoft Graph API error: {response.status_code} - {response.text}")
-
- # Check for specific error types and provide helpful messages
- if response.status_code == 400:
- logger.error("Bad Request (400) - Check search query format and parameters")
- logger.error(f"Search query: '{query}'")
- logger.error(f"Search parameters: {params}")
- logger.error(f"API URL: {api_url}")
- elif response.status_code == 401:
- logger.error("Unauthorized (401) - Check access token and permissions")
- elif response.status_code == 403:
- logger.error("Forbidden (403) - Check API permissions and scopes")
- elif response.status_code == 429:
- logger.error("Too Many Requests (429) - Rate limit exceeded")
-
- raise Exception(f"Microsoft Graph API returned {response.status_code}: {response.text}")
-
- response.raise_for_status()
-
- search_data = response.json()
- emails = search_data.get("value", [])
-
-
-
- # Apply folder filtering if needed and we used $search
- if folder and folder.lower() != "all" and "$search" in params:
- # Get the actual folder ID for proper filtering
- folder_id = self._getFolderId(folder, connection)
-
- if folder_id:
- # Filter results by folder ID
- filtered_emails = []
- for email in emails:
- if email.get("parentFolderId") == folder_id:
- filtered_emails.append(email)
- emails = filtered_emails
- logger.debug(f"Applied folder filtering: {len(filtered_emails)} emails found in folder {folder}")
- else:
- # Fallback: try to filter by folder name (less reliable)
- filtered_emails = []
- for email in emails:
- # Check if email has folder information
- if hasattr(email, 'parentFolderId') and email.get('parentFolderId'):
- if email.get('parentFolderId') == folder:
- filtered_emails.append(email)
- else:
- # If no folder info, include the email (less strict filtering)
- filtered_emails.append(email)
-
- emails = filtered_emails
- logger.debug(f"Applied fallback folder filtering: {len(filtered_emails)} emails found in folder {folder}")
-
- # Special handling for folder specification queries
- if query.strip().lower().startswith('folder:'):
- folder_name = query.strip()[7:].strip()
- folder_id = self._getFolderId(folder_name, connection)
- if folder_id:
- # Filter results to only include emails from the specified folder
- filtered_emails = []
- for email in emails:
- if email.get("parentFolderId") == folder_id:
- filtered_emails.append(email)
- emails = filtered_emails
- logger.debug(f"Applied folder specification filtering: {len(filtered_emails)} emails found in folder {folder_name}")
- else:
- logger.warning(f"Could not find folder ID for folder specification: {folder_name}")
-
-
- search_result = {
- "query": query,
- "results": emails,
- "count": len(emails),
- "folder": folder,
- "limit": limit,
- "apiMetadata": {
- "@odata.context": search_data.get("@odata.context"),
- "@odata.count": search_data.get("@odata.count"),
- "@odata.nextLink": search_data.get("@odata.nextLink")
- },
- "searchParams": params
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "query": query,
- "folder": folder,
- "limit": limit,
- "searchResults": search_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.searchEmails",
- "connectionReference": connectionReference,
- "query": query,
- "folder": folder,
- "limit": limit,
- "resultCount": search_result.get("count", 0),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_email_search_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error searching emails: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- async def listDrafts(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: List draft emails from a folder.
- - Input requirements: connectionReference (required); optional folder, limit, outputMimeType.
- - Output format: JSON with draft items and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - folder (str, optional): Drafts folder to list. Default: Drafts.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- folder = parameters.get("folder", "Drafts")
- limit = parameters.get("limit", 1000)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # List drafts using Microsoft Graph API
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the folder ID for the specified folder
- folder_id = self._getFolderId(folder, connection)
-
- if folder_id:
- # List messages in the specific folder
- api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
-
- else:
- # Fallback: list all messages (might include drafts)
- api_url = f"{graph_url}/me/messages"
- logger.warning(f"Could not find folder '{folder}', listing all messages")
-
- params = {
- "$top": limit,
- "$orderby": "lastModifiedDateTime desc",
- "$select": "id,subject,from,toRecipients,ccRecipients,bccRecipients,receivedDateTime,lastModifiedDateTime,parentFolderId,isDraft"
- }
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
- response.raise_for_status()
-
- messages_data = response.json()
- messages = messages_data.get("value", [])
-
- # Filter for drafts if we're looking at all messages
- if not folder_id:
- drafts = [msg for msg in messages if msg.get("isDraft", False)]
- messages = drafts
-
-
- drafts_result = {
- "folder": folder,
- "folderId": folder_id,
- "drafts": messages,
- "count": len(messages),
- "limit": limit,
- "apiResponse": messages_data
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error listing drafts via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to list drafts: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "draftsResult": drafts_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.listDrafts",
- "connectionReference": connectionReference,
- "folder": folder,
- "limit": limit,
- "draftCount": drafts_result.get("count", 0),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_drafts_list_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error listing drafts: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- async def findDrafts(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Find draft emails across folders.
- - Input requirements: connectionReference (required); optional limit, outputMimeType.
- - Output format: JSON with drafts and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - limit (int, optional): Maximum items to return. Default: 50.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- limit = parameters.get("limit", 50)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Find drafts using Microsoft Graph API
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get all messages and filter for drafts
- api_url = f"{graph_url}/me/messages"
- params = {
- "$top": limit,
- "$select": "id,subject,from,toRecipients,ccRecipients,bccRecipients,receivedDateTime,lastModifiedDateTime,parentFolderId,isDraft,webLink",
- "$filter": "isDraft eq true"
- }
-
-
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
- response.raise_for_status()
-
- messages_data = response.json()
- drafts = messages_data.get("value", [])
-
- # Get folder information for each draft
- for draft in drafts:
- if "parentFolderId" in draft:
- folder_info = self._getFolderNameById(draft["parentFolderId"], connection)
- draft["folderName"] = folder_info
-
- drafts_result = {
- "totalDrafts": len(drafts),
- "drafts": drafts,
- "limit": limit,
- "apiResponse": messages_data
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error finding drafts via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to find drafts: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "limit": limit,
- "draftsResult": drafts_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.findDrafts",
- "connectionReference": connectionReference,
- "limit": limit,
- "totalDrafts": drafts_result.get("totalDrafts", 0),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_drafts_found_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error finding drafts: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- def _getFolderNameById(self, folder_id: str, connection: Dict[str, Any]) -> str:
- """
- Get folder name by folder ID
-
- This is a helper method to identify which folder a draft is in
- """
- try:
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get folder information
- api_url = f"{graph_url}/me/mailFolders/{folder_id}"
- response = requests.get(api_url, headers=headers)
-
- if response.status_code == 200:
- folder_data = response.json()
- return folder_data.get("displayName", f"Unknown Folder ({folder_id})")
- else:
- return f"Unknown Folder ({folder_id})"
-
- except Exception as e:
- logger.warning(f"Error getting folder name for ID '{folder_id}': {str(e)}")
- return f"Unknown Folder ({folder_id})"
-
- async def checkDraftsFolder(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Check contents of the Drafts folder.
- - Input requirements: connectionReference (required); optional limit, outputMimeType.
- - Output format: JSON with drafts and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - limit (int, optional): Maximum items to return. Default: 20.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
- try:
- connectionReference = parameters.get("connectionReference")
- limit = parameters.get("limit", 20)
- outputMimeType = parameters.get("outputMimeType", "application/json")
-
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Check Drafts folder directly
- try:
- # Microsoft Graph API endpoint for messages
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- # Get the Drafts folder ID
- drafts_folder_id = self._getFolderId("Drafts", connection)
-
- if not drafts_folder_id:
- return ActionResult.isFailure(error="Could not find Drafts folder")
-
- # Get messages directly from Drafts folder
- api_url = f"{graph_url}/me/mailFolders/{drafts_folder_id}/messages"
- params = {
- "$top": limit,
- "$select": "id,subject,from,toRecipients,ccRecipients,bccRecipients,receivedDateTime,lastModifiedDateTime,isDraft,webLink",
- "$orderby": "lastModifiedDateTime desc"
- }
-
-
-
- # Make the API call
- response = requests.get(api_url, headers=headers, params=params)
- response.raise_for_status()
-
- messages_data = response.json()
- drafts = messages_data.get("value", [])
-
-
-
- drafts_result = {
- "draftsFolderId": drafts_folder_id,
- "totalDrafts": len(drafts),
- "drafts": drafts,
- "limit": limit,
- "apiResponse": messages_data,
- "apiUrl": api_url
- }
-
-
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error checking Drafts folder via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to check Drafts folder: {str(e)}")
-
- # Determine output format based on MIME type
- mime_type_mapping = {
- "application/json": ".json",
- "text/plain": ".txt",
- "text/csv": ".csv"
- }
- output_extension = mime_type_mapping.get(outputMimeType, ".json")
- output_mime_type = outputMimeType
- logger.info(f"Using output format: {output_extension} ({output_mime_type})")
-
-
-
- result_data = {
- "connectionReference": connectionReference,
- "limit": limit,
- "draftsResult": drafts_result,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- validationMetadata = {
- "actionType": "outlook.checkDraftsFolder",
- "connectionReference": connectionReference,
- "limit": limit,
- "totalDrafts": drafts_result.get("totalDrafts", 0),
- "draftsFolderId": drafts_result.get("draftsFolderId"),
- "outputMimeType": outputMimeType
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_drafts_folder_check_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except Exception as e:
- logger.error(f"Error checking Drafts folder: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Compose email content using AI from context and optional documents, then create a draft.
- - Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
- - Output format: JSON confirmation with AI-generated draft metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - to (list, required): Recipient email addresses.
- - context (str, required): Detailled context for composing the email.
- - documentList (list, optional): Document references for context/attachments.
- - cc (list, optional): CC recipients.
- - bcc (list, optional): BCC recipients.
- - emailStyle (str, optional): formal | casual | business. Default: business.
- - maxLength (int, optional): Maximum length for generated content. Default: 1000.
- """
- try:
- connectionReference = parameters.get("connectionReference")
- to = parameters.get("to")
- context = parameters.get("context")
- documentList = parameters.get("documentList", [])
- cc = parameters.get("cc", [])
- bcc = parameters.get("bcc", [])
- emailStyle = parameters.get("emailStyle", "business")
- maxLength = parameters.get("maxLength", 1000)
-
- if not connectionReference or not to or not context:
- return ActionResult.isFailure(error="connectionReference, to, and context are required")
-
- # Convert single values to lists for all recipient parameters
- if isinstance(to, str):
- to = [to]
- if isinstance(cc, str):
- cc = [cc]
- if isinstance(bcc, str):
- bcc = [bcc]
- if isinstance(documentList, str):
- documentList = [documentList]
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found")
-
- # Check permissions
- permissions_ok = await self._checkPermissions(connection)
- if not permissions_ok:
- return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
-
- # Prepare documents for AI processing
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- chatDocuments = []
- if documentList:
- # Convert to DocumentReferenceList if needed
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- elif isinstance(documentList, str):
- docRefList = DocumentReferenceList.from_string_list([documentList])
- else:
- docRefList = DocumentReferenceList(references=[])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
-
- # Create AI prompt for email composition
- # Build document reference list for AI with expanded list contents when possible
- doc_references = documentList
- doc_list_text = ""
- if doc_references:
- lines = ["Available_Document_References:"]
- for ref in doc_references:
- # Each item is a label: resolve to its document list and render contained items
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- list_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([ref])) or []
- if list_docs:
- for d in list_docs:
- doc_ref_label = self.services.chat.getDocumentReferenceFromChatDocument(d)
- lines.append(f"- {doc_ref_label}")
- else:
- lines.append(" - (no documents)")
- doc_list_text = "\n" + "\n".join(lines)
- else:
- doc_list_text = "Available_Document_References: (No documents available for attachment)"
-
- # Escape only the user-controlled context to prevent prompt injection
- escaped_context = context.replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r')
-
- ai_prompt = f"""Compose an email based on this context:
--------
-{escaped_context}
--------
-
-Recipients: {to}
-Style: {emailStyle}
-Max length: {maxLength} characters
-{doc_list_text}
-
-Based on the context, decide which documents to attach.
-
-CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:
' in cleaned_body:
- html_body = cleaned_body
- else:
- # Convert plain text to proper HTML formatting
- html_body = cleaned_body.replace('\n', '
')
- html_body = f"{html_body}"
-
- # Build the email message
- message = {
- "subject": subject,
- "body": {
- "contentType": "HTML",
- "content": html_body
- },
- "toRecipients": [{"emailAddress": {"address": email}} for email in to],
- "ccRecipients": [{"emailAddress": {"address": email}} for email in cc] if cc else [],
- "bccRecipients": [{"emailAddress": {"address": email}} for email in bcc] if bcc else []
- }
-
- # Add documents as attachments if provided
- if documentList:
- message["attachments"] = []
- for attachment_ref in documentList:
- # Get attachment document from service center
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- attachment_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([attachment_ref]))
- if attachment_docs:
- for doc in attachment_docs:
- file_id = getattr(doc, 'fileId', None)
- if file_id:
- try:
- file_content = self.services.chat.getFileData(file_id)
- if file_content:
- if isinstance(file_content, bytes):
- content_bytes = file_content
- else:
- content_bytes = str(file_content).encode('utf-8')
-
- base64_content = base64.b64encode(content_bytes).decode('utf-8')
-
- attachment = {
- "@odata.type": "#microsoft.graph.fileAttachment",
- "name": doc.fileName,
- "contentType": doc.mimeType or "application/octet-stream",
- "contentBytes": base64_content
- }
- message["attachments"].append(attachment)
- except Exception as e:
- logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}")
-
- # Create the draft message
- drafts_folder_id = self._getFolderId("Drafts", connection)
-
- if drafts_folder_id:
- api_url = f"{graph_url}/me/mailFolders/{drafts_folder_id}/messages"
- else:
- api_url = f"{graph_url}/me/messages"
- logger.warning("Could not find Drafts folder, creating draft in default location")
-
- response = requests.post(api_url, headers=headers, json=message)
-
- if response.status_code in [200, 201]:
- draft_data = response.json()
- draft_id = draft_data.get("id", "Unknown")
-
- # Create draft result data with full draft information
- draftResultData = {
- "status": "draft",
- "message": "Email draft created successfully with AI-generated content",
- "draftId": draft_id,
- "folder": "Drafts (Entwürfe)",
- "mailbox": connection.get('userEmail', 'Unknown'),
- "subject": subject,
- "body": body,
- "recipients": to,
- "cc": cc,
- "bcc": bcc,
- "attachments": len(documentList),
- "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
- "aiGenerated": True,
- "context": context,
- "emailStyle": emailStyle,
- "timestamp": self.services.utils.timestampGetUtc(),
- "draftData": draft_data
- }
-
- # Extract attachment filenames for validation metadata
- attachmentFilenames = []
- attachmentReferences = []
- if documentList:
- try:
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
- attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
- # Store normalized document references (with filenames) - use normalized_ai_attachments if available
- attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
- except Exception:
- pass
-
- # Create validation metadata for content validator
- validationMetadata = {
- "actionType": "outlook.composeAndDraftEmailWithContext",
- "emailRecipients": to,
- "emailCc": cc,
- "emailBcc": bcc,
- "emailSubject": subject,
- "emailAttachments": attachmentFilenames,
- "emailAttachmentReferences": attachmentReferences,
- "emailAttachmentCount": len(attachmentFilenames),
- "emailStyle": emailStyle,
- "hasAttachments": len(attachmentFilenames) > 0
- }
-
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(draftResultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- else:
- logger.error(f"Failed to create draft. Status: {response.status_code}, Response: {response.text}")
- return ActionResult.isFailure(error=f"Failed to create email draft: {response.status_code} - {response.text}")
-
- except Exception as e:
- logger.error(f"Error creating email via Microsoft Graph API: {str(e)}")
- return ActionResult.isFailure(error=f"Failed to create email: {str(e)}")
-
- except Exception as e:
- logger.error(f"Error in composeAndDraftEmailWithContext: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
- - Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
- - Output format: JSON confirmation with sent mail metadata for all emails.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"outlook_send_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Send Draft Email",
- "Outlook Email Sending",
- f"Processing {len(parameters.get('documentList', []))} draft(s)",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList", [])
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- if not documentList:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="documentList is required and cannot be empty")
-
- # Convert single value to list if needed
- if isinstance(documentList, str):
- documentList = [documentList]
-
- # Get Microsoft connection
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Check permissions
- self.services.chat.progressLogUpdate(operationId, 0.3, "Checking permissions")
- permissions_ok = await self._checkPermissions(connection)
- if not permissions_ok:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
-
- # Read draft email JSON documents from documentList
- self.services.chat.progressLogUpdate(operationId, 0.4, "Reading draft email documents")
- draftEmails = []
- for docRef in documentList:
- try:
- # Get documents from document reference
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([docRef]))
- if not chatDocuments:
- logger.warning(f"No documents found for reference: {docRef}")
- continue
-
- # Process each document in the reference
- for doc in chatDocuments:
- try:
- # Read file data
- fileId = getattr(doc, 'fileId', None)
- if not fileId:
- logger.warning(f"Document {doc.fileName} has no fileId")
- continue
-
- fileData = self.services.chat.getFileData(fileId)
- if not fileData:
- logger.warning(f"No file data found for document: {doc.fileName}")
- continue
-
- # Parse JSON content
- if isinstance(fileData, bytes):
- jsonContent = fileData.decode('utf-8')
- else:
- jsonContent = str(fileData)
-
- # Parse JSON - handle both direct JSON and JSON wrapped in documentData
- try:
- draftEmailData = json.loads(jsonContent)
-
- # If the JSON contains a 'documentData' field, extract it
- if isinstance(draftEmailData, dict) and 'documentData' in draftEmailData:
- documentDataStr = draftEmailData['documentData']
- if isinstance(documentDataStr, str):
- draftEmailData = json.loads(documentDataStr)
-
- # Validate draft email structure
- if not isinstance(draftEmailData, dict):
- logger.warning(f"Document {doc.fileName} does not contain a valid draft email JSON object")
- continue
-
- draftId = draftEmailData.get("draftId")
- if not draftId:
- logger.warning(f"Document {doc.fileName} does not contain 'draftId' field")
- continue
-
- draftEmails.append({
- "draftEmailJson": draftEmailData,
- "draftId": draftId,
- "sourceDocument": doc.fileName,
- "sourceReference": docRef
- })
-
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse JSON from document {doc.fileName}: {str(e)}")
- continue
-
- except Exception as e:
- logger.error(f"Error processing document {doc.fileName}: {str(e)}")
- continue
-
- except Exception as e:
- logger.error(f"Error reading documents from reference {docRef}: {str(e)}")
- continue
-
- if not draftEmails:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid draft email JSON documents found in documentList")
-
- self.services.chat.progressLogUpdate(operationId, 0.6, f"Found {len(draftEmails)} draft email(s) to send")
-
- # Send all draft emails
- graph_url = "https://graph.microsoft.com/v1.0"
- headers = {
- "Authorization": f"Bearer {connection['accessToken']}",
- "Content-Type": "application/json"
- }
-
- sentResults = []
- failedResults = []
-
- self.services.chat.progressLogUpdate(operationId, 0.7, "Sending emails")
- for idx, draftEmail in enumerate(draftEmails):
- draftEmailJson = draftEmail["draftEmailJson"]
- draftId = draftEmail["draftId"]
- sourceDocument = draftEmail["sourceDocument"]
-
- try:
- send_url = f"{graph_url}/me/messages/{draftId}/send"
- sendResponse = requests.post(send_url, headers=headers)
-
- # Extract email details from draft JSON for confirmation
- subject = draftEmailJson.get("subject", "Unknown")
- recipients = draftEmailJson.get("recipients", [])
- cc = draftEmailJson.get("cc", [])
- bcc = draftEmailJson.get("bcc", [])
- attachmentsCount = draftEmailJson.get("attachments", 0)
-
- if sendResponse.status_code in [200, 202, 204]:
- sentResults.append({
- "status": "sent",
- "message": "Email sent successfully",
- "draftId": draftId,
- "subject": subject,
- "recipients": recipients,
- "cc": cc,
- "bcc": bcc,
- "attachments": attachmentsCount,
- "sentTimestamp": self.services.utils.timestampGetUtc(),
- "sourceDocument": sourceDocument
- })
- logger.info(f"Email sent successfully. Draft ID: {draftId}, Subject: {subject}")
- self.services.chat.progressLogUpdate(operationId, 0.7 + (idx + 1) * 0.2 / len(draftEmails), f"Sent {idx + 1}/{len(draftEmails)}: {subject}")
- else:
- errorResult = {
- "status": "error",
- "message": "Failed to send draft email",
- "draftId": draftId,
- "subject": subject,
- "recipients": recipients,
- "sendError": {
- "statusCode": sendResponse.status_code,
- "response": sendResponse.text
- },
- "sentTimestamp": self.services.utils.timestampGetUtc(),
- "sourceDocument": sourceDocument
- }
- failedResults.append(errorResult)
- logger.error(f"Failed to send email. Draft ID: {draftId}, Status: {sendResponse.status_code}, Response: {sendResponse.text}")
-
- except Exception as e:
- errorResult = {
- "status": "error",
- "message": f"Exception while sending draft email: {str(e)}",
- "draftId": draftId,
- "subject": draftEmailJson.get("subject", "Unknown"),
- "recipients": draftEmailJson.get("recipients", []),
- "exception": str(e),
- "sentTimestamp": self.services.utils.timestampGetUtc(),
- "sourceDocument": sourceDocument
- }
- failedResults.append(errorResult)
- logger.error(f"Error sending draft email {draftId}: {str(e)}")
-
- # Build result summary
- totalEmails = len(draftEmails)
- successfulEmails = len(sentResults)
- failedEmails = len(failedResults)
-
- resultData = {
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "sentResults": sentResults,
- "failedResults": failedResults,
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Determine overall success status
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Sent {successfulEmails}/{totalEmails} email(s)")
- if successfulEmails == 0:
- self.services.chat.progressLogFinish(operationId, False)
- validationMetadata = {
- "actionType": "outlook.sendDraftEmail",
- "connectionReference": connectionReference,
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "status": "all_failed"
- }
- return ActionResult.isFailure(
- error=f"Failed to send all {totalEmails} email(s)",
- documents=[ActionDocument(
- documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- elif failedEmails > 0:
- # Partial success
- logger.warning(f"Sent {successfulEmails} out of {totalEmails} emails. {failedEmails} failed.")
- validationMetadata = {
- "actionType": "outlook.sendDraftEmail",
- "connectionReference": connectionReference,
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "status": "partial_success"
- }
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- else:
- # All successful
- logger.info(f"Successfully sent all {totalEmails} email(s)")
- validationMetadata = {
- "actionType": "outlook.sendDraftEmail",
- "connectionReference": connectionReference,
- "totalEmails": totalEmails,
- "successfulEmails": successfulEmails,
- "failedEmails": failedEmails,
- "status": "all_successful"
- }
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
-
- except ImportError:
- logger.error("requests module not available")
- return ActionResult.isFailure(error="requests module not available")
- except Exception as e:
- logger.error(f"Error in sendDraftEmail: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
- async def checkPermissions(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Verify that the connection has required permissions for Outlook operations.
- - Input requirements: connectionReference (required).
- - Output format: JSON with permission status and details.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label to check.
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="Failed to get Microsoft connection")
-
- # Check permissions
- permissions_ok = await self._checkPermissions(connection)
-
- if permissions_ok:
- result_data = {
- "permissions": "✅ All necessary permissions are available",
- "scopes": connection.get("scopes", []),
- "connectionId": connection.get("id"),
- "status": "ready"
- }
-
- validationMetadata = {
- "actionType": "outlook.checkPermissions",
- "connectionReference": connectionReference,
- "permissionsStatus": "ready",
- "hasPermissions": True
- }
- return ActionResult(
- success=True,
- documents=[ActionDocument(
- documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )]
- )
- else:
- result_data = {
- "permissions": "❌ Missing necessary permissions",
- "requiredScopes": ["Mail.ReadWrite", "Mail.Send", "Mail.ReadWrite.Shared", "User.Read"],
- "currentScopes": connection.get("scopes", []),
- "connectionId": connection.get("id"),
- "status": "needs_reauthentication",
- "message": "Please re-authenticate your Microsoft connection to get updated permissions."
- }
-
- validationMetadata = {
- "actionType": "outlook.checkPermissions",
- "connectionReference": connectionReference,
- "permissionsStatus": "needs_reauthentication",
- "hasPermissions": False
- }
- return ActionResult(
- success=False,
- documents=[ActionDocument(
- documentName=f"outlook_permissions_check_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(result_data, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )],
- error="Connection lacks necessary permissions for Outlook operations"
- )
-
- except Exception as e:
- logger.error(f"Error checking permissions: {str(e)}")
- return ActionResult.isFailure(error=str(e))
-
diff --git a/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
index 2bad3838..59604896 100644
--- a/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
+++ b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
@@ -1,39 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Compose And Draft Email With Context action for Outlook operations.
-Composes email content using AI from context and optional documents, then creates a draft.
-"""
-
import logging
import json
import base64
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Compose email content using AI from context and optional documents, then create a draft.
- - Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
- - Output format: JSON confirmation with AI-generated draft metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - to (list, required): Recipient email addresses.
- - context (str, required): Detailled context for composing the email.
- - documentList (list, optional): Document references for context/attachments.
- - cc (list, optional): CC recipients.
- - bcc (list, optional): BCC recipients.
- - emailStyle (str, optional): formal | casual | business. Default: business.
- - maxLength (int, optional): Maximum length for generated content. Default: 1000.
- """
try:
connectionReference = parameters.get("connectionReference")
to = parameters.get("to")
diff --git a/modules/workflows/methods/methodOutlook/actions/readEmails.py b/modules/workflows/methods/methodOutlook/actions/readEmails.py
index e698cb9f..2d325d9f 100644
--- a/modules/workflows/methods/methodOutlook/actions/readEmails.py
+++ b/modules/workflows/methods/methodOutlook/actions/readEmails.py
@@ -1,36 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Read Emails action for Outlook operations.
-Reads emails and metadata from a mailbox folder.
-"""
-
import logging
import time
import json
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read emails and metadata from a mailbox folder.
- - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
- - Output format: JSON with emails and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - folder (str, optional): Folder to read from. Default: Inbox.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - filter (str, optional): Sender, query operators, or subject text.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodOutlook/actions/searchEmails.py b/modules/workflows/methods/methodOutlook/actions/searchEmails.py
index 72830caf..f8831d59 100644
--- a/modules/workflows/methods/methodOutlook/actions/searchEmails.py
+++ b/modules/workflows/methods/methodOutlook/actions/searchEmails.py
@@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Search Emails action for Outlook operations.
-Searches emails by query and returns matching items with metadata.
-"""
-
import logging
import json
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Search emails by query and return matching items with metadata.
- - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
- - Output format: JSON with search results and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - query (str, required): Search expression.
- - folder (str, optional): Folder scope or All. Default: All.
- - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
- """
try:
connectionReference = parameters.get("connectionReference")
query = parameters.get("query")
diff --git a/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
index ffae4c8d..9b7fb011 100644
--- a/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
+++ b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
@@ -1,33 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Send Draft Email action for Outlook operations.
-Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
-"""
-
import logging
import time
import json
import requests
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
- - Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
- - Output format: JSON confirmation with sent mail metadata for all emails.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodOutlook/methodOutlook.py b/modules/workflows/methods/methodOutlook/methodOutlook.py
index 31bc7dc3..4a978b7a 100644
--- a/modules/workflows/methods/methodOutlook/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook/methodOutlook.py
@@ -39,6 +39,7 @@ class MethodOutlook(MethodBase):
"readEmails": WorkflowActionDefinition(
actionId="outlook.readEmails",
description="Read emails and metadata from a mailbox folder",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -87,6 +88,7 @@ class MethodOutlook(MethodBase):
"searchEmails": WorkflowActionDefinition(
actionId="outlook.searchEmails",
description="Search emails by query and return matching items with metadata",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -135,6 +137,7 @@ class MethodOutlook(MethodBase):
"composeAndDraftEmailWithContext": WorkflowActionDefinition(
actionId="outlook.composeAndDraftEmailWithContext",
description="Compose email content using AI from context and optional documents, then create a draft",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -202,6 +205,7 @@ class MethodOutlook(MethodBase):
"sendDraftEmail": WorkflowActionDefinition(
actionId="outlook.sendDraftEmail",
description="Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
diff --git a/modules/workflows/methods/methodSharepoint.py.old b/modules/workflows/methods/methodSharepoint.py.old
deleted file mode 100644
index d12b53eb..00000000
--- a/modules/workflows/methods/methodSharepoint.py.old
+++ /dev/null
@@ -1,2840 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-SharePoint operations method module.
-Handles SharePoint document operations using the SharePoint service.
-"""
-
-import logging
-import re
-import json
-from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC, timedelta, timezone
-import urllib
-import aiohttp
-import asyncio
-
-from modules.workflows.methods.methodBase import MethodBase, action
-from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-
-logger = logging.getLogger(__name__)
-
-class MethodSharepoint(MethodBase):
- """SharePoint operations methods."""
-
- def __init__(self, services):
- super().__init__(services)
- self.name = "sharepoint"
- self.description = "SharePoint operations methods"
-
- def _format_timestamp_for_filename(self) -> str:
- """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
- return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-
- def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
- """Get Microsoft connection from connection reference and configure SharePoint service"""
- try:
- userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
- if not userConnection:
- logger.warning(f"No user connection found for reference: {connectionReference}")
- return None
-
- if userConnection.authority.value != "msft":
- logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
- return None
-
- # Check if connection is active or pending (pending means OAuth in progress)
- if userConnection.status.value not in ["active", "pending"]:
- logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
- return None
-
- # Configure SharePoint service with the UserConnection
- if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
- logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
- return None
-
- logger.info(f"Successfully configured SharePoint service with Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
-
- return {
- "id": userConnection.id,
- "userConnection": userConnection,
- "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
- }
- except Exception as e:
- logger.error(f"Error getting Microsoft connection: {str(e)}")
- return None
-
- async def _discoverSharePointSites(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
- """
- Discover SharePoint sites accessible to the user via Microsoft Graph API
-
- Parameters:
- limit (Optional[int]): Limit number of sites to return (for optimization when only hostname is needed)
-
- Returns:
- List[Dict[str, Any]]: List of SharePoint site information
- """
- try:
- # Query Microsoft Graph to get sites the user has access to
- endpoint = "sites?search=*"
- if limit:
- endpoint += f"&$top={limit}"
-
- result = await self._makeGraphApiCall(endpoint)
-
- if "error" in result:
- logger.error(f"Error discovering SharePoint sites: {result['error']}")
- return []
-
- sites = result.get("value", [])
- if limit:
- sites = sites[:limit]
-
- logger.info(f"Discovered {len(sites)} SharePoint sites" + (f" (limited to {limit})" if limit else ""))
-
- # Process and return site information
- processedSites = []
- for site in sites:
- siteInfo = {
- "id": site.get("id"),
- "displayName": site.get("displayName"),
- "name": site.get("name"),
- "webUrl": site.get("webUrl"),
- "description": site.get("description"),
- "createdDateTime": site.get("createdDateTime"),
- "lastModifiedDateTime": site.get("lastModifiedDateTime")
- }
- processedSites.append(siteInfo)
- logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
-
- return processedSites
-
- except Exception as e:
- logger.error(f"Error discovering SharePoint sites: {str(e)}")
- return []
-
- def _extractHostnameFromWebUrl(self, webUrl: str) -> Optional[str]:
- """Extract hostname from SharePoint webUrl (e.g., https://pcuster.sharepoint.com)"""
- try:
- if not webUrl:
- return None
- parsed = urllib.parse.urlparse(webUrl)
- return parsed.hostname
- except Exception as e:
- logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}")
- return None
-
- def _extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
- """
- Extract site name from Microsoft-standard server-relative path.
- Delegates to SharePoint service.
- """
- return self.services.sharepoint.extractSiteFromStandardPath(pathQuery)
-
- async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]:
- """
- Get SharePoint site directly by Microsoft-standard path.
- Delegates to SharePoint service.
- """
- return await self.services.sharepoint.getSiteByStandardPath(sitePath)
-
- def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
- """
- Filter discovered sites by a human-entered site hint.
- Delegates to SharePoint service.
- """
- return self.services.sharepoint.filterSitesByHint(sites, siteHint)
-
- def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
- """
- Parse searchQuery to extract path, search terms, search type, and search options.
-
- CRITICAL: NEVER convert words to paths! Words stay as search terms.
- - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
- - "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
- - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"
-
- Parameters:
- searchQuery (str): Enhanced search query with options:
- - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
- - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
- - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
- - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
- - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
- - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
- - "exact:\"Operations 2025\"" -> exact phrase matching
- - "regex:^Operations.*2025$" -> regex pattern matching
- - "case:DELTA" -> case-sensitive search
- - "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present
-
- Returns:
- tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
- """
- try:
- if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
- return "*", "*", "all", {}
-
- searchQuery = searchQuery.strip()
- searchOptions = {}
-
- # CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
- # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
- # "root, gose" should stay as "root, gose", NOT "/root/gose"
-
- # Check for search type specification (files:, folders:, all:) FIRST
- searchType = "all" # Default
- if searchQuery.startswith(("files:", "folders:", "all:")):
- typeParts = searchQuery.split(':', 1)
- searchType = typeParts[0].strip()
- searchQuery = typeParts[1].strip()
-
- # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
- def _extractSiteHint(q: str) -> tuple[str, Optional[str]]:
- try:
- qStrip = q.strip()
- # Leading form: site:KM LayerFinance ...
- if qStrip.lower().startswith("site:"):
- after = qStrip[5:].lstrip()
- # site name until next space or end
- if ' ' in after:
- siteName, rest = after.split(' ', 1)
- else:
- siteName, rest = after, ''
- return rest.strip(), siteName.strip()
- # Inline key=value form anywhere
- m = re.search(r"\bsite=([^;\s]+)", qStrip, flags=re.IGNORECASE)
- if m:
- siteName = m.group(1).strip()
- # remove the token from query
- qNew = re.sub(r"\bsite=[^;\s]+;?", "", qStrip, flags=re.IGNORECASE).strip()
- return qNew, siteName
- except Exception:
- pass
- return q, None
-
- searchQuery, extractedSite = _extractSiteHint(searchQuery)
- if extractedSite:
- searchOptions["site_hint"] = extractedSite
- logger.info(f"Extracted site hint: '{extractedSite}'")
-
- # Extract name="..." if present (for quoted multi-word names)
- nameMatch = re.search(r"name=\"([^\"]+)\"", searchQuery)
- if nameMatch:
- searchQuery = nameMatch.group(1)
- logger.info(f"Extracted name from quotes: '{searchQuery}'")
-
- # Check for search mode specification (exact:, regex:, case:, and:)
- if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
- modeParts = searchQuery.split(':', 1)
- mode = modeParts[0].strip()
- searchQuery = modeParts[1].strip()
-
- if mode == "exact":
- searchOptions["exact_match"] = True
- # Remove quotes if present
- if searchQuery.startswith('"') and searchQuery.endswith('"'):
- searchQuery = searchQuery[1:-1]
- elif mode == "regex":
- searchOptions["regex_match"] = True
- elif mode == "case":
- searchOptions["case_sensitive"] = True
- elif mode == "and":
- searchOptions["and_terms"] = True
-
- # Check if it contains path:search format
- # Microsoft-standard paths: /sites/SiteName/Path:files:.pdf
- if ':' in searchQuery:
- # For Microsoft-standard paths (/sites/...), find the colon that separates path from search
- if searchQuery.startswith('/sites/'):
- # Find the colon that separates path from search (after the full path)
- # Look for pattern: /sites/SiteName/Path/...:files:.pdf
- # We need to find the colon that's followed by search type or file extension
- colonPositions = []
- for i, char in enumerate(searchQuery):
- if char == ':':
- colonPositions.append(i)
-
- # If we have colons, find the one that's followed by search type or file extension
- splitPos = None
- if colonPositions:
- for pos in colonPositions:
- afterColon = searchQuery[pos+1:pos+10].strip().lower()
- # Check if this colon is followed by search type or looks like a file extension
- if afterColon.startswith(('files:', 'folders:', 'all:', '.')) or afterColon == '':
- splitPos = pos
- break
-
- # If no clear split found, use the last colon
- if splitPos is None and colonPositions:
- splitPos = colonPositions[-1]
-
- if splitPos:
- pathPart = searchQuery[:splitPos].strip()
- searchPart = searchQuery[splitPos+1:].strip()
- else:
- # Fallback: split on first colon
- parts = searchQuery.split(':', 1)
- pathPart = parts[0].strip()
- searchPart = parts[1].strip()
- else:
- # Regular path:search format - split on first colon
- parts = searchQuery.split(':', 1)
- pathPart = parts[0].strip()
- searchPart = parts[1].strip()
-
- # Check if searchPart starts with search type (files:, folders:, all:)
- if searchPart.startswith(("files:", "folders:", "all:")):
- typeParts = searchPart.split(':', 1)
- searchType = typeParts[0].strip() # Update searchType
- searchPart = typeParts[1].strip() if len(typeParts) > 1 else ""
-
- # Handle path part
- if not pathPart or pathPart == "*":
- pathQuery = "*"
- elif pathPart.startswith('/'):
- pathQuery = pathPart
- else:
- pathQuery = f"/Documents/{pathPart}"
-
- # Handle search part
- if not searchPart or searchPart == "*":
- fileQuery = "*"
- else:
- fileQuery = searchPart
-
- return pathQuery, fileQuery, searchType, searchOptions
-
- # No colon - check if it looks like a path
- elif searchQuery.startswith('/'):
- # It's a path only
- return searchQuery, "*", searchType, searchOptions
-
- else:
- # It's a search term only - keep words as-is, do NOT convert to paths
- # "root document lesson" stays as "root document lesson"
- # "root, gose" stays as "root, gose"
- return "*", searchQuery, searchType, searchOptions
-
- except Exception as e:
- logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
- raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")
-
- def _resolvePathQuery(self, pathQuery: str) -> List[str]:
- """
- Resolve pathQuery into a list of search paths for SharePoint operations.
-
- Parameters:
- pathQuery (str): Query string that can contain:
- - Direct paths (e.g., "/Documents/Project1")
- - Wildcards (e.g., "/Documents/*")
- - Multiple paths separated by semicolons (e.g., "/Docs; /Files")
- - Single word relative paths (e.g., "Project1" -> resolved to default folder)
- - Empty string or "*" for global search
- - Space-separated words are treated as search terms, NOT folder paths
-
- Returns:
- List[str]: List of resolved paths
- """
- try:
- if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
- return ["*"] # Global search across all sites
-
- # Split by semicolon to handle multiple paths
- rawPaths = [path.strip() for path in pathQuery.split(';') if path.strip()]
- resolvedPaths = []
-
- for rawPath in rawPaths:
- # Handle wildcards - return as-is
- if '*' in rawPath:
- resolvedPaths.append(rawPath)
- # Handle absolute paths
- elif rawPath.startswith('/'):
- resolvedPaths.append(rawPath)
- # Handle single word relative paths - prepend default folder
- # BUT NOT space-separated words (those are search terms, not paths)
- elif ' ' not in rawPath:
- resolvedPaths.append(f"/Documents/{rawPath}")
- else:
- # Check if this looks like a path (has path separators) or search terms
- if '\\' in rawPath or '/' in rawPath:
- # This looks like a path with spaces in folder names - treat as valid path
- resolvedPaths.append(rawPath)
- logger.info(f"Path with spaces '{rawPath}' treated as valid folder path")
- else:
- # Space-separated words without path separators are search terms
- # Return as "*" to search globally
- logger.info(f"Space-separated words '{rawPath}' treated as search terms, not folder path")
- resolvedPaths.append("*")
-
- # Remove duplicates while preserving order
- seen = set()
- uniquePaths = []
- for path in resolvedPaths:
- if path not in seen:
- seen.add(path)
- uniquePaths.append(path)
-
- logger.info(f"Resolved pathQuery '{pathQuery}' to {len(uniquePaths)} paths: {uniquePaths}")
- return uniquePaths
-
- except Exception as e:
- logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
- raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")
-
- def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
- """Parse SharePoint site URL to extract hostname and site path"""
- try:
- parsed = urllib.parse.urlparse(siteUrl)
- hostname = parsed.hostname
- path = parsed.path.strip('/')
-
- return {
- "hostname": hostname,
- "sitePath": path
- }
- except Exception as e:
- logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
- return {"hostname": "", "sitePath": ""}
-
- def _cleanSearchQuery(self, query: str) -> str:
- """
- Clean search query to make it compatible with Graph API KQL syntax.
- Removes path-like syntax and invalid KQL constructs.
-
- Parameters:
- query (str): Raw search query that may contain paths and invalid syntax
-
- Returns:
- str: Cleaned query suitable for Graph API search endpoint
- """
- if not query or not query.strip():
- return ""
-
- query = query.strip()
-
- # Handle patterns like: "Company Share/Freigegebene Dokumente/.../expenses:files:.pdf"
- # Extract the search term and file extension
-
- # First, extract file extension if present (format: :files:.pdf or just .pdf at the end)
- fileExtension = ""
- if ':files:' in query.lower() or ':folders:' in query.lower():
- # Extract extension after the type filter
- extMatch = re.search(r':(?:files|folders):(\.\w+)', query, re.IGNORECASE)
- if extMatch:
- fileExtension = extMatch.group(1)
- # Remove the type filter part
- query = re.sub(r':(?:files|folders):\.?\w*', '', query, flags=re.IGNORECASE)
- elif query.endswith(('.pdf', '.doc', '.docx', '.xls', '.xlsx', '.txt', '.csv', '.ppt', '.pptx')):
- # Extract extension from end
- extMatch = re.search(r'(\.\w+)$', query)
- if extMatch:
- fileExtension = extMatch.group(1)
- query = query[:-len(fileExtension)]
-
- # Extract search term: get the last segment after the last slash (filename part)
- queryNormalized = query.replace('\\', '/')
- if '/' in queryNormalized:
- # Extract the last segment (usually the filename/search term)
- lastSegment = queryNormalized.split('/')[-1]
- # Remove any remaining colons or type filters
- if ':' in lastSegment:
- lastSegment = lastSegment.split(':')[0]
- searchTerm = lastSegment.strip()
- else:
- # No path separators, use the query as-is but remove type filters
- if ':' in query:
- searchTerm = query.split(':')[0].strip()
- else:
- searchTerm = query.strip()
-
- # Remove any remaining type filters or invalid syntax
- searchTerm = re.sub(r':(?:files|folders|all):?', '', searchTerm, flags=re.IGNORECASE)
- searchTerm = searchTerm.strip()
-
- # If we have a file extension, include it in the search term
- # Note: Graph API search endpoint may not support filetype: syntax
- # So we include the extension as part of the search term or filter results after
- if fileExtension:
- extWithoutDot = fileExtension.lstrip('.')
- # Try simple approach: add extension as search term
- # If this doesn't work, we'll filter results after search
- if searchTerm:
- # Include extension in search - Graph API will search in filename
- searchTerm = f"{searchTerm} {extWithoutDot}"
- else:
- searchTerm = extWithoutDot
-
- # Final cleanup: remove any remaining invalid characters for KQL
- # Keep alphanumeric, spaces, hyphens, underscores, dots, and common search operators
- searchTerm = re.sub(r'[^\w\s\-\.\*]', ' ', searchTerm)
- searchTerm = ' '.join(searchTerm.split()) # Normalize whitespace
-
- return searchTerm if searchTerm else "*"
-
- async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
- """Make a Microsoft Graph API call with timeout and detailed logging"""
- try:
- if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.accessToken:
- return {"error": "SharePoint service not configured with access token"}
-
- headers = {
- "Authorization": f"Bearer {self.services.sharepoint._target.accessToken}",
- "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
- }
-
- url = f"https://graph.microsoft.com/v1.0/{endpoint}"
- logger.info(f"Making Graph API call: {method} {url}")
-
- # Set timeout to 30 seconds
- timeout = aiohttp.ClientTimeout(total=30)
-
- async with aiohttp.ClientSession(timeout=timeout) as session:
- if method == "GET":
- logger.debug(f"Starting GET request to {url}")
- async with session.get(url, headers=headers) as response:
- logger.info(f"Graph API response: {response.status}")
- if response.status == 200:
- result = await response.json()
- logger.debug(f"Graph API success: {len(str(result))} characters response")
- return result
- else:
- errorText = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {errorText}")
- return {"error": f"API call failed: {response.status} - {errorText}"}
-
- elif method == "PUT":
- logger.debug(f"Starting PUT request to {url}")
- async with session.put(url, headers=headers, data=data) as response:
- logger.info(f"Graph API response: {response.status}")
- if response.status in [200, 201]:
- result = await response.json()
- logger.debug(f"Graph API success: {len(str(result))} characters response")
- return result
- else:
- errorText = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {errorText}")
- return {"error": f"API call failed: {response.status} - {errorText}"}
-
- elif method == "POST":
- logger.debug(f"Starting POST request to {url}")
- async with session.post(url, headers=headers, data=data) as response:
- logger.info(f"Graph API response: {response.status}")
- if response.status in [200, 201]:
- result = await response.json()
- logger.debug(f"Graph API success: {len(str(result))} characters response")
- return result
- else:
- errorText = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {errorText}")
- return {"error": f"API call failed: {response.status} - {errorText}"}
-
- except asyncio.TimeoutError:
- logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
- return {"error": f"API call timed out after 30 seconds: {endpoint}"}
- except Exception as e:
- logger.error(f"Error making Graph API call: {str(e)}")
- return {"error": f"Error making Graph API call: {str(e)}"}
-
- async def _getSiteId(self, hostname: str, sitePath: str) -> str:
- """Get SharePoint site ID from hostname and site path"""
- try:
- endpoint = f"sites/{hostname}:/{sitePath}"
- result = await self._makeGraphApiCall(endpoint)
-
- if "error" in result:
- logger.error(f"Error getting site ID: {result['error']}")
- return ""
-
- return result.get("id", "")
- except Exception as e:
- logger.error(f"Error getting site ID: {str(e)}")
- return ""
-
- async def _parseDocumentListForFoundDocuments(self, documentList: Any) -> tuple[Optional[List[Dict[str, Any]]], Optional[List[Dict[str, Any]]], Optional[str]]:
- """
- Parse documentList to extract foundDocuments and site information.
-
- Parameters:
- documentList: Document list (can be list, DocumentReferenceList, or string)
-
- Returns:
- tuple: (foundDocuments, sites, errorMessage)
- - foundDocuments: List of found documents from findDocumentPath result
- - sites: List of site dictionaries with id, displayName, webUrl
- - errorMessage: Error message if parsing failed, None otherwise
- """
- try:
- if isinstance(documentList, str):
- documentList = [documentList]
-
- # Resolve documentList to get actual documents
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- else:
- docRefList = DocumentReferenceList(references=[])
-
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
- if not chatDocuments:
- return None, None, "No documents found for the provided document list"
-
- firstDocument = chatDocuments[0]
- fileData = self.services.chat.getFileData(firstDocument.fileId)
- if not fileData:
- return None, None, None # No fileData, but not an error (might be regular file)
-
- try:
- resultData = json.loads(fileData)
- foundDocuments = resultData.get("foundDocuments", [])
-
- # If no foundDocuments, check if it's a listDocuments result (has listResults)
- if not foundDocuments and "listResults" in resultData:
- logger.info(f"documentList contains listResults from listDocuments, converting to foundDocuments format")
- listResults = resultData.get("listResults", [])
- foundDocuments = []
- siteIdFromList = None
- siteNameFromList = None
-
- for listResult in listResults:
- siteResults = listResult.get("siteResults", [])
- for siteResult in siteResults:
- items = siteResult.get("items", [])
- # Extract site info from first item if available
- if items and not siteIdFromList:
- siteNameFromList = items[0].get("siteName")
-
- for item in items:
- # Convert listDocuments item format to foundDocuments format
- if item.get("type") == "file":
- foundDoc = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "file",
- "siteName": item.get("siteName"),
- "siteId": None, # Will be determined from site discovery
- "webUrl": item.get("webUrl"),
- "fullPath": item.get("webUrl", ""),
- "parentPath": item.get("parentPath", "")
- }
- foundDocuments.append(foundDoc)
-
- # Discover sites to get siteId if we have siteName
- if foundDocuments and siteNameFromList and not siteIdFromList:
- logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
- if matchingSites:
- siteIdFromList = matchingSites[0].get("id")
- # Update all foundDocuments with siteId
- for doc in foundDocuments:
- doc["siteId"] = siteIdFromList
- logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
-
- logger.info(f"Converted {len(foundDocuments)} files from listResults format")
-
- if not foundDocuments:
- return None, None, None # No foundDocuments, but not an error
-
- # Extract site information from foundDocuments
- firstDoc = foundDocuments[0]
- siteName = firstDoc.get("siteName")
- siteId = firstDoc.get("siteId")
-
- # If siteId is missing (from listDocuments conversion), discover sites to find it
- if siteName and not siteId:
- logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteName)
- if matchingSites:
- siteId = matchingSites[0].get("id")
- logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
-
- sites = None
- if siteName and siteId:
- sites = [{
- "id": siteId,
- "displayName": siteName,
- "webUrl": firstDoc.get("webUrl", "")
- }]
- logger.info(f"Using specific site from documentList: {siteName} (ID: {siteId})")
- elif siteName:
- # Try to get site by name
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteName)
- if matchingSites:
- sites = [{
- "id": matchingSites[0].get("id"),
- "displayName": siteName,
- "webUrl": matchingSites[0].get("webUrl", "")
- }]
- logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
- else:
- return None, None, f"Site '{siteName}' not found. Cannot determine target site."
- else:
- return None, None, "Site information missing from documentList. Cannot determine target site."
-
- return foundDocuments, sites, None
-
- except json.JSONDecodeError as e:
- return None, None, f"Invalid JSON in documentList: {str(e)}"
- except Exception as e:
- return None, None, f"Error processing documentList: {str(e)}"
-
- except Exception as e:
- logger.error(f"Error parsing documentList: {str(e)}")
- return None, None, f"Error parsing documentList: {str(e)}"
-
- async def _resolveSitesFromPathQuery(self, pathQuery: str) -> tuple[List[Dict[str, Any]], Optional[str]]:
- """
- Resolve sites from pathQuery using SharePoint service helper methods.
-
- Parameters:
- pathQuery (str): Path query string
-
- Returns:
- tuple: (sites, errorMessage)
- - sites: List of site dictionaries
- - errorMessage: Error message if resolution failed, None otherwise
- """
- try:
- # Validate pathQuery format
- isValid, errorMsg = self.services.sharepoint.validatePathQuery(pathQuery)
- if not isValid:
- return [], errorMsg
-
- # Resolve sites using service helper
- sites = await self.services.sharepoint.resolveSitesFromPathQuery(pathQuery)
- if not sites:
- return [], "No SharePoint sites found or accessible"
-
- return sites, None
- except Exception as e:
- logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
- return [], f"Error resolving sites from pathQuery: {str(e)}"
-
-
- @action
- async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Find documents and folders by name/path across sites.
- - Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
- - Output format: JSON with found items and paths.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - site (str, optional): Site hint.
- - searchQuery (str, required): Search terms or path.
- - maxResults (int, optional): Maximum items to return. Default: 1000.
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_find_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Find Document Path",
- "SharePoint Search",
- f"Query: {parameters.get('searchQuery', '*')}",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- site = parameters.get("site")
- searchQuery = parameters.get("searchQuery", "*")
- maxResults = parameters.get("maxResults", 1000)
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Parse searchQuery to extract path, search terms, search type, and options
- pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
- logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'")
-
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...)
- siteFromPath = None
- directSite = None
- if pathQuery and pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- siteFromPath = parsedPath.get("siteName")
- logger.info(f"Extracted site from Microsoft-standard pathQuery '{pathQuery}': '{siteFromPath}'")
-
- # Try to get site directly by path (optimization - no need to load all 60 sites)
- directSite = await self._getSiteByStandardPath(siteFromPath)
- if directSite:
- logger.info(f"Got site directly by standard path - no need to discover all sites")
- sites = [directSite]
- else:
- logger.warning(f"Could not get site directly, falling back to site discovery")
- directSite = None
- else:
- logger.warning(f"Failed to parse site from standard pathQuery '{pathQuery}'")
-
- # If we didn't get the site directly, use discovery and filtering
- if not directSite:
- # Determine which site hint to use (priority: site parameter > site from pathQuery > site_hint from searchOptions)
- siteHintToUse = site or siteFromPath or searchOptions.get("site_hint")
-
- # Discover SharePoint sites - use targeted approach when site hint is available
- self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites")
- if siteHintToUse:
- # When site hint is available, discover all sites first, then filter
- allSites = await self._discoverSharePointSites()
- if not allSites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- sites = self._filterSitesByHint(allSites, siteHintToUse)
- logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites")
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'")
- else:
- # No site hint - discover all sites
- sites = await self._discoverSharePointSites()
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # Resolve path query into search paths
- searchPaths = self._resolvePathQuery(pathQuery)
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)")
-
- try:
- # Search across all discovered sites
- foundDocuments = []
- allSitesSearched = []
-
- # Handle different search approaches based on search type
- if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
- # Use unified search for folders - this is global and searches all sites
- try:
-
- # Use Microsoft Graph Search API syntax (simple term search only)
- terms = [t for t in fileQuery.split() if t.strip()]
-
- if len(terms) > 1:
- # Multiple terms: search for ALL terms (AND) - more specific results
- queryString = " AND ".join(terms)
- else:
- # Single term: search for the term
- queryString = terms[0] if terms else fileQuery
- logger.info(f"Using unified search for folders: {queryString}")
-
- payload = {
- "requests": [
- {
- "entityTypes": ["driveItem"],
- "query": {"queryString": queryString},
- "from": 0,
- "size": 50
- }
- ]
- }
- logger.info(f"Using unified search API for folders with queryString: {queryString}")
-
- # Use global search endpoint (site-specific search not available)
- unifiedResult = await self._makeGraphApiCall(
- "search/query",
- method="POST",
- data=json.dumps(payload).encode("utf-8")
- )
-
- if "error" in unifiedResult:
- logger.warning(f"Unified search failed: {unifiedResult['error']}")
- items = []
- else:
- # Flatten hits -> driveItem resources
- items = []
- for container in (unifiedResult.get("value", []) or []):
- for hitsContainer in (container.get("hitsContainers", []) or []):
- for hit in (hitsContainer.get("hits", []) or []):
- resource = hit.get("resource")
- if resource:
- items.append(resource)
-
- logger.info(f"Unified search returned {len(items)} items (pre-filter)")
-
- # Apply our improved folder detection logic
- folderItems = []
- for item in items:
- resource = item
-
- # Use the same detection logic as our test
- isFolder = self.services.sharepoint.detectFolderType(resource)
-
- if isFolder:
- folderItems.append(item)
-
- items = folderItems
- logger.info(f"Filtered to {len(items)} folders using improved detection logic")
-
- # Process unified search results - extract site information from webUrl
- for item in items:
- itemName = item.get("name", "")
- webUrl = item.get("webUrl", "")
-
- # Extract site information from webUrl
- siteName = "Unknown Site"
- siteId = "unknown"
-
- if webUrl and '/sites/' in webUrl:
- try:
- # Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
- urlParts = webUrl.split('/sites/')
- if len(urlParts) > 1:
- sitePath = urlParts[1].split('/')[0]
- # Find matching site from discovered sites
- # First try to match by site name (URL path)
- for site in sites:
- if site.get("name") == sitePath:
- siteName = site.get("displayName", sitePath)
- siteId = site.get("id", "unknown")
- break
- else:
- # If no match by name, try to match by displayName
- for site in sites:
- if site.get("displayName") == sitePath:
- siteName = site.get("displayName", sitePath)
- siteId = site.get("id", "unknown")
- break
- else:
- # If no exact match, use the site path as site name
- siteName = sitePath
- # Try to find a site with similar name
- for site in sites:
- if sitePath.lower() in site.get("name", "").lower() or sitePath.lower() in site.get("displayName", "").lower():
- siteName = site.get("displayName", sitePath)
- siteId = site.get("id", "unknown")
- break
- except Exception as e:
- logger.warning(f"Error extracting site info from URL {webUrl}: {e}")
-
- # Use improved folder detection logic
- isFolder = self.services.sharepoint.detectFolderType(item)
- itemType = "folder" if isFolder else "file"
- itemPath = item.get("parentReference", {}).get("path", "")
- logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
-
- # Simple filtering like test file - just check search type
- if searchType == "files" and isFolder:
- continue # Skip folders when searching for files
- elif searchType == "folders" and not isFolder:
- continue # Skip files when searching for folders
-
- # Simple approach like test file - no complex filtering
- logger.debug(f"Item '{itemName}' found - adding to results")
-
- # Create result with full path information for proper action chaining
- parentPath = item.get("parentReference", {}).get("path", "")
-
- # Extract the full SharePoint path from webUrl or parentReference
- fullPath = ""
- if webUrl:
- # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
- if '/sites/' in webUrl:
- pathPart = webUrl.split('/sites/')[1]
- # Decode URL encoding and convert to backslash format
- decodedPath = urllib.parse.unquote(pathPart)
- fullPath = "\\" + decodedPath.replace('/', '\\')
- elif parentPath:
- # Use parentReference path if available
- fullPath = parentPath.replace('/', '\\')
-
- docInfo = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "folder" if isFolder else "file",
- "siteName": siteName,
- "siteId": siteId,
- "webUrl": webUrl,
- "fullPath": fullPath,
- "parentPath": parentPath
- }
-
- foundDocuments.append(docInfo)
-
- logger.info(f"Found {len(foundDocuments)} documents from unified search")
-
- except Exception as e:
- logger.error(f"Error performing unified folder search: {str(e)}")
- # Fallback to site-by-site search
- pass
-
- # If no unified search was performed or it failed, fall back to site-by-site search
- if not foundDocuments:
- # Use simple approach like test file - no complex filtering
- siteScopedSites = sites
-
- for site in siteScopedSites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- logger.info(f"Searching in site: {siteName} ({siteUrl})")
-
- # Check if pathQuery contains a specific folder path (not just /sites/SiteName)
- folderPath = None
- if pathQuery and pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- innerPath = parsedPath.get("innerPath", "")
- if innerPath and innerPath.strip():
- # Remove leading slash if present
- folderPath = innerPath.lstrip('/')
-
- # Generic approach: Try to find the folder, if it fails, remove first segment
- # This works for all languages because we test the actual API response
- # In SharePoint Graph API, /drive/root already points to the default document library,
- # so library names in paths should be removed
- pathSegments = [s for s in folderPath.split('/') if s.strip()]
- if len(pathSegments) > 1:
- # Try with first segment removed (first segment is likely the document library)
- testPath = '/'.join(pathSegments[1:])
- # Quick test: try to get folder info (this is fast and doesn't require full search)
- testEndpoint = f"sites/{siteId}/drive/root:/{urllib.parse.quote(testPath, safe='')}:"
- testResult = await self._makeGraphApiCall(testEndpoint)
- if testResult and "error" not in testResult:
- # Path without first segment works - first segment was likely the document library
- folderPath = testPath
- logger.info(f"Removed document library name '{pathSegments[0]}' from folder path (tested via API)")
- else:
- # Keep original path - first segment is not a document library
- logger.info(f"Keeping original folder path '{folderPath}' (first segment is not a document library)")
- elif len(pathSegments) == 1:
- # Only one segment - likely the document library itself, use root
- folderPath = None
- logger.info(f"Only one segment '{pathSegments[0]}' found, likely document library - using root")
-
- if folderPath:
- logger.info(f"Extracted folder path from pathQuery: '{folderPath}'")
- else:
- logger.info(f"Folder path resolved to root (only document library in path)")
-
- # Use Microsoft Graph API for this specific site
- # Handle empty or wildcard queries
- if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
- # For wildcard/empty queries, list all items
- if folderPath:
- # List items in specific folder
- encodedPath = urllib.parse.quote(folderPath, safe='')
- endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/children"
- logger.info(f"Listing items in folder: '{folderPath}'")
- else:
- # List all items in the drive root
- endpoint = f"sites/{siteId}/drive/root/children"
-
- # Make the API call to list items
- listResult = await self._makeGraphApiCall(endpoint)
- if "error" in listResult:
- logger.warning(f"List failed for site {siteName}: {listResult['error']}")
- continue
- # Process list results for this site
- items = listResult.get("value", [])
- logger.info(f"Retrieved {len(items)} items from site {siteName}")
- else:
- # For files, use regular search API
- # Clean the query: remove path-like syntax and invalid KQL syntax
- searchQuery = self._cleanSearchQuery(fileQuery)
- # URL-encode the query parameter
- encodedQuery = urllib.parse.quote(searchQuery, safe='')
-
- if folderPath:
- # Search in specific folder
- encodedPath = urllib.parse.quote(folderPath, safe='')
- endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/search(q='{encodedQuery}')"
- logger.info(f"Searching in folder '{folderPath}' with query: '{searchQuery}' (encoded: '{encodedQuery}')")
- else:
- # Search in drive root
- endpoint = f"sites/{siteId}/drive/root/search(q='{encodedQuery}')"
- logger.info(f"Using search API for files with query: '{searchQuery}' (encoded: '{encodedQuery}')")
-
- # Make the search API call (files)
- searchResult = await self._makeGraphApiCall(endpoint)
- if "error" in searchResult:
- logger.warning(f"Search failed for site {siteName}: {searchResult['error']}")
- continue
- # Process search results for this site (files)
- items = searchResult.get("value", [])
- logger.info(f"Retrieved {len(items)} items from site {siteName}")
-
- siteDocuments = []
-
- for item in items:
- itemName = item.get("name", "")
-
- # Use improved folder detection logic
- isFolder = self.services.sharepoint.detectFolderType(item)
-
- itemType = "folder" if isFolder else "file"
- itemPath = item.get("parentReference", {}).get("path", "")
- logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
-
- # Simple filtering like test file - just check search type
- if searchType == "files" and isFolder:
- continue # Skip folders when searching for files
- elif searchType == "folders" and not isFolder:
- continue # Skip files when searching for folders
-
- # Simple approach like test file - no complex filtering
- logger.debug(f"Item '{itemName}' found - adding to results")
-
- # Create result with full path information for proper action chaining
- webUrl = item.get("webUrl", "")
- parentPath = item.get("parentReference", {}).get("path", "")
-
- # Extract the full SharePoint path from webUrl or parentReference
- fullPath = ""
- if webUrl:
- # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
- if '/sites/' in webUrl:
- pathPart = webUrl.split('/sites/')[1]
- # Decode URL encoding and convert to backslash format
- decodedPath = urllib.parse.unquote(pathPart)
- fullPath = "\\" + decodedPath.replace('/', '\\')
- elif parentPath:
- # Use parentReference path if available
- fullPath = parentPath.replace('/', '\\')
-
- docInfo = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "folder" if isFolder else "file",
- "siteName": siteName,
- "siteId": siteId,
- "webUrl": webUrl,
- "fullPath": fullPath,
- "parentPath": parentPath
- }
-
- siteDocuments.append(docInfo)
-
- foundDocuments.extend(siteDocuments)
- allSitesSearched.append({
- "siteName": siteName,
- "siteUrl": siteUrl,
- "siteId": siteId,
- "documentsFound": len(siteDocuments)
- })
-
- logger.info(f"Found {len(siteDocuments)} documents in site {siteName}")
-
- # Limit total results to maxResults
- if len(foundDocuments) > maxResults:
- foundDocuments = foundDocuments[:maxResults]
- logger.info(f"Limited results to {maxResults} items")
-
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)")
-
- resultData = {
- "searchQuery": searchQuery,
- "totalResults": len(foundDocuments),
- "maxResults": maxResults,
- "foundDocuments": foundDocuments,
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- except Exception as e:
- logger.error(f"Error searching SharePoint: {str(e)}")
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=str(e))
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.findDocumentPath",
- "searchQuery": searchQuery,
- "maxResults": maxResults,
- "totalResults": len(foundDocuments),
- "hasResults": len(foundDocuments) > 0
- }
-
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error finding document path: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult.isFailure(error=str(e))
-
- @action
- async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read documents from SharePoint and extract content/metadata.
- - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
- - Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
- - Binary files (PDFs, etc.) are Base64-encoded in documentData.
- - Text files are stored as plain text in documentData.
- - Returns ActionResult with documents list for template processing.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, optional): Document list reference(s) containing findDocumentPath result.
- - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
- - includeMetadata (bool, optional): Include metadata. Default: True.
-
- Returns:
- - ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
- - documentName: File name
- - documentData: Base64-encoded content (binary files) or plain text (text files)
- - mimeType: MIME type (e.g., application/pdf, text/plain)
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_read_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Read Documents",
- "SharePoint Document Reading",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery", "*")
- connectionReference = parameters.get("connectionReference")
- includeMetadata = parameters.get("includeMetadata", True)
-
- # Validate connection reference
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Require either documentList or pathQuery
- if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList or pathQuery is required")
-
- # Get connection first
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Parse documentList to extract foundDocuments and site information
- sharePointFileIds = None
- sites = None
-
- if documentList:
- foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- if foundDocuments:
- # Extract SharePoint file IDs from foundDocuments
- sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
- if not sharePointFileIds:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files found in documentList from findDocumentPath result")
- logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from documentList")
-
- # If we have SharePoint file IDs from documentList (findDocumentPath result), read them directly
- if sharePointFileIds and sites:
- # Read SharePoint files directly using their IDs
- readResults = []
- siteId = sites[0]['id']
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Reading {len(sharePointFileIds)} file(s) from SharePoint")
- for idx, fileId in enumerate(sharePointFileIds):
- try:
- self.services.chat.progressLogUpdate(operationId, 0.5 + (idx * 0.3 / len(sharePointFileIds)), f"Reading file {idx + 1}/{len(sharePointFileIds)}")
- # Get file info from SharePoint
- endpoint = f"sites/{siteId}/drive/items/{fileId}"
- fileInfo = await self._makeGraphApiCall(endpoint)
-
- if "error" in fileInfo:
- logger.warning(f"Failed to get file info for {fileId}: {fileInfo['error']}")
- continue
-
- # Get file content using SharePoint service (handles binary data correctly)
- fileName = fileInfo.get("name", f"file_{fileId}")
- fileContent = await self.services.sharepoint.downloadFile(siteId, fileId)
-
- # Create result document
- resultItem = {
- "fileId": fileId,
- "fileName": fileName,
- "sharepointFileId": fileId,
- "siteName": sites[0]['displayName'],
- "siteUrl": sites[0]['webUrl'],
- "size": fileInfo.get("size", 0),
- "createdDateTime": fileInfo.get("createdDateTime"),
- "lastModifiedDateTime": fileInfo.get("lastModifiedDateTime"),
- "webUrl": fileInfo.get("webUrl")
- }
-
- # Add content if available
- if fileContent:
- resultItem["content"] = fileContent
-
- # Add metadata if requested
- if includeMetadata:
- resultItem["metadata"] = {
- "mimeType": fileInfo.get("file", {}).get("mimeType"),
- "downloadUrl": fileInfo.get("@microsoft.graph.downloadUrl"),
- "createdBy": fileInfo.get("createdBy", {}),
- "lastModifiedBy": fileInfo.get("lastModifiedBy", {}),
- "parentReference": fileInfo.get("parentReference", {})
- }
-
- readResults.append(resultItem)
- except Exception as e:
- logger.error(f"Error reading file {fileId}: {str(e)}")
- continue
-
- if not readResults:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files could be read from documentList")
-
- # Convert read results to ActionDocument objects
- # IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
- # The system will create FileData and ChatDocument automatically
- self.services.chat.progressLogUpdate(operationId, 0.8, f"Processing {len(readResults)} document(s)")
- from modules.datamodels.datamodelChat import ActionDocument
- import base64
-
- actionDocuments = []
- for resultItem in readResults:
- fileContent = resultItem.get("content")
- fileName = resultItem.get("fileName", f"file_{resultItem.get('fileId')}")
-
- # Determine MIME type from metadata or file extension
- mimeType = "application/octet-stream"
- if resultItem.get("metadata", {}).get("mimeType"):
- mimeType = resultItem["metadata"]["mimeType"]
- elif fileName:
- if fileName.endswith('.pdf'):
- mimeType = "application/pdf"
- elif fileName.endswith('.txt'):
- mimeType = "text/plain"
- elif fileName.endswith('.json'):
- mimeType = "application/json"
-
- # For binary files (PDFs, etc.), store Base64-encoded content directly
- # The GenerationService will detect PDF mimeType and handle base64 decoding
- if fileContent and isinstance(fileContent, bytes):
- # Encode binary content as Base64 string
- base64Content = base64.b64encode(fileContent).decode('utf-8')
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "mimeType": mimeType,
- "contentType": "binary",
- "size": len(fileContent),
- "includeMetadata": includeMetadata
- }
- actionDoc = ActionDocument(
- documentName=fileName,
- documentData=base64Content, # Base64 string for binary files
- mimeType=mimeType,
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
- logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument")
- elif fileContent:
- # Text content - store directly in documentData
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "mimeType": mimeType,
- "contentType": "text",
- "includeMetadata": includeMetadata
- }
- actionDoc = ActionDocument(
- documentName=fileName,
- documentData=fileContent if isinstance(fileContent, str) else str(fileContent),
- mimeType=mimeType,
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
- else:
- # No content - store metadata only
- docData = {
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "siteUrl": resultItem.get("siteUrl"),
- "size": resultItem.get("size"),
- "createdDateTime": resultItem.get("createdDateTime"),
- "lastModifiedDateTime": resultItem.get("lastModifiedDateTime"),
- "webUrl": resultItem.get("webUrl")
- }
- if resultItem.get("metadata"):
- docData["metadata"] = resultItem["metadata"]
-
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "fileName": fileName,
- "sharepointFileId": resultItem.get("sharepointFileId"),
- "siteName": resultItem.get("siteName"),
- "mimeType": mimeType,
- "contentType": "metadata_only",
- "includeMetadata": includeMetadata
- }
- actionDoc = ActionDocument(
- documentName=fileName,
- documentData=json.dumps(docData, indent=2),
- mimeType=mimeType,
- validationMetadata=validationMetadata
- )
- actionDocuments.append(actionDoc)
-
- # Return success with action documents
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Read {len(actionDocuments)} document(s)")
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult.isSuccess(documents=actionDocuments)
-
- # If no sites from documentList, try pathQuery fallback
- if not sites and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # If still no sites, return error
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with file information, or pathQuery must be provided. Use findDocumentPath first to get file paths, or provide pathQuery directly.")
-
- # This should never be reached if logic above is correct
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Unexpected error: could not process documentList or pathQuery")
- except Exception as e:
- logger.error(f"Error reading SharePoint documents: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass # Don't fail on progress logging errors
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- - Output format: JSON with upload status and file info.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
- - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Upload Document",
- "SharePoint Upload",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery")
- if isinstance(documentList, str):
- documentList = [documentList]
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- if not documentList:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Document list is required")
-
- # Parse documentList to extract folder path and site information
- uploadPath, sites, filesToUpload, errorMsg = await self._parseDocumentListForFolder(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # If no folder path found from documentList, use pathQuery if provided
- if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- uploadPath = pathQuery
- logger.info(f"Using pathQuery for upload path: {uploadPath}")
- # Resolve sites from pathQuery
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # Validate required parameters
- if not uploadPath:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")
-
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")
-
- if not filesToUpload:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files to upload found in documentList.")
-
- # Get connection
- self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Process upload paths
- uploadPaths = []
- if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
- # It's a folder ID - use it directly
- uploadPaths = [uploadPath]
- logger.info(f"Using folder ID directly for upload: {uploadPath}")
- else:
- # It's a path - resolve it normally
- uploadPaths = self._resolvePathQuery(uploadPath)
-
- # Process each document upload
- uploadResults = []
-
- # Extract file names from documents
- fileNames = [doc.fileName for doc in filesToUpload]
- logger.info(f"Using file names from documentList: {fileNames}")
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
-
- # Process upload paths
-
- # Process each document upload
- uploadResults = []
-
- # Extract file names from documents
- fileNames = [doc.fileName for doc in filesToUpload]
- logger.info(f"Using file names from documentList: {fileNames}")
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
-
- for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
- try:
- fileId = chatDocument.fileId
- fileData = self.services.chat.getFileData(fileId)
-
- if not fileData:
- logger.warning(f"File data not found for fileId: {fileId}")
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "error": "File data not found",
- "uploadStatus": "failed"
- })
- continue
-
- # Upload to the first available site (or could be made configurable)
- uploadSuccessful = False
-
- for site in sites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- # Use the first upload path or default to Documents
- uploadPath = uploadPaths[0] if uploadPaths else "/Documents"
-
- # Handle wildcard paths - replace with default Documents folder
- if uploadPath == "*":
- uploadPath = "/Documents"
- logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
-
- # Check if uploadPath is a folder ID or a regular path
- if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
- # It's a folder ID - use the folder-specific upload endpoint
- uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
- logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
- else:
- # It's a regular path - use the root-based upload endpoint
- uploadPath = uploadPath.rstrip('/') + '/' + fileName
- uploadPathClean = uploadPath.lstrip('/')
- uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
- logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
-
- # Upload endpoint for small files (< 4MB)
- if len(fileData) < 4 * 1024 * 1024: # 4MB
-
- # Upload the file
- uploadResult = await self._makeGraphApiCall(
- uploadEndpoint,
- method="PUT",
- data=fileData
- )
-
- if "error" not in uploadResult:
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "uploadStatus": "success",
- "siteName": siteName,
- "siteUrl": siteUrl,
- "uploadPath": uploadPath,
- "uploadEndpoint": uploadEndpoint,
- "sharepointFileId": uploadResult.get("id"),
- "webUrl": uploadResult.get("webUrl"),
- "size": uploadResult.get("size"),
- "createdDateTime": uploadResult.get("createdDateTime")
- })
- uploadSuccessful = True
- break
- else:
- logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
- else:
- # For large files, we would need to implement resumable upload
- logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
- continue
-
- if not uploadSuccessful:
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
- "uploadStatus": "failed"
- })
-
- except Exception as e:
- logger.error(f"Error uploading document {fileName}: {str(e)}")
- uploadResults.append({
- "fileName": fileName,
- "fileId": fileId,
- "error": str(e),
- "uploadStatus": "failed"
- })
-
- # Update progress for each file
- self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")
-
- # Create result data
- resultData = {
- "connectionReference": connectionReference,
- "uploadPath": uploadPath,
- "documentList": documentList,
- "fileNames": fileNames,
- "sitesAvailable": len(sites),
- "uploadResults": uploadResults,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.uploadDocument",
- "connectionReference": connectionReference,
- "uploadPath": uploadPath,
- "fileNames": fileNames,
- "uploadCount": len(uploadResults),
- "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
- "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
- }
-
- successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
- self.services.chat.progressLogFinish(operationId, successfulUploads > 0)
-
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error uploading to SharePoint: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: List documents and folders in SharePoint paths across sites.
- - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
- - Output format: JSON with folder items and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_list_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "List Documents",
- "SharePoint Listing",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery", "*")
- if isinstance(documentList, str):
- documentList = [documentList]
- includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Require either documentList or pathQuery
- if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList or pathQuery is required")
-
- # Parse documentList to extract folder path and site information
- listQuery, sites, _, errorMsg = await self._parseDocumentListForFolder(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # If no folder path found from documentList, use pathQuery if provided
- if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- listQuery = pathQuery
- logger.info(f"Using pathQuery for list query: {listQuery}")
- # Resolve sites from pathQuery
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- # Validate required parameters
- if not listQuery:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
-
- if not sites:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.")
-
- # Get connection
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}")
- logger.debug(f"Connection ID: {connection['id']}")
-
- self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path")
-
- # Parse listQuery to extract path, search terms, search type, and options
- pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(listQuery)
-
- # Check if listQuery is a folder ID (starts with 01PPXICCB...)
- if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'):
- # Direct folder ID - use it directly
- folderPaths = [listQuery]
- logger.info(f"Using direct folder ID: {listQuery}")
- else:
- # Remove site prefix from pathQuery before resolving (it's only for site filtering)
- pathQueryForResolve = pathQuery
- # Microsoft-standard path: /sites/SiteName/Path -> /Path
- if pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- innerPath = parsedPath.get("innerPath", "")
- pathQueryForResolve = '/' + innerPath if innerPath else '/'
- else:
- pathQueryForResolve = '/'
-
- # Remove first path segment if it looks like a document library name
- # In SharePoint Graph API, /drive/root already points to the default document library,
- # so library names in paths should be removed
- # Generic approach: if path has multiple segments, store original for fallback
- pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()]
- if len(pathSegments) > 1:
- # Path has multiple segments - first might be a library name
- # Store original for potential fallback
- originalPath = pathQueryForResolve
- # Try without first segment (assuming it's a library name)
- pathQueryForResolve = '/' + '/'.join(pathSegments[1:])
- logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'")
- elif len(pathSegments) == 1:
- # Only one segment - if it's a common library-like name, use root
- firstSegmentLower = pathSegments[0].lower()
- libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
- if any(indicator in firstSegmentLower for indicator in libraryIndicators):
- pathQueryForResolve = '/'
- logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
-
- # Resolve path query into folder paths
- folderPaths = self._resolvePathQuery(pathQueryForResolve)
- logger.info(f"Resolved folder paths: {folderPaths}")
-
- # Process each folder path across all sites
- listResults = []
-
- self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)")
-
- for folderPath in folderPaths:
- try:
- folderResults = []
-
- for site in sites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- logger.info(f"Listing folder {folderPath} in site: {siteName}")
-
- # Determine the endpoint based on folder path
- if folderPath in ["/", ""] or folderPath == "*":
- # Root folder
- endpoint = f"sites/{siteId}/drive/root/children"
- elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
- # Direct folder ID
- endpoint = f"sites/{siteId}/drive/items/{folderPath}/children"
- else:
- # Specific folder path - remove leading slash if present and URL encode
- folderPathClean = folderPath.lstrip('/')
- # URL encode the path for Graph API (spaces and special characters need encoding)
- folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/')
- endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children"
-
- # Make the API call to list folder contents
- apiResult = await self._makeGraphApiCall(endpoint)
-
- if "error" in apiResult:
- logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}")
- continue
-
- # Process the results
- items = apiResult.get("value", [])
- processedItems = []
-
- for item in items:
- # Use improved folder detection logic
- isFolder = self.services.sharepoint.detectFolderType(item)
-
- itemInfo = {
- "id": item.get("id"),
- "name": item.get("name"),
- "size": item.get("size", 0),
- "createdDateTime": item.get("createdDateTime"),
- "lastModifiedDateTime": item.get("lastModifiedDateTime"),
- "webUrl": item.get("webUrl"),
- "type": "folder" if isFolder else "file",
- "siteName": siteName,
- "siteUrl": siteUrl
- }
-
- # Add file-specific information
- if "file" in item:
- itemInfo.update({
- "mimeType": item["file"].get("mimeType"),
- "downloadUrl": item.get("@microsoft.graph.downloadUrl")
- })
-
- # Add folder-specific information
- if "folder" in item:
- itemInfo.update({
- "childCount": item["folder"].get("childCount", 0)
- })
-
- processedItems.append(itemInfo)
-
- # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
- if includeSubfolders:
- folderItems = [item for item in processedItems if item['type'] == 'folder']
- logger.info(f"Including subfolders - processing {len(folderItems)} folders")
- subfolderCount = 0
- maxSubfolders = 10 # Limit to prevent infinite loops
-
- for item in processedItems[:]: # Use slice to avoid modifying list during iteration
- if item["type"] == "folder" and subfolderCount < maxSubfolders:
- subfolderCount += 1
- subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}"
- subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children"
-
- logger.debug(f"Getting contents of subfolder: {item['name']}")
- subfolderResult = await self._makeGraphApiCall(subfolderEndpoint)
- if "error" not in subfolderResult:
- subfolderItems = subfolderResult.get("value", [])
- logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}")
-
- for subfolderItem in subfolderItems:
- # Use improved folder detection logic for subfolder items
- subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem)
-
- # Only add files and direct subfolders, NO RECURSION
- subfolderItemInfo = {
- "id": subfolderItem.get("id"),
- "name": subfolderItem.get("name"),
- "size": subfolderItem.get("size", 0),
- "createdDateTime": subfolderItem.get("createdDateTime"),
- "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"),
- "webUrl": subfolderItem.get("webUrl"),
- "type": "folder" if subfolderIsFolder else "file",
- "parentPath": subfolderPath,
- "siteName": siteName,
- "siteUrl": siteUrl
- }
-
- if "file" in subfolderItem:
- subfolderItemInfo.update({
- "mimeType": subfolderItem["file"].get("mimeType"),
- "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl")
- })
-
- processedItems.append(subfolderItemInfo)
- else:
- logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}")
- elif subfolderCount >= maxSubfolders:
- logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders")
- break
-
- logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}")
-
- folderResults.append({
- "siteName": siteName,
- "siteUrl": siteUrl,
- "itemCount": len(processedItems),
- "items": processedItems
- })
-
- listResults.append({
- "folderPath": folderPath,
- "sitesProcessed": len(folderResults),
- "siteResults": folderResults
- })
-
- except Exception as e:
- logger.error(f"Error listing folder {folderPath}: {str(e)}")
- listResults.append({
- "folderPath": folderPath,
- "error": str(e),
- "siteResults": []
- })
-
- totalItems = sum(len(result.get("siteResults", [])) for result in listResults)
- self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s)")
-
- # Create result data
- resultData = {
- "pathQuery": listQuery,
- "includeSubfolders": includeSubfolders,
- "sitesSearched": len(sites),
- "listResults": listResults,
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.listDocuments",
- "pathQuery": listQuery,
- "includeSubfolders": includeSubfolders,
- "sitesSearched": len(sites),
- "folderCount": len(listResults),
- "totalItems": totalItems
- }
-
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error listing SharePoint documents: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Analyze usage intensity of folders and files in SharePoint.
- - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
- - Output format: JSON with usage analytics grouped by time intervals.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
- - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
- - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
- """
- import time
- operationId = None
- try:
- # Init progress logger
- workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
- operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
-
- # Start progress tracking
- parentOperationId = parameters.get('parentOperationId')
- self.services.chat.progressLogStart(
- operationId,
- "Analyze Folder Usage",
- "SharePoint Analytics",
- "Processing document list",
- parentOperationId=parentOperationId
- )
-
- connectionReference = parameters.get("connectionReference")
- documentList = parameters.get("documentList")
- pathQuery = parameters.get("pathQuery")
- if isinstance(documentList, str):
- documentList = [documentList]
- startDateTime = parameters.get("startDateTime")
- endDateTime = parameters.get("endDateTime")
- interval = parameters.get("interval", "day")
-
- if not connectionReference:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Connection reference is required")
-
- # Require either documentList or pathQuery
- if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList or pathQuery is required")
-
- # Resolve folder/item information from documentList or pathQuery
- siteId = None
- driveId = None
- itemId = None
- folderPath = None
- folderName = None
-
- if documentList:
- foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- if not foundDocuments:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No documents found in documentList")
-
- # Get siteId from first document (all should be from same site)
- firstItem = foundDocuments[0]
- siteId = firstItem.get("siteId")
- if not siteId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Site ID missing from documentList")
-
- # Get drive ID (needed for analytics)
- driveId = await self.services.sharepoint.getDriveId(siteId)
- if not driveId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Could not determine drive ID for the site")
-
- # If no items from documentList, try pathQuery fallback
- if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
- if errorMsg:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=errorMsg)
-
- if sites:
- siteId = sites[0].get("id")
- # Parse pathQuery to find the folder/item
- pathQueryParsed, fileQuery, searchType, searchOptions = self._parseSearchQuery(pathQuery)
-
- # Extract folder path from pathQuery
- folderPath = '/'
- if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQueryParsed)
- if parsedPath:
- innerPath = parsedPath.get("innerPath", "")
- folderPath = '/' + innerPath if innerPath else '/'
- elif pathQueryParsed:
- folderPath = pathQueryParsed
-
- # Get drive ID
- driveId = await self.services.sharepoint.getDriveId(siteId)
- if not driveId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Could not determine drive ID for the site")
-
- # Get folder/item by path
- folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
- if not folderInfo:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
-
- # Add pathQuery item to foundDocuments for processing
- foundDocuments = [{
- "id": folderInfo.get("id"),
- "name": folderInfo.get("name", ""),
- "type": "folder" if folderInfo.get("folder") else "file",
- "siteId": siteId,
- "fullPath": folderPath,
- "webUrl": folderInfo.get("webUrl", "")
- }]
-
- if not siteId or not driveId:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
-
- self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Set access token
- if not self.services.sharepoint.setAccessTokenFromConnection(connection):
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="Failed to set SharePoint access token")
-
- # Process all items from documentList or pathQuery
- # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
- itemsToAnalyze = []
- if foundDocuments:
- for item in foundDocuments:
- itemId = item.get("id")
- itemType = item.get("type", "").lower()
-
- # Only process folders, skip files and site-level items
- if itemId and itemType == "folder":
- itemsToAnalyze.append({
- "id": itemId,
- "name": item.get("name", ""),
- "type": itemType,
- "path": item.get("fullPath", ""),
- "webUrl": item.get("webUrl", "")
- })
-
- if not itemsToAnalyze:
- if operationId:
- self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
-
- self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
-
- # Analyze each item
- allAnalytics = []
- totalActivities = 0
- uniqueUsers = set()
- activityTypes = {}
-
- # Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
- # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
- actualStartDateTime = startDateTime
- actualEndDateTime = endDateTime
- if not actualEndDateTime:
- actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
- if not actualStartDateTime:
- startDate = datetime.now(timezone.utc) - timedelta(days=30)
- actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
-
- for idx, item in enumerate(itemsToAnalyze):
- progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
- self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
-
- # Get usage analytics for this folder
- analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
- siteId=siteId,
- driveId=driveId,
- itemId=item["id"],
- startDateTime=startDateTime,
- endDateTime=endDateTime,
- interval=interval
- )
-
- if "error" in analyticsResult:
- logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
- # Continue with other items even if one fails
- itemAnalytics = {
- "itemId": item["id"],
- "itemName": item["name"],
- "itemType": item["type"],
- "itemPath": item["path"],
- "error": analyticsResult.get("error", "Unknown error")
- }
- else:
- # Process analytics for this item
- itemActivities = 0
- itemUsers = set()
- itemActivityTypes = {}
-
- if "value" in analyticsResult:
- for intervalData in analyticsResult["value"]:
- activities = intervalData.get("activities", [])
- for activity in activities:
- itemActivities += 1
- totalActivities += 1
-
- action = activity.get("action", {})
- actionType = action.get("verb", "unknown")
- itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
- activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
-
- actor = activity.get("actor", {})
- userPrincipalName = actor.get("userPrincipalName", "")
- if userPrincipalName:
- itemUsers.add(userPrincipalName)
- uniqueUsers.add(userPrincipalName)
-
- itemAnalytics = {
- "itemId": item["id"],
- "itemName": item["name"],
- "itemType": item["type"],
- "itemPath": item["path"],
- "webUrl": item["webUrl"],
- "analytics": analyticsResult,
- "summary": {
- "totalActivities": itemActivities,
- "uniqueUsers": len(itemUsers),
- "activityTypes": itemActivityTypes
- }
- }
-
- # Include note if analytics are not available
- if "note" in analyticsResult:
- itemAnalytics["note"] = analyticsResult["note"]
-
- allAnalytics.append(itemAnalytics)
-
- self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
-
- # Process and format analytics data
- resultData = {
- "siteId": siteId,
- "driveId": driveId,
- "startDateTime": actualStartDateTime, # Store computed date range (not None)
- "endDateTime": actualEndDateTime, # Store computed date range (not None)
- "interval": interval,
- "itemsAnalyzed": len(itemsToAnalyze),
- "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
- "items": allAnalytics,
- "summary": {
- "totalActivities": totalActivities,
- "uniqueUsers": len(uniqueUsers),
- "activityTypes": activityTypes
- },
- "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
- f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
- (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
-
- validationMetadata = {
- "actionType": "sharepoint.analyzeFolderUsage",
- "itemsAnalyzed": len(itemsToAnalyze),
- "interval": interval,
- "totalActivities": totalActivities,
- "uniqueUsers": len(uniqueUsers)
- }
-
- self.services.chat.progressLogFinish(operationId, True)
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_usage_analysis_{self._format_timestamp_for_filename()}.json",
- documentData=json.dumps(resultData, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
- ]
- )
-
- except Exception as e:
- logger.error(f"Error analyzing folder usage: {str(e)}")
- if operationId:
- try:
- self.services.chat.progressLogFinish(operationId, False)
- except:
- pass
- return ActionResult(
- success=False,
- error=str(e)
- )
-
- @action
- async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Find SharePoint site by hostname and site path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
- - sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
-
- Returns:
- - ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- hostname = parameters.get("hostname")
- if not hostname:
- return ActionResult.isFailure(error="hostname parameter is required")
-
- sitePath = parameters.get("sitePath")
- if not sitePath:
- return ActionResult.isFailure(error="sitePath parameter is required")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Find site by URL
- siteInfo = await self.services.sharepoint.findSiteByUrl(
- hostname=hostname,
- sitePath=sitePath
- )
-
- if not siteInfo:
- return ActionResult.isFailure(error=f"Site not found: {hostname}:/sites/{sitePath}")
-
- logger.info(f"Found SharePoint site: {siteInfo.get('displayName')} (ID: {siteInfo.get('id')})")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "sharepoint_site",
- "json",
- workflowContext,
- "findSiteByUrl"
- )
-
- validationMetadata = self._createValidationMetadata(
- "findSiteByUrl",
- hostname=hostname,
- sitePath=sitePath,
- siteId=siteInfo.get("id")
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(siteInfo, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error finding SharePoint site: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Download file from SharePoint by exact file path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
-
- Returns:
- - ActionResult with ActionDocument containing file content as base64-encoded bytes
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- siteIdParam = parameters.get("siteId")
- if not siteIdParam:
- return ActionResult.isFailure(error="siteId parameter is required")
-
- filePath = parameters.get("filePath")
- if not filePath:
- return ActionResult.isFailure(error="filePath parameter is required")
-
- # Extract siteId from document if it's a reference
- siteId = None
- if isinstance(siteIdParam, str):
- # Try to parse from document reference
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- try:
- docList = DocumentReferenceList.from_string_list([siteIdParam])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if chatDocuments and len(chatDocuments) > 0:
- siteInfoJson = json.loads(chatDocuments[0].documentData)
- siteId = siteInfoJson.get("id")
- except:
- pass
-
- if not siteId:
- # Assume it's the site ID directly
- siteId = siteIdParam
- else:
- siteId = siteIdParam
-
- if not siteId:
- return ActionResult.isFailure(error="Could not extract siteId from parameter")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Download file
- fileContent = await self.services.sharepoint.downloadFileByPath(
- siteId=siteId,
- filePath=filePath
- )
-
- if fileContent is None:
- return ActionResult.isFailure(error=f"File not found or could not be downloaded: {filePath}")
-
- logger.info(f"Downloaded file from SharePoint: {filePath} ({len(fileContent)} bytes)")
-
- # Generate filename from filePath
- import os
- fileName = os.path.basename(filePath) or "downloaded_file"
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- fileName.split('.')[0] if '.' in fileName else fileName,
- fileName.split('.')[-1] if '.' in fileName else "bin",
- workflowContext,
- "downloadFileByPath"
- )
-
- # Encode as base64
- import base64
- fileBase64 = base64.b64encode(fileContent).decode('utf-8')
-
- validationMetadata = self._createValidationMetadata(
- "downloadFileByPath",
- siteId=siteId,
- filePath=filePath,
- fileSize=len(fileContent)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=fileBase64,
- mimeType="application/octet-stream",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error downloading file from SharePoint: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Copy file within SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - sourceFolder (str, required): Source folder path relative to site root
- - sourceFile (str, required): Source file name
- - destFolder (str, required): Destination folder path relative to site root
- - destFile (str, required): Destination file name
-
- Returns:
- - ActionResult with ActionDocument containing copy result
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- siteIdParam = parameters.get("siteId")
- if not siteIdParam:
- return ActionResult.isFailure(error="siteId parameter is required")
-
- sourceFolder = parameters.get("sourceFolder")
- if not sourceFolder:
- return ActionResult.isFailure(error="sourceFolder parameter is required")
-
- sourceFile = parameters.get("sourceFile")
- if not sourceFile:
- return ActionResult.isFailure(error="sourceFile parameter is required")
-
- destFolder = parameters.get("destFolder")
- if not destFolder:
- return ActionResult.isFailure(error="destFolder parameter is required")
-
- destFile = parameters.get("destFile")
- if not destFile:
- return ActionResult.isFailure(error="destFile parameter is required")
-
- # Extract siteId from document if it's a reference
- siteId = None
- if isinstance(siteIdParam, str):
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- try:
- docList = DocumentReferenceList.from_string_list([siteIdParam])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if chatDocuments and len(chatDocuments) > 0:
- siteInfoJson = json.loads(chatDocuments[0].documentData)
- siteId = siteInfoJson.get("id")
- except:
- pass
-
- if not siteId:
- siteId = siteIdParam
- else:
- siteId = siteIdParam
-
- if not siteId:
- return ActionResult.isFailure(error="Could not extract siteId from parameter")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Copy file
- await self.services.sharepoint.copyFileAsync(
- siteId=siteId,
- sourceFolder=sourceFolder,
- sourceFile=sourceFile,
- destFolder=destFolder,
- destFile=destFile
- )
-
- logger.info(f"Copied file in SharePoint: {sourceFolder}/{sourceFile} -> {destFolder}/{destFile}")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "file_copy_result",
- "json",
- workflowContext,
- "copyFile"
- )
-
- result = {
- "success": True,
- "siteId": siteId,
- "sourcePath": f"{sourceFolder}/{sourceFile}",
- "destPath": f"{destFolder}/{destFile}"
- }
-
- validationMetadata = self._createValidationMetadata(
- "copyFile",
- siteId=siteId,
- sourcePath=f"{sourceFolder}/{sourceFile}",
- destPath=f"{destFolder}/{destFile}"
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- # Handle file not found gracefully
- if "itemNotFound" in str(e) or "404" in str(e):
- logger.warning(f"File not found for copy: {parameters.get('sourceFolder')}/{parameters.get('sourceFile')}")
- # Return success with skipped status
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "file_copy_result",
- "json",
- workflowContext,
- "copyFile"
- )
-
- result = {
- "success": True,
- "skipped": True,
- "reason": "File not found (may not exist yet)"
- }
-
- validationMetadata = self._createValidationMetadata(
- "copyFile",
- skipped=True
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- errorMsg = f"Error copying file in SharePoint: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
-
- @action
- async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Upload raw file content (bytes) to SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - folderPath (str, required): Folder path relative to site root
- - fileName (str, required): File name
- - content (str, required): Document reference containing file content as base64-encoded bytes
-
- Returns:
- - ActionResult with ActionDocument containing upload result
- """
- try:
- connectionReference = parameters.get("connectionReference")
- if not connectionReference:
- return ActionResult.isFailure(error="connectionReference parameter is required")
-
- siteIdParam = parameters.get("siteId")
- if not siteIdParam:
- return ActionResult.isFailure(error="siteId parameter is required")
-
- folderPath = parameters.get("folderPath")
- if not folderPath:
- return ActionResult.isFailure(error="folderPath parameter is required")
-
- fileName = parameters.get("fileName")
- if not fileName:
- return ActionResult.isFailure(error="fileName parameter is required")
-
- contentParam = parameters.get("content")
- if not contentParam:
- return ActionResult.isFailure(error="content parameter is required")
-
- # Extract siteId from document if it's a reference
- siteId = None
- if isinstance(siteIdParam, str):
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- try:
- docList = DocumentReferenceList.from_string_list([siteIdParam])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if chatDocuments and len(chatDocuments) > 0:
- siteInfoJson = json.loads(chatDocuments[0].documentData)
- siteId = siteInfoJson.get("id")
- except:
- pass
-
- if not siteId:
- siteId = siteIdParam
- else:
- siteId = siteIdParam
-
- if not siteId:
- return ActionResult.isFailure(error="Could not extract siteId from parameter")
-
- # Get file content from document
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- docList = DocumentReferenceList.from_string_list([contentParam] if isinstance(contentParam, str) else contentParam)
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
- if not chatDocuments or len(chatDocuments) == 0:
- return ActionResult.isFailure(error="Could not get file content from document reference")
-
- fileContentBase64 = chatDocuments[0].documentData
-
- # Decode base64
- import base64
- try:
- fileContent = base64.b64decode(fileContentBase64)
- except Exception as e:
- return ActionResult.isFailure(error=f"Could not decode base64 file content: {str(e)}")
-
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
- # Upload file
- uploadResult = await self.services.sharepoint.uploadFile(
- siteId=siteId,
- folderPath=folderPath,
- fileName=fileName,
- content=fileContent
- )
-
- if "error" in uploadResult:
- return ActionResult.isFailure(error=f"Upload failed: {uploadResult['error']}")
-
- logger.info(f"Uploaded file to SharePoint: {folderPath}/{fileName} ({len(fileContent)} bytes)")
-
- # Generate filename
- workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
- filename = self._generateMeaningfulFileName(
- "file_upload_result",
- "json",
- workflowContext,
- "uploadFile"
- )
-
- result = {
- "success": True,
- "siteId": siteId,
- "filePath": f"{folderPath}/{fileName}",
- "fileSize": len(fileContent),
- "uploadResult": uploadResult
- }
-
- validationMetadata = self._createValidationMetadata(
- "uploadFile",
- siteId=siteId,
- filePath=f"{folderPath}/{fileName}",
- fileSize=len(fileContent)
- )
-
- document = ActionDocument(
- documentName=filename,
- documentData=json.dumps(result, indent=2),
- mimeType="application/json",
- validationMetadata=validationMetadata
- )
-
- return ActionResult.isSuccess(documents=[document])
-
- except Exception as e:
- errorMsg = f"Error uploading file to SharePoint: {str(e)}"
- logger.error(errorMsg)
- return ActionResult.isFailure(error=errorMsg)
\ No newline at end of file
diff --git a/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
index 075c8b96..a4bf18b6 100644
--- a/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
+++ b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
@@ -1,36 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Analyze Folder Usage action for SharePoint operations.
-Analyzes usage intensity of folders and files in SharePoint.
-"""
-
import logging
import time
import json
from datetime import datetime, timezone, timedelta
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Analyze usage intensity of folders and files in SharePoint.
- - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
- - Output format: JSON with usage analytics grouped by time intervals.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
- - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
- - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/copyFile.py b/modules/workflows/methods/methodSharepoint/actions/copyFile.py
index 1b6d821d..f149e482 100644
--- a/modules/workflows/methods/methodSharepoint/actions/copyFile.py
+++ b/modules/workflows/methods/methodSharepoint/actions/copyFile.py
@@ -1,35 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Copy File action for SharePoint operations.
-Copies file within SharePoint.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Copy file within SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - sourceFolder (str, required): Source folder path relative to site root
- - sourceFile (str, required): Source file name
- - destFolder (str, required): Destination folder path relative to site root
- - destFile (str, required): Destination file name
-
- Returns:
- - ActionResult with ActionDocument containing copy result
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
index d6e291a8..c64a6637 100644
--- a/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
+++ b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Download File By Path action for SharePoint operations.
-Downloads file from SharePoint by exact file path.
-"""
-
import logging
import json
import base64
import os
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Download file from SharePoint by exact file path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
-
- Returns:
- - ActionResult with ActionDocument containing file content as base64-encoded bytes
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
index 01c1baf3..722dbc99 100644
--- a/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
+++ b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
@@ -1,35 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Find Document Path action for SharePoint operations.
-Finds documents and folders by name/path across SharePoint sites.
-"""
-
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Find documents and folders by name/path across sites.
- - Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
- - Output format: JSON with found items and paths.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - site (str, optional): Site hint.
- - searchQuery (str, required): Search terms or path.
- - maxResults (int, optional): Maximum items to return. Default: 1000.
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
index 405b35f2..62b6dd94 100644
--- a/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
+++ b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
@@ -1,32 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Find Site By URL action for SharePoint operations.
-Finds SharePoint site by hostname and site path.
-"""
-
import logging
import json
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Find SharePoint site by hostname and site path.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
- - sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
-
- Returns:
- - ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/actions/listDocuments.py b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
index 78aabadc..318271c3 100644
--- a/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
+++ b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-List Documents action for SharePoint operations.
-Lists documents and folders in SharePoint paths across sites.
-"""
-
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: List documents and folders in SharePoint paths across sites.
- - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
- - Output format: JSON with folder items and metadata.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- - includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/readDocuments.py b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
index 2bc2688c..73cdb730 100644
--- a/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
+++ b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
@@ -1,44 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Read Documents action for SharePoint operations.
-Reads documents from SharePoint and extracts content/metadata.
-"""
-
import logging
import time
import json
import base64
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Read documents from SharePoint and extract content/metadata.
- - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
- - Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
- - Binary files (PDFs, etc.) are Base64-encoded in documentData.
- - Text files are stored as plain text in documentData.
- - Returns ActionResult with documents list for template processing.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, optional): Document list reference(s) containing findDocumentPath result.
- - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
- - includeMetadata (bool, optional): Include metadata. Default: True.
-
- Returns:
- - ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
- - documentName: File name
- - documentData: Base64-encoded content (binary files) or plain text (text files)
- - mimeType: MIME type (e.g., application/pdf, text/plain)
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
index 82c93434..cfe4cf86 100644
--- a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
@@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Upload Document action for SharePoint operations.
-Uploads documents to SharePoint.
-"""
-
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- GENERAL:
- - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- - Output format: JSON with upload status and file info.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
- - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
- """
operationId = None
try:
# Init progress logger
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadFile.py b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
index 3d8a9499..1f469b80 100644
--- a/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
@@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-"""
-Upload File action for SharePoint operations.
-Uploads raw file content (bytes) to SharePoint.
-"""
-
import logging
import json
import base64
from typing import Dict, Any
-from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
-@action
async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
- """
- Upload raw file content (bytes) to SharePoint.
-
- Parameters:
- - connectionReference (str, required): Microsoft connection label.
- - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- - folderPath (str, required): Folder path relative to site root
- - fileName (str, required): File name
- - content (str, required): Document reference containing file content as base64-encoded bytes
-
- Returns:
- - ActionResult with ActionDocument containing upload result
- """
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:
diff --git a/modules/workflows/methods/methodSharepoint/methodSharepoint.py b/modules/workflows/methods/methodSharepoint/methodSharepoint.py
index 299d3fed..e8d41905 100644
--- a/modules/workflows/methods/methodSharepoint/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint/methodSharepoint.py
@@ -51,6 +51,7 @@ class MethodSharepoint(MethodBase):
"findDocumentPath": WorkflowActionDefinition(
actionId="sharepoint.findDocumentPath",
description="Find documents and folders by name/path across sites",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -88,6 +89,7 @@ class MethodSharepoint(MethodBase):
"readDocuments": WorkflowActionDefinition(
actionId="sharepoint.readDocuments",
description="Read documents from SharePoint and extract content/metadata",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -124,6 +126,7 @@ class MethodSharepoint(MethodBase):
"uploadDocument": WorkflowActionDefinition(
actionId="sharepoint.uploadDocument",
description="Upload documents to SharePoint",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -152,6 +155,7 @@ class MethodSharepoint(MethodBase):
"listDocuments": WorkflowActionDefinition(
actionId="sharepoint.listDocuments",
description="List documents and folders in SharePoint paths across sites",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -181,6 +185,7 @@ class MethodSharepoint(MethodBase):
"analyzeFolderUsage": WorkflowActionDefinition(
actionId="sharepoint.analyzeFolderUsage",
description="Analyze usage intensity of folders and files in SharePoint",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -225,6 +230,7 @@ class MethodSharepoint(MethodBase):
"findSiteByUrl": WorkflowActionDefinition(
actionId="sharepoint.findSiteByUrl",
description="Find SharePoint site by hostname and site path",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -253,6 +259,7 @@ class MethodSharepoint(MethodBase):
"downloadFileByPath": WorkflowActionDefinition(
actionId="sharepoint.downloadFileByPath",
description="Download file from SharePoint by exact file path",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -281,6 +288,7 @@ class MethodSharepoint(MethodBase):
"copyFile": WorkflowActionDefinition(
actionId="sharepoint.copyFile",
description="Copy file within SharePoint",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@@ -330,6 +338,7 @@ class MethodSharepoint(MethodBase):
"uploadFile": WorkflowActionDefinition(
actionId="sharepoint.uploadFile",
description="Upload raw file content (bytes) to SharePoint",
+ dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 36673ed0..32f9c528 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -24,7 +24,7 @@ class ContentValidator:
self.services = services
self.learningEngine = learningEngine
- async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
+ async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
Args:
@@ -34,8 +34,9 @@ class ContentValidator:
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
+ context: Optional context object to access all documents delivered in the current round
"""
- return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
+ return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory, context)
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
@@ -533,13 +534,13 @@ class ContentValidator:
return False
- async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
+ async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
"""AI-based comprehensive validation - generic approach"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
return self._createFailedValidationResult("AI service not available")
- # Use taskStep.objective if available, otherwise fall back to intent.primaryGoal
+ # Use taskStep.objective if available, otherwise fall back to workflow intent
taskObjective = None
if taskStep and hasattr(taskStep, 'objective'):
taskObjective = taskStep.objective
@@ -566,7 +567,9 @@ class ContentValidator:
expectedFormats = intent.get('expectedFormats', [])
# Determine objective text and label
- objectiveText = taskObjective if taskObjective else intent.get('primaryGoal', 'Unknown')
+ workflowIntent = getattr(self.services.workflow, '_workflowIntent', {}) if hasattr(self.services, 'workflow') and self.services.workflow else {}
+ intentText = workflowIntent.get('intent', 'Unknown')
+ objectiveText = taskObjective if taskObjective else intentText
objectiveLabel = "TASK OBJECTIVE" if taskObjective else "USER REQUEST"
# Build prompt base WITHOUT document summaries first
@@ -636,9 +639,46 @@ class ContentValidator:
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
+ # Build document index context (all documents delivered in current round)
+ documentIndexContext = ""
+ if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
+ try:
+ documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
+ if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
+ # Extract only "Current round documents" section if present
+ lines = documentIndex.split('\n')
+ currentRoundSection = []
+ inCurrentRound = False
+ for line in lines:
+ if "Current round documents:" in line:
+ inCurrentRound = True
+ currentRoundSection.append(line)
+ elif inCurrentRound:
+ if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
+ currentRoundSection.append(line)
+ elif line.strip() == "":
+ # Empty line is okay, continue
+ continue
+ elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
+ # End of current round section
+ break
+ else:
+ # Still in current round section
+ currentRoundSection.append(line)
+
+ if currentRoundSection:
+ documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
+ documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
+ except Exception as e:
+ logger.warning(f"Error extracting document index for validation: {str(e)}")
+ # Continue without document index - not critical
+
+ # Transform criteria that require data access into metadata-only checks
+ transformedCriteria = self._transformCriteriaForMetadataOnly(successCriteria)
+
# Format success criteria for display with index numbers
- if successCriteria:
- criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
+ if transformedCriteria:
+ criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(transformedCriteria)])
else:
criteriaDisplay = "[]"
@@ -647,7 +687,7 @@ class ContentValidator:
=== TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
-EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}
+EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}{documentIndexContext}
=== VALIDATION INSTRUCTIONS ===
@@ -661,6 +701,7 @@ VALIDATION RULES:
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
+8. DATA-LEVEL CRITERIA TRANSFORMATION: Criteria mentioning accuracy percentages (e.g., "95% accuracy"), completeness percentages (e.g., "98% completeness"), or "all X extracted" have been transformed to metadata-only checks. For accuracy/completeness: Check if contentPartIds reference all source documents and if structure metadata shows expected data types (tables, lists, etc.) exist. For "all X extracted": Check if contentPartIds reference all source documents mentioned in ACTION HISTORY or document index. NEVER attempt to verify accuracy/completeness by comparing actual data values - only use metadata indicators.
VALIDATION STEPS:
- Check ACTION HISTORY for process-oriented criteria
@@ -812,6 +853,52 @@ DELIVERED DOCUMENTS ({len(documents)} items):
logger.error(f"AI validation failed: {str(e)}")
raise
+ def _transformCriteriaForMetadataOnly(self, criteria: List[str]) -> List[str]:
+ """
+ Transform criteria that require data access into metadata-only checks.
+
+ Preserves original criterion intent while converting data-level checks to metadata checks.
+ Examples:
+ - "95% accuracy" → "[METADATA ONLY] Data structure indicates extraction completed (check contentPartIds reference all source documents)"
+ - "98% completeness" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
+ - "all transactions extracted" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
+ """
+ if not criteria:
+ return []
+
+ transformed = []
+ for criterion in criteria:
+ original = criterion.strip()
+ transformed_criterion = original
+
+ # Pattern: accuracy percentage (e.g., "95% accuracy", "accuracy meets or exceeds 95% threshold")
+ if re.search(r'\d+%?\s*accuracy|accuracy.*\d+%', original, re.IGNORECASE):
+ # Extract the main subject (e.g., "transactions", "data", etc.)
+ subject_match = re.search(r'(transactions?|data|items?|records?|entries?)', original, re.IGNORECASE)
+ subject = subject_match.group(1).lower() if subject_match else "data"
+
+ transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference all source documents and jsonStructure shows expected {subject} structure exists (tables/lists with rowCount/itemCount > 0). Cannot verify actual {subject} accuracy values from metadata."
+
+ # Pattern: completeness percentage or "all X extracted" (e.g., "98% completeness", "all transactions extracted")
+ elif re.search(r'\d+%?\s*completeness|completeness.*\d+%|all\s+.*extracted|extract.*all', original, re.IGNORECASE):
+ # Extract the main subject
+ subject_match = re.search(r'(transactions?|data|items?|records?|entries?|statements?|documents?)', original, re.IGNORECASE)
+ subject = subject_match.group(1).lower() if subject_match else "items"
+
+ transformed_criterion = f"[METADATA ONLY] {original}: Verify that contentPartIds reference all source documents mentioned in ACTION HISTORY/document index, and jsonStructure shows {subject} structure exists (check rowCount/itemCount in tables/lists). Cannot verify actual {subject} count from metadata."
+
+ # Pattern: "no missing data" or "no incorrect data"
+ elif re.search(r'no\s+missing|no\s+incorrect|no\s+errors?', original, re.IGNORECASE):
+ transformed_criterion = f"[METADATA ONLY] {original}: Check that jsonStructure.content_type shows expected data types present (tables, lists, etc.) and contentPreview.looksLikeRenderedContent=true. Cannot verify actual data values from metadata."
+
+ # Pattern: data accuracy without percentage (e.g., "data is accurate", "accurate data")
+ elif re.search(r'data.*accurate|accurate.*data', original, re.IGNORECASE) and '%' not in original:
+ transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference source documents and jsonStructure shows expected data structure exists. Cannot verify actual data accuracy values from metadata."
+
+ transformed.append(transformed_criterion)
+
+ return transformed
+
def _createFailedValidationResult(self, errorMessage: str) -> Dict[str, Any]:
"""Create a standardized failed validation result"""
return {
diff --git a/modules/workflows/processing/adaptive/progressTracker.py b/modules/workflows/processing/adaptive/progressTracker.py
index 2b6cf8b3..80c570ed 100644
--- a/modules/workflows/processing/adaptive/progressTracker.py
+++ b/modules/workflows/processing/adaptive/progressTracker.py
@@ -28,7 +28,8 @@ class ProgressTracker:
improvementSuggestions = validation.get('improvementSuggestions', [])
# Get task objective from taskIntent (task-level, not workflow-level)
- taskObjective = taskIntent.get('taskObjective', taskIntent.get('primaryGoal', 'Unknown'))
+ # Fallback to 'Unknown' if task objective not available
+ taskObjective = taskIntent.get('taskObjective', 'Unknown')
# If validation is not schema compliant, treat as indeterminate (do not count as failure)
if not schemaCompliant or overallSuccess is None or qualityScore is None:
diff --git a/modules/workflows/processing/core/taskPlanner.py b/modules/workflows/processing/core/taskPlanner.py
index 20abccde..0fac427c 100644
--- a/modules/workflows/processing/core/taskPlanner.py
+++ b/modules/workflows/processing/core/taskPlanner.py
@@ -64,7 +64,7 @@ class TaskPlanner:
# Use workflowIntent from workflow object (set in workflowManager from userintention analysis)
workflowIntent = getattr(workflow, '_workflowIntent', None)
if workflowIntent and isinstance(workflowIntent, dict):
- cleanedObjective = workflowIntent.get('primaryGoal', actualUserPrompt)
+ cleanedObjective = workflowIntent.get('intent', actualUserPrompt)
else:
# Fallback: use user prompt directly if workflowIntent not available
cleanedObjective = actualUserPrompt
diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py
index 50889b22..f7754eab 100644
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@@ -149,21 +149,63 @@ class DynamicMode(BaseMode):
})
# Content validation (against original cleaned user prompt / workflow intent)
- if getattr(self, 'workflowIntent', None) and result.documents:
- # Pass ALL documents to validator - validator decides what to validate (generic approach)
- # Pass taskStep so validator can use task.objective and format fields
- # Pass action name so validator knows which action created the documents
- # Pass action parameters so validator can verify parameter-specific requirements
- # Pass action history so validator can validate process-oriented criteria in multi-step workflows
- actionName = selection.get('action', 'unknown')
- actionParameters = selection.get('parameters', {})
- actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
- validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory)
- observation.contentValidation = validationResult
- quality_score = validationResult.get('qualityScore', 0.0)
- if quality_score is None:
- quality_score = 0.0
- logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
+ if getattr(self, 'workflowIntent', None):
+ # Collect ALL documents from current round, not just from last action
+ # Start with documents from current action (ActionDocument objects with metadata)
+ allRoundDocuments = list(result.documents) if result and result.documents else []
+
+ # Also collect ChatDocument references from all messages in current round
+ # These provide document existence info even if we don't have full metadata
+ if workflow and hasattr(workflow, 'messages') and workflow.messages:
+ currentRound = getattr(workflow, 'currentRound', 0)
+ currentTask = getattr(workflow, 'currentTask', 0)
+ # Collect documents from all messages in current round
+ for message in workflow.messages:
+ if hasattr(message, 'documents') and message.documents:
+ for chatDoc in message.documents:
+ # Include documents from current round and current task
+ docRound = getattr(chatDoc, 'roundNumber', None)
+ docTask = getattr(chatDoc, 'taskNumber', None)
+ if docRound == currentRound and (docTask is None or docTask == currentTask):
+ # Avoid duplicates - check if document already in list by fileId
+ chatDocFileId = getattr(chatDoc, 'fileId', None)
+ if chatDocFileId:
+ # Check if we already have this document (by fileId for ChatDocument, by documentName for ActionDocument)
+ isDuplicate = False
+ for existingDoc in allRoundDocuments:
+ existingFileId = getattr(existingDoc, 'fileId', None)
+ existingDocName = getattr(existingDoc, 'documentName', None)
+ # Match by fileId or by documentName matching fileName
+ if (existingFileId == chatDocFileId) or \
+ (existingDocName and hasattr(chatDoc, 'fileName') and existingDocName == chatDoc.fileName):
+ isDuplicate = True
+ break
+ if not isDuplicate:
+ allRoundDocuments.append(chatDoc)
+
+ # Only validate if we have documents to validate
+ if allRoundDocuments:
+ # Pass ALL documents from current round to validator
+ # Pass taskStep so validator can use task.objective and format fields
+ # Pass action name so validator knows which action created the documents
+ # Pass action parameters so validator can verify parameter-specific requirements
+ # Pass action history so validator can validate process-oriented criteria in multi-step workflows
+ actionName = selection.get('action', 'unknown')
+ actionParameters = selection.get('parameters', {})
+ actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
+ validationResult = await self.contentValidator.validateContent(allRoundDocuments, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory, context)
+ else:
+ # No documents to validate
+ validationResult = None
+
+ if validationResult:
+ observation.contentValidation = validationResult
+ quality_score = validationResult.get('qualityScore', 0.0)
+ if quality_score is None:
+ quality_score = 0.0
+ logger.info(f"Content validation: {validationResult.get('overallSuccess', False)} (quality: {quality_score:.2f})")
+ else:
+ logger.info("Content validation skipped: no documents to validate")
# NEW: Record validation result for adaptive learning
actionValue = selection.get('action', 'unknown')
@@ -194,6 +236,31 @@ class DynamicMode(BaseMode):
if decision: # Only append if decision is not None
context.previousReviewResult.append(decision)
+ # Send ChatLog message if userMessage is present in refinement response
+ if decision and decision.userMessage:
+ try:
+ currentRound = getattr(workflow, 'currentRound', 0)
+ currentTask = getattr(workflow, 'currentTask', 0)
+
+ messageData = {
+ "workflowId": workflow.id,
+ "role": "assistant",
+ "message": decision.userMessage,
+ "status": "refinement",
+ "sequenceNr": len(workflow.messages) + 1,
+ "publishedAt": self.services.utils.timestampGetUtc(),
+ "documentsLabel": None,
+ "documents": [],
+ "roundNumber": currentRound,
+ "taskNumber": currentTask,
+ "actionNumber": step
+ }
+
+ self.services.chat.storeMessageWithDocuments(workflow, messageData, [])
+ logger.info(f"Sent refinement userMessage to UI: {decision.userMessage[:100]}...")
+ except Exception as e:
+ logger.warning(f"Failed to send refinement userMessage to UI: {str(e)}")
+
# Store next action guidance from decision for use in next iteration
if decision and decision.status == "continue" and decision.nextAction:
# Set nextActionGuidance directly (now defined in TaskContext model)
diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py
index c8920247..0be4e029 100644
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@@ -68,6 +68,52 @@ def extractUserPrompt(context: Any) -> str:
return context.taskStep.objective
return 'No request specified'
+def extractNormalizedRequest(services: Any) -> str:
+ """Extract normalized user request from services. Maps to {{KEY:NORMALIZED_REQUEST}}.
+ Returns the full normalized request from user input analysis (preserves all constraints and details).
+ CRITICAL: Must return the actual normalizedRequest from analysis, NOT intent.
+ """
+ try:
+ # Get normalized request from currentUserPromptNormalized (stores the normalizedRequest from analysis)
+ if services and getattr(services, 'currentUserPromptNormalized', None):
+ normalized = services.currentUserPromptNormalized
+ # Validate that it's not the intent (which is shorter and less detailed)
+ # Intent is typically a concise objective, normalized request should be longer and more detailed
+ workflowIntent = getattr(services.workflow, '_workflowIntent', {}) if hasattr(services, 'workflow') and services.workflow else {}
+ intent = workflowIntent.get('intent', '')
+
+ # If normalized matches intent exactly, it's wrong - log warning
+ if intent and normalized == intent:
+ logger.warning(f"extractNormalizedRequest: normalized request matches intent - this is incorrect! normalized={normalized[:100]}...")
+ # Try to get from workflow intent or return error message
+ return f"ERROR: Normalized request not properly stored. Expected detailed request, got intent: {intent}"
+
+ return normalized
+
+ return 'No normalized request specified'
+ except Exception as e:
+ logger.error(f"Error extracting normalized request: {str(e)}")
+ return 'No normalized request specified'
+
+def extractUserIntent(services: Any) -> str:
+ """Extract user intent from services. Maps to {{KEY:USER_INTENT}}.
+ Returns the concise intent from user input analysis, or falls back to normalized request.
+ """
+ try:
+ # Get intent from currentUserPrompt (stores the intent from analysis)
+ if services and getattr(services, 'currentUserPrompt', None):
+ intent = services.currentUserPrompt
+ # If intent is same as normalized, it's fine - use it
+ return intent
+
+ # Fallback to normalized request if intent not available
+ if services and getattr(services, 'currentUserPromptNormalized', None):
+ return services.currentUserPromptNormalized
+
+ return 'No intent specified'
+ except Exception:
+ return 'No intent specified'
+
def extractWorkflowHistory(service: Any) -> str:
"""Extract workflow history. Maps to {{KEY:WORKFLOW_HISTORY}}
Reverse-chronological, enriched with message summaries and document labels.
@@ -79,8 +125,13 @@ def extractWorkflowHistory(service: Any) -> str:
logger.error(f"Error getting workflow history: {str(e)}")
return "No previous workflow rounds available"
-def extractAvailableMethods(service: Any) -> str:
- """Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
+def extractAvailableMethods(service: Any, filterDynamicMode: bool = True) -> str:
+ """Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}
+
+ Args:
+ service: Service object
+ filterDynamicMode: If True, only include actions with dynamicMode=True flag (default: True for dynamic workflow prompts)
+ """
try:
# Get the methods dictionary directly from the global methods variable
if not methods:
@@ -105,7 +156,21 @@ def extractAvailableMethods(service: Any) -> str:
processed_methods.add(shortName)
+ # Get method instance to access _actions dictionary with WorkflowActionDefinition objects
+ methodInstance = methodInfo.get('instance')
+ if not methodInstance:
+ continue
+
for actionName, actionInfo in methodInfo['actions'].items():
+ # Check dynamicMode flag if filtering is enabled
+ if filterDynamicMode:
+ # Access original WorkflowActionDefinition from _actions dictionary
+ if hasattr(methodInstance, '_actions') and actionName in methodInstance._actions:
+ actionDef = methodInstance._actions[actionName]
+ # Only include actions with dynamicMode=True
+ if not getattr(actionDef, 'dynamicMode', False):
+ continue
+
# Create compound action name: method.action
compoundActionName = f"{shortName}.{actionName}"
# Get the action description
diff --git a/modules/workflows/processing/shared/promptGenerationTaskplan.py b/modules/workflows/processing/shared/promptGenerationTaskplan.py
index 1d4d999a..11a54ca1 100644
--- a/modules/workflows/processing/shared/promptGenerationTaskplan.py
+++ b/modules/workflows/processing/shared/promptGenerationTaskplan.py
@@ -12,6 +12,8 @@ from modules.workflows.processing.shared.placeholderFactory import (
extractUserPrompt,
extractAvailableDocumentsSummary,
extractWorkflowHistory,
+ extractUserIntent,
+ extractNormalizedRequest,
)
logger = logging.getLogger(__name__)
@@ -41,13 +43,13 @@ def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
- Data Type: {workflowIntent.get('dataType', 'unknown')}
- Expected Formats: {workflowIntent.get('expectedFormats', [])}
- Quality Requirements: {workflowIntent.get('qualityRequirements', {})}
-- Primary Goal: {workflowIntent.get('primaryGoal', '')}
Note: Tasks can override these if task-specific needs differ (e.g., workflow wants PDF, but task needs CSV for intermediate step).
"""
placeholders: List[PromptPlaceholder] = [
- PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
+ PromptPlaceholder(label="NORMALIZED_REQUEST", content=extractNormalizedRequest(services), summaryAllowed=False),
+ PromptPlaceholder(label="USER_INTENT", content=extractUserIntent(services), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services), summaryAllowed=True),
PromptPlaceholder(label="USER_LANGUAGE", content=userLanguage, summaryAllowed=False),
@@ -62,9 +64,17 @@ Break down user requests into logical, executable task steps.
## 📋 Context
-### User Request
-The following is the user's normalized request:
-{{KEY:USER_PROMPT}}
+### Normalized User Request
+The following is the user's full normalized request (preserves all constraints and details):
+```
+{{KEY:NORMALIZED_REQUEST}}
+```
+
+### User Intent
+The following is the user's intent (concise objective):
+```
+{{KEY:USER_INTENT}}
+```
### Workflow Intent
{{KEY:WORKFLOW_INTENT}}
diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py
index 593ba555..9806060a 100644
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@@ -155,6 +155,15 @@ class WorkflowManager:
async def _workflowProcess(self, userInput: UserInputRequest) -> None:
"""Process a workflow with user input"""
try:
+ # Send ChatLog message immediately when workflow starts
+ workflow = self.services.workflow
+ self.services.chat.storeLog(workflow, {
+ "message": "Workflow started...",
+ "type": "info",
+ "status": "running",
+ "progress": 0.0
+ })
+
# Store the current user prompt in services for easy access throughout the workflow
self.services.rawUserPrompt = userInput.prompt
self.services.currentUserPrompt = userInput.prompt
@@ -203,7 +212,7 @@ class WorkflowManager:
# Extract intent analysis fields and store as workflowIntent
workflowIntent = {
- 'primaryGoal': analysisResult.get('primaryGoal'),
+ 'intent': intentText, # Use intent instead of primaryGoal
'dataType': analysisResult.get('dataType', 'unknown'),
'expectedFormats': analysisResult.get('expectedFormats', []),
'qualityRequirements': analysisResult.get('qualityRequirements', {}),
@@ -220,8 +229,16 @@ class WorkflowManager:
self.services.workflow._workflowIntent = workflowIntent
# Store normalized request and intent
+ # CRITICAL: normalizedRequest MUST be used if available, do NOT fall back to intent
self.services.currentUserPrompt = intentText or userInput.prompt
- self.services.currentUserPromptNormalized = normalizedRequest or intentText or userInput.prompt
+ if normalizedRequest and normalizedRequest.strip():
+ # Use normalizedRequest if available and not empty
+ self.services.currentUserPromptNormalized = normalizedRequest
+ logger.info(f"Stored normalized request (length: {len(normalizedRequest)}, preview: {normalizedRequest[:100]}...)")
+ else:
+ # Fallback only if normalizedRequest is None or empty
+ logger.warning(f"normalizedRequest is None or empty, falling back to intentText. normalizedRequest={normalizedRequest}, intentText={intentText[:100] if intentText else None}...")
+ self.services.currentUserPromptNormalized = intentText or userInput.prompt
if contextItems is not None:
self.services.currentUserContextItems = contextItems
@@ -280,7 +297,6 @@ class WorkflowManager:
- complexity: "simple" | "moderate" | "complex"
- needsWorkflowHistory: bool
- fastTrack: bool
- - primaryGoal: Hauptziel
- dataType: Datentyp
- expectedFormats: Erwartete Formate
- qualityRequirements: Qualitätsanforderungen
@@ -304,11 +320,10 @@ class WorkflowManager:
- "complex": Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s)
6. needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work)
7. fastTrack: Boolean indicating if Fast Track is possible (simple requests without documents and without workflow history)
-8. primaryGoal: The main objective the user wants to achieve
-9. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown)
-10. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
-11. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
-12. successCriteria: Specific success criteria that define completion (array of strings)
+8. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown)
+9. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
+10. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
+11. successCriteria: Specific success criteria that define completion (array of strings)
Rules:
- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
@@ -336,7 +351,6 @@ Return ONLY JSON (no markdown) with this exact structure:
"complexity": "simple" | "moderate" | "complex",
"needsWorkflowHistory": true|false,
"fastTrack": true|false,
- "primaryGoal": "The main objective the user wants to achieve",
"dataType": "numbers|text|documents|analysis|code|unknown",
"expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"],
"qualityRequirements": {{
@@ -386,7 +400,6 @@ The following is the user's original input message. Analyze intent, normalize th
"complexity": "moderate",
"needsWorkflowHistory": False,
"fastTrack": False,
- "primaryGoal": None,
"dataType": "unknown",
"expectedFormats": [],
"qualityRequirements": {
@@ -514,10 +527,14 @@ The following is the user's original input message. Analyze intent, normalize th
roundNum = workflow.currentRound
contextLabel = f"round{roundNum}_usercontext"
+ # Use normalized request if available (from combined analysis), otherwise use original prompt
+ # This ensures the first message uses the normalized request for security
+ normalizedRequest = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt
+
messageData = {
"workflowId": workflow.id,
"role": "user",
- "message": userInput.prompt,
+ "message": normalizedRequest, # Use normalized request instead of original prompt
"status": "first",
"sequenceNr": 1,
"publishedAt": self.services.utils.timestampGetUtc(),
@@ -593,12 +610,11 @@ The following is the user's original input message. Analyze intent, normalize th
"2) normalizedRequest: full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details.\n"
"3) intent: concise single-paragraph core request in the detected language for high-level routing.\n"
"4) contextItems: supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content.\n"
- "5) primaryGoal: The main objective the user wants to achieve.\n"
- "6) dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown).\n"
- "7) expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., [\"xlsx\", \"pdf\"]). If format is unclear or not specified, use empty list [].\n"
- "8) qualityRequirements: Quality requirements they have (accuracy, completeness) as {accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}.\n"
- "9) successCriteria: Specific success criteria that define completion (array of strings).\n"
- "10) needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history to be understood or completed (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work). Return true if the request is a continuation, retry, modification, or builds upon previous work.\n\n"
+ "5) dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown).\n"
+ "6) expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., [\"xlsx\", \"pdf\"]). If format is unclear or not specified, use empty list [].\n"
+ "7) qualityRequirements: Quality requirements they have (accuracy, completeness) as {accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}.\n"
+ "8) successCriteria: Specific success criteria that define completion (array of strings).\n"
+ "9) needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history to be understood or completed (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work). Return true if the request is a continuation, retry, modification, or builds upon previous work.\n\n"
"Rules:\n"
"- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
"- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
@@ -616,7 +632,6 @@ The following is the user's original input message. Analyze intent, normalize th
" \"content\": \"Full extracted content block here\"\n"
" }\n"
" ],\n"
- " \"primaryGoal\": \"The main objective the user wants to achieve\",\n"
" \"dataType\": \"numbers|text|documents|analysis|code|unknown\",\n"
" \"expectedFormats\": [\"pdf\", \"docx\", \"xlsx\", \"txt\", \"json\", \"csv\", \"html\", \"md\"],\n"
" \"qualityRequirements\": {\n"
@@ -659,8 +674,9 @@ The following is the user's original input message. Analyze intent, normalize th
contextItems = parsed.get('contextItems') or []
# Extract intent analysis fields and store as workflowIntent
+ intentText = parsed.get('intent') or userInput.prompt
workflowIntent = {
- 'primaryGoal': parsed.get('primaryGoal'),
+ 'intent': intentText, # Use intent instead of primaryGoal
'dataType': parsed.get('dataType', 'unknown'),
'expectedFormats': parsed.get('expectedFormats', []),
'qualityRequirements': parsed.get('qualityRequirements', {}),
@@ -684,18 +700,56 @@ The following is the user's original input message. Analyze intent, normalize th
setattr(self.services, '_needsWorkflowHistory', False)
# Update services state
+ # CRITICAL: Validate language from AI response
+ # If AI didn't return language or invalid → use user language
+ # If user language not set → use "en"
+ validatedLanguage = None
+
+ # Validate AI-detected language
if detectedLanguage and isinstance(detectedLanguage, str):
- self._setUserLanguage(detectedLanguage)
- try:
- setattr(self.services, 'currentUserLanguage', detectedLanguage)
- except Exception:
- pass
+ detectedLanguage = detectedLanguage.strip().lower()
+ # Check if it's a valid 2-character ISO code
+ if len(detectedLanguage) == 2 and detectedLanguage.isalpha():
+ validatedLanguage = detectedLanguage
+
+ # If AI didn't return valid language, use user language
+ if not validatedLanguage:
+ userLanguage = getattr(self.services.user, 'language', None) if hasattr(self.services, 'user') and self.services.user else None
+ if userLanguage and isinstance(userLanguage, str):
+ userLanguage = userLanguage.strip().lower()
+ if len(userLanguage) == 2 and userLanguage.isalpha():
+ validatedLanguage = userLanguage
+
+ # Final fallback to "en"
+ if not validatedLanguage:
+ validatedLanguage = "en"
+ logger.warning("Language not detected from AI and user language not set - using default 'en'")
+
+ # Set validated language
+ self._setUserLanguage(validatedLanguage)
+ try:
+ setattr(self.services, 'currentUserLanguage', validatedLanguage)
+ logger.debug(f"Set currentUserLanguage to validated value: {validatedLanguage}")
+ except Exception:
+ pass
self.services.currentUserPrompt = intentText or userInput.prompt
# Always set currentUserPromptNormalized - use normalizedRequest if available, otherwise fallback to currentUserPrompt
- normalizedValue = normalizedRequest or intentText or userInput.prompt
- self.services.currentUserPromptNormalized = normalizedValue
+ # CRITICAL: normalizedRequest MUST be used if available, do NOT fall back to intent
+ if normalizedRequest and normalizedRequest.strip():
+ # Use normalizedRequest if available and not empty
+ self.services.currentUserPromptNormalized = normalizedRequest
+ logger.debug(f"Stored normalized request from analysis (length: {len(normalizedRequest)})")
+ else:
+ # Fallback only if normalizedRequest is None or empty
+ logger.warning(f"normalizedRequest is None or empty in analysis, falling back to intentText. normalizedRequest={normalizedRequest}, intentText={intentText}")
+ self.services.currentUserPromptNormalized = intentText or userInput.prompt
if contextItems is not None:
self.services.currentUserContextItems = contextItems
+
+ # Update message with normalized request if analysis produced one
+ if normalizedRequest and normalizedRequest != userInput.prompt:
+ messageData["message"] = normalizedRequest
+ logger.debug(f"Updated first message with normalized request (length: {len(normalizedRequest)})")
# Create documents for context items
if contextItems and isinstance(contextItems, list):
@@ -749,6 +803,34 @@ The following is the user's original input message. Analyze intent, normalize th
# Finally, persist and bind the first message with combined documents (context + user)
self.services.chat.storeMessageWithDocuments(workflow, messageData, createdDocs)
+ # Create ChatMessage with success criteria (KPI) AFTER the first user message
+ # This ensures the KPI message appears after the user message in the UI
+ workflowIntent = getattr(workflow, '_workflowIntent', None)
+ if workflowIntent and isinstance(workflowIntent, dict):
+ successCriteria = workflowIntent.get('successCriteria', [])
+ if successCriteria and isinstance(successCriteria, list) and len(successCriteria) > 0:
+ try:
+ # Format success criteria as message with "KPI" title
+ criteriaText = "**KPI**\n\n" + "\n".join([f"• {criterion}" for criterion in successCriteria])
+
+ kpiMessageData = {
+ "workflowId": workflow.id,
+ "role": "system",
+ "message": criteriaText,
+ "summary": f"KPI: {len(successCriteria)} success criteria",
+ "status": "step",
+ "sequenceNr": len(workflow.messages) + 1, # After user message
+ "publishedAt": self.services.utils.timestampGetUtc(),
+ "roundNumber": workflow.currentRound,
+ "taskNumber": 0,
+ "actionNumber": 0
+ }
+
+ self.services.chat.storeMessageWithDocuments(workflow, kpiMessageData, [])
+ logger.info(f"Created KPI message with {len(successCriteria)} success criteria after first user message")
+ except Exception as e:
+ logger.error(f"Error creating KPI message: {str(e)}")
+
except Exception as e:
logger.error(f"Error sending first message: {str(e)}")
raise
diff --git a/tests/functional/test01_ai_model_selection.py b/tests/functional/test01_ai_model_selection.py
index 84b22494..b06e9c64 100644
--- a/tests/functional/test01_ai_model_selection.py
+++ b/tests/functional/test01_ai_model_selection.py
@@ -252,7 +252,7 @@ class ModelSelectionTester:
print(f"{'='*80}")
options = AiCallOptions(
- operationType=OperationTypeEnum.WEB_SEARCH,
+ operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.05,
@@ -269,7 +269,7 @@ class ModelSelectionTester:
print(f"{'='*80}")
options = AiCallOptions(
- operationType=OperationTypeEnum.WEB_SEARCH,
+ operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC,
maxCost=0.01,
@@ -327,7 +327,7 @@ class ModelSelectionTester:
# This method uses webQuery internally, so it uses the same model selection as web research
options = AiCallOptions(
- operationType=OperationTypeEnum.WEB_SEARCH,
+ operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.03,
@@ -436,7 +436,7 @@ class ModelSelectionTester:
print("\n Testing: aiObjects.webQuery() - Web Research")
try:
options = AiCallOptions(
- operationType=OperationTypeEnum.WEB_SEARCH,
+ operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.05,
diff --git a/tests/functional/test02_ai_models.py b/tests/functional/test02_ai_models.py
index 0578ba7b..12a374f8 100644
--- a/tests/functional/test02_ai_models.py
+++ b/tests/functional/test02_ai_models.py
@@ -11,7 +11,7 @@ This script tests all available models with all their supported operation types:
- DATA_EXTRACT: Data extraction
- IMAGE_ANALYSE: Image analysis
- IMAGE_GENERATE: Image generation
-- WEB_SEARCH: Web search
+- WEB_SEARCH_DATA: Web search
- WEB_CRAWL: Web crawling
For each model, it tests every operation type the model supports and validates
@@ -119,7 +119,7 @@ class AIModelsTester:
OperationTypeEnum.DATA_EXTRACT: "Extract key information from this text about artificial intelligence trends.",
OperationTypeEnum.IMAGE_ANALYSE: "Describe what you see in this image.",
OperationTypeEnum.IMAGE_GENERATE: "A futuristic cityscape with flying cars and neon lights.",
- OperationTypeEnum.WEB_SEARCH: "Who works in valueon ag in switzerland?", # Search query for valueon.ch
+ OperationTypeEnum.WEB_SEARCH_DATA: "Who works in valueon ag in switzerland?", # Search query for valueon.ch
OperationTypeEnum.WEB_CRAWL: "https://www.valueon.ch" # URL to crawl
}
return prompts.get(operationType, "Test prompt for this operation type.")
@@ -195,7 +195,7 @@ class AIModelsTester:
)
# Update message content to JSON format
messages[0]["content"] = json.dumps(imagePrompt.model_dump())
- elif operationType == OperationTypeEnum.WEB_SEARCH:
+ elif operationType == OperationTypeEnum.WEB_SEARCH_DATA:
# Create structured prompt for web search
webSearchPrompt = AiCallPromptWebSearch(
instruction=testPrompt,
diff --git a/tests/functional/test03_ai_operations.py b/tests/functional/test03_ai_operations.py
index 259932c2..36a8505a 100644
--- a/tests/functional/test03_ai_operations.py
+++ b/tests/functional/test03_ai_operations.py
@@ -74,7 +74,7 @@ class MethodAiOperationsTester:
"aiPrompt": "A beautiful sunset over the ocean with purple and orange hues",
"resultType": "png"
},
- OperationTypeEnum.WEB_SEARCH: {
+ OperationTypeEnum.WEB_SEARCH_DATA: {
"aiPrompt": "Who works in valueon ag in switzerland?",
"resultType": "json"
},
diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py
index 8d963643..9ce9b367 100644
--- a/tests/functional/test10_document_generation_formats.py
+++ b/tests/functional/test10_document_generation_formats.py
@@ -413,12 +413,11 @@ class DocumentGenerationFormatsTester10:
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80)
- print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
+ print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
print("="*80)
- # Only test HTML format
- formats = ["html"]
- # formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
+ # Test all document formats
+ formats = ["docx", "xlsx", "pptx", "pdf", "html"]
results = {}
for format in formats:
@@ -471,7 +470,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
- print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
+ print("DOCUMENT GENERATION FORMATS TEST 10 - ALL FORMATS")
print("="*80)
try:
diff --git a/tests/unit/services/test_json_extraction_merging.py b/tests/unit/services/test_json_extraction_merging.py
new file mode 100644
index 00000000..07ecfa4b
--- /dev/null
+++ b/tests/unit/services/test_json_extraction_merging.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Test script for JSON extraction response detection and merging.
+Run: python gateway/tests/unit/services/test_json_extraction_merging.py
+"""
+
+import json
+import sys
+import os
+
+# Add gateway to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../..'))
+
+from modules.datamodels.datamodelExtraction import ContentPart
+from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
+
+
+def test_detects_json_with_code_fences():
+ """Test that JSON extraction responses with markdown code fences are detected"""
+ print("Test 1: Detecting JSON with code fences...")
+ service = ExtractionService(None)
+
+ content_part = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='```json\n{"extracted_content": {"text": "Sample text", "tables": []}}\n```'
+ )
+
+ result = service._isJsonExtractionResponse([content_part])
+ assert result == True, "Should detect JSON with code fences"
+ print(" [PASS]")
+
+
+def test_detects_json_without_code_fences():
+ """Test that JSON extraction responses without code fences are detected"""
+ print("Test 2: Detecting JSON without code fences...")
+ service = ExtractionService(None)
+
+ content_part = ContentPart(
+ id="test2",
+ label="test2",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='{"extracted_content": {"text": "Sample text", "tables": []}}'
+ )
+
+ result = service._isJsonExtractionResponse([content_part])
+ assert result == True, "Should detect JSON without code fences"
+ print(" [PASS]")
+
+
+def test_rejects_non_extraction_json():
+ """Test that regular JSON (without extracted_content) is rejected"""
+ print("Test 3: Rejecting non-extraction JSON...")
+ service = ExtractionService(None)
+
+ content_part = ContentPart(
+ id="test3",
+ label="test3",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='{"documents": [{"sections": []}]}'
+ )
+
+ result = service._isJsonExtractionResponse([content_part])
+ assert result == False, "Should reject non-extraction JSON"
+ print(" [PASS]")
+
+
+def test_rejects_non_json_content():
+ """Test that non-JSON content is rejected"""
+ print("Test 4: Rejecting non-JSON content...")
+ service = ExtractionService(None)
+
+ content_part = ContentPart(
+ id="test4",
+ label="test4",
+ typeGroup="text",
+ mimeType="text/plain",
+ data="This is plain text, not JSON"
+ )
+
+ result = service._isJsonExtractionResponse([content_part])
+ assert result == False, "Should reject non-JSON content"
+ print(" [PASS]")
+
+
+def test_merges_tables_with_same_headers():
+ """Test that tables with identical headers are merged"""
+ print("Test 5: Merging tables with same headers...")
+ service = ExtractionService(None)
+
+ part1 = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Alice", "100"], ["Bob", "200"]]}]}}\n```'
+ )
+
+ part2 = ContentPart(
+ id="test2",
+ label="test2",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Charlie", "300"], ["Alice", "100"]]}]}}\n```'
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1, part2])
+
+ # Should have one table group with merged rows
+ assert len(merged["extracted_content"]["tables"]) == 1, f"Should have one merged table, got {len(merged['extracted_content']['tables'])}"
+ table = merged["extracted_content"]["tables"][0]
+ assert table["headers"] == ["Name", "Amount"], f"Headers should match, got {table['headers']}"
+ # Should have 3 unique rows (Alice appears twice but should be deduplicated)
+ assert len(table["rows"]) == 3, f"Should have 3 unique rows, got {len(table['rows'])}"
+ assert ["Alice", "100"] in table["rows"], "Alice row should be present"
+ assert ["Bob", "200"] in table["rows"], "Bob row should be present"
+ assert ["Charlie", "300"] in table["rows"], "Charlie row should be present"
+ print(" [PASS]")
+
+
+def test_merges_multiple_json_blocks_separated_by_dash():
+ """Test that multiple JSON blocks separated by --- are merged"""
+ print("Test 6: Merging multiple JSON blocks separated by ---...")
+ service = ExtractionService(None)
+
+ # Create content part with multiple JSON blocks separated by ---
+ part1 = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Alice", "100"]]}]}}\n```\n---\n```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Bob", "200"]]}]}}\n```'
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1])
+
+ # Should have one table with merged rows from both JSON blocks
+ assert len(merged["extracted_content"]["tables"]) == 1, f"Should have one merged table, got {len(merged['extracted_content']['tables'])}"
+ table = merged["extracted_content"]["tables"][0]
+ assert table["headers"] == ["Name", "Amount"], f"Headers should match, got {table['headers']}"
+ assert len(table["rows"]) == 2, f"Should have 2 rows, got {len(table['rows'])}"
+ assert ["Alice", "100"] in table["rows"], "Alice row should be present"
+ assert ["Bob", "200"] in table["rows"], "Bob row should be present"
+ print(" [PASS]")
+
+
+def test_merges_text_content():
+ """Test that text content from multiple parts is merged"""
+ print("Test 7: Merging text content...")
+ service = ExtractionService(None)
+
+ part1 = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='```json\n{"extracted_content": {"text": "First paragraph."}}\n```'
+ )
+
+ part2 = ContentPart(
+ id="test2",
+ label="test2",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='```json\n{"extracted_content": {"text": "Second paragraph."}}\n```'
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1, part2])
+
+ # Text should be concatenated with newlines
+ text = merged["extracted_content"]["text"]
+ assert "First paragraph." in text, "First paragraph should be present"
+ assert "Second paragraph." in text, "Second paragraph should be present"
+ print(" [PASS]")
+
+
+def test_merges_headings_and_lists():
+ """Test that headings and lists are merged"""
+ print("Test 8: Merging headings and lists...")
+ service = ExtractionService(None)
+
+ part1 = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='```json\n{"extracted_content": {"headings": [{"level": 1, "text": "Title 1"}], "lists": [{"type": "bullet", "items": ["Item 1"]}]}}\n```'
+ )
+
+ part2 = ContentPart(
+ id="test2",
+ label="test2",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='```json\n{"extracted_content": {"headings": [{"level": 2, "text": "Subtitle 1"}], "lists": [{"type": "bullet", "items": ["Item 2"]}]}}\n```'
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1, part2])
+
+ # Should have 2 headings
+ assert len(merged["extracted_content"]["headings"]) == 2, f"Should have 2 headings, got {len(merged['extracted_content']['headings'])}"
+ assert merged["extracted_content"]["headings"][0]["text"] == "Title 1", "First heading should be Title 1"
+ assert merged["extracted_content"]["headings"][1]["text"] == "Subtitle 1", "Second heading should be Subtitle 1"
+
+ # Should have 2 lists
+ assert len(merged["extracted_content"]["lists"]) == 2, f"Should have 2 lists, got {len(merged['extracted_content']['lists'])}"
+ assert merged["extracted_content"]["lists"][0]["items"] == ["Item 1"], "First list should have Item 1"
+ assert merged["extracted_content"]["lists"][1]["items"] == ["Item 2"], "Second list should have Item 2"
+ print(" [PASS]")
+
+
+def test_handles_empty_content_parts():
+ """Test that empty content parts are handled gracefully"""
+ print("Test 9: Handling empty content parts...")
+ service = ExtractionService(None)
+
+ part1 = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="text",
+ mimeType="text/plain",
+ data='```json\n{"extracted_content": {"text": "Some text"}}\n```'
+ )
+
+ part2 = ContentPart(
+ id="test2",
+ label="test2",
+ typeGroup="text",
+ mimeType="text/plain",
+ data="" # Empty part
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1, part2])
+
+ # Should still have the text from part1
+ assert merged["extracted_content"]["text"] == "Some text", "Should have text from part1"
+ print(" [PASS]")
+
+
+def test_merges_tables_with_different_headers():
+ """Test that tables with different headers are kept separate"""
+ print("Test 10: Keeping tables with different headers separate...")
+ service = ExtractionService(None)
+
+ part1 = ContentPart(
+ id="test1",
+ label="test1",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Date", "Amount"], "rows": [["2024-01-01", "100"]]}]}}\n```'
+ )
+
+ part2 = ContentPart(
+ id="test2",
+ label="test2",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Email"], "rows": [["Alice", "alice@example.com"]]}]}}\n```'
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1, part2])
+
+ # Should have 2 separate tables (different headers)
+ assert len(merged["extracted_content"]["tables"]) == 2, f"Should have 2 separate tables, got {len(merged['extracted_content']['tables'])}"
+
+ # Check first table
+ table1 = merged["extracted_content"]["tables"][0]
+ assert table1["headers"] == ["Date", "Amount"], "First table should have Date/Amount headers"
+ assert len(table1["rows"]) == 1, "First table should have 1 row"
+
+ # Check second table
+ table2 = merged["extracted_content"]["tables"][1]
+ assert table2["headers"] == ["Name", "Email"], "Second table should have Name/Email headers"
+ assert len(table2["rows"]) == 1, "Second table should have 1 row"
+ print(" [PASS]")
+
+
+def test_real_world_scenario():
+ """Test with a realistic scenario similar to the debug file"""
+ print("Test 11: Real-world scenario (multiple documents, multiple JSON blocks)...")
+ service = ExtractionService(None)
+
+ # Simulate 3 documents, each with a table extraction response
+ part1 = ContentPart(
+ id="doc1",
+ label="doc1",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN001", "2024-01-01", "100.00"], ["TXN002", "2024-01-02", "200.00"]]}]}}\n```'
+ )
+
+ part2 = ContentPart(
+ id="doc2",
+ label="doc2",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN003", "2024-01-03", "300.00"], ["TXN001", "2024-01-01", "100.00"]]}]}}\n```'
+ )
+
+ # Part 3 has multiple JSON blocks separated by ---
+ part3 = ContentPart(
+ id="doc3",
+ label="doc3",
+ typeGroup="table",
+ mimeType="application/json",
+ data='```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN004", "2024-01-04", "400.00"]]}]}}\n```\n---\n```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN005", "2024-01-05", "500.00"]]}]}}\n```'
+ )
+
+ merged = service._mergeJsonExtractionResponses([part1, part2, part3])
+
+ # Should have one merged table with all unique transactions
+ assert len(merged["extracted_content"]["tables"]) == 1, f"Should have one merged table, got {len(merged['extracted_content']['tables'])}"
+ table = merged["extracted_content"]["tables"][0]
+ assert table["headers"] == ["Transaction ID", "Date", "Amount"], "Headers should match"
+
+ # Should have 5 unique rows (TXN001 appears twice but should be deduplicated)
+ assert len(table["rows"]) == 5, f"Should have 5 unique rows, got {len(table['rows'])}"
+
+ # Verify all transactions are present
+ transaction_ids = [row[0] for row in table["rows"]]
+ assert "TXN001" in transaction_ids, "TXN001 should be present"
+ assert "TXN002" in transaction_ids, "TXN002 should be present"
+ assert "TXN003" in transaction_ids, "TXN003 should be present"
+ assert "TXN004" in transaction_ids, "TXN004 should be present"
+ assert "TXN005" in transaction_ids, "TXN005 should be present"
+
+ # Verify TXN001 appears only once (deduplicated)
+ assert transaction_ids.count("TXN001") == 1, "TXN001 should appear only once (deduplicated)"
+
+ print(" [PASS]")
+
+
+def main():
+ """Run all tests"""
+ print("=" * 60)
+ print("Testing JSON Extraction Response Detection and Merging")
+ print("=" * 60)
+ print()
+
+ tests = [
+ test_detects_json_with_code_fences,
+ test_detects_json_without_code_fences,
+ test_rejects_non_extraction_json,
+ test_rejects_non_json_content,
+ test_merges_tables_with_same_headers,
+ test_merges_multiple_json_blocks_separated_by_dash,
+ test_merges_text_content,
+ test_merges_headings_and_lists,
+ test_handles_empty_content_parts,
+ test_merges_tables_with_different_headers,
+ test_real_world_scenario,
+ ]
+
+ passed = 0
+ failed = 0
+
+ for test in tests:
+ try:
+ test()
+ passed += 1
+ except AssertionError as e:
+ print(f" [FAIL] {e}")
+ failed += 1
+ except Exception as e:
+ print(f" [ERROR] {e}")
+ import traceback
+ traceback.print_exc()
+ failed += 1
+ print()
+
+ print("=" * 60)
+ print(f"Results: {passed} passed, {failed} failed")
+ print("=" * 60)
+
+ return 0 if failed == 0 else 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
+