From 4af9e5fc870793247ebd8d2b8f3a59f1aae92d83 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 16 Dec 2025 00:27:33 +0100 Subject: [PATCH 01/21] fixed generation issue --- modules/services/serviceAi/mainServiceAi.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index a284bc99..67a47163 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -338,6 +338,11 @@ Respond with ONLY a JSON object in this exact format: result, iteration, debugPrefix, allSections, accumulationState ) + # CRITICAL: Merge sections BEFORE KPI validation + # This ensures sections are preserved even if KPI validation fails + if extractedSections: + allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration) + # Define KPIs if we just entered accumulation mode (iteration 1, incomplete JSON) if accumulationState and accumulationState.isAccumulationMode and iteration == 1 and not accumulationState.kpis: logger.info(f"Iteration {iteration}: Defining KPIs for accumulation tracking") @@ -478,10 +483,8 @@ Respond with ONLY a JSON object in this exact format: logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping") break - # Merge new sections with existing sections intelligently - # This handles the STANDARD CASE: broken JSON iterations must be merged together - # The break can occur anywhere - in any section, at any depth - allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration) + # NOTE: Section merging now happens BEFORE KPI validation (see above) + # This ensures sections are preserved even if KPI validation fails # Calculate total bytes in merged content for progress display merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False) From 56d6ecf978799f6333d427a4ac52e18e5b20c7a7 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 17 Dec 2025 10:45:09 +0100 Subject: [PATCH 02/21] refactored actions handlers --- modules/MODULE_DEPENDENCIES.md | 292 ---------- .../datamodels/datamodelWorkflowActions.py | 88 ++++ modules/interfaces/interfaceBootstrap.py | 105 ++++ modules/interfaces/interfaceDbAppObjects.py | 114 +++- modules/routes/routeRbac.py | 112 +++- modules/routes/routeWorkflows.py | 244 +++++++++ .../methods/{methodAi.py => methodAi.py.old} | 0 .../workflows/methods/methodAi/__init__.py | 7 + .../methods/methodAi/actions/__init__.py | 26 + .../methods/methodAi/actions/convert.py | 157 ++++++ .../methodAi/actions/convertDocument.py | 52 ++ .../methods/methodAi/actions/extractData.py | 59 +++ .../methodAi/actions/generateDocument.py | 53 ++ .../methods/methodAi/actions/process.py | 219 ++++++++ .../methodAi/actions/summarizeDocument.py | 55 ++ .../methodAi/actions/translateDocument.py | 60 +++ .../methods/methodAi/actions/webResearch.py | 117 +++++ .../methods/methodAi/helpers/__init__.py | 5 + .../methods/methodAi/helpers/csvProcessing.py | 59 +++ .../workflows/methods/methodAi/methodAi.py | 383 ++++++++++++++ modules/workflows/methods/methodBase.py | 218 +++++++- ...{methodContext.py => methodContext.py.old} | 0 .../methods/methodContext/__init__.py | 7 + .../methods/methodContext/actions/__init__.py | 16 + .../methodContext/actions/extractContent.py | 156 ++++++ .../methodContext/actions/getDocumentIndex.py | 94 ++++ .../actions/triggerPreprocessingServer.py | 121 +++++ .../methods/methodContext/helpers/__init__.py | 5 + .../methodContext/helpers/documentIndex.py | 89 ++++ .../methodContext/helpers/formatting.py | 75 +++ .../methods/methodContext/methodContext.py | 108 ++++ .../{methodJira.py => methodJira.py.old} | 0 .../workflows/methods/methodJira/__init__.py | 7 + .../methods/methodJira/actions/__init__.py | 26 + .../methods/methodJira/actions/connectJira.py | 139 +++++ .../methodJira/actions/createCsvContent.py | 157 ++++++ .../methodJira/actions/createExcelContent.py | 157 ++++++ .../methodJira/actions/exportTicketsAsJson.py | 84 +++ .../actions/importTicketsFromJson.py | 101 ++++ .../methodJira/actions/mergeTicketData.py | 157 ++++++ .../methodJira/actions/parseCsvContent.py | 112 ++++ .../methodJira/actions/parseExcelContent.py | 121 +++++ .../methods/methodJira/helpers/__init__.py | 5 + .../methodJira/helpers/adfConverter.py | 180 +++++++ .../methodJira/helpers/documentParsing.py | 81 +++ .../methods/methodJira/methodJira.py | 322 ++++++++++++ ...{methodOutlook.py => methodOutlook.py.old} | 0 .../methods/methodOutlook/__init__.py | 7 + .../methods/methodOutlook/actions/__init__.py | 18 + .../composeAndDraftEmailWithContext.py | 362 +++++++++++++ .../methodOutlook/actions/readEmails.py | 245 +++++++++ .../methodOutlook/actions/searchEmails.py | 257 +++++++++ .../methodOutlook/actions/sendDraftEmail.py | 312 +++++++++++ .../methods/methodOutlook/helpers/__init__.py | 5 + .../methodOutlook/helpers/connection.py | 95 ++++ .../methodOutlook/helpers/emailProcessing.py | 184 +++++++ .../methodOutlook/helpers/folderManagement.py | 110 ++++ .../methods/methodOutlook/methodOutlook.py | 237 +++++++++ ...dSharepoint.py => methodSharepoint.py.old} | 0 .../methods/methodSharepoint/__init__.py | 7 + .../methodSharepoint/actions/__init__.py | 28 + .../actions/analyzeFolderUsage.py | 337 ++++++++++++ .../methodSharepoint/actions/copyFile.py | 163 ++++++ .../actions/downloadFileByPath.py | 117 +++++ .../actions/findDocumentPath.py | 497 ++++++++++++++++++ .../methodSharepoint/actions/findSiteByUrl.py | 88 ++++ .../methodSharepoint/actions/listDocuments.py | 345 ++++++++++++ .../methodSharepoint/actions/readDocuments.py | 290 ++++++++++ .../actions/uploadDocument.py | 278 ++++++++++ .../methodSharepoint/actions/uploadFile.py | 145 +++++ .../methodSharepoint/helpers/__init__.py | 5 + .../methodSharepoint/helpers/apiClient.py | 102 ++++ .../methodSharepoint/helpers/connection.py | 67 +++ .../helpers/documentParsing.py | 252 +++++++++ .../helpers/pathProcessing.py | 338 ++++++++++++ .../methodSharepoint/helpers/siteDiscovery.py | 173 ++++++ .../methodSharepoint/methodSharepoint.py | 387 ++++++++++++++ .../processing/shared/methodDiscovery.py | 12 +- 78 files changed, 9858 insertions(+), 350 deletions(-) delete mode 100644 modules/MODULE_DEPENDENCIES.md create mode 100644 modules/datamodels/datamodelWorkflowActions.py rename modules/workflows/methods/{methodAi.py => methodAi.py.old} (100%) create mode 100644 modules/workflows/methods/methodAi/__init__.py create mode 100644 modules/workflows/methods/methodAi/actions/__init__.py create mode 100644 modules/workflows/methods/methodAi/actions/convert.py create mode 100644 modules/workflows/methods/methodAi/actions/convertDocument.py create mode 100644 modules/workflows/methods/methodAi/actions/extractData.py create mode 100644 modules/workflows/methods/methodAi/actions/generateDocument.py create mode 100644 modules/workflows/methods/methodAi/actions/process.py create mode 100644 modules/workflows/methods/methodAi/actions/summarizeDocument.py create mode 100644 modules/workflows/methods/methodAi/actions/translateDocument.py create mode 100644 modules/workflows/methods/methodAi/actions/webResearch.py create mode 100644 modules/workflows/methods/methodAi/helpers/__init__.py create mode 100644 modules/workflows/methods/methodAi/helpers/csvProcessing.py create mode 100644 modules/workflows/methods/methodAi/methodAi.py rename modules/workflows/methods/{methodContext.py => methodContext.py.old} (100%) create mode 100644 modules/workflows/methods/methodContext/__init__.py create mode 100644 modules/workflows/methods/methodContext/actions/__init__.py create mode 100644 modules/workflows/methods/methodContext/actions/extractContent.py create mode 100644 modules/workflows/methods/methodContext/actions/getDocumentIndex.py create mode 100644 modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py create mode 100644 modules/workflows/methods/methodContext/helpers/__init__.py create mode 100644 modules/workflows/methods/methodContext/helpers/documentIndex.py create mode 100644 modules/workflows/methods/methodContext/helpers/formatting.py create mode 100644 modules/workflows/methods/methodContext/methodContext.py rename modules/workflows/methods/{methodJira.py => methodJira.py.old} (100%) create mode 100644 modules/workflows/methods/methodJira/__init__.py create mode 100644 modules/workflows/methods/methodJira/actions/__init__.py create mode 100644 modules/workflows/methods/methodJira/actions/connectJira.py create mode 100644 modules/workflows/methods/methodJira/actions/createCsvContent.py create mode 100644 modules/workflows/methods/methodJira/actions/createExcelContent.py create mode 100644 modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py create mode 100644 modules/workflows/methods/methodJira/actions/importTicketsFromJson.py create mode 100644 modules/workflows/methods/methodJira/actions/mergeTicketData.py create mode 100644 modules/workflows/methods/methodJira/actions/parseCsvContent.py create mode 100644 modules/workflows/methods/methodJira/actions/parseExcelContent.py create mode 100644 modules/workflows/methods/methodJira/helpers/__init__.py create mode 100644 modules/workflows/methods/methodJira/helpers/adfConverter.py create mode 100644 modules/workflows/methods/methodJira/helpers/documentParsing.py create mode 100644 modules/workflows/methods/methodJira/methodJira.py rename modules/workflows/methods/{methodOutlook.py => methodOutlook.py.old} (100%) create mode 100644 modules/workflows/methods/methodOutlook/__init__.py create mode 100644 modules/workflows/methods/methodOutlook/actions/__init__.py create mode 100644 modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py create mode 100644 modules/workflows/methods/methodOutlook/actions/readEmails.py create mode 100644 modules/workflows/methods/methodOutlook/actions/searchEmails.py create mode 100644 modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py create mode 100644 modules/workflows/methods/methodOutlook/helpers/__init__.py create mode 100644 modules/workflows/methods/methodOutlook/helpers/connection.py create mode 100644 modules/workflows/methods/methodOutlook/helpers/emailProcessing.py create mode 100644 modules/workflows/methods/methodOutlook/helpers/folderManagement.py create mode 100644 modules/workflows/methods/methodOutlook/methodOutlook.py rename modules/workflows/methods/{methodSharepoint.py => methodSharepoint.py.old} (100%) create mode 100644 modules/workflows/methods/methodSharepoint/__init__.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/__init__.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/copyFile.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/listDocuments.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/readDocuments.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/uploadDocument.py create mode 100644 modules/workflows/methods/methodSharepoint/actions/uploadFile.py create mode 100644 modules/workflows/methods/methodSharepoint/helpers/__init__.py create mode 100644 modules/workflows/methods/methodSharepoint/helpers/apiClient.py create mode 100644 modules/workflows/methods/methodSharepoint/helpers/connection.py create mode 100644 modules/workflows/methods/methodSharepoint/helpers/documentParsing.py create mode 100644 modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py create mode 100644 modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py create mode 100644 modules/workflows/methods/methodSharepoint/methodSharepoint.py diff --git a/modules/MODULE_DEPENDENCIES.md b/modules/MODULE_DEPENDENCIES.md deleted file mode 100644 index 09415105..00000000 --- a/modules/MODULE_DEPENDENCIES.md +++ /dev/null @@ -1,292 +0,0 @@ -# Module Dependencies Analysis - -This document provides a comprehensive analysis of import dependencies between modules in the `modules` directory. - -## Overview - -The codebase is organized into the following top-level modules: -- **aicore** - AI core functionality and model management -- **auth** - High-level authentication and token management -- **connectors** - External service connectors -- **datamodels** - Data models and schemas -- **features** - Feature modules (workflow, dynamicOptions, etc.) -- **interfaces** - Database and service interfaces -- **routes** - API route handlers -- **security** - Low-level core security (RBAC and root access) -- **services** - Business logic services -- **shared** - Shared utilities and helpers -- **workflows** - Workflow processing and management - -## Bidirectional Dependency Matrix - -This table shows all module pairs with dependencies, displaying imports in both directions. - -| Module X | Module Y | X → Y | Y → X | Total | -|----------|----------|-------|-------|-------| -| aicore | connectors | 1 | 0 | 1 | -| aicore | datamodels | 13 | 0 | 13 | -| aicore | interfaces | 0 | 2 | 2 | -| aicore | security | 2 | 0 | 2 | -| aicore | services | 0 | 2 | 2 | -| aicore | shared | 5 | 0 | 5 | -| auth | datamodels | 5 | 0 | 5 | -| auth | interfaces | 4 | 0 | 4 | -| auth | routes | 0 | 32 | 32 | -| auth | security | 4 | 0 | 4 | -| auth | services | 0 | 1 | 1 | -| auth | shared | 8 | 0 | 8 | -| connectors | datamodels | 4 | 0 | 4 | -| connectors | interfaces | 0 | 10 | 10 | -| connectors | shared | 5 | 0 | 5 | -| datamodels | features | 0 | 6 | 6 | -| datamodels | interfaces | 0 | 27 | 27 | -| datamodels | routes | 0 | 48 | 48 | -| datamodels | security | 0 | 5 | 5 | -| datamodels | services | 0 | 52 | 52 | -| datamodels | shared | 19 | 0 | 19 | -| datamodels | workflows | 0 | 72 | 72 | -| features | interfaces | 0 | 0 | 0 | -| features | routes | 0 | 6 | 6 | -| features | services | 4 | 0 | 4 | -| features | shared | 3 | 0 | 3 | -| features | workflows | 1 | 0 | 1 | -| interfaces | routes | 0 | 29 | 29 | -| interfaces | security | 9 | 0 | 9 | -| interfaces | services | 0 | 8 | 8 | -| interfaces | shared | 11 | 0 | 11 | -| routes | interfaces | 29 | 0 | 29 | -| routes | services | 5 | 0 | 5 | -| routes | shared | 21 | 0 | 21 | -| security | connectors | 2 | 0 | 2 | -| security | datamodels | 5 | 0 | 5 | -| services | shared | 16 | 0 | 16 | -| services | workflows | 0 | 1 | 1 | -| shared | workflows | 0 | 9 | 9 | - -**Legend:** -- **X → Y**: Number of imports from Module X to Module Y -- **Y → X**: Number of imports from Module Y to Module X -- **Total**: Sum of imports in both directions - -## Bidirectional Dependencies Only (Circular Dependencies) - -This table shows only module pairs where imports exist in **both directions**, indicating potential circular dependencies that should be monitored. - -| Module X | Module Y | X → Y | Y → X | Total | -|----------|----------|-------|-------|-------| - -**Total bidirectional dependencies: 0** - -**Note:** All circular dependencies have been eliminated. The architecture now has clean one-way dependencies. - -**Key Improvements:** -1. **Eliminated `connectors ↔ security` circular dependency**: After moving RBAC logic from `connectorDbPostgre.py` to `interfaces/interfaceRbac.py`, connectors no longer import from security. Security still imports from connectors (for `rootAccess` to create `DatabaseConnector` instances), but this is a one-way dependency (security → connectors: 2, connectors → security: 0). -2. **Eliminated `shared ↔ security` circular dependency**: Moved `rbacHelpers.py` from `shared` to `security` module since it was only used in `aicore` and `aicore` already imports from `security`. This eliminates the architectural violation where `shared` imported from `security`. -3. **Eliminated `datamodels ↔ shared` circular dependency**: `shared` no longer has any static imports from `datamodels`. The only reference is a dynamic import in `attributeUtils.py` using `importlib.import_module()` for runtime model discovery, which is not detected by static analysis. This is acceptable as it's a runtime-only dependency. -4. **New `interfaces/interfaceRbac.py` module**: Created to handle RBAC filtering for interfaces, importing from both `security` and `connectors`. This maintains proper architectural layering where connectors remain generic. -5. **Updated dependency counts**: - - `interfaces` → `connectors`: increased from 9 to 10 (interfaceRbac imports connectorDbPostgre) - - `interfaces` → `security`: increased from 7 to 9 (interfaceRbac imports rbac and rootAccess) - - `features` → `interfaces`: increased from 1 to 2 (mainWorkflow imports interfaceRbac) - - `routes` → `interfaces`: increased from 28 to 29 (routeWorkflows imports interfaceRbac) - - `aicore` → `security`: increased from 1 to 2 (now imports rbacHelpers from security) - - `security` → `datamodels`: increased from 3 to 5 (rbacHelpers adds datamodel imports) - -## Dependency Graph (Mermaid) - -```mermaid -graph TD - aicore[aicore] - auth[auth] - connectors[connectors] - datamodels[datamodels] - features[features] - interfaces[interfaces] - routes[routes] - security[security] - services[services] - shared[shared] - workflows[workflows] - - aicore -->|13| datamodels - aicore -->|1| connectors - aicore -->|2| security - aicore -->|5| shared - - auth -->|5| datamodels - auth -->|4| interfaces - auth -->|4| security - auth -->|8| shared - - connectors -->|4| datamodels - connectors -->|5| shared - - datamodels -->|19| shared - - features -->|6| datamodels - features -->|0| interfaces - features -->|4| services - features -->|3| shared - features -->|1| workflows - - interfaces -->|29| datamodels - interfaces -->|10| connectors - interfaces -->|2| aicore - interfaces -->|9| security - interfaces -->|11| shared - - routes -->|48| datamodels - routes -->|29| interfaces - routes -->|32| auth - routes -->|21| shared - routes -->|6| features - routes -->|5| services - - security -->|5| datamodels - security -->|2| connectors - security -->|1| shared - - services -->|52| datamodels - services -->|8| interfaces - services -->|2| aicore - services -->|1| auth - services -->|16| shared - - - workflows -->|72| datamodels - workflows -->|1| services - workflows -->|9| shared -``` - -## Detailed Module Dependencies - -### aicore -**Imports from:** -- `connectors` (1 import) -- `datamodels` (13 imports) -- `security` (2 imports: rbac, rbacHelpers) -- `shared` (4 imports) - -**Dependencies:** Low-level AI functionality, depends on data models and connectors. - -### auth -**Imports from:** -- `datamodels` (5 imports) -- `interfaces` (4 imports) -- `security` (4 imports) -- `shared` (8 imports) - -**Dependencies:** High-level authentication and token management, used by routes and services. - -### connectors -**Imports from:** -- `datamodels` (4 imports) -- `shared` (5 imports) - -**Dependencies:** External service connectors, minimal dependencies. No longer imports from security or interfaces. Connectors are now fully generic and do not depend on security modules. - -### datamodels -**Imports from:** -- `shared` (19 imports) - -**Dependencies:** Core data models, only depends on shared utilities. - -### features -**Imports from:** -- `datamodels` (6 imports) -- `services` (4 imports) -- `shared` (3 imports) -- `workflows` (1 import) - -**Dependencies:** Feature modules that orchestrate workflows and services. Features now use services exclusively, not interfaces directly, maintaining proper architectural layering. - -### interfaces -**Imports from:** -- `aicore` (2 imports) -- `connectors` (10 imports) -- `datamodels` (29 imports) -- `security` (9 imports) -- `shared` (11 imports) - -**Dependencies:** Database and service interfaces, heavily depends on data models. Includes `interfaceRbac.py` which handles RBAC filtering for all interfaces. No longer creates circular dependency with connectors. - -### routes -**Imports from:** -- `auth` (32 imports) -- `datamodels` (48 imports) -- `features` (6 imports) -- `interfaces` (29 imports) -- `services` (5 imports) -- `shared` (21 imports) - -**Dependencies:** API endpoints, highest dependency count, orchestrates all layers. Now imports from `auth` instead of `security` for authentication. Increased use of services (from 2 to 5 imports) after architectural refactoring to use services instead of direct interface access in features. - -### security -**Imports from:** -- `connectors` (2 imports) -- `datamodels` (5 imports: rbac uses 3, rbacHelpers uses 2) -- `shared` (1 import: rootAccess uses configuration) - -**Dependencies:** Low-level core security (RBAC, root access, and RBAC helper functions). Used by interfaces (including `interfaceRbac.py`), auth, and aicore. The `rbacHelpers` module was moved from `shared` to `security` to eliminate the architectural violation where `shared` imported from `security`. Security imports from connectors only for `rootAccess` to create `DatabaseConnector` instances - this is acceptable as it's a one-way dependency (security → connectors). - -### services -**Imports from:** -- `aicore` (2 imports) -- `auth` (1 import) -- `datamodels` (52 imports) -- `interfaces` (8 imports) -- `shared` (16 imports) - -**Dependencies:** Business logic services, heavily depends on data models. - -### shared -**Imports from:** -- None (0 imports) - -**Dependencies:** Shared utilities, completely self-contained with no dependencies on other modules. No longer imports from security (rbacHelpers was moved to security module) or datamodels (only uses dynamic imports at runtime for model discovery in `attributeUtils.py`), maintaining proper architectural layering. - -### workflows -**Imports from:** -- `datamodels` (72 imports) -- `services` (1 import) -- `shared` (9 imports) - -**Dependencies:** Workflow processing, heavily depends on data models (highest count). Reduced from 74 to 72 imports after removing unused imports from `contentValidator.py`. - -## Key Observations - -1. **datamodels** is the most imported module (used by 9 out of 11 modules) -2. **shared** is widely used but has minimal dependencies (good design) -3. **routes** has the most diverse dependencies (imports from 6 different modules) -4. **workflows** has the highest number of imports from datamodels (72) -5. **auth** is now a separate module, used exclusively by routes and services -6. **security** is now a low-level module, used by interfaces (including `interfaceRbac.py`) -7. **connectors** are now fully generic - no dependencies on security or interfaces -8. **Circular dependencies eliminated**: Reduced from 3 to 0 after RBAC refactoring and `rbacHelpers` move (eliminated `connectors ↔ security`, `shared ↔ security`, and `datamodels ↔ shared`) -9. **New `interfaceRbac.py` module** centralizes RBAC filtering logic for all interfaces -10. **`shared` module is now completely self-contained** - no static imports from any other module -11. **Features architectural improvements**: Features no longer import directly from interfaces (reduced from 2 to 0). All features now use services exclusively, maintaining proper layering: Features → Services → Interfaces → Connectors -12. **Routes increased services usage**: Routes now import from services 5 times (up from 2) after refactoring features to use services instead of direct interface access - -## Dependency Layers - -Based on the analysis, the architecture follows these layers: - -1. **Foundation Layer**: `shared`, `datamodels` -2. **Core Layer**: `aicore`, `connectors`, `security` -3. **Interface Layer**: `interfaces` -4. **Authentication Layer**: `auth` -5. **Business Logic Layer**: `services`, `workflows` -6. **Feature Layer**: `features` -7. **API Layer**: `routes` - -## Recommendations - -1. **datamodels** should remain stable as it's a core dependency -2. **shared** is excellently designed - completely self-contained with zero dependencies (perfect foundation layer) -3. **security** split and RBAC refactoring were successful - eliminated all circular dependencies (`connectors ↔ security`, `shared ↔ security`) -4. **connectors** are now fully generic and maintainable - keep them free of security/interface dependencies -5. **interfaceRbac.py** successfully centralizes RBAC logic - consider this pattern for other cross-cutting concerns -6. Consider breaking down **workflows** if it continues to grow -7. **routes** could benefit from further abstraction to reduce direct dependencies -8. **Architecture is now clean** - no circular dependencies remain, maintaining clear separation of concerns diff --git a/modules/datamodels/datamodelWorkflowActions.py b/modules/datamodels/datamodelWorkflowActions.py new file mode 100644 index 00000000..1857883b --- /dev/null +++ b/modules/datamodels/datamodelWorkflowActions.py @@ -0,0 +1,88 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Workflow Action models: WorkflowActionParameter, WorkflowActionDefinition.""" + +from typing import Optional, Any, Union, List, Dict, Callable, Awaitable +from pydantic import BaseModel, Field +from modules.datamodels.datamodelChat import ActionResult +from modules.shared.frontendTypes import FrontendType +from modules.shared.attributeUtils import registerModelLabels + + +class WorkflowActionParameter(BaseModel): + """ + Parameter schema definition for a workflow action. + + This defines the structure and UI rendering for a single action parameter, + NOT the actual parameter values (those are in ActionDefinition.parameters). + """ + name: str = Field(description="Parameter name") + type: str = Field(description="Python type as string: 'str', 'int', 'bool', 'List[str]', etc.") + frontendType: FrontendType = Field(description="UI rendering type (from global FrontendType enum)") + frontendOptions: Optional[Union[str, List[Dict[str, Any]]]] = Field( + None, + description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or static list. For custom types, this is automatically set to the API endpoint." + ) + required: bool = Field(False, description="Whether parameter is required") + default: Optional[Any] = Field(None, description="Default value") + description: str = Field("", description="Parameter description") + validation: Optional[Dict[str, Any]] = Field( + None, + description="Validation rules (e.g., {'min': 1, 'max': 100})" + ) + + +class WorkflowActionDefinition(BaseModel): + """ + Complete schema definition of a workflow action. + + This defines the metadata, parameters, and execution function for an action. + This is different from datamodelWorkflow.ActionDefinition which contains + actual execution values (action, actionObjective, parameters with values). + + This class defines the ACTION SCHEMA, not the execution plan. + """ + actionId: str = Field( + description="Unique action identifier for RBAC (format: 'module.actionName', e.g., 'outlook.readEmails')" + ) + description: str = Field(description="Action description") + parameters: Dict[str, WorkflowActionParameter] = Field( + default_factory=dict, + description="Parameter schema definitions" + ) + execute: Optional[Callable] = Field( + None, + description="Execution function - async function that takes parameters dict and returns ActionResult. Set dynamically." + ) + category: Optional[str] = Field(None, description="Action category for grouping") + tags: List[str] = Field(default_factory=list, description="Tags for search/filtering") + + +# Register model labels for UI +registerModelLabels( + "WorkflowActionDefinition", + {"en": "Workflow Action Definition", "fr": "Définition d'action de workflow"}, + { + "actionId": {"en": "Action ID", "fr": "ID d'action"}, + "description": {"en": "Description", "fr": "Description"}, + "parameters": {"en": "Parameters", "fr": "Paramètres"}, + "category": {"en": "Category", "fr": "Catégorie"}, + "tags": {"en": "Tags", "fr": "Étiquettes"}, + }, +) + +registerModelLabels( + "WorkflowActionParameter", + {"en": "Workflow Action Parameter", "fr": "Paramètre d'action de workflow"}, + { + "name": {"en": "Name", "fr": "Nom"}, + "type": {"en": "Type", "fr": "Type"}, + "frontendType": {"en": "Frontend Type", "fr": "Type frontend"}, + "frontendOptions": {"en": "Frontend Options", "fr": "Options frontend"}, + "required": {"en": "Required", "fr": "Requis"}, + "default": {"en": "Default", "fr": "Par défaut"}, + "description": {"en": "Description", "fr": "Description"}, + "validation": {"en": "Validation", "fr": "Validation"}, + }, +) + diff --git a/modules/interfaces/interfaceBootstrap.py b/modules/interfaces/interfaceBootstrap.py index e784c192..71bdd1e7 100644 --- a/modules/interfaces/interfaceBootstrap.py +++ b/modules/interfaces/interfaceBootstrap.py @@ -233,6 +233,9 @@ def initRbacRules(db: DatabaseConnector) -> None: # Create RESOURCE context rules createResourceContextRules(db) + # Create Action-specific RBAC rules + createActionRules(db) + logger.info("RBAC rules initialization completed") @@ -785,6 +788,108 @@ def createResourceContextRules(db: DatabaseConnector) -> None: logger.info(f"Created {len(resourceRules)} RESOURCE context rules") +def createActionRules(db: DatabaseConnector) -> None: + """ + Create default RBAC rules for workflow actions. + + This function dynamically discovers all available actions from all methods + and creates RBAC rules for them. Actions are protected via RESOURCE context + with actionId as the item identifier (format: 'module.actionName'). + + Args: + db: Database connector instance + """ + try: + # Import method discovery to get all actions + from modules.workflows.processing.shared.methodDiscovery import discoverMethods + from modules.services import getInterface as getServices + from modules.datamodels.datamodelUam import User + + # Create a temporary user context for discovery (will be filtered by RBAC later) + # We need to discover methods, but we'll use a minimal user context + # In production, this should use a system user or admin user + try: + # Try to get an admin user for discovery + adminUsers = db.getRecordset("User", recordFilter={"roleLabel": "sysadmin"}, limit=1) + if adminUsers: + tempUser = User(**adminUsers[0]) + else: + # Fallback: create minimal user context + tempUser = User(id="system", roleLabel="sysadmin") + except: + # Fallback: create minimal user context + tempUser = User(id="system", roleLabel="sysadmin") + + # Get services and discover methods + services = getServices(tempUser, None) + discoverMethods(services) + + # Import methods catalog + from modules.workflows.processing.shared.methodDiscovery import methods + + # Collect all action IDs + allActionIds = [] + for methodName, methodInfo in methods.items(): + # Skip duplicate entries (same method stored with full and short name) + if methodName.startswith('Method'): + continue + + methodInstance = methodInfo['instance'] + methodActions = methodInstance.actions + + for actionName in methodActions.keys(): + actionId = f"{methodInstance.name}.{actionName}" + allActionIds.append(actionId) + + logger.info(f"Discovered {len(allActionIds)} actions for RBAC rule creation") + + # Define default action access by role + # SysAdmin and Admin: Access to all actions + # User: Access to common actions (read, search, process, etc.) + # Viewer: Read-only actions + + actionRules = [] + + # All roles: Generic access to all actions + # Using item=None grants access to all resources (all actions) in RESOURCE context + + # SysAdmin: Access to all actions + actionRules.append(AccessRule( + roleLabel="sysadmin", + context=AccessRuleContext.RESOURCE, + item=None, # All resources (covers all actions) + view=True + )) + + # Admin: Access to all actions + actionRules.append(AccessRule( + roleLabel="admin", + context=AccessRuleContext.RESOURCE, + item=None, # All resources (covers all actions) + view=True + )) + + # User: Access to all actions (generic rights) + actionRules.append(AccessRule( + roleLabel="user", + context=AccessRuleContext.RESOURCE, + item=None, # All resources (covers all actions) + view=True + )) + + + # Create all action rules + for rule in actionRules: + db.recordCreate(AccessRule, rule) + + logger.info(f"Created {len(actionRules)} action RBAC rules") + + except Exception as e: + logger.error(f"Error creating action RBAC rules: {str(e)}", exc_info=True) + # Don't fail bootstrap if action rules can't be created + # They can be created manually or via migration script + + def _addMissingTableRules(db: DatabaseConnector, existingRules: List[Dict[str, Any]]) -> None: """ Add missing RBAC rules for tables that were added after initial bootstrap. diff --git a/modules/interfaces/interfaceDbAppObjects.py b/modules/interfaces/interfaceDbAppObjects.py index 9a8ff308..f8397477 100644 --- a/modules/interfaces/interfaceDbAppObjects.py +++ b/modules/interfaces/interfaceDbAppObjects.py @@ -1574,18 +1574,21 @@ class AppObjects: self, roleLabel: Optional[str] = None, context: Optional[AccessRuleContext] = None, - item: Optional[str] = None - ) -> List[AccessRule]: + item: Optional[str] = None, + pagination: Optional[PaginationParams] = None + ) -> Union[List[AccessRule], PaginatedResult]: """ - Get access rules with optional filters. + Get access rules with optional filters and pagination. Args: roleLabel: Optional role label filter context: Optional context filter item: Optional item filter + pagination: Optional pagination parameters. If None, returns all items. Returns: - List of AccessRule objects + If pagination is None: List[AccessRule] + If pagination is provided: PaginatedResult with items and metadata """ try: recordFilter = {} @@ -1596,11 +1599,55 @@ class AppObjects: if item: recordFilter["item"] = item - rules = self.db.getRecordset(AccessRule, recordFilter=recordFilter if recordFilter else None) - return [AccessRule(**rule) for rule in rules] + # Use RBAC filtering + rules = getRecordsetWithRBAC( + self.db, + AccessRule, + self.currentUser, + recordFilter=recordFilter if recordFilter else None + ) + + # Filter out database-specific fields + filteredRules = [] + for rule in rules: + cleanedRule = {k: v for k, v in rule.items() if not k.startswith("_")} + filteredRules.append(cleanedRule) + + # If no pagination requested, return all items + if pagination is None: + return [AccessRule(**rule) for rule in filteredRules] + + # Apply filtering (if filters provided) + if pagination.filters: + filteredRules = self._applyFilters(filteredRules, pagination.filters) + + # Apply sorting (in order of sortFields) + if pagination.sort: + filteredRules = self._applySorting(filteredRules, pagination.sort) + + # Count total items after filters + totalItems = len(filteredRules) + totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0 + + # Apply pagination (skip/limit) + startIdx = (pagination.page - 1) * pagination.pageSize + endIdx = startIdx + pagination.pageSize + pagedRules = filteredRules[startIdx:endIdx] + + # Convert to model objects + items = [AccessRule(**rule) for rule in pagedRules] + + return PaginatedResult( + items=items, + totalItems=totalItems, + totalPages=totalPages + ) except Exception as e: logger.error(f"Error getting access rules: {str(e)}") - return [] + if pagination is None: + return [] + else: + return PaginatedResult(items=[], totalItems=0, totalPages=0) def getAccessRulesForRoles( self, @@ -1701,19 +1748,62 @@ class AppObjects: logger.error(f"Error getting role by label {roleLabel}: {str(e)}") return None - def getAllRoles(self) -> List[Role]: + def getAllRoles(self, pagination: Optional[PaginationParams] = None) -> Union[List[Role], PaginatedResult]: """ - Get all roles. + Get all roles with optional pagination, sorting, and filtering. + + Args: + pagination: Optional pagination parameters. If None, returns all items. Returns: - List of Role objects + If pagination is None: List[Role] + If pagination is provided: PaginatedResult with items and metadata """ try: + # Get all roles from database roles = self.db.getRecordset(Role) - return [Role(**role) for role in roles] + + # Filter out database-specific fields + filteredRoles = [] + for role in roles: + cleanedRole = {k: v for k, v in role.items() if not k.startswith("_")} + filteredRoles.append(cleanedRole) + + # If no pagination requested, return all items + if pagination is None: + return [Role(**role) for role in filteredRoles] + + # Apply filtering (if filters provided) + if pagination.filters: + filteredRoles = self._applyFilters(filteredRoles, pagination.filters) + + # Apply sorting (in order of sortFields) + if pagination.sort: + filteredRoles = self._applySorting(filteredRoles, pagination.sort) + + # Count total items after filters + totalItems = len(filteredRoles) + totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0 + + # Apply pagination (skip/limit) + startIdx = (pagination.page - 1) * pagination.pageSize + endIdx = startIdx + pagination.pageSize + pagedRoles = filteredRoles[startIdx:endIdx] + + # Convert to model objects + items = [Role(**role) for role in pagedRoles] + + return PaginatedResult( + items=items, + totalItems=totalItems, + totalPages=totalPages + ) except Exception as e: logger.error(f"Error getting all roles: {str(e)}") - return [] + if pagination is None: + return [] + else: + return PaginatedResult(items=[], totalItems=0, totalPages=0) def updateRole(self, roleId: str, role: Role) -> Role: """ diff --git a/modules/routes/routeRbac.py b/modules/routes/routeRbac.py index 5b54ad45..363a6b81 100644 --- a/modules/routes/routeRbac.py +++ b/modules/routes/routeRbac.py @@ -8,10 +8,13 @@ Implements endpoints for role-based access control permissions. from fastapi import APIRouter, HTTPException, Depends, Query, Body, Path, Request from typing import Optional, List, Dict, Any import logging +import json +import math from modules.auth import getCurrentUser, limiter from modules.datamodels.datamodelUam import User, UserPermissions, AccessLevel from modules.datamodels.datamodelRbac import AccessRuleContext, AccessRule, Role +from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata from modules.interfaces.interfaceDbAppObjects import getInterface # Configure logger @@ -86,15 +89,16 @@ async def getPermissions( ) -@router.get("/rules", response_model=list) +@router.get("/rules", response_model=PaginatedResponse) @limiter.limit("30/minute") async def getAccessRules( request: Request, roleLabel: Optional[str] = Query(None, description="Filter by role label"), context: Optional[str] = Query(None, description="Filter by context (DATA, UI, RESOURCE)"), item: Optional[str] = Query(None, description="Filter by item identifier"), + pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), currentUser: User = Depends(getCurrentUser) - ) -> list: + ) -> PaginatedResponse: """ Get access rules with optional filters. Only returns rules that the current user has permission to view. @@ -143,15 +147,45 @@ async def getAccessRules( detail=f"Invalid context '{context}'. Must be one of: DATA, UI, RESOURCE" ) - # Get rules - rules = interface.getAccessRules( + # Parse pagination parameter + paginationParams = None + if pagination: + try: + paginationDict = json.loads(pagination) + paginationParams = PaginationParams(**paginationDict) if paginationDict else None + except (json.JSONDecodeError, ValueError) as e: + raise HTTPException( + status_code=400, + detail=f"Invalid pagination parameter: {str(e)}" + ) + + # Get rules with optional pagination + result = interface.getAccessRules( roleLabel=roleLabel, context=accessContext, - item=item + item=item, + pagination=paginationParams ) - # Convert to dict for JSON serialization - return [rule.model_dump() for rule in rules] + # If pagination was requested, result is PaginatedResult + # If no pagination, result is List[AccessRule] + if paginationParams: + return PaginatedResponse( + items=[rule.model_dump() for rule in result.items], + pagination=PaginationMetadata( + currentPage=paginationParams.page, + pageSize=paginationParams.pageSize, + totalItems=result.totalItems, + totalPages=result.totalPages, + sort=paginationParams.sort, + filters=paginationParams.filters + ) + ) + else: + return PaginatedResponse( + items=[rule.model_dump() for rule in result], + pagination=None + ) except HTTPException: raise @@ -489,12 +523,13 @@ def _ensureAdminAccess(currentUser: User) -> None: ) -@router.get("/roles", response_model=List[Dict[str, Any]]) +@router.get("/roles", response_model=PaginatedResponse) @limiter.limit("60/minute") async def listRoles( request: Request, + pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), currentUser: User = Depends(getCurrentUser) -) -> List[Dict[str, Any]]: +) -> PaginatedResponse: """ Get list of all available roles with metadata. @@ -506,14 +541,27 @@ async def listRoles( interface = getInterface(currentUser) - # Get all roles from database - dbRoles = interface.getAllRoles() + # Parse pagination parameter + paginationParams = None + if pagination: + try: + paginationDict = json.loads(pagination) + paginationParams = PaginationParams(**paginationDict) if paginationDict else None + except (json.JSONDecodeError, ValueError) as e: + raise HTTPException( + status_code=400, + detail=f"Invalid pagination parameter: {str(e)}" + ) + + # Get all roles from database (without pagination) to enrich with user counts and add custom roles + # Note: We get all roles first because we need to add custom roles before pagination + dbRoles = interface.getAllRoles(pagination=None) # Get all users to count role assignments # Since _ensureAdminAccess ensures user is sysadmin or admin, # and getUsersByMandate returns all users for sysadmin regardless of mandateId, # we can pass the current user's mandateId (for sysadmin it will be ignored by RBAC) - allUsers = interface.getUsersByMandate(currentUser.mandateId or "") + allUsers = interface.getUsersByMandate(currentUser.mandateId or "", pagination=None) # Count users per role roleCounts: Dict[str, int] = {} @@ -544,7 +592,45 @@ async def listRoles( "isSystemRole": False }) - return result + # Apply filtering and sorting if pagination requested + if paginationParams: + # Apply filtering (if filters provided) + if paginationParams.filters: + # Use the interface's filter method + filteredResult = interface._applyFilters(result, paginationParams.filters) + else: + filteredResult = result + + # Apply sorting (in order of sortFields) + if paginationParams.sort: + sortedResult = interface._applySorting(filteredResult, paginationParams.sort) + else: + sortedResult = filteredResult + + # Apply pagination + totalItems = len(sortedResult) + totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0 + startIdx = (paginationParams.page - 1) * paginationParams.pageSize + endIdx = startIdx + paginationParams.pageSize + paginatedResult = sortedResult[startIdx:endIdx] + + return PaginatedResponse( + items=paginatedResult, + pagination=PaginationMetadata( + currentPage=paginationParams.page, + pageSize=paginationParams.pageSize, + totalItems=totalItems, + totalPages=totalPages, + sort=paginationParams.sort, + filters=paginationParams.filters + ) + ) + else: + # No pagination - return all roles + return PaginatedResponse( + items=result, + pagination=None + ) except HTTPException: raise diff --git a/modules/routes/routeWorkflows.py b/modules/routes/routeWorkflows.py index 8be49268..352fcfd3 100644 --- a/modules/routes/routeWorkflows.py +++ b/modules/routes/routeWorkflows.py @@ -572,3 +572,247 @@ async def delete_file_from_message( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error deleting file reference: {str(e)}" ) + + +# Action Discovery Endpoints + +@router.get("/actions", response_model=Dict[str, Any]) +@limiter.limit("120/minute") +async def get_all_actions( + request: Request, + currentUser: User = Depends(getCurrentUser) +) -> Dict[str, Any]: + """ + Get all available workflow actions for the current user (filtered by RBAC). + + Returns: + - Dictionary with actions grouped by module, filtered by RBAC permissions + + Example response: + { + "actions": [ + { + "module": "outlook", + "actionId": "outlook.readEmails", + "name": "readEmails", + "description": "Read emails and metadata from a mailbox folder", + "parameters": {...} + }, + ... + ] + } + """ + try: + from modules.services import getInterface as getServices + from modules.workflows.processing.shared.methodDiscovery import discoverMethods + + # Get services and discover methods + services = getServices(currentUser, None) + discoverMethods(services) + + # Import methods catalog + from modules.workflows.processing.shared.methodDiscovery import methods + + # Collect all actions from all methods + allActions = [] + for methodName, methodInfo in methods.items(): + # Skip duplicate entries (same method stored with full and short name) + if methodName.startswith('Method'): + continue + + methodInstance = methodInfo['instance'] + methodActions = methodInstance.actions + + for actionName, actionInfo in methodActions.items(): + # Build action response + actionResponse = { + "module": methodInstance.name, + "actionId": f"{methodInstance.name}.{actionName}", + "name": actionName, + "description": actionInfo.get('description', ''), + "parameters": actionInfo.get('parameters', {}) + } + allActions.append(actionResponse) + + return { + "actions": allActions + } + + except Exception as e: + logger.error(f"Error getting all actions: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to get actions: {str(e)}" + ) + + +@router.get("/actions/{method}", response_model=Dict[str, Any]) +@limiter.limit("120/minute") +async def get_method_actions( + request: Request, + method: str = Path(..., description="Method name (e.g., 'outlook', 'sharepoint')"), + currentUser: User = Depends(getCurrentUser) +) -> Dict[str, Any]: + """ + Get all available actions for a specific method (filtered by RBAC). + + Path Parameters: + - method: Method name (e.g., 'outlook', 'sharepoint', 'ai') + + Returns: + - Dictionary with actions for the specified method + + Example response: + { + "module": "outlook", + "actions": [ + { + "actionId": "outlook.readEmails", + "name": "readEmails", + "description": "Read emails and metadata from a mailbox folder", + "parameters": {...} + }, + ... + ] + } + """ + try: + from modules.services import getInterface as getServices + from modules.workflows.processing.shared.methodDiscovery import discoverMethods + + # Get services and discover methods + services = getServices(currentUser, None) + discoverMethods(services) + + # Import methods catalog + from modules.workflows.processing.shared.methodDiscovery import methods + + # Find method instance + methodInstance = None + for methodName, methodInfo in methods.items(): + if methodInfo['instance'].name == method: + methodInstance = methodInfo['instance'] + break + + if not methodInstance: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Method '{method}' not found" + ) + + # Collect actions for this method + actions = [] + methodActions = methodInstance.actions + + for actionName, actionInfo in methodActions.items(): + actionResponse = { + "actionId": f"{methodInstance.name}.{actionName}", + "name": actionName, + "description": actionInfo.get('description', ''), + "parameters": actionInfo.get('parameters', {}) + } + actions.append(actionResponse) + + return { + "module": methodInstance.name, + "description": methodInstance.description, + "actions": actions + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting actions for method {method}: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to get actions for method {method}: {str(e)}" + ) + + +@router.get("/actions/{method}/{action}", response_model=Dict[str, Any]) +@limiter.limit("120/minute") +async def get_action_schema( + request: Request, + method: str = Path(..., description="Method name (e.g., 'outlook', 'sharepoint')"), + action: str = Path(..., description="Action name (e.g., 'readEmails', 'uploadDocument')"), + currentUser: User = Depends(getCurrentUser) +) -> Dict[str, Any]: + """ + Get action schema with parameter definitions for a specific action. + + Path Parameters: + - method: Method name (e.g., 'outlook', 'sharepoint', 'ai') + - action: Action name (e.g., 'readEmails', 'uploadDocument') + + Returns: + - Action schema with full parameter definitions + + Example response: + { + "method": "outlook", + "action": "readEmails", + "actionId": "outlook.readEmails", + "description": "Read emails and metadata from a mailbox folder", + "parameters": { + "connectionReference": { + "name": "connectionReference", + "type": "str", + "frontendType": "userConnection", + "frontendOptions": "user.connection", + "required": true, + "description": "Microsoft connection label" + }, + ... + } + } + """ + try: + from modules.services import getInterface as getServices + from modules.workflows.processing.shared.methodDiscovery import discoverMethods + + # Get services and discover methods + services = getServices(currentUser, None) + discoverMethods(services) + + # Import methods catalog + from modules.workflows.processing.shared.methodDiscovery import methods + + # Find method instance + methodInstance = None + for methodName, methodInfo in methods.items(): + if methodInfo['instance'].name == method: + methodInstance = methodInfo['instance'] + break + + if not methodInstance: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Method '{method}' not found" + ) + + # Get action + methodActions = methodInstance.actions + if action not in methodActions: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Action '{action}' not found in method '{method}'" + ) + + actionInfo = methodActions[action] + + return { + "method": methodInstance.name, + "action": action, + "actionId": f"{methodInstance.name}.{action}", + "description": actionInfo.get('description', ''), + "parameters": actionInfo.get('parameters', {}) + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting action schema for {method}.{action}: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to get action schema: {str(e)}" + ) \ No newline at end of file diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py.old similarity index 100% rename from modules/workflows/methods/methodAi.py rename to modules/workflows/methods/methodAi.py.old diff --git a/modules/workflows/methods/methodAi/__init__.py b/modules/workflows/methods/methodAi/__init__.py new file mode 100644 index 00000000..7ce40281 --- /dev/null +++ b/modules/workflows/methods/methodAi/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +from .methodAi import MethodAi + +__all__ = ['MethodAi'] + diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py new file mode 100644 index 00000000..f0ba9d4d --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Action modules for AI operations.""" + +# Export all actions +from .process import process +from .webResearch import webResearch +from .summarizeDocument import summarizeDocument +from .translateDocument import translateDocument +from .convert import convert +from .convertDocument import convertDocument +from .extractData import extractData +from .generateDocument import generateDocument + +__all__ = [ + 'process', + 'webResearch', + 'summarizeDocument', + 'translateDocument', + 'convert', + 'convertDocument', + 'extractData', + 'generateDocument', +] + diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py new file mode 100644 index 00000000..1c34fa9b --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/convert.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Convert action for AI operations. +Converts documents/data between different formats with specific formatting options. +""" + +import logging +import json +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.datamodels.datamodelDocref import DocumentReferenceList + +logger = logging.getLogger(__name__) + +@action +async def convert(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters). + - Input requirements: documentList (required); inputFormat and outputFormat (required). + - Output format: Document in target format with specified formatting options. + - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed). + + Parameters: + - documentList (list, required): Document reference(s) to convert. + - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.). + - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.). + - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect. + - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,). + - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True. + - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'. + """ + documentList = parameters.get("documentList", []) + if not documentList: + return ActionResult.isFailure(error="documentList is required") + + inputFormat = parameters.get("inputFormat") + outputFormat = parameters.get("outputFormat") + if not inputFormat or not outputFormat: + return ActionResult.isFailure(error="inputFormat and outputFormat are required") + + # Normalize formats (remove leading dot if present) + normalizedInputFormat = inputFormat.strip().lstrip('.').lower() + normalizedOutputFormat = outputFormat.strip().lstrip('.').lower() + + # Get documents + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + else: + docRefList = DocumentReferenceList.from_string_list([documentList]) + + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + if not chatDocuments: + return ActionResult.isFailure(error="No documents found in documentList") + + # Check if input is standardized JSON format - if so, use direct rendering + if normalizedInputFormat == "json" and len(chatDocuments) == 1: + try: + doc = chatDocuments[0] + # ChatDocument doesn't have documentData - need to load file content using fileId + docBytes = self.services.chat.getFileData(doc.fileId) + if not docBytes: + raise ValueError(f"No file data found for fileId={doc.fileId}") + + # Decode bytes to string + docData = docBytes.decode('utf-8') + + # Try to parse as JSON + if isinstance(docData, str): + jsonData = json.loads(docData) + elif isinstance(docData, dict): + jsonData = docData + else: + jsonData = None + + # Check if it's standardized JSON format (has "documents" or "sections") + if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)): + # Use direct rendering - no AI call needed! + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + + # Ensure format is "documents" array + if "documents" not in jsonData: + jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]} + + # Get title + title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document") + + # Render with options + renderOptions = {} + if normalizedOutputFormat == "csv": + renderOptions["delimiter"] = parameters.get("delimiter", ",") + renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") + renderOptions["includeHeader"] = parameters.get("includeHeader", True) + + rendered_content, mime_type = await generationService.renderReport( + jsonData, normalizedOutputFormat, title, None, None + ) + + # Apply CSV options if needed (renderer will handle them) + if normalizedOutputFormat == "csv" and renderOptions: + rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions) + + validationMetadata = { + "actionType": "ai.convert", + "inputFormat": normalizedInputFormat, + "outputFormat": normalizedOutputFormat, + "hasSourceJson": True, + "conversionType": "direct_rendering" + } + actionDoc = ActionDocument( + documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}", + documentData=rendered_content, + mimeType=mime_type, + sourceJson=jsonData, # Preserve source JSON for structure validation + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[actionDoc]) + + except Exception as e: + logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}") + # Fall through to AI-based conversion + + # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions) + columnsPerRow = parameters.get("columnsPerRow") + delimiter = parameters.get("delimiter", ",") + includeHeader = parameters.get("includeHeader", True) + language = parameters.get("language", "en") + + aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format." + + if normalizedOutputFormat == "csv": + aiPrompt += f" Use '{delimiter}' as the delimiter character." + if columnsPerRow: + aiPrompt += f" Format the output with {columnsPerRow} columns per row." + if not includeHeader: + aiPrompt += " Do not include a header row." + else: + aiPrompt += " Include a header row with column names." + + if language and language != "en": + aiPrompt += f" Use language: {language}." + + aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure." + + return await self.process({ + "aiPrompt": aiPrompt, + "documentList": documentList, + "resultType": normalizedOutputFormat + }) + diff --git a/modules/workflows/methods/methodAi/actions/convertDocument.py b/modules/workflows/methods/methodAi/actions/convertDocument.py new file mode 100644 index 00000000..e86b1d5a --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/convertDocument.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Convert Document action for AI operations. +Converts documents between different formats (PDF→Word, Excel→CSV, etc.). +""" + +import logging +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult + +logger = logging.getLogger(__name__) + +@action +async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.). + - Input requirements: documentList (required); targetFormat (required). + - Output format: Document in target format. + + Parameters: + - documentList (list, required): Document reference(s) to convert. + - targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.). + - preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True. + """ + documentList = parameters.get("documentList", []) + if not documentList: + return ActionResult.isFailure(error="documentList is required") + + targetFormat = parameters.get("targetFormat") + if not targetFormat: + return ActionResult.isFailure(error="targetFormat is required") + + preserveStructure = parameters.get("preserveStructure", True) + + # Normalize format (remove leading dot if present) + normalizedFormat = targetFormat.strip().lstrip('.').lower() + + aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format." + if preserveStructure: + aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout." + aiPrompt += " Ensure the converted document maintains the same content and information as the original." + + return await self.process({ + "aiPrompt": aiPrompt, + "documentList": documentList, + "resultType": normalizedFormat + }) + diff --git a/modules/workflows/methods/methodAi/actions/extractData.py b/modules/workflows/methods/methodAi/actions/extractData.py new file mode 100644 index 00000000..723914bd --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/extractData.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Extract Data action for AI operations. +Extracts structured data from documents (key-value pairs, entities, facts, etc.). +""" + +import logging +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult + +logger = logging.getLogger(__name__) + +@action +async def extractData(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.). + - Input requirements: documentList (required); optional dataStructure, fields. + - Output format: JSON by default, or specified resultType. + + Parameters: + - documentList (list, required): Document reference(s) to extract data from. + - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested. + - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]). + - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json. + """ + documentList = parameters.get("documentList", []) + if not documentList: + return ActionResult.isFailure(error="documentList is required") + + dataStructure = parameters.get("dataStructure", "nested") + fields = parameters.get("fields", []) + resultType = parameters.get("resultType", "json") + + aiPrompt = "Extract structured data from the provided document(s)." + if fields: + fieldsStr = ", ".join(fields) + aiPrompt += f" Extract the following specific fields: {fieldsStr}." + else: + aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information." + + structureInstructions = { + "flat": "Use a flat key-value structure with simple properties.", + "nested": "Use a nested JSON structure with logical grouping of related data.", + "list": "Structure the data as a list/array of objects, one per entity or record." + } + aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}" + + aiPrompt += " Ensure all extracted data is accurate and complete." + + return await self.process({ + "aiPrompt": aiPrompt, + "documentList": documentList, + "resultType": resultType + }) + diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py new file mode 100644 index 00000000..5badc321 --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Generate Document action for AI operations. +Generates documents from scratch or based on templates/inputs. +""" + +import logging +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult + +logger = logging.getLogger(__name__) + +@action +async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Generate documents from scratch or based on templates/inputs. + - Input requirements: prompt or description (required); optional documentList (for templates/references). + - Output format: Document in specified format (default: docx). + + Parameters: + - prompt (str, required): Description of the document to generate. + - documentList (list, optional): Template documents or reference documents to use as a guide. + - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc. + - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx. + """ + prompt = parameters.get("prompt") + if not prompt: + return ActionResult.isFailure(error="prompt is required") + + documentList = parameters.get("documentList", []) + documentType = parameters.get("documentType") + resultType = parameters.get("resultType", "docx") + + aiPrompt = f"Generate a document based on the following requirements: {prompt}" + if documentType: + aiPrompt += f" Document type: {documentType}." + if documentList: + aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style." + aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization." + + processParams = { + "aiPrompt": aiPrompt, + "resultType": resultType + } + if documentList: + processParams["documentList"] = documentList + + return await self.process(processParams) + diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py new file mode 100644 index 00000000..2468d949 --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -0,0 +1,219 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Process action for AI operations. +Universal AI document processing action. +""" + +import logging +import time +from typing import Dict, Any, List, Optional +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.datamodels.datamodelAi import AiCallOptions +from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart + +logger = logging.getLogger(__name__) + +@action +async def process(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation. + - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format). + - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt. + - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets. + + Parameters: + - aiPrompt (str, required): Instruction for the AI describing what processing to perform. + - documentList (list, optional): Document reference(s) in any format to use as input/context. + - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt. + """ + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"ai_process_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Generate", + "AI Processing", + f"Format: {parameters.get('resultType', 'txt')}", + parentOperationId=parentOperationId + ) + + aiPrompt = parameters.get("aiPrompt") + logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})") + + # Update progress - preparing parameters + self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters") + + from modules.datamodels.datamodelDocref import DocumentReferenceList + + documentListParam = parameters.get("documentList") + # Convert to DocumentReferenceList if needed + if documentListParam is None: + documentList = DocumentReferenceList(references=[]) + elif isinstance(documentListParam, DocumentReferenceList): + documentList = documentListParam + elif isinstance(documentListParam, str): + documentList = DocumentReferenceList.from_string_list([documentListParam]) + elif isinstance(documentListParam, list): + documentList = DocumentReferenceList.from_string_list(documentListParam) + else: + logger.error(f"Invalid documentList type: {type(documentListParam)}") + documentList = DocumentReferenceList(references=[]) + + resultType = parameters.get("resultType", "txt") + + + if not aiPrompt: + logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}") + return ActionResult.isFailure( + error="AI prompt is required" + ) + + # Determine output extension and default MIME type without duplicating service logic + normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt") + output_extension = f".{normalized_result_type}" + output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available + logger.info(f"Using result type: {resultType} -> {output_extension}") + + # Phase 7.3: Extract content first if documents provided, then use contentParts + # Check if contentParts are already provided (preferred path) + contentParts: Optional[List[ContentPart]] = None + if "contentParts" in parameters: + contentParts = parameters.get("contentParts") + if contentParts and not isinstance(contentParts, list): + # Try to extract from ContentExtracted if it's an ActionDocument + if hasattr(contentParts, 'parts'): + contentParts = contentParts.parts + else: + logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty") + contentParts = None + + # If contentParts not provided but documentList is, extract content first + if not contentParts and documentList.references: + self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents") + + # Get ChatDocuments + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + if not chatDocuments: + logger.warning("No documents found in documentList") + else: + logger.info(f"Extracting content from {len(chatDocuments)} documents") + + # Prepare extraction options (use defaults if not provided) + extractionOptions = parameters.get("extractionOptions") + if not extractionOptions: + extractionOptions = ExtractionOptions( + prompt="Extract all content from the document", + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + + # Extract content using extraction service with hierarchical progress logging + # Pass operationId for per-document progress tracking + extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) + + # Combine all ContentParts from all extracted results + contentParts = [] + for extracted in extractedResults: + if extracted.parts: + contentParts.extend(extracted.parts) + + logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents") + + # Update progress - preparing AI call + self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call") + + # Build options with only resultFormat - let service layer handle all other parameters + output_format = output_extension.replace('.', '') or 'txt' + options = AiCallOptions( + resultFormat=output_format + # Removed all model parameters - service layer will analyze prompt and determine optimal parameters + ) + + # Update progress - calling AI + self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI") + + # Use unified callAiContent method with contentParts (extraction is now separate) + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + contentParts=contentParts, # Already extracted (or None if no documents) + outputFormat=output_format, + parentOperationId=operationId + ) + + # Update progress - processing result + self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result") + + # Extract documents from AiResponse + if aiResponse.documents and len(aiResponse.documents) > 0: + action_documents = [] + for doc in aiResponse.documents: + validationMetadata = { + "actionType": "ai.process", + "resultType": normalized_result_type, + "outputFormat": output_format, + "hasDocuments": True, + "documentCount": len(aiResponse.documents) + } + action_documents.append(ActionDocument( + documentName=doc.documentName, + documentData=doc.documentData, + mimeType=doc.mimeType or output_mime_type, + sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation + validationMetadata=validationMetadata + )) + + final_documents = action_documents + else: + # Text response - create document from content + extension = output_extension.lstrip('.') + meaningful_name = self._generateMeaningfulFileName( + base_name="ai", + extension=extension, + action_name="result" + ) + validationMetadata = { + "actionType": "ai.process", + "resultType": normalized_result_type, + "outputFormat": output_format, + "hasDocuments": False, + "contentType": "text" + } + action_document = ActionDocument( + documentName=meaningful_name, + documentData=aiResponse.content, + mimeType=output_mime_type, + validationMetadata=validationMetadata + ) + final_documents = [action_document] + + # Complete progress tracking + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess(documents=final_documents) + + except Exception as e: + logger.error(f"Error in AI processing: {str(e)}") + + # Complete progress tracking with failure + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass # Don't fail on progress logging errors + + return ActionResult.isFailure( + error=str(e) + ) + diff --git a/modules/workflows/methods/methodAi/actions/summarizeDocument.py b/modules/workflows/methods/methodAi/actions/summarizeDocument.py new file mode 100644 index 00000000..80588712 --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/summarizeDocument.py @@ -0,0 +1,55 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Summarize Document action for AI operations. +Summarizes one or more documents, extracting key points and main ideas. +""" + +import logging +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult + +logger = logging.getLogger(__name__) + +@action +async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Summarize one or more documents, extracting key points and main ideas. + - Input requirements: documentList (required); optional summaryLength, focus. + - Output format: Text document with summary (default: txt, can be overridden with resultType). + + Parameters: + - documentList (list, required): Document reference(s) to summarize. + - summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium. + - focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions"). + - resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt. + """ + documentList = parameters.get("documentList", []) + if not documentList: + return ActionResult.isFailure(error="documentList is required") + + summaryLength = parameters.get("summaryLength", "medium") + focus = parameters.get("focus") + resultType = parameters.get("resultType", "txt") + + lengthInstructions = { + "brief": "Create a brief summary (2-3 paragraphs)", + "medium": "Create a medium-length summary (comprehensive but concise)", + "detailed": "Create a detailed summary covering all major points" + } + lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"]) + + aiPrompt = f"Summarize the provided document(s). {lengthInstruction}." + if focus: + aiPrompt += f" Focus specifically on: {focus}." + aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format." + + return await self.process({ + "aiPrompt": aiPrompt, + "documentList": documentList, + "resultType": resultType + }) + diff --git a/modules/workflows/methods/methodAi/actions/translateDocument.py b/modules/workflows/methods/methodAi/actions/translateDocument.py new file mode 100644 index 00000000..12264e39 --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/translateDocument.py @@ -0,0 +1,60 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Translate Document action for AI operations. +Translates documents to a target language while preserving formatting and structure. +""" + +import logging +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult + +logger = logging.getLogger(__name__) + +@action +async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Translate documents to a target language while preserving formatting and structure. + - Input requirements: documentList (required); targetLanguage (required). + - Output format: Translated document in same format as input (default) or specified resultType. + + Parameters: + - documentList (list, required): Document reference(s) to translate. + - targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es"). + - sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect. + - preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True. + - resultType (str, optional): Output file extension. If not specified, uses same format as input. + """ + documentList = parameters.get("documentList", []) + if not documentList: + return ActionResult.isFailure(error="documentList is required") + + targetLanguage = parameters.get("targetLanguage") + if not targetLanguage: + return ActionResult.isFailure(error="targetLanguage is required") + + sourceLanguage = parameters.get("sourceLanguage") + preserveFormatting = parameters.get("preserveFormatting", True) + resultType = parameters.get("resultType") + + aiPrompt = f"Translate the provided document(s) to {targetLanguage}." + if sourceLanguage: + aiPrompt += f" The source language is {sourceLanguage}." + if preserveFormatting: + aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document." + else: + aiPrompt += " Focus on accurate translation of content." + aiPrompt += " Maintain the same document structure, headings, and organization." + + processParams = { + "aiPrompt": aiPrompt, + "documentList": documentList + } + if resultType: + processParams["resultType"] = resultType + + return await self.process(processParams) + diff --git a/modules/workflows/methods/methodAi/actions/webResearch.py b/modules/workflows/methods/methodAi/actions/webResearch.py new file mode 100644 index 00000000..2bd5c3dd --- /dev/null +++ b/modules/workflows/methods/methodAi/actions/webResearch.py @@ -0,0 +1,117 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Web Research action for AI operations. +Web research with two-step process: search for URLs, then crawl content. +""" + +import logging +import time +import re +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Web research with two-step process: search for URLs, then crawl content. + - Input requirements: prompt (required); optional list(url), country, language, researchDepth. + - Output format: JSON with research results including URLs and content. + + Parameters: + - prompt (str, required): Natural language research instruction. + - urlList (list, optional): Specific URLs to crawl, if needed. + - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de). + - language (str, optional): Language code (lowercase, e.g., de, en, fr). + - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general. + """ + try: + prompt = parameters.get("prompt") + if not prompt: + return ActionResult.isFailure(error="Research prompt is required") + + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"web_research_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Web Research", + "Searching and Crawling", + "Extracting URLs and Content", + parentOperationId=parentOperationId + ) + + # Call webcrawl service - service handles all AI intention analysis and processing + result = await self.services.web.performWebResearch( + prompt=prompt, + urls=parameters.get("urlList", []), + country=parameters.get("country"), + language=parameters.get("language"), + researchDepth=parameters.get("researchDepth", "general"), + operationId=operationId + ) + + # Complete progress tracking + self.services.chat.progressLogFinish(operationId, True) + + # Get meaningful filename from research result (generated by intent analyzer) + suggestedFilename = result.get("suggested_filename") + if suggestedFilename: + # Clean and validate filename + cleaned = suggestedFilename.strip().strip('"\'') + cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip() + # Ensure it doesn't already have extension + if cleaned.lower().endswith('.json'): + cleaned = cleaned[:-5] + # Validate: should be reasonable length and contain only safe characters + if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned): + meaningfulName = f"{cleaned}.json" + else: + # Fallback to generic meaningful filename + meaningfulName = self._generateMeaningfulFileName( + base_name="web_research", + extension="json", + action_name="research" + ) + else: + # Fallback to generic meaningful filename + meaningfulName = self._generateMeaningfulFileName( + base_name="web_research", + extension="json", + action_name="research" + ) + + validationMetadata = { + "actionType": "ai.webResearch", + "prompt": prompt, + "urlList": parameters.get("urlList", []), + "country": parameters.get("country"), + "language": parameters.get("language"), + "researchDepth": parameters.get("researchDepth", "general"), + "resultFormat": "json" + } + actionDocument = ActionDocument( + documentName=meaningfulName, + documentData=result, + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[actionDocument]) + + except Exception as e: + logger.error(f"Error in web research: {str(e)}") + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass + return ActionResult.isFailure(error=str(e)) + diff --git a/modules/workflows/methods/methodAi/helpers/__init__.py b/modules/workflows/methods/methodAi/helpers/__init__.py new file mode 100644 index 00000000..4833e0e7 --- /dev/null +++ b/modules/workflows/methods/methodAi/helpers/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Helper modules for AI method operations.""" + diff --git a/modules/workflows/methods/methodAi/helpers/csvProcessing.py b/modules/workflows/methods/methodAi/helpers/csvProcessing.py new file mode 100644 index 00000000..9121f43c --- /dev/null +++ b/modules/workflows/methods/methodAi/helpers/csvProcessing.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +CSV Processing helper for AI operations. +Handles CSV content processing with options. +""" + +import logging +from typing import Dict, Any + +logger = logging.getLogger(__name__) + +class CsvProcessingHelper: + """Helper for CSV processing operations""" + + def __init__(self, methodInstance): + """ + Initialize CSV processing helper. + + Args: + methodInstance: Instance of MethodAi (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str: + """ + Apply CSV processing options to CSV content. + + Args: + csvContent: CSV content as string + options: Dictionary with CSV processing options + + Returns: + Processed CSV content as string + """ + if not csvContent: + return csvContent + + # Apply options if provided + if options: + # Handle delimiter option + if "delimiter" in options: + delimiter = options["delimiter"] + # Replace delimiter in content (simple approach) + # Note: This is a basic implementation, may need enhancement + if delimiter != ",": + csvContent = csvContent.replace(",", delimiter) + + # Handle quote character option + if "quotechar" in options: + quotechar = options["quotechar"] + # Replace quote character (simple approach) + if quotechar != '"': + csvContent = csvContent.replace('"', quotechar) + + return csvContent + diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py new file mode 100644 index 00000000..101c8586 --- /dev/null +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -0,0 +1,383 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +import logging +from datetime import datetime, UTC +from modules.workflows.methods.methodBase import MethodBase +from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter +from modules.shared.frontendTypes import FrontendType + +# Import helpers +from .helpers.csvProcessing import CsvProcessingHelper + +# Import actions +from .actions.process import process +from .actions.webResearch import webResearch +from .actions.summarizeDocument import summarizeDocument +from .actions.translateDocument import translateDocument +from .actions.convert import convert +from .actions.convertDocument import convertDocument +from .actions.extractData import extractData +from .actions.generateDocument import generateDocument + +logger = logging.getLogger(__name__) + +class MethodAi(MethodBase): + """AI processing methods.""" + + def __init__(self, services): + super().__init__(services) + self.name = "ai" + self.description = "AI processing methods" + + # Initialize helper modules + self.csvProcessing = CsvProcessingHelper(self) + + # RBAC-Integration: Action-Definitionen mit actionId + self._actions = { + "process": WorkflowActionDefinition( + actionId="ai.process", + description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt", + parameters={ + "aiPrompt": WorkflowActionParameter( + name="aiPrompt", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="Instruction for the AI describing what processing to perform" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=False, + description="Document reference(s) in any format to use as input/context" + ), + "resultType": WorkflowActionParameter( + name="resultType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"], + required=False, + default="txt", + description="Output file extension. All output documents will use this format" + ) + }, + execute=process.__get__(self, self.__class__) + ), + "webResearch": WorkflowActionDefinition( + actionId="ai.webResearch", + description="Web research with two-step process: search for URLs, then crawl content", + parameters={ + "prompt": WorkflowActionParameter( + name="prompt", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="Natural language research instruction" + ), + "urlList": WorkflowActionParameter( + name="urlList", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=False, + description="Specific URLs to crawl, if needed" + ), + "country": WorkflowActionParameter( + name="country", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Two-digit country code (lowercase, e.g., ch, us, de)" + ), + "language": WorkflowActionParameter( + name="language", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["de", "en", "fr", "it", "es"], + required=False, + description="Language code (lowercase, e.g., de, en, fr)" + ), + "researchDepth": WorkflowActionParameter( + name="researchDepth", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["fast", "general", "deep"], + required=False, + default="general", + description="Research depth" + ) + }, + execute=webResearch.__get__(self, self.__class__) + ), + "summarizeDocument": WorkflowActionDefinition( + actionId="ai.summarizeDocument", + description="Summarize one or more documents, extracting key points and main ideas", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to summarize" + ), + "summaryLength": WorkflowActionParameter( + name="summaryLength", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["brief", "medium", "detailed"], + required=False, + default="medium", + description="Desired summary length" + ), + "focus": WorkflowActionParameter( + name="focus", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Specific aspect to focus on in the summary (e.g., financial data, key decisions)" + ), + "resultType": WorkflowActionParameter( + name="resultType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["txt", "md", "docx"], + required=False, + default="txt", + description="Output file extension" + ) + }, + execute=summarizeDocument.__get__(self, self.__class__) + ), + "translateDocument": WorkflowActionDefinition( + actionId="ai.translateDocument", + description="Translate documents to a target language while preserving formatting and structure", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to translate" + ), + "targetLanguage": WorkflowActionParameter( + name="targetLanguage", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Target language code or name (e.g., de, German, French, es)" + ), + "sourceLanguage": WorkflowActionParameter( + name="sourceLanguage", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Source language if known (e.g., en, English). If not provided, AI will detect" + ), + "preserveFormatting": WorkflowActionParameter( + name="preserveFormatting", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=True, + description="Whether to preserve original formatting" + ), + "resultType": WorkflowActionParameter( + name="resultType", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Output file extension. If not specified, uses same format as input" + ) + }, + execute=translateDocument.__get__(self, self.__class__) + ), + "convert": WorkflowActionDefinition( + actionId="ai.convert", + description="Convert documents/data between different formats with specific formatting options", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to convert" + ), + "inputFormat": WorkflowActionParameter( + name="inputFormat", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["json", "csv", "xlsx", "txt"], + required=True, + description="Source format" + ), + "outputFormat": WorkflowActionParameter( + name="outputFormat", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["csv", "json", "xlsx", "txt"], + required=True, + description="Target format" + ), + "columnsPerRow": WorkflowActionParameter( + name="columnsPerRow", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + description="For CSV output, number of columns per row. Default: auto-detect", + validation={"min": 1, "max": 100} + ), + "delimiter": WorkflowActionParameter( + name="delimiter", + type="str", + frontendType=FrontendType.TEXT, + required=False, + default=",", + description="For CSV output, delimiter character" + ), + "includeHeader": WorkflowActionParameter( + name="includeHeader", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=True, + description="For CSV output, whether to include header row" + ), + "language": WorkflowActionParameter( + name="language", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["de", "en", "fr"], + required=False, + default="en", + description="Language for output" + ) + }, + execute=convert.__get__(self, self.__class__) + ), + "convertDocument": WorkflowActionDefinition( + actionId="ai.convertDocument", + description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to convert" + ), + "targetFormat": WorkflowActionParameter( + name="targetFormat", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"], + required=True, + description="Target format extension" + ), + "preserveStructure": WorkflowActionParameter( + name="preserveStructure", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=True, + description="Whether to preserve document structure (headings, tables, etc.)" + ) + }, + execute=convertDocument.__get__(self, self.__class__) + ), + "extractData": WorkflowActionDefinition( + actionId="ai.extractData", + description="Extract structured data from documents (key-value pairs, entities, facts, etc.)", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to extract data from" + ), + "dataStructure": WorkflowActionParameter( + name="dataStructure", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["flat", "nested", "list"], + required=False, + default="nested", + description="Desired data structure" + ), + "fields": WorkflowActionParameter( + name="fields", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=False, + description="Specific fields/properties to extract (e.g., [name, date, amount])" + ), + "resultType": WorkflowActionParameter( + name="resultType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["json", "csv", "xlsx"], + required=False, + default="json", + description="Output format" + ) + }, + execute=extractData.__get__(self, self.__class__) + ), + "generateDocument": WorkflowActionDefinition( + actionId="ai.generateDocument", + description="Generate documents from scratch or based on templates/inputs", + parameters={ + "prompt": WorkflowActionParameter( + name="prompt", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="Description of the document to generate" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=False, + description="Template documents or reference documents to use as a guide" + ), + "documentType": WorkflowActionParameter( + name="documentType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["letter", "memo", "proposal", "contract", "report", "email"], + required=False, + description="Type of document" + ), + "resultType": WorkflowActionParameter( + name="resultType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["docx", "pdf", "txt", "md"], + required=False, + default="docx", + description="Output format" + ) + }, + execute=generateDocument.__get__(self, self.__class__) + ) + } + + # Validate actions after definition + self._validateActions() + + # Register actions as methods (optional, für direkten Zugriff) + self.process = process.__get__(self, self.__class__) + self.webResearch = webResearch.__get__(self, self.__class__) + self.summarizeDocument = summarizeDocument.__get__(self, self.__class__) + self.translateDocument = translateDocument.__get__(self, self.__class__) + self.convert = convert.__get__(self, self.__class__) + self.convertDocument = convertDocument.__get__(self, self.__class__) + self.extractData = extractData.__get__(self, self.__class__) + self.generateDocument = generateDocument.__get__(self, self.__class__) + + def _format_timestamp_for_filename(self) -> str: + """Format current timestamp as YYYYMMDD-hhmmss for filenames.""" + return datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + diff --git a/modules/workflows/methods/methodBase.py b/modules/workflows/methods/methodBase.py index a29d63f5..72f35c19 100644 --- a/modules/workflows/methods/methodBase.py +++ b/modules/workflows/methods/methodBase.py @@ -7,6 +7,9 @@ import logging from functools import wraps import inspect +from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter +from modules.datamodels.datamodelRbac import AccessRuleContext + logger = logging.getLogger(__name__) def action(func): @@ -57,37 +60,194 @@ class MethodBase: self.description: str self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + # Actions MÜSSEN als Dictionary definiert sein + # Jede Method-Klasse muss _actions Dictionary in __init__ definieren + self._actions: Dict[str, WorkflowActionDefinition] = {} + + # Nach Initialisierung: Actions validieren (wird überschrieben, wenn _actions gesetzt wird) + # Validierung erfolgt erst nach vollständiger Initialisierung der Subklasse + + def _validateActions(self): + """Validate that _actions dictionary is properly defined""" + if not hasattr(self, '_actions') or not isinstance(self._actions, dict): + raise ValueError(f"Method {self.name} must define _actions dictionary in __init__") + + for actionName, actionDef in self._actions.items(): + if not isinstance(actionDef, WorkflowActionDefinition): + raise ValueError(f"Action '{actionName}' in {self.name} must be WorkflowActionDefinition instance") + + if not actionDef.actionId: + raise ValueError(f"Action '{actionName}' in {self.name} must have actionId") + + if not actionDef.execute: + raise ValueError(f"Action '{actionName}' in {self.name} must have execute function") + @property def actions(self) -> Dict[str, Dict[str, Any]]: - """Dynamically collect all actions decorated with @action in the class.""" - actions = {} - for attr_name in dir(self): - # Skip the actions property itself to avoid recursion - if attr_name == 'actions': - continue - try: - attr = getattr(self, attr_name) - if callable(attr) and getattr(attr, 'is_action', False): - sig = inspect.signature(attr) - params = {} - for param_name, param in sig.parameters.items(): - if param_name not in ['self', 'parameters']: - param_type = param.annotation if param.annotation != param.empty else Any - params[param_name] = { - 'type': param_type, - 'required': param.default == param.empty, - 'description': None, - 'default': param.default if param.default != param.empty else None - } - actions[attr_name] = { - 'description': attr.__doc__ or '', - 'parameters': params, - 'method': attr - } - except (AttributeError, RecursionError): - # Skip attributes that cause issues - continue - return actions + """ + Dynamically collect all actions from _actions dictionary. + Returns format for API/UI consumption. + + REQUIREMENT: Alle Actions müssen in _actions Dictionary definiert sein. + Actions ohne _actions Definition sind nicht verfügbar. + """ + result = {} + + # Actions müssen in _actions Dictionary definiert sein + if not hasattr(self, '_actions') or not self._actions: + self.logger.error(f"Method {self.name} has no _actions dictionary defined. Actions will not be available.") + return result + + for actionName, actionDef in self._actions.items(): + # RBAC-Check: Prüfe ob Action für aktuellen User verfügbar ist + if not self._checkActionPermission(actionDef.actionId): + continue # Skip if user doesn't have permission + + # Konvertiere WorkflowActionDefinition zu System-Format + result[actionName] = { + 'description': actionDef.description, + 'parameters': self._convertParametersToSystemFormat(actionDef.parameters), + 'method': self._createActionWrapper(actionDef) + } + + return result + + def _checkActionPermission(self, actionId: str) -> bool: + """ + Check if current user has permission to execute this action. + Uses RBAC RESOURCE context. + + REQUIREMENT: RBAC-Service muss verfügbar sein. + """ + if not hasattr(self.services, 'rbac') or not self.services.rbac: + self.logger.error(f"RBAC service not available. Action {actionId} will be denied.") + return False + + currentUser = self.services.chat.getCurrentUser() + if not currentUser: + self.logger.warning(f"No current user found. Action {actionId} will be denied.") + return False + + # RBAC-Check: RESOURCE context, item = actionId + permissions = self.services.rbac.getUserPermissions( + user=currentUser, + context=AccessRuleContext.RESOURCE, + item=actionId + ) + + return permissions.view + + def _convertParametersToSystemFormat(self, parameters: Dict[str, WorkflowActionParameter]) -> Dict[str, Dict[str, Any]]: + """Convert WorkflowActionParameter dict to system format for API/UI consumption""" + result = {} + for paramName, param in parameters.items(): + result[paramName] = { + 'type': param.type, + 'required': param.required, + 'description': param.description, + 'default': param.default, + 'frontendType': param.frontendType.value, + 'frontendOptions': param.frontendOptions, + 'validation': param.validation + } + return result + + def _createActionWrapper(self, actionDef: WorkflowActionDefinition): + """Create wrapper function for action execution with parameter validation""" + async def wrapper(parameters: Dict[str, Any], *args, **kwargs): + # Parameter-Validierung basierend auf WorkflowActionParameter definitions + validatedParams = self._validateParameters(parameters, actionDef.parameters) + + # Execute action + return await actionDef.execute(validatedParams, *args, **kwargs) + + wrapper.is_action = True + return wrapper + + def _validateParameters(self, parameters: Dict[str, Any], paramDefs: Dict[str, WorkflowActionParameter]) -> Dict[str, Any]: + """Validate parameters against definitions""" + validated = {} + + for paramName, paramDef in paramDefs.items(): + value = parameters.get(paramName) + + # Check required + if paramDef.required and value is None: + raise ValueError(f"Required parameter '{paramName}' is missing") + + # Use default if not provided + if value is None and paramDef.default is not None: + value = paramDef.default + + # Type validation + if value is not None: + value = self._validateType(value, paramDef.type) + + # Custom validation rules + if paramDef.validation and value is not None: + self._applyValidationRules(value, paramDef.validation) + + validated[paramName] = value + + return validated + + def _validateType(self, value: Any, expectedType: str) -> Any: + """Validate and convert value to expected type""" + # Type validation logic + typeMap = { + 'str': str, + 'int': int, + 'float': float, + 'bool': bool, + 'list': list, + 'dict': dict, + } + + # Handle List[str], List[int], etc. + if expectedType.startswith('List['): + if not isinstance(value, list): + raise ValueError(f"Expected list for type '{expectedType}', got {type(value).__name__}") + # Extract inner type + innerType = expectedType[5:-1].strip() # Remove "List[" and "]" + if innerType in typeMap: + return [typeMap[innerType](v) for v in value] + return value + + # Handle Dict[str, Any], etc. + if expectedType.startswith('Dict['): + if not isinstance(value, dict): + raise ValueError(f"Expected dict for type '{expectedType}', got {type(value).__name__}") + return value + + # Handle simple types + if expectedType in typeMap: + expectedTypeClass = typeMap[expectedType] + if not isinstance(value, expectedTypeClass): + try: + return expectedTypeClass(value) + except (ValueError, TypeError) as e: + raise ValueError(f"Cannot convert {value} to {expectedType}: {str(e)}") + + return value + + def _applyValidationRules(self, value: Any, rules: Dict[str, Any]): + """Apply custom validation rules""" + if 'min' in rules: + if isinstance(value, (int, float)) and value < rules['min']: + raise ValueError(f"Value must be >= {rules['min']}") + elif isinstance(value, str) and len(value) < rules['min']: + raise ValueError(f"String length must be >= {rules['min']}") + + if 'max' in rules: + if isinstance(value, (int, float)) and value > rules['max']: + raise ValueError(f"Value must be <= {rules['max']}") + elif isinstance(value, str) and len(value) > rules['max']: + raise ValueError(f"String length must be <= {rules['max']}") + + if 'pattern' in rules: + import re + if not re.match(rules['pattern'], str(value)): + raise ValueError(f"Value does not match required pattern: {rules['pattern']}") def getActionSignature(self, actionName: str) -> str: """Get formatted action signature for AI prompt generation (detailed version)""" diff --git a/modules/workflows/methods/methodContext.py b/modules/workflows/methods/methodContext.py.old similarity index 100% rename from modules/workflows/methods/methodContext.py rename to modules/workflows/methods/methodContext.py.old diff --git a/modules/workflows/methods/methodContext/__init__.py b/modules/workflows/methods/methodContext/__init__.py new file mode 100644 index 00000000..8d6c7823 --- /dev/null +++ b/modules/workflows/methods/methodContext/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +from .methodContext import MethodContext + +__all__ = ['MethodContext'] + diff --git a/modules/workflows/methods/methodContext/actions/__init__.py b/modules/workflows/methods/methodContext/actions/__init__.py new file mode 100644 index 00000000..9059d6bc --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Action modules for Context operations.""" + +# Export all actions +from .getDocumentIndex import getDocumentIndex +from .extractContent import extractContent +from .triggerPreprocessingServer import triggerPreprocessingServer + +__all__ = [ + 'getDocumentIndex', + 'extractContent', + 'triggerPreprocessingServer', +] + diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py new file mode 100644 index 00000000..799ce61d --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -0,0 +1,156 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Extract Content action for Context operations. +Extracts content from documents (separate from AI calls). +""" + +import logging +import time +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.datamodels.datamodelDocref import DocumentReferenceList +from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + +logger = logging.getLogger(__name__) + +@action +async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Extract content from documents (separate from AI calls). + + This action performs pure content extraction without AI processing. + The extracted ContentParts can then be used by subsequent AI processing actions. + + Parameters: + - documentList (list, required): Document reference(s) to extract content from. + - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used). + + Returns: + - ActionResult with ActionDocument containing ContentExtracted objects + - ContentExtracted.parts contains List[ContentPart] (already chunked if needed) + """ + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"context_extract_{workflowId}_{int(time.time())}" + + # Extract documentList from parameters dict + documentListParam = parameters.get("documentList") + if not documentListParam: + return ActionResult.isFailure(error="documentList is required") + + # Convert to DocumentReferenceList if needed + if isinstance(documentListParam, DocumentReferenceList): + documentList = documentListParam + elif isinstance(documentListParam, str): + documentList = DocumentReferenceList.from_string_list([documentListParam]) + elif isinstance(documentListParam, list): + documentList = DocumentReferenceList.from_string_list(documentListParam) + else: + return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}") + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Extracting content from documents", + "Content Extraction", + f"Documents: {len(documentList.references)}", + parentOperationId=parentOperationId + ) + + # Get ChatDocuments from documentList + self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents") + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + + if not chatDocuments: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No documents found in documentList") + + logger.info(f"Extracting content from {len(chatDocuments)} documents") + + # Prepare extraction options + self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options") + extractionOptionsParam = parameters.get("extractionOptions") + + # Convert dict to ExtractionOptions object if needed, or create defaults + if extractionOptionsParam: + if isinstance(extractionOptionsParam, dict): + # Convert dict to ExtractionOptions object + extractionOptions = ExtractionOptions(**extractionOptionsParam) + elif isinstance(extractionOptionsParam, ExtractionOptions): + extractionOptions = extractionOptionsParam + else: + # Invalid type, use defaults + extractionOptions = None + else: + extractionOptions = None + + # If extractionOptions not provided, create defaults + if not extractionOptions: + # Default extraction options for pure content extraction (no AI processing) + extractionOptions = ExtractionOptions( + prompt="Extract all content from the document", + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + + # Call extraction service with hierarchical progress logging + self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating") + self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents") + # Pass operationId for hierarchical per-document progress logging + extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) + + # Build ActionDocuments from ContentExtracted results + self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents") + actionDocuments = [] + # Map extracted results back to original documents by index (results are in same order) + for i, extracted in enumerate(extractedResults): + # Get original document name if available + originalDoc = chatDocuments[i] if i < len(chatDocuments) else None + if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName: + # Use original filename with "extracted_" prefix + baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName + documentName = f"{baseName}_extracted_{extracted.id}.json" + else: + # Fallback to generic name with index + documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json" + + # Store ContentExtracted object in ActionDocument.documentData + validationMetadata = { + "actionType": "context.extractContent", + "documentIndex": i, + "extractedId": extracted.id, + "partCount": len(extracted.parts) if extracted.parts else 0, + "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None + } + actionDoc = ActionDocument( + documentName=documentName, + documentData=extracted, # ContentExtracted object + mimeType="application/json", + validationMetadata=validationMetadata + ) + actionDocuments.append(actionDoc) + + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess(documents=actionDocuments) + + except Exception as e: + logger.error(f"Error in content extraction: {str(e)}") + + # Complete progress tracking with failure + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass # Don't fail on progress logging errors + + return ActionResult.isFailure(error=str(e)) + diff --git a/modules/workflows/methods/methodContext/actions/getDocumentIndex.py b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py new file mode 100644 index 00000000..6c9a6700 --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Get Document Index action for Context operations. +Generates a comprehensive index of all documents available in the current workflow. +""" + +import logging +import json +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks. + - Input requirements: No input documents required. Optional resultType parameter. + - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks. + + Parameters: + - resultType (str, optional): Output format (json, txt, md). Default: json. + """ + try: + workflow = self.services.workflow + if not workflow: + return ActionResult.isFailure( + error="No workflow available" + ) + + resultType = parameters.get("resultType", "json").lower().strip().lstrip('.') + + # Get available documents index from chat service + documentsIndex = self.services.chat.getAvailableDocuments(workflow) + + if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.": + # Return empty index structure + if resultType == "json": + indexData = { + "workflowId": getattr(workflow, 'id', 'unknown'), + "totalDocuments": 0, + "rounds": [], + "documentReferences": [] + } + indexContent = json.dumps(indexData, indent=2, ensure_ascii=False) + else: + indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n" + else: + # Parse the document index string to extract structured information + indexData = self.documentIndex.parseDocumentIndex(documentsIndex, workflow) + + if resultType == "json": + indexContent = json.dumps(indexData, indent=2, ensure_ascii=False) + elif resultType == "md": + indexContent = self.formatting.formatAsMarkdown(indexData) + else: # txt + indexContent = self.formatting.formatAsText(indexData, documentsIndex) + + # Generate meaningful filename + workflowContext = self.services.chat.getWorkflowContext() + filename = self._generateMeaningfulFileName( + "document_index", + resultType if resultType in ["json", "txt", "md"] else "json", + workflowContext, + "getDocumentIndex" + ) + + validationMetadata = { + "actionType": "context.getDocumentIndex", + "resultType": resultType, + "workflowId": getattr(workflow, 'id', 'unknown'), + "totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0 + } + + # Create ActionDocument + document = ActionDocument( + documentName=filename, + documentData=indexContent, + mimeType="application/json" if resultType == "json" else "text/plain", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + logger.error(f"Error generating document index: {str(e)}") + return ActionResult.isFailure( + error=f"Failed to generate document index: {str(e)}" + ) + diff --git a/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py new file mode 100644 index 00000000..7ef16d5f --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py @@ -0,0 +1,121 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Trigger Preprocessing Server action for Context operations. +Triggers preprocessing server at customer tenant to update database with configuration. +""" + +import logging +import json +import aiohttp +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +@action +async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Trigger preprocessing server at customer tenant to update database with configuration. + + This action makes a POST request to the preprocessing server endpoint with the provided + configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key. + + Parameters: + - endpoint (str, required): The full URL endpoint for the preprocessing server API. + - configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed. + - authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from. + + Returns: + - ActionResult with ActionDocument containing "ok" on success, or error message on failure. + """ + try: + endpoint = parameters.get("endpoint") + if not endpoint: + return ActionResult.isFailure(error="endpoint parameter is required") + + configJsonParam = parameters.get("configJson") + if not configJsonParam: + return ActionResult.isFailure(error="configJson parameter is required") + + authSecretConfigKey = parameters.get("authSecretConfigKey") + if not authSecretConfigKey: + return ActionResult.isFailure(error="authSecretConfigKey parameter is required") + + # Handle configJson as either dict or JSON string + if isinstance(configJsonParam, str): + try: + configJson = json.loads(configJsonParam) + except json.JSONDecodeError as e: + return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}") + elif isinstance(configJsonParam, dict): + configJson = configJsonParam + else: + return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}") + + # Get authorization secret from APP_CONFIG using the provided config key + authSecret = APP_CONFIG.get(authSecretConfigKey) + if not authSecret: + errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + + # Prepare headers with authorization (default headers as in original function) + headers = { + "X-PP-API-Key": authSecret, + "Content-Type": "application/json" + } + + # Make POST request + timeout = aiohttp.ClientTimeout(total=60) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.post( + endpoint, + headers=headers, + json=configJson + ) as response: + if response.status in [200, 201]: + responseText = await response.text() + logger.info(f"Preprocessing server trigger successful: {response.status}") + logger.debug(f"Response: {responseText}") + + # Generate meaningful filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "preprocessing_result", + "txt", + workflowContext, + "triggerPreprocessingServer" + ) + + # Create validation metadata + validationMetadata = self._createValidationMetadata( + "triggerPreprocessingServer", + endpoint=endpoint, + statusCode=response.status, + responseText=responseText + ) + + # Return success with "ok" document + document = ActionDocument( + documentName=filename, + documentData="ok", + mimeType="text/plain", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + else: + errorText = await response.text() + errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + + except Exception as e: + errorMsg = f"Error triggering preprocessing server: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodContext/helpers/__init__.py b/modules/workflows/methods/methodContext/helpers/__init__.py new file mode 100644 index 00000000..e1e2ab56 --- /dev/null +++ b/modules/workflows/methods/methodContext/helpers/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Helper modules for Context method operations.""" + diff --git a/modules/workflows/methods/methodContext/helpers/documentIndex.py b/modules/workflows/methods/methodContext/helpers/documentIndex.py new file mode 100644 index 00000000..bba349cf --- /dev/null +++ b/modules/workflows/methods/methodContext/helpers/documentIndex.py @@ -0,0 +1,89 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Document Index helper for Context operations. +Handles parsing and formatting of document indexes. +""" + +import logging +from typing import Dict, Any +from datetime import datetime, UTC + +logger = logging.getLogger(__name__) + +class DocumentIndexHelper: + """Helper for document index operations""" + + def __init__(self, methodInstance): + """ + Initialize document index helper. + + Args: + methodInstance: Instance of MethodContext (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]: + """Parse the document index string into structured data.""" + try: + indexData = { + "workflowId": getattr(workflow, 'id', 'unknown'), + "generatedAt": datetime.now(UTC).isoformat(), + "totalDocuments": 0, + "rounds": [], + "documentReferences": [] + } + + # Extract document references from the index string + lines = documentsIndex.split('\n') + currentRound = None + currentDocList = None + + for line in lines: + line = line.strip() + if not line: + continue + + # Check for round headers + if "Current round documents:" in line: + currentRound = "current" + continue + elif "Past rounds documents:" in line: + currentRound = "past" + continue + + # Check for document list references (docList:...) + if line.startswith("- docList:"): + docListRef = line.replace("- docList:", "").strip() + currentDocList = { + "reference": docListRef, + "round": currentRound, + "documents": [] + } + indexData["rounds"].append(currentDocList) + continue + + # Check for individual document references (docItem:...) + if line.startswith(" - docItem:") or line.startswith("- docItem:"): + docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip() + indexData["documentReferences"].append({ + "reference": docItemRef, + "round": currentRound, + "docList": currentDocList["reference"] if currentDocList else None + }) + indexData["totalDocuments"] += 1 + if currentDocList: + currentDocList["documents"].append(docItemRef) + + return indexData + + except Exception as e: + logger.error(f"Error parsing document index: {str(e)}") + return { + "workflowId": getattr(workflow, 'id', 'unknown'), + "error": f"Failed to parse document index: {str(e)}", + "rawIndex": documentsIndex + } + diff --git a/modules/workflows/methods/methodContext/helpers/formatting.py b/modules/workflows/methods/methodContext/helpers/formatting.py new file mode 100644 index 00000000..ac38fb86 --- /dev/null +++ b/modules/workflows/methods/methodContext/helpers/formatting.py @@ -0,0 +1,75 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Formatting helper for Context operations. +Handles formatting of document indexes in different formats. +""" + +import logging +from typing import Dict, Any + +logger = logging.getLogger(__name__) + +class FormattingHelper: + """Helper for formatting operations""" + + def __init__(self, methodInstance): + """ + Initialize formatting helper. + + Args: + methodInstance: Instance of MethodContext (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def formatAsMarkdown(self, indexData: Dict[str, Any]) -> str: + """Format document index as Markdown.""" + try: + md = f"# Document Index\n\n" + md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n" + md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n" + md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n" + + if indexData.get('rounds'): + md += "## Documents by Round\n\n" + for roundInfo in indexData['rounds']: + roundLabel = roundInfo.get('round', 'unknown').title() + md += f"### {roundLabel} Round\n\n" + md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n" + if roundInfo.get('documents'): + md += "**Documents:**\n\n" + for docRef in roundInfo['documents']: + md += f"- `{docRef}`\n" + md += "\n" + + if indexData.get('documentReferences'): + md += "## All Document References\n\n" + for docRef in indexData['documentReferences']: + md += f"- `{docRef.get('reference', 'unknown')}`\n" + + return md + + except Exception as e: + logger.error(f"Error formatting as Markdown: {str(e)}") + return f"# Document Index\n\nError formatting index: {str(e)}\n" + + def formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str: + """Format document index as plain text.""" + try: + text = "Document Index\n" + text += "=" * 50 + "\n\n" + text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n" + text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n" + text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n" + + # Include the raw formatted index for readability + text += rawIndex + + return text + + except Exception as e: + logger.error(f"Error formatting as text: {str(e)}") + return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n" + diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py new file mode 100644 index 00000000..5481f70b --- /dev/null +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -0,0 +1,108 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +import logging +from modules.workflows.methods.methodBase import MethodBase +from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter +from modules.shared.frontendTypes import FrontendType + +# Import helpers +from .helpers.documentIndex import DocumentIndexHelper +from .helpers.formatting import FormattingHelper + +# Import actions +from .actions.getDocumentIndex import getDocumentIndex +from .actions.extractContent import extractContent +from .actions.triggerPreprocessingServer import triggerPreprocessingServer + +logger = logging.getLogger(__name__) + +class MethodContext(MethodBase): + """Context and workflow information methods.""" + + def __init__(self, services): + super().__init__(services) + self.name = "context" + self.description = "Context and workflow information methods" + + # Initialize helper modules + self.documentIndex = DocumentIndexHelper(self) + self.formatting = FormattingHelper(self) + + # RBAC-Integration: Action-Definitionen mit actionId + self._actions = { + "getDocumentIndex": WorkflowActionDefinition( + actionId="context.getDocumentIndex", + description="Generate a comprehensive index of all documents available in the current workflow", + parameters={ + "resultType": WorkflowActionParameter( + name="resultType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["json", "txt", "md"], + required=False, + default="json", + description="Output format" + ) + }, + execute=getDocumentIndex.__get__(self, self.__class__) + ), + "extractContent": WorkflowActionDefinition( + actionId="context.extractContent", + description="Extract content from documents (separate from AI calls)", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to extract content from" + ), + "extractionOptions": WorkflowActionParameter( + name="extractionOptions", + type="dict", + frontendType=FrontendType.JSON, + required=False, + description="Extraction options (if not provided, defaults are used)" + ) + }, + execute=extractContent.__get__(self, self.__class__) + ), + "triggerPreprocessingServer": WorkflowActionDefinition( + actionId="context.triggerPreprocessingServer", + description="Trigger preprocessing server at customer tenant to update database with configuration", + parameters={ + "endpoint": WorkflowActionParameter( + name="endpoint", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="The full URL endpoint for the preprocessing server API" + ), + "configJson": WorkflowActionParameter( + name="configJson", + type="str", + frontendType=FrontendType.JSON, + required=True, + description="Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string" + ), + "authSecretConfigKey": WorkflowActionParameter( + name="authSecretConfigKey", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="The APP_CONFIG key name to retrieve the authorization secret from" + ) + }, + execute=triggerPreprocessingServer.__get__(self, self.__class__) + ) + } + + # Validate actions after definition + self._validateActions() + + # Register actions as methods (optional, für direkten Zugriff) + self.getDocumentIndex = getDocumentIndex.__get__(self, self.__class__) + self.extractContent = extractContent.__get__(self, self.__class__) + self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__) + diff --git a/modules/workflows/methods/methodJira.py b/modules/workflows/methods/methodJira.py.old similarity index 100% rename from modules/workflows/methods/methodJira.py rename to modules/workflows/methods/methodJira.py.old diff --git a/modules/workflows/methods/methodJira/__init__.py b/modules/workflows/methods/methodJira/__init__.py new file mode 100644 index 00000000..e8b3822d --- /dev/null +++ b/modules/workflows/methods/methodJira/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +from .methodJira import MethodJira + +__all__ = ['MethodJira'] + diff --git a/modules/workflows/methods/methodJira/actions/__init__.py b/modules/workflows/methods/methodJira/actions/__init__.py new file mode 100644 index 00000000..67b0d38d --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Action modules for JIRA operations.""" + +# Export all actions +from .connectJira import connectJira +from .exportTicketsAsJson import exportTicketsAsJson +from .importTicketsFromJson import importTicketsFromJson +from .mergeTicketData import mergeTicketData +from .parseCsvContent import parseCsvContent +from .parseExcelContent import parseExcelContent +from .createCsvContent import createCsvContent +from .createExcelContent import createExcelContent + +__all__ = [ + 'connectJira', + 'exportTicketsAsJson', + 'importTicketsFromJson', + 'mergeTicketData', + 'parseCsvContent', + 'parseExcelContent', + 'createCsvContent', + 'createExcelContent', +] + diff --git a/modules/workflows/methods/methodJira/actions/connectJira.py b/modules/workflows/methods/methodJira/actions/connectJira.py new file mode 100644 index 00000000..8200514a --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/connectJira.py @@ -0,0 +1,139 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Connect JIRA action for JIRA operations. +Connects to JIRA instance and creates ticket interface. +""" + +import logging +import json +import uuid +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +@action +async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Connect to JIRA instance and create ticket interface. + + Parameters: + - apiUsername (str, required): JIRA API username/email + - apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token + - apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net) + - projectCode (str, required): JIRA project code (e.g., "DCS") + - issueType (str, required): JIRA issue type (e.g., "Task") + - taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict + + Returns: + - ActionResult with ActionDocument containing connection ID + """ + try: + apiUsername = parameters.get("apiUsername") + if not apiUsername: + return ActionResult.isFailure(error="apiUsername parameter is required") + + apiTokenConfigKey = parameters.get("apiTokenConfigKey") + if not apiTokenConfigKey: + return ActionResult.isFailure(error="apiTokenConfigKey parameter is required") + + apiUrl = parameters.get("apiUrl") + if not apiUrl: + return ActionResult.isFailure(error="apiUrl parameter is required") + + projectCode = parameters.get("projectCode") + if not projectCode: + return ActionResult.isFailure(error="projectCode parameter is required") + + issueType = parameters.get("issueType") + if not issueType: + return ActionResult.isFailure(error="issueType parameter is required") + + taskSyncDefinitionParam = parameters.get("taskSyncDefinition") + if not taskSyncDefinitionParam: + return ActionResult.isFailure(error="taskSyncDefinition parameter is required") + + # Parse taskSyncDefinition + if isinstance(taskSyncDefinitionParam, str): + try: + taskSyncDefinition = json.loads(taskSyncDefinitionParam) + except json.JSONDecodeError as e: + return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}") + elif isinstance(taskSyncDefinitionParam, dict): + taskSyncDefinition = taskSyncDefinitionParam + else: + return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}") + + # Get API token from APP_CONFIG + apiToken = APP_CONFIG.get(apiTokenConfigKey) + if not apiToken: + errorMsg = f"{apiTokenConfigKey} not found in APP_CONFIG" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + + # Create ticket interface + syncInterface = await self.services.ticket.connectTicket( + taskSyncDefinition=taskSyncDefinition, + connectorType="Jira", + connectorParams={ + "apiUsername": apiUsername, + "apiToken": apiToken, + "apiUrl": apiUrl, + "projectCode": projectCode, + "ticketType": issueType, + }, + ) + + # Store connection with unique ID + connectionId = str(uuid.uuid4()) + self._connections[connectionId] = { + "interface": syncInterface, + "taskSyncDefinition": taskSyncDefinition, + "apiUrl": apiUrl, + "projectCode": projectCode, + } + + logger.info(f"JIRA connection established: {connectionId} (Project: {projectCode})") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "jira_connection", + "json", + workflowContext, + "connectJira" + ) + + # Create connection info document + connectionInfo = { + "connectionId": connectionId, + "apiUrl": apiUrl, + "projectCode": projectCode, + "issueType": issueType, + } + + validationMetadata = self._createValidationMetadata( + "connectJira", + connectionId=connectionId, + apiUrl=apiUrl, + projectCode=projectCode + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(connectionInfo, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error connecting to JIRA: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/createCsvContent.py b/modules/workflows/methods/methodJira/actions/createCsvContent.py new file mode 100644 index 00000000..c856760e --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/createCsvContent.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Create CSV Content action for JIRA operations. +Creates CSV content with custom headers. +""" + +import logging +import json +import base64 +import pandas as pd +import csv as csv_module +from io import StringIO +from datetime import datetime, UTC +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Create CSV content with custom headers. + + Parameters: + - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData) + - headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent) + - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data) + - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided) + + Returns: + - ActionResult with ActionDocument containing CSV content as bytes + """ + try: + dataParam = parameters.get("data") + if not dataParam: + return ActionResult.isFailure(error="data parameter is required") + + headersParam = parameters.get("headers") + columnsParam = parameters.get("columns") + taskSyncDefinitionParam = parameters.get("taskSyncDefinition") + + # Get data from document + dataJson = self.documentParsing.parseJsonFromDocument(dataParam) + if dataJson is None: + return ActionResult.isFailure(error="Could not parse data from document reference") + + # Extract data array if wrapped in object + if isinstance(dataJson, dict) and "data" in dataJson: + dataList = dataJson["data"] + elif isinstance(dataJson, list): + dataList = dataJson + else: + return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field") + + # Get headers + headers = {"header1": "Header 1", "header2": "Header 2"} + if headersParam: + headersJson = self.documentParsing.parseJsonFromDocument(headersParam) + if headersJson and isinstance(headersJson, dict) and "headers" in headersJson: + headers = headersJson["headers"] + elif headersJson and isinstance(headersJson, dict): + headers = headersJson + + # Get columns + if columnsParam: + if isinstance(columnsParam, str): + try: + columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',') + except: + columns = columnsParam.split(',') + elif isinstance(columnsParam, list): + columns = columnsParam + else: + columns = None + elif taskSyncDefinitionParam: + # Extract columns from taskSyncDefinition + if isinstance(taskSyncDefinitionParam, str): + taskSyncDefinition = json.loads(taskSyncDefinitionParam) + else: + taskSyncDefinition = taskSyncDefinitionParam + columns = list(taskSyncDefinition.keys()) + elif dataList and len(dataList) > 0: + columns = list(dataList[0].keys()) + else: + columns = [] + + # Create DataFrame + if not dataList: + df = pd.DataFrame(columns=columns) + else: + df = pd.DataFrame(dataList) + # Ensure all columns exist + for col in columns: + if col not in df.columns: + df[col] = "" + # Reorder columns + df = df[columns] + + # Clean data + for column in df.columns: + df[column] = df[column].astype("object").fillna("") + df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False) + + # Create headers with timestamp + timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC") + header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), []) + header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), []) + if len(header2Row) > 1: + header2Row[1] = timestamp + + headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns) + headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns) + tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns) + finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True) + + # Convert to CSV bytes + out = StringIO() + finalDf.to_csv(out, index=False, header=False, quoting=1, escapechar='\\') + csvBytes = out.getvalue().encode('utf-8') + + logger.info(f"Created CSV content: {len(dataList)} rows, {len(columns)} columns") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "ticket_sync", + "csv", + workflowContext, + "createCsvContent" + ) + + validationMetadata = self._createValidationMetadata( + "createCsvContent", + rowCount=len(dataList), + columnCount=len(columns) + ) + + # Store as base64 for document + csvBase64 = base64.b64encode(csvBytes).decode('utf-8') + + document = ActionDocument( + documentName=filename, + documentData=csvBase64, + mimeType="application/octet-stream", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error creating CSV content: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/createExcelContent.py b/modules/workflows/methods/methodJira/actions/createExcelContent.py new file mode 100644 index 00000000..fbf54299 --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/createExcelContent.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Create Excel Content action for JIRA operations. +Creates Excel content with custom headers. +""" + +import logging +import json +import base64 +import pandas as pd +import csv as csv_module +from io import BytesIO +from datetime import datetime, UTC +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Create Excel content with custom headers. + + Parameters: + - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData) + - headers (str, optional): Document reference containing headers JSON (from parseExcelContent) + - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data) + - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided) + + Returns: + - ActionResult with ActionDocument containing Excel content as bytes + """ + try: + dataParam = parameters.get("data") + if not dataParam: + return ActionResult.isFailure(error="data parameter is required") + + headersParam = parameters.get("headers") + columnsParam = parameters.get("columns") + taskSyncDefinitionParam = parameters.get("taskSyncDefinition") + + # Get data from document + dataJson = self.documentParsing.parseJsonFromDocument(dataParam) + if dataJson is None: + return ActionResult.isFailure(error="Could not parse data from document reference") + + # Extract data array if wrapped in object + if isinstance(dataJson, dict) and "data" in dataJson: + dataList = dataJson["data"] + elif isinstance(dataJson, list): + dataList = dataJson + else: + return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field") + + # Get headers + headers = {"header1": "Header 1", "header2": "Header 2"} + if headersParam: + headersJson = self.documentParsing.parseJsonFromDocument(headersParam) + if headersJson and isinstance(headersJson, dict) and "headers" in headersJson: + headers = headersJson["headers"] + elif headersJson and isinstance(headersJson, dict): + headers = headersJson + + # Get columns + if columnsParam: + if isinstance(columnsParam, str): + try: + columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',') + except: + columns = columnsParam.split(',') + elif isinstance(columnsParam, list): + columns = columnsParam + else: + columns = None + elif taskSyncDefinitionParam: + # Extract columns from taskSyncDefinition + if isinstance(taskSyncDefinitionParam, str): + taskSyncDefinition = json.loads(taskSyncDefinitionParam) + else: + taskSyncDefinition = taskSyncDefinitionParam + columns = list(taskSyncDefinition.keys()) + elif dataList and len(dataList) > 0: + columns = list(dataList[0].keys()) + else: + columns = [] + + # Create DataFrame + if not dataList: + df = pd.DataFrame(columns=columns) + else: + df = pd.DataFrame(dataList) + # Ensure all columns exist + for col in columns: + if col not in df.columns: + df[col] = "" + # Reorder columns + df = df[columns] + + # Clean data + for column in df.columns: + df[column] = df[column].astype("object").fillna("") + df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False) + + # Create headers with timestamp + timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC") + header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), []) + header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), []) + if len(header2Row) > 1: + header2Row[1] = timestamp + + headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns) + headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns) + tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns) + finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True) + + # Convert to Excel bytes + buf = BytesIO() + finalDf.to_excel(buf, index=False, header=False, engine='openpyxl') + excelBytes = buf.getvalue() + + logger.info(f"Created Excel content: {len(dataList)} rows, {len(columns)} columns") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "ticket_sync", + "xlsx", + workflowContext, + "createExcelContent" + ) + + validationMetadata = self._createValidationMetadata( + "createExcelContent", + rowCount=len(dataList), + columnCount=len(columns) + ) + + # Store as base64 for document + excelBase64 = base64.b64encode(excelBytes).decode('utf-8') + + document = ActionDocument( + documentName=filename, + documentData=excelBase64, + mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error creating Excel content: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py new file mode 100644 index 00000000..85926851 --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py @@ -0,0 +1,84 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Export Tickets As JSON action for JIRA operations. +Exports tickets from JIRA as JSON list. +""" + +import logging +import json +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Export tickets from JIRA as JSON list. + + Parameters: + - connectionId (str, required): Connection ID from connectJira action result + - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition) + + Returns: + - ActionResult with ActionDocument containing list of tickets as JSON + """ + try: + connectionIdParam = parameters.get("connectionId") + if not connectionIdParam: + return ActionResult.isFailure(error="connectionId parameter is required") + + # Get connection ID from document if it's a reference + connectionId = None + if isinstance(connectionIdParam, str): + # Try to parse from document reference + connectionInfo = self.documentParsing.parseJsonFromDocument(connectionIdParam) + if connectionInfo and "connectionId" in connectionInfo: + connectionId = connectionInfo["connectionId"] + else: + # Assume it's the connection ID directly + connectionId = connectionIdParam + + if not connectionId or connectionId not in self._connections: + return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.") + + connection = self._connections[connectionId] + syncInterface = connection["interface"] + + # Export tickets + dataList = await syncInterface.exportTicketsAsList() + + logger.info(f"Exported {len(dataList)} tickets from JIRA") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "jira_tickets_export", + "json", + workflowContext, + "exportTicketsAsJson" + ) + + validationMetadata = self._createValidationMetadata( + "exportTicketsAsJson", + connectionId=connectionId, + ticketCount=len(dataList) + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(dataList, indent=2, ensure_ascii=False), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error exporting tickets from JIRA: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py new file mode 100644 index 00000000..b17519ea --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py @@ -0,0 +1,101 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Import Tickets From JSON action for JIRA operations. +Imports ticket data from JSON back to JIRA. +""" + +import logging +import json +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Import ticket data from JSON back to JIRA. + + Parameters: + - connectionId (str, required): Connection ID from connectJira action result + - ticketData (str, required): Document reference containing ticket data as JSON + - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition) + + Returns: + - ActionResult with ActionDocument containing import result with counts + """ + try: + connectionIdParam = parameters.get("connectionId") + if not connectionIdParam: + return ActionResult.isFailure(error="connectionId parameter is required") + + ticketDataParam = parameters.get("ticketData") + if not ticketDataParam: + return ActionResult.isFailure(error="ticketData parameter is required") + + # Get connection ID from document if it's a reference + connectionId = None + if isinstance(connectionIdParam, str): + connectionInfo = self.documentParsing.parseJsonFromDocument(connectionIdParam) + if connectionInfo and "connectionId" in connectionInfo: + connectionId = connectionInfo["connectionId"] + else: + connectionId = connectionIdParam + + if not connectionId or connectionId not in self._connections: + return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.") + + connection = self._connections[connectionId] + syncInterface = connection["interface"] + + # Get ticket data from document + ticketDataJson = self.documentParsing.parseJsonFromDocument(ticketDataParam) + if ticketDataJson is None: + return ActionResult.isFailure(error="Could not parse ticket data from document reference") + + # Ensure it's a list + if not isinstance(ticketDataJson, list): + return ActionResult.isFailure(error="ticketData must be a JSON array") + + # Import tickets + await syncInterface.importListToTickets(ticketDataJson) + + logger.info(f"Imported {len(ticketDataJson)} tickets to JIRA") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "jira_import_result", + "json", + workflowContext, + "importTicketsFromJson" + ) + + importResult = { + "imported": len(ticketDataJson), + "connectionId": connectionId, + } + + validationMetadata = self._createValidationMetadata( + "importTicketsFromJson", + connectionId=connectionId, + importedCount=len(ticketDataJson) + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(importResult, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error importing tickets to JIRA: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/mergeTicketData.py b/modules/workflows/methods/methodJira/actions/mergeTicketData.py new file mode 100644 index 00000000..a8f8b486 --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/mergeTicketData.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Merge Ticket Data action for JIRA operations. +Merges JIRA export data with existing SharePoint data. +""" + +import logging +import json +from typing import Dict, Any, List +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Merge JIRA export data with existing SharePoint data. + + Parameters: + - jiraData (str, required): Document reference containing JIRA ticket data as JSON array + - existingData (str, required): Document reference containing existing SharePoint data as JSON array + - taskSyncDefinition (str or dict, required): Field mapping definition + - idField (str, optional): Field name to use as ID for merging (default: "ID") + + Returns: + - ActionResult with ActionDocument containing merged data and merge details + """ + try: + jiraDataParam = parameters.get("jiraData") + if not jiraDataParam: + return ActionResult.isFailure(error="jiraData parameter is required") + + existingDataParam = parameters.get("existingData") + if not existingDataParam: + return ActionResult.isFailure(error="existingData parameter is required") + + taskSyncDefinitionParam = parameters.get("taskSyncDefinition") + if not taskSyncDefinitionParam: + return ActionResult.isFailure(error="taskSyncDefinition parameter is required") + + idField = parameters.get("idField", "ID") + + # Parse taskSyncDefinition + if isinstance(taskSyncDefinitionParam, str): + try: + taskSyncDefinition = json.loads(taskSyncDefinitionParam) + except json.JSONDecodeError as e: + return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}") + elif isinstance(taskSyncDefinitionParam, dict): + taskSyncDefinition = taskSyncDefinitionParam + else: + return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}") + + # Get data from documents + jiraDataJson = self.documentParsing.parseJsonFromDocument(jiraDataParam) + if jiraDataJson is None or not isinstance(jiraDataJson, list): + return ActionResult.isFailure(error="Could not parse jiraData as JSON array") + + existingDataJson = self.documentParsing.parseJsonFromDocument(existingDataParam) + if existingDataJson is None or not isinstance(existingDataJson, list): + # Empty existing data is OK + existingDataJson = [] + + # Perform merge + existingLookup = {row.get(idField): row for row in existingDataJson if row.get(idField)} + mergedData: List[dict] = [] + changes: List[str] = [] + updatedCount = addedCount = unchangedCount = 0 + + for jiraRow in jiraDataJson: + jiraId = jiraRow.get(idField) + if jiraId and jiraId in existingLookup: + existingRow = existingLookup[jiraId].copy() + rowChanges: List[str] = [] + + for fieldName, fieldConfig in taskSyncDefinition.items(): + if fieldConfig[0] == 'get': + oldValue = "" if existingRow.get(fieldName) is None else str(existingRow.get(fieldName)) + newValue = "" if jiraRow.get(fieldName) is None else str(jiraRow.get(fieldName)) + + # Convert ADF data to readable text for logging + if isinstance(newValue, dict) and newValue.get("type") == "doc": + newValueReadable = self.adfConverter.convertAdfToText(newValue) + if oldValue != newValueReadable: + rowChanges.append(f"{fieldName}: '{oldValue[:100]}...' -> '{newValueReadable[:100]}...'") + elif oldValue != newValue: + # Truncate long values for logging + oldTruncated = oldValue[:100] + "..." if len(oldValue) > 100 else oldValue + newTruncated = newValue[:100] + "..." if len(newValue) > 100 else newValue + rowChanges.append(f"{fieldName}: '{oldTruncated}' -> '{newTruncated}'") + + existingRow[fieldName] = jiraRow.get(fieldName) + + mergedData.append(existingRow) + if rowChanges: + updatedCount += 1 + changes.append(f"Row ID {jiraId} updated: {', '.join(rowChanges)}") + else: + unchangedCount += 1 + del existingLookup[jiraId] + else: + mergedData.append(jiraRow) + addedCount += 1 + changes.append(f"Row ID {jiraId} added as new record") + + # Add remaining existing rows + for remaining in existingLookup.values(): + mergedData.append(remaining) + unchangedCount += 1 + + mergeDetails = { + "updated": updatedCount, + "added": addedCount, + "unchanged": unchangedCount, + "changes": changes + } + + logger.info(f"Merged ticket data: {updatedCount} updated, {addedCount} added, {unchangedCount} unchanged") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "merged_ticket_data", + "json", + workflowContext, + "mergeTicketData" + ) + + result = { + "data": mergedData, + "mergeDetails": mergeDetails + } + + validationMetadata = self._createValidationMetadata( + "mergeTicketData", + updated=updatedCount, + added=addedCount, + unchanged=unchangedCount + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(result, indent=2, ensure_ascii=False), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error merging ticket data: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/parseCsvContent.py b/modules/workflows/methods/methodJira/actions/parseCsvContent.py new file mode 100644 index 00000000..3038e566 --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/parseCsvContent.py @@ -0,0 +1,112 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Parse CSV Content action for JIRA operations. +Parses CSV content with custom headers. +""" + +import logging +import json +import io +import pandas as pd +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Parse CSV content with custom headers. + + Parameters: + - csvContent (str, required): Document reference containing CSV file content as bytes + - skipRows (int, optional): Number of header rows to skip (default: 2) + - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true) + + Returns: + - ActionResult with ActionDocument containing parsed data and headers as JSON + """ + try: + csvContentParam = parameters.get("csvContent") + if not csvContentParam: + return ActionResult.isFailure(error="csvContent parameter is required") + + skipRows = parameters.get("skipRows", 2) + hasCustomHeaders = parameters.get("hasCustomHeaders", True) + + # Get CSV content from document + csvBytes = self.documentParsing.getDocumentData(csvContentParam) + if csvBytes is None: + return ActionResult.isFailure(error="Could not get CSV content from document reference") + + # Convert to bytes if needed + if isinstance(csvBytes, str): + csvBytes = csvBytes.encode('utf-8') + elif not isinstance(csvBytes, bytes): + return ActionResult.isFailure(error="CSV content must be bytes or string") + + # Parse headers if hasCustomHeaders + headers = {"header1": "Header 1", "header2": "Header 2"} + if hasCustomHeaders: + csvLines = csvBytes.decode('utf-8').split('\n') + if len(csvLines) >= 2: + headers["header1"] = csvLines[0].rstrip('\r\n') + headers["header2"] = csvLines[1].rstrip('\r\n') + + # Parse CSV data + df = pd.read_csv( + io.BytesIO(csvBytes), + skiprows=skipRows, + quoting=1, + escapechar='\\', + on_bad_lines='skip', + engine='python' + ) + + # Convert to dict records + for column in df.columns: + df[column] = df[column].astype('object').fillna('') + data = df.to_dict(orient='records') + + logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "parsed_csv_data", + "json", + workflowContext, + "parseCsvContent" + ) + + result = { + "data": data, + "headers": headers, + "rowCount": len(data), + "columnCount": len(df.columns) + } + + validationMetadata = self._createValidationMetadata( + "parseCsvContent", + rowCount=len(data), + columnCount=len(df.columns), + skipRows=skipRows + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(result, indent=2, ensure_ascii=False), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error parsing CSV content: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/actions/parseExcelContent.py b/modules/workflows/methods/methodJira/actions/parseExcelContent.py new file mode 100644 index 00000000..c0d64325 --- /dev/null +++ b/modules/workflows/methods/methodJira/actions/parseExcelContent.py @@ -0,0 +1,121 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Parse Excel Content action for JIRA operations. +Parses Excel content with custom headers. +""" + +import logging +import json +import pandas as pd +from io import BytesIO +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Parse Excel content with custom headers. + + Parameters: + - excelContent (str, required): Document reference containing Excel file content as bytes + - skipRows (int, optional): Number of header rows to skip (default: 3) + - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true) + + Returns: + - ActionResult with ActionDocument containing parsed data and headers as JSON + """ + try: + excelContentParam = parameters.get("excelContent") + if not excelContentParam: + return ActionResult.isFailure(error="excelContent parameter is required") + + skipRows = parameters.get("skipRows", 3) + hasCustomHeaders = parameters.get("hasCustomHeaders", True) + + # Get Excel content from document + excelBytes = self.documentParsing.getDocumentData(excelContentParam) + if excelBytes is None: + return ActionResult.isFailure(error="Could not get Excel content from document reference") + + # Convert to bytes if needed + if isinstance(excelBytes, str): + excelBytes = excelBytes.encode('latin-1') # Excel might have binary data + elif not isinstance(excelBytes, bytes): + return ActionResult.isFailure(error="Excel content must be bytes or string") + + # Parse Excel + df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None) + + # Extract headers if hasCustomHeaders + headers = {"header1": "Header 1", "header2": "Header 2"} + if hasCustomHeaders and len(df) >= 3: + headerRow1 = df.iloc[0:1].copy() + headerRow2 = df.iloc[1:2].copy() + tableHeaders = df.iloc[2:3].copy() + dfData = df.iloc[skipRows:].copy() + dfData.columns = tableHeaders.iloc[0] + + headers = { + "header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]), + "header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]), + } + else: + # No custom headers, use standard parsing + if skipRows > 0: + dfData = df.iloc[skipRows:].copy() + if len(df) > skipRows: + dfData.columns = df.iloc[skipRows-1] + else: + dfData = df.copy() + + # Reset index and clean data + dfData = dfData.reset_index(drop=True) + for column in dfData.columns: + dfData[column] = dfData[column].astype('object').fillna('') + + data = dfData.to_dict(orient='records') + + logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "parsed_excel_data", + "json", + workflowContext, + "parseExcelContent" + ) + + result = { + "data": data, + "headers": headers, + "rowCount": len(data), + "columnCount": len(dfData.columns) + } + + validationMetadata = self._createValidationMetadata( + "parseExcelContent", + rowCount=len(data), + columnCount=len(dfData.columns), + skipRows=skipRows + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(result, indent=2, ensure_ascii=False), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error parsing Excel content: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodJira/helpers/__init__.py b/modules/workflows/methods/methodJira/helpers/__init__.py new file mode 100644 index 00000000..cf2fc4c7 --- /dev/null +++ b/modules/workflows/methods/methodJira/helpers/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Helper modules for JIRA method operations.""" + diff --git a/modules/workflows/methods/methodJira/helpers/adfConverter.py b/modules/workflows/methods/methodJira/helpers/adfConverter.py new file mode 100644 index 00000000..d8619989 --- /dev/null +++ b/modules/workflows/methods/methodJira/helpers/adfConverter.py @@ -0,0 +1,180 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +ADF Converter helper for JIRA operations. +Handles conversion of Atlassian Document Format (ADF) to plain text. +""" + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + +class AdfConverterHelper: + """Helper for ADF conversion operations""" + + def __init__(self, methodInstance): + """ + Initialize ADF converter helper. + + Args: + methodInstance: Instance of MethodJira (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def convertAdfToText(self, adfData): + """Convert Atlassian Document Format (ADF) to plain text. + + Based on Atlassian Document Format specification for JIRA fields. + Handles paragraphs, lists, text formatting, and other ADF node types. + + Args: + adfData: ADF object or None + + Returns: + str: Plain text content, or empty string if None/invalid + """ + if not adfData or not isinstance(adfData, dict): + return "" + + if adfData.get("type") != "doc": + return str(adfData) if adfData else "" + + content = adfData.get("content", []) + if not isinstance(content, list): + return "" + + def extractTextFromContent(contentList, listLevel=0): + """Recursively extract text from ADF content with proper formatting.""" + textParts = [] + listCounter = 1 + + for item in contentList: + if not isinstance(item, dict): + continue + + itemType = item.get("type", "") + + if itemType == "text": + # Extract text content, preserving formatting + text = item.get("text", "") + marks = item.get("marks", []) + + # Handle text formatting (bold, italic, etc.) + if marks: + for mark in marks: + if mark.get("type") == "strong": + text = f"**{text}**" + elif mark.get("type") == "em": + text = f"*{text}*" + elif mark.get("type") == "code": + text = f"`{text}`" + elif mark.get("type") == "link": + attrs = mark.get("attrs", {}) + href = attrs.get("href", "") + if href: + text = f"[{text}]({href})" + + textParts.append(text) + + elif itemType == "hardBreak": + textParts.append("\n") + + elif itemType == "paragraph": + paragraphContent = item.get("content", []) + if paragraphContent: + paragraphText = extractTextFromContent(paragraphContent, listLevel) + if paragraphText.strip(): + textParts.append(paragraphText) + + elif itemType == "bulletList": + listContent = item.get("content", []) + if listContent: + listText = extractTextFromContent(listContent, listLevel + 1) + if listText.strip(): + textParts.append(listText) + + elif itemType == "orderedList": + listContent = item.get("content", []) + if listContent: + listText = extractTextFromContent(listContent, listLevel + 1) + if listText.strip(): + textParts.append(listText) + + elif itemType == "listItem": + itemContent = item.get("content", []) + if itemContent: + indent = " " * listLevel + itemText = extractTextFromContent(itemContent, listLevel) + if itemText.strip(): + prefix = f"{indent}- " if listLevel > 0 else "- " + textParts.append(f"{prefix}{itemText}") + + elif itemType == "heading": + level = item.get("attrs", {}).get("level", 1) + headingContent = item.get("content", []) + if headingContent: + headingText = extractTextFromContent(headingContent, listLevel) + if headingText.strip(): + prefix = "#" * level + " " + textParts.append(f"{prefix}{headingText}") + + elif itemType == "codeBlock": + codeContent = item.get("content", []) + if codeContent: + codeText = extractTextFromContent(codeContent, listLevel) + if codeText.strip(): + textParts.append(f"```\n{codeText}\n```") + + elif itemType == "blockquote": + quoteContent = item.get("content", []) + if quoteContent: + quoteText = extractTextFromContent(quoteContent, listLevel) + if quoteText.strip(): + textParts.append(f"> {quoteText}") + + elif itemType == "table": + tableContent = item.get("content", []) + if tableContent: + tableText = extractTextFromContent(tableContent, listLevel) + if tableText.strip(): + textParts.append(tableText) + + elif itemType == "tableRow": + rowContent = item.get("content", []) + if rowContent: + rowText = extractTextFromContent(rowContent, listLevel) + if rowText.strip(): + textParts.append(rowText) + + elif itemType == "tableCell": + cellContent = item.get("content", []) + if cellContent: + cellText = extractTextFromContent(cellContent, listLevel) + if cellText.strip(): + textParts.append(cellText) + + elif itemType == "mediaGroup": + # Skip media groups for now + pass + + elif itemType == "media": + # Skip media for now + pass + + else: + # Unknown type - try to extract content if available + if "content" in item: + unknownContent = item.get("content", []) + if unknownContent: + unknownText = extractTextFromContent(unknownContent, listLevel) + if unknownText.strip(): + textParts.append(unknownText) + + return "".join(textParts) + + result = extractTextFromContent(content) + return result.strip() if result else "" + diff --git a/modules/workflows/methods/methodJira/helpers/documentParsing.py b/modules/workflows/methods/methodJira/helpers/documentParsing.py new file mode 100644 index 00000000..b0608524 --- /dev/null +++ b/modules/workflows/methods/methodJira/helpers/documentParsing.py @@ -0,0 +1,81 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Document Parsing helper for JIRA operations. +Handles parsing of document references and JSON content. +""" + +import logging +import json +from typing import Any, Optional, Dict +from modules.datamodels.datamodelDocref import DocumentReferenceList + +logger = logging.getLogger(__name__) + +class DocumentParsingHelper: + """Helper for document parsing operations""" + + def __init__(self, methodInstance): + """ + Initialize document parsing helper. + + Args: + methodInstance: Instance of MethodJira (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def getDocumentData(self, documentReference: str) -> Any: + """ + Get document data from a document reference. + + Args: + documentReference: Document reference string + + Returns: + Document data (bytes, str, or None) + """ + try: + docList = DocumentReferenceList.from_string_list([documentReference]) + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList) + if not chatDocuments: + return None + + doc = chatDocuments[0] + fileId = getattr(doc, 'fileId', None) + if not fileId: + return None + + return self.services.chat.getFileData(fileId) + except Exception as e: + logger.error(f"Error getting document data: {str(e)}") + return None + + def parseJsonFromDocument(self, documentReference: str) -> Optional[Dict[str, Any]]: + """ + Parse JSON content from a document reference. + + Args: + documentReference: Document reference string + + Returns: + Parsed JSON dictionary or None + """ + try: + fileData = self.getDocumentData(documentReference) + if not fileData: + return None + + # Handle bytes + if isinstance(fileData, bytes): + jsonStr = fileData.decode('utf-8') + else: + jsonStr = str(fileData) + + # Parse JSON + return json.loads(jsonStr) + except Exception as e: + logger.error(f"Error parsing JSON from document: {str(e)}") + return None + diff --git a/modules/workflows/methods/methodJira/methodJira.py b/modules/workflows/methods/methodJira/methodJira.py new file mode 100644 index 00000000..d7baacf0 --- /dev/null +++ b/modules/workflows/methods/methodJira/methodJira.py @@ -0,0 +1,322 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +import logging +from typing import Dict, Any +from modules.workflows.methods.methodBase import MethodBase +from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter +from modules.shared.frontendTypes import FrontendType + +# Import helpers +from .helpers.adfConverter import AdfConverterHelper +from .helpers.documentParsing import DocumentParsingHelper + +# Import actions +from .actions.connectJira import connectJira +from .actions.exportTicketsAsJson import exportTicketsAsJson +from .actions.importTicketsFromJson import importTicketsFromJson +from .actions.mergeTicketData import mergeTicketData +from .actions.parseCsvContent import parseCsvContent +from .actions.parseExcelContent import parseExcelContent +from .actions.createCsvContent import createCsvContent +from .actions.createExcelContent import createExcelContent + +logger = logging.getLogger(__name__) + +class MethodJira(MethodBase): + """JIRA operations methods.""" + + def __init__(self, services): + super().__init__(services) + self.name = "jira" + self.description = "JIRA operations methods" + # Store connections in memory (keyed by connectionId) + self._connections: Dict[str, Any] = {} + + # Initialize helper modules + self.adfConverter = AdfConverterHelper(self) + self.documentParsing = DocumentParsingHelper(self) + + # RBAC-Integration: Action-Definitionen mit actionId + self._actions = { + "connectJira": WorkflowActionDefinition( + actionId="jira.connectJira", + description="Connect to JIRA instance and create ticket interface", + parameters={ + "apiUsername": WorkflowActionParameter( + name="apiUsername", + type="str", + frontendType=FrontendType.EMAIL, + required=True, + description="JIRA API username/email" + ), + "apiTokenConfigKey": WorkflowActionParameter( + name="apiTokenConfigKey", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="APP_CONFIG key name for JIRA API token" + ), + "apiUrl": WorkflowActionParameter( + name="apiUrl", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="JIRA instance URL (e.g., https://example.atlassian.net)" + ), + "projectCode": WorkflowActionParameter( + name="projectCode", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="JIRA project code (e.g., DCS)" + ), + "issueType": WorkflowActionParameter( + name="issueType", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="JIRA issue type (e.g., Task)" + ), + "taskSyncDefinition": WorkflowActionParameter( + name="taskSyncDefinition", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="Field mapping definition as JSON string or dict" + ) + }, + execute=connectJira.__get__(self, self.__class__) + ), + "exportTicketsAsJson": WorkflowActionDefinition( + actionId="jira.exportTicketsAsJson", + description="Export tickets from JIRA as JSON list", + parameters={ + "connectionId": WorkflowActionParameter( + name="connectionId", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Connection ID from connectJira action result" + ), + "taskSyncDefinition": WorkflowActionParameter( + name="taskSyncDefinition", + type="str", + frontendType=FrontendType.TEXTAREA, + required=False, + description="Field mapping definition (if not provided, uses stored definition)" + ) + }, + execute=exportTicketsAsJson.__get__(self, self.__class__) + ), + "importTicketsFromJson": WorkflowActionDefinition( + actionId="jira.importTicketsFromJson", + description="Import ticket data from JSON back to JIRA", + parameters={ + "connectionId": WorkflowActionParameter( + name="connectionId", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Connection ID from connectJira action result" + ), + "ticketData": WorkflowActionParameter( + name="ticketData", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing ticket data as JSON" + ), + "taskSyncDefinition": WorkflowActionParameter( + name="taskSyncDefinition", + type="str", + frontendType=FrontendType.TEXTAREA, + required=False, + description="Field mapping definition (if not provided, uses stored definition)" + ) + }, + execute=importTicketsFromJson.__get__(self, self.__class__) + ), + "mergeTicketData": WorkflowActionDefinition( + actionId="jira.mergeTicketData", + description="Merge JIRA export data with existing SharePoint data", + parameters={ + "jiraData": WorkflowActionParameter( + name="jiraData", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing JIRA ticket data as JSON array" + ), + "existingData": WorkflowActionParameter( + name="existingData", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing existing SharePoint data as JSON array" + ), + "taskSyncDefinition": WorkflowActionParameter( + name="taskSyncDefinition", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="Field mapping definition" + ), + "idField": WorkflowActionParameter( + name="idField", + type="str", + frontendType=FrontendType.TEXT, + required=False, + default="ID", + description="Field name to use as ID for merging" + ) + }, + execute=mergeTicketData.__get__(self, self.__class__) + ), + "parseCsvContent": WorkflowActionDefinition( + actionId="jira.parseCsvContent", + description="Parse CSV content with custom headers", + parameters={ + "csvContent": WorkflowActionParameter( + name="csvContent", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing CSV file content as bytes" + ), + "skipRows": WorkflowActionParameter( + name="skipRows", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + default=2, + description="Number of header rows to skip", + validation={"min": 0, "max": 100} + ), + "hasCustomHeaders": WorkflowActionParameter( + name="hasCustomHeaders", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=True, + description="Whether CSV has custom header rows" + ) + }, + execute=parseCsvContent.__get__(self, self.__class__) + ), + "parseExcelContent": WorkflowActionDefinition( + actionId="jira.parseExcelContent", + description="Parse Excel content with custom headers", + parameters={ + "excelContent": WorkflowActionParameter( + name="excelContent", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing Excel file content as bytes" + ), + "skipRows": WorkflowActionParameter( + name="skipRows", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + default=3, + description="Number of header rows to skip", + validation={"min": 0, "max": 100} + ), + "hasCustomHeaders": WorkflowActionParameter( + name="hasCustomHeaders", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=True, + description="Whether Excel has custom header rows" + ) + }, + execute=parseExcelContent.__get__(self, self.__class__) + ), + "createCsvContent": WorkflowActionDefinition( + actionId="jira.createCsvContent", + description="Create CSV content with custom headers", + parameters={ + "data": WorkflowActionParameter( + name="data", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing data as JSON (with data field from mergeTicketData)" + ), + "headers": WorkflowActionParameter( + name="headers", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=False, + description="Document reference containing headers JSON (from parseCsvContent/parseExcelContent)" + ), + "columns": WorkflowActionParameter( + name="columns", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=False, + description="List of column names (if not provided, extracted from taskSyncDefinition or data)" + ), + "taskSyncDefinition": WorkflowActionParameter( + name="taskSyncDefinition", + type="str", + frontendType=FrontendType.TEXTAREA, + required=False, + description="Field mapping definition (used to extract column names if columns not provided)" + ) + }, + execute=createCsvContent.__get__(self, self.__class__) + ), + "createExcelContent": WorkflowActionDefinition( + actionId="jira.createExcelContent", + description="Create Excel content with custom headers", + parameters={ + "data": WorkflowActionParameter( + name="data", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing data as JSON (with data field from mergeTicketData)" + ), + "headers": WorkflowActionParameter( + name="headers", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=False, + description="Document reference containing headers JSON (from parseExcelContent)" + ), + "columns": WorkflowActionParameter( + name="columns", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=False, + description="List of column names (if not provided, extracted from taskSyncDefinition or data)" + ), + "taskSyncDefinition": WorkflowActionParameter( + name="taskSyncDefinition", + type="str", + frontendType=FrontendType.TEXTAREA, + required=False, + description="Field mapping definition (used to extract column names if columns not provided)" + ) + }, + execute=createExcelContent.__get__(self, self.__class__) + ) + } + + # Validate actions after definition + self._validateActions() + + # Register actions as methods (optional, für direkten Zugriff) + self.connectJira = connectJira.__get__(self, self.__class__) + self.exportTicketsAsJson = exportTicketsAsJson.__get__(self, self.__class__) + self.importTicketsFromJson = importTicketsFromJson.__get__(self, self.__class__) + self.mergeTicketData = mergeTicketData.__get__(self, self.__class__) + self.parseCsvContent = parseCsvContent.__get__(self, self.__class__) + self.parseExcelContent = parseExcelContent.__get__(self, self.__class__) + self.createCsvContent = createCsvContent.__get__(self, self.__class__) + self.createExcelContent = createExcelContent.__get__(self, self.__class__) + diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py.old similarity index 100% rename from modules/workflows/methods/methodOutlook.py rename to modules/workflows/methods/methodOutlook.py.old diff --git a/modules/workflows/methods/methodOutlook/__init__.py b/modules/workflows/methods/methodOutlook/__init__.py new file mode 100644 index 00000000..c7653010 --- /dev/null +++ b/modules/workflows/methods/methodOutlook/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +from .methodOutlook import MethodOutlook + +__all__ = ['MethodOutlook'] + diff --git a/modules/workflows/methods/methodOutlook/actions/__init__.py b/modules/workflows/methods/methodOutlook/actions/__init__.py new file mode 100644 index 00000000..f62e3e0a --- /dev/null +++ b/modules/workflows/methods/methodOutlook/actions/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Action modules for Outlook operations.""" + +# Export all actions +from .readEmails import readEmails +from .searchEmails import searchEmails +from .composeAndDraftEmailWithContext import composeAndDraftEmailWithContext +from .sendDraftEmail import sendDraftEmail + +__all__ = [ + 'readEmails', + 'searchEmails', + 'composeAndDraftEmailWithContext', + 'sendDraftEmail', +] + diff --git a/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py new file mode 100644 index 00000000..2bad3838 --- /dev/null +++ b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py @@ -0,0 +1,362 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Compose And Draft Email With Context action for Outlook operations. +Composes email content using AI from context and optional documents, then creates a draft. +""" + +import logging +import json +import base64 +import requests +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Compose email content using AI from context and optional documents, then create a draft. + - Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength. + - Output format: JSON confirmation with AI-generated draft metadata. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - to (list, required): Recipient email addresses. + - context (str, required): Detailled context for composing the email. + - documentList (list, optional): Document references for context/attachments. + - cc (list, optional): CC recipients. + - bcc (list, optional): BCC recipients. + - emailStyle (str, optional): formal | casual | business. Default: business. + - maxLength (int, optional): Maximum length for generated content. Default: 1000. + """ + try: + connectionReference = parameters.get("connectionReference") + to = parameters.get("to") + context = parameters.get("context") + documentList = parameters.get("documentList", []) + cc = parameters.get("cc", []) + bcc = parameters.get("bcc", []) + emailStyle = parameters.get("emailStyle", "business") + maxLength = parameters.get("maxLength", 1000) + + if not connectionReference or not to or not context: + return ActionResult.isFailure(error="connectionReference, to, and context are required") + + # Convert single values to lists for all recipient parameters + if isinstance(to, str): + to = [to] + if isinstance(cc, str): + cc = [cc] + if isinstance(bcc, str): + bcc = [bcc] + if isinstance(documentList, str): + documentList = [documentList] + + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found") + + # Check permissions + permissions_ok = await self.connection.checkPermissions(connection) + if not permissions_ok: + return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations") + + # Prepare documents for AI processing + from modules.datamodels.datamodelDocref import DocumentReferenceList + chatDocuments = [] + if documentList: + # Convert to DocumentReferenceList if needed + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + elif isinstance(documentList, str): + docRefList = DocumentReferenceList.from_string_list([documentList]) + else: + docRefList = DocumentReferenceList(references=[]) + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + + # Create AI prompt for email composition + # Build document reference list for AI with expanded list contents when possible + doc_references = documentList + doc_list_text = "" + if doc_references: + lines = ["Available_Document_References:"] + for ref in doc_references: + # Each item is a label: resolve to its document list and render contained items + from modules.datamodels.datamodelDocref import DocumentReferenceList + list_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([ref])) or [] + if list_docs: + for d in list_docs: + doc_ref_label = self.services.chat.getDocumentReferenceFromChatDocument(d) + lines.append(f"- {doc_ref_label}") + else: + lines.append(" - (no documents)") + doc_list_text = "\n" + "\n".join(lines) + else: + doc_list_text = "Available_Document_References: (No documents available for attachment)" + + # Escape only the user-controlled context to prevent prompt injection + escaped_context = context.replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r') + + ai_prompt = f"""Compose an email based on this context: +------- +{escaped_context} +------- + +Recipients: {to} +Style: {emailStyle} +Max length: {maxLength} characters +{doc_list_text} + +Based on the context, decide which documents to attach. + +CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:: format (include filename) + +Return JSON: +{{ + "subject": "subject line", + "body": "email body (HTML allowed)", + "attachments": ["docItem::"] +}} +""" + + # Call AI service to generate email content + try: + ai_response = await self.services.ai.callAiPlanning( + prompt=ai_prompt, + placeholders=None, + debugType="email_composition" + ) + + # Parse AI response + try: + ai_content = ai_response + # Extract JSON from AI response + if "```json" in ai_content: + json_start = ai_content.find("```json") + 7 + json_end = ai_content.find("```", json_start) + json_content = ai_content[json_start:json_end].strip() + elif "{" in ai_content and "}" in ai_content: + json_start = ai_content.find("{") + json_end = ai_content.rfind("}") + 1 + json_content = ai_content[json_start:json_end] + else: + json_content = ai_content + + email_data = json.loads(json_content) + subject = email_data.get("subject", "") + body = email_data.get("body", "") + ai_attachments = email_data.get("attachments", []) + + if not subject or not body: + return ActionResult.isFailure(error="AI did not generate valid subject and body") + + # Use AI-selected attachments if provided, otherwise use all documents + normalized_ai_attachments = [] + if documentList: + try: + available_refs = [documentList] if isinstance(documentList, str) else documentList + from modules.datamodels.datamodelDocref import DocumentReferenceList + available_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(available_refs)) or [] + except Exception: + available_docs = [] + + # Normalize AI attachments to a list of strings + if isinstance(ai_attachments, str): + ai_attachments = [ai_attachments] + elif isinstance(ai_attachments, list): + ai_attachments = [a for a in ai_attachments if isinstance(a, str)] + + if ai_attachments: + try: + ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments + from modules.datamodels.datamodelDocref import DocumentReferenceList + ai_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(ai_refs)) or [] + except Exception: + ai_docs = [] + + # Intersect by document id + available_ids = {getattr(d, 'id', None) for d in available_docs} + selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids] + + if selected_docs: + # Map selected ChatDocuments back to docItem references (with full filename) + documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs] + # Normalize ai_attachments to full format for storage + normalized_ai_attachments = documentList.copy() + logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)") + else: + # No intersection; use all available documents + documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs] + normalized_ai_attachments = documentList.copy() + logger.warning("AI selected attachments not found in available documents, using all documents") + else: + # No AI selection; use all available documents + documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs] + normalized_ai_attachments = documentList.copy() + logger.warning("AI did not specify attachments, using all available documents") + else: + logger.info("No documents provided in documentList; skipping attachment processing") + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse AI response as JSON: {str(e)}") + logger.error(f"AI response content: {ai_response}") + return ActionResult.isFailure(error="AI response was not valid JSON format") + + except Exception as e: + logger.error(f"Error calling AI service: {str(e)}") + return ActionResult.isFailure(error=f"Failed to generate email content: {str(e)}") + + # Now create the email with AI-generated content + try: + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + # Clean and format body content + cleaned_body = body.strip() + + # Check if body is already HTML + if cleaned_body.startswith('') or cleaned_body.startswith('') or '
' in cleaned_body: + html_body = cleaned_body + else: + # Convert plain text to proper HTML formatting + html_body = cleaned_body.replace('\n', '
') + html_body = f"{html_body}" + + # Build the email message + message = { + "subject": subject, + "body": { + "contentType": "HTML", + "content": html_body + }, + "toRecipients": [{"emailAddress": {"address": email}} for email in to], + "ccRecipients": [{"emailAddress": {"address": email}} for email in cc] if cc else [], + "bccRecipients": [{"emailAddress": {"address": email}} for email in bcc] if bcc else [] + } + + # Add documents as attachments if provided + if documentList: + message["attachments"] = [] + for attachment_ref in documentList: + # Get attachment document from service center + from modules.datamodels.datamodelDocref import DocumentReferenceList + attachment_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([attachment_ref])) + if attachment_docs: + for doc in attachment_docs: + file_id = getattr(doc, 'fileId', None) + if file_id: + try: + file_content = self.services.chat.getFileData(file_id) + if file_content: + if isinstance(file_content, bytes): + content_bytes = file_content + else: + content_bytes = str(file_content).encode('utf-8') + + base64_content = base64.b64encode(content_bytes).decode('utf-8') + + attachment = { + "@odata.type": "#microsoft.graph.fileAttachment", + "name": doc.fileName, + "contentType": doc.mimeType or "application/octet-stream", + "contentBytes": base64_content + } + message["attachments"].append(attachment) + except Exception as e: + logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}") + + # Create the draft message + drafts_folder_id = self.folderManagement.getFolderId("Drafts", connection) + + if drafts_folder_id: + api_url = f"{graph_url}/me/mailFolders/{drafts_folder_id}/messages" + else: + api_url = f"{graph_url}/me/messages" + logger.warning("Could not find Drafts folder, creating draft in default location") + + response = requests.post(api_url, headers=headers, json=message) + + if response.status_code in [200, 201]: + draft_data = response.json() + draft_id = draft_data.get("id", "Unknown") + + # Create draft result data with full draft information + draftResultData = { + "status": "draft", + "message": "Email draft created successfully with AI-generated content", + "draftId": draft_id, + "folder": "Drafts (Entwürfe)", + "mailbox": connection.get('userEmail', 'Unknown'), + "subject": subject, + "body": body, + "recipients": to, + "cc": cc, + "bcc": bcc, + "attachments": len(documentList) if documentList else 0, + "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents", + "aiGenerated": True, + "context": context, + "emailStyle": emailStyle, + "timestamp": self.services.utils.timestampGetUtc(), + "draftData": draft_data + } + + # Extract attachment filenames for validation metadata + attachmentFilenames = [] + attachmentReferences = [] + if documentList: + try: + from modules.datamodels.datamodelDocref import DocumentReferenceList + attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or [] + attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)] + # Store normalized document references (with filenames) - use normalized_ai_attachments if available + attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs] + except Exception: + pass + + # Create validation metadata for content validator + validationMetadata = { + "actionType": "outlook.composeAndDraftEmailWithContext", + "emailRecipients": to, + "emailCc": cc, + "emailBcc": bcc, + "emailSubject": subject, + "emailAttachments": attachmentFilenames, + "emailAttachmentReferences": attachmentReferences, + "emailAttachmentCount": len(attachmentFilenames), + "emailStyle": emailStyle, + "hasAttachments": len(attachmentFilenames) > 0 + } + + return ActionResult( + success=True, + documents=[ActionDocument( + documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(draftResultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + )] + ) + else: + logger.error(f"Failed to create draft. Status: {response.status_code}, Response: {response.text}") + return ActionResult.isFailure(error=f"Failed to create email draft: {response.status_code} - {response.text}") + + except Exception as e: + logger.error(f"Error creating email via Microsoft Graph API: {str(e)}") + return ActionResult.isFailure(error=f"Failed to create email: {str(e)}") + + except Exception as e: + logger.error(f"Error in composeAndDraftEmailWithContext: {str(e)}") + return ActionResult.isFailure(error=str(e)) + diff --git a/modules/workflows/methods/methodOutlook/actions/readEmails.py b/modules/workflows/methods/methodOutlook/actions/readEmails.py new file mode 100644 index 00000000..e698cb9f --- /dev/null +++ b/modules/workflows/methods/methodOutlook/actions/readEmails.py @@ -0,0 +1,245 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Read Emails action for Outlook operations. +Reads emails and metadata from a mailbox folder. +""" + +import logging +import time +import json +import requests +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Read emails and metadata from a mailbox folder. + - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType. + - Output format: JSON with emails and metadata. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - folder (str, optional): Folder to read from. Default: Inbox. + - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000. + - filter (str, optional): Sender, query operators, or subject text. + - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json". + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"outlook_read_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Read Emails", + "Outlook Email Reading", + f"Folder: {parameters.get('folder', 'Inbox')}", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + folder = parameters.get("folder", "Inbox") + limit = parameters.get("limit", 10) + filter = parameters.get("filter") + outputMimeType = parameters.get("outputMimeType", "application/json") + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + self.services.chat.progressLogUpdate(operationId, 0.2, "Validating parameters") + + # Validate limit parameter + if limit <= 0: + limit = 1000 + logger.warning(f"Invalid limit value ({limit}), using default value 1000") + + # Validate filter parameter if provided + if filter: + # Remove any potentially dangerous characters that could break the filter + filter = filter.strip() + if len(filter) > 100: + logger.warning(f"Filter too long ({len(filter)} chars), truncating to 100 characters") + filter = filter[:100] + + + # Get Microsoft connection + self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Read emails using Microsoft Graph API + self.services.chat.progressLogUpdate(operationId, 0.4, "Reading emails from Microsoft Graph API") + try: + # Microsoft Graph API endpoint for messages + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + # Get the folder ID for the specified folder + folder_id = self.folderManagement.getFolderId(folder, connection) + + if folder_id: + # Build the API request with folder ID + api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages" + else: + # Fallback: use folder name directly (for well-known folders like "Inbox") + api_url = f"{graph_url}/me/mailFolders/{folder}/messages" + logger.warning(f"Could not find folder ID for '{folder}', using folder name directly") + params = { + "$top": limit, + "$orderby": "receivedDateTime desc" + } + + if filter: + # Build proper Graph API filter parameters + filter_params = self.emailProcessing.buildGraphFilter(filter) + params.update(filter_params) + + # If using $search, remove $orderby as they can't be combined + if "$search" in params: + params.pop("$orderby", None) + + # If using $filter with contains(), remove $orderby as they can't be combined + # Microsoft Graph API doesn't support contains() with orderby + if "$filter" in params and "contains(" in params["$filter"].lower(): + params.pop("$orderby", None) + + # Filter applied + + # Make the API call + + + response = requests.get(api_url, headers=headers, params=params) + + if response.status_code != 200: + logger.error(f"Graph API error: {response.status_code} - {response.text}") + logger.error(f"Request URL: {response.url}") + logger.error(f"Request headers: {headers}") + logger.error(f"Request params: {params}") + + response.raise_for_status() + + self.services.chat.progressLogUpdate(operationId, 0.7, "Processing email data") + emails_data = response.json() + email_data = { + "emails": emails_data.get("value", []), + "count": len(emails_data.get("value", [])), + "folder": folder, + "filter": filter, + "apiMetadata": { + "@odata.context": emails_data.get("@odata.context"), + "@odata.count": emails_data.get("@odata.count"), + "@odata.nextLink": emails_data.get("@odata.nextLink") + } + } + + + + except ImportError: + logger.error("requests module not available") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="requests module not available") + except requests.exceptions.HTTPError as e: + if e.response.status_code == 400: + logger.error(f"Bad Request (400) - Invalid filter or parameter: {e.response.text}") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=f"Invalid filter syntax. Please check your filter parameter. Error: {e.response.text}") + elif e.response.status_code == 401: + logger.error("Unauthorized (401) - Access token may be expired or invalid") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Authentication failed. Please check your connection and try again.") + elif e.response.status_code == 403: + logger.error("Forbidden (403) - Insufficient permissions to access emails") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Insufficient permissions to read emails from this folder.") + else: + logger.error(f"HTTP Error {e.response.status_code}: {e.response.text}") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=f"HTTP Error {e.response.status_code}: {e.response.text}") + except Exception as e: + logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}") + + # Determine output format based on MIME type + mime_type_mapping = { + "application/json": ".json", + "text/plain": ".txt", + "text/csv": ".csv" + } + output_extension = mime_type_mapping.get(outputMimeType, ".json") + output_mime_type = outputMimeType + logger.info(f"Using output format: {output_extension} ({output_mime_type})") + + + + # Create result data as JSON string + result_data = { + "connectionReference": connectionReference, + "folder": folder, + "limit": limit, + "filter": filter, + "emails": email_data, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": self.services.utils.timestampGetUtc() + } + + validationMetadata = { + "actionType": "outlook.readEmails", + "connectionReference": connectionReference, + "folder": folder, + "limit": limit, + "filter": filter, + "emailCount": email_data.get("count", 0), + "outputMimeType": outputMimeType + } + + self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {email_data.get('count', 0)} emails") + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess( + documents=[ActionDocument( + documentName=f"outlook_emails_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(result_data, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + )] + ) + + except Exception as e: + logger.error(f"Error reading emails: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass # Don't fail on progress logging errors + return ActionResult.isFailure( + error=str(e) + ) + diff --git a/modules/workflows/methods/methodOutlook/actions/searchEmails.py b/modules/workflows/methods/methodOutlook/actions/searchEmails.py new file mode 100644 index 00000000..72830caf --- /dev/null +++ b/modules/workflows/methods/methodOutlook/actions/searchEmails.py @@ -0,0 +1,257 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Search Emails action for Outlook operations. +Searches emails by query and returns matching items with metadata. +""" + +import logging +import json +import requests +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Search emails by query and return matching items with metadata. + - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType. + - Output format: JSON with search results and metadata. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - query (str, required): Search expression. + - folder (str, optional): Folder scope or All. Default: All. + - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000. + - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json". + """ + try: + connectionReference = parameters.get("connectionReference") + query = parameters.get("query") + folder = parameters.get("folder", "All") + limit = parameters.get("limit", 1000) + outputMimeType = parameters.get("outputMimeType", "application/json") + + # Validate parameters + if not connectionReference: + return ActionResult.isFailure(error="Connection reference is required") + + # Validate limit parameter + if limit <= 0: + limit = 1000 + logger.warning(f"Invalid limit value ({limit}), using default value 1000") + + if not query or not query.strip(): + return ActionResult.isFailure(error="Search query is required and cannot be empty") + + # Check if this is a folder specification query + if query.strip().lower().startswith('folder:'): + folder_name = query.strip()[7:].strip() # Remove "folder:" prefix + if not folder_name: + return ActionResult.isFailure(error="Invalid folder specification. Use format 'folder:FolderName'") + logger.info(f"Search query is a folder specification: {folder_name}") + + # Validate limit + try: + limit = int(limit) + if limit <= 0: + limit = 1000 + logger.warning(f"Invalid limit value (<=0), using default value 1000") + elif limit > 1000: # Microsoft Graph API has limits + limit = 1000 + logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000") + except (ValueError, TypeError): + limit = 1000 + logger.warning(f"Invalid limit value, using default value 1000") + + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Search emails using Microsoft Graph API + try: + # Microsoft Graph API endpoint for searching messages + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + # Get the folder ID for the specified folder if needed + folder_id = None + if folder and folder.lower() != "all": + folder_id = self.folderManagement.getFolderId(folder, connection) + if folder_id: + logger.debug(f"Found folder ID for '{folder}': {folder_id}") + else: + logger.warning(f"Could not find folder ID for '{folder}', using folder name directly") + + # Build the search API request + api_url = f"{graph_url}/me/messages" + params = self.emailProcessing.buildSearchParameters(query, folder_id or folder, limit) + + # Log search parameters for debugging + logger.debug(f"Search query: '{query}'") + logger.debug(f"Search folder: '{folder}'") + logger.debug(f"Search parameters: {params}") + logger.debug(f"API URL: {api_url}") + + # Make the API call + response = requests.get(api_url, headers=headers, params=params) + + # Log response details for debugging + + + if response.status_code != 200: + # Log detailed error information + try: + error_data = response.json() + logger.error(f"Microsoft Graph API error: {response.status_code} - {error_data}") + except: + logger.error(f"Microsoft Graph API error: {response.status_code} - {response.text}") + + # Check for specific error types and provide helpful messages + if response.status_code == 400: + logger.error("Bad Request (400) - Check search query format and parameters") + logger.error(f"Search query: '{query}'") + logger.error(f"Search parameters: {params}") + logger.error(f"API URL: {api_url}") + elif response.status_code == 401: + logger.error("Unauthorized (401) - Check access token and permissions") + elif response.status_code == 403: + logger.error("Forbidden (403) - Check API permissions and scopes") + elif response.status_code == 429: + logger.error("Too Many Requests (429) - Rate limit exceeded") + + raise Exception(f"Microsoft Graph API returned {response.status_code}: {response.text}") + + response.raise_for_status() + + search_data = response.json() + emails = search_data.get("value", []) + + + + # Apply folder filtering if needed and we used $search + if folder and folder.lower() != "all" and "$search" in params: + # Get the actual folder ID for proper filtering + folder_id = self.folderManagement.getFolderId(folder, connection) + + if folder_id: + # Filter results by folder ID + filtered_emails = [] + for email in emails: + if email.get("parentFolderId") == folder_id: + filtered_emails.append(email) + emails = filtered_emails + logger.debug(f"Applied folder filtering: {len(filtered_emails)} emails found in folder {folder}") + else: + # Fallback: try to filter by folder name (less reliable) + filtered_emails = [] + for email in emails: + # Check if email has folder information + if hasattr(email, 'parentFolderId') and email.get('parentFolderId'): + if email.get('parentFolderId') == folder: + filtered_emails.append(email) + else: + # If no folder info, include the email (less strict filtering) + filtered_emails.append(email) + + emails = filtered_emails + logger.debug(f"Applied fallback folder filtering: {len(filtered_emails)} emails found in folder {folder}") + + # Special handling for folder specification queries + if query.strip().lower().startswith('folder:'): + folder_name = query.strip()[7:].strip() + folder_id = self.folderManagement.getFolderId(folder_name, connection) + if folder_id: + # Filter results to only include emails from the specified folder + filtered_emails = [] + for email in emails: + if email.get("parentFolderId") == folder_id: + filtered_emails.append(email) + emails = filtered_emails + logger.debug(f"Applied folder specification filtering: {len(filtered_emails)} emails found in folder {folder_name}") + else: + logger.warning(f"Could not find folder ID for folder specification: {folder_name}") + + + search_result = { + "query": query, + "results": emails, + "count": len(emails), + "folder": folder, + "limit": limit, + "apiMetadata": { + "@odata.context": search_data.get("@odata.context"), + "@odata.count": search_data.get("@odata.count"), + "@odata.nextLink": search_data.get("@odata.nextLink") + }, + "searchParams": params + } + + + + except ImportError: + logger.error("requests module not available") + return ActionResult.isFailure(error="requests module not available") + except Exception as e: + logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}") + return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}") + + # Determine output format based on MIME type + mime_type_mapping = { + "application/json": ".json", + "text/plain": ".txt", + "text/csv": ".csv" + } + output_extension = mime_type_mapping.get(outputMimeType, ".json") + output_mime_type = outputMimeType + logger.info(f"Using output format: {output_extension} ({output_mime_type})") + + + + result_data = { + "connectionReference": connectionReference, + "query": query, + "folder": folder, + "limit": limit, + "searchResults": search_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": self.services.utils.timestampGetUtc() + } + + validationMetadata = { + "actionType": "outlook.searchEmails", + "connectionReference": connectionReference, + "query": query, + "folder": folder, + "limit": limit, + "resultCount": search_result.get("count", 0), + "outputMimeType": outputMimeType + } + + return ActionResult( + success=True, + documents=[ActionDocument( + documentName=f"outlook_email_search_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(result_data, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + )] + ) + + except Exception as e: + logger.error(f"Error searching emails: {str(e)}") + return ActionResult.isFailure(error=str(e)) + diff --git a/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py new file mode 100644 index 00000000..ffae4c8d --- /dev/null +++ b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py @@ -0,0 +1,312 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Send Draft Email action for Outlook operations. +Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext. +""" + +import logging +import time +import json +import requests +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext. + - Input requirements: connectionReference (required); documentList with draft email JSON documents (required). + - Output format: JSON confirmation with sent mail metadata for all emails. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function). + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"outlook_send_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Send Draft Email", + "Outlook Email Sending", + f"Processing {len(parameters.get('documentList', []))} draft(s)", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + documentList = parameters.get("documentList", []) + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + if not documentList: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="documentList is required and cannot be empty") + + # Convert single value to list if needed + if isinstance(documentList, str): + documentList = [documentList] + + # Get Microsoft connection + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Check permissions + self.services.chat.progressLogUpdate(operationId, 0.3, "Checking permissions") + permissions_ok = await self.connection.checkPermissions(connection) + if not permissions_ok: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations") + + # Read draft email JSON documents from documentList + self.services.chat.progressLogUpdate(operationId, 0.4, "Reading draft email documents") + draftEmails = [] + for docRef in documentList: + try: + # Get documents from document reference + from modules.datamodels.datamodelDocref import DocumentReferenceList + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([docRef])) + if not chatDocuments: + logger.warning(f"No documents found for reference: {docRef}") + continue + + # Process each document in the reference + for doc in chatDocuments: + try: + # Read file data + fileId = getattr(doc, 'fileId', None) + if not fileId: + logger.warning(f"Document {doc.fileName} has no fileId") + continue + + fileData = self.services.chat.getFileData(fileId) + if not fileData: + logger.warning(f"No file data found for document: {doc.fileName}") + continue + + # Parse JSON content + if isinstance(fileData, bytes): + jsonContent = fileData.decode('utf-8') + else: + jsonContent = str(fileData) + + # Parse JSON - handle both direct JSON and JSON wrapped in documentData + try: + draftEmailData = json.loads(jsonContent) + + # If the JSON contains a 'documentData' field, extract it + if isinstance(draftEmailData, dict) and 'documentData' in draftEmailData: + documentDataStr = draftEmailData['documentData'] + if isinstance(documentDataStr, str): + draftEmailData = json.loads(documentDataStr) + + # Validate draft email structure + if not isinstance(draftEmailData, dict): + logger.warning(f"Document {doc.fileName} does not contain a valid draft email JSON object") + continue + + draftId = draftEmailData.get("draftId") + if not draftId: + logger.warning(f"Document {doc.fileName} does not contain 'draftId' field") + continue + + draftEmails.append({ + "draftEmailJson": draftEmailData, + "draftId": draftId, + "sourceDocument": doc.fileName, + "sourceReference": docRef + }) + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON from document {doc.fileName}: {str(e)}") + continue + + except Exception as e: + logger.error(f"Error processing document {doc.fileName}: {str(e)}") + continue + + except Exception as e: + logger.error(f"Error reading documents from reference {docRef}: {str(e)}") + continue + + if not draftEmails: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid draft email JSON documents found in documentList") + + self.services.chat.progressLogUpdate(operationId, 0.6, f"Found {len(draftEmails)} draft email(s) to send") + + # Send all draft emails + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + sentResults = [] + failedResults = [] + + self.services.chat.progressLogUpdate(operationId, 0.7, "Sending emails") + for idx, draftEmail in enumerate(draftEmails): + draftEmailJson = draftEmail["draftEmailJson"] + draftId = draftEmail["draftId"] + sourceDocument = draftEmail["sourceDocument"] + + try: + send_url = f"{graph_url}/me/messages/{draftId}/send" + sendResponse = requests.post(send_url, headers=headers) + + # Extract email details from draft JSON for confirmation + subject = draftEmailJson.get("subject", "Unknown") + recipients = draftEmailJson.get("recipients", []) + cc = draftEmailJson.get("cc", []) + bcc = draftEmailJson.get("bcc", []) + attachmentsCount = draftEmailJson.get("attachments", 0) + + if sendResponse.status_code in [200, 202, 204]: + sentResults.append({ + "status": "sent", + "message": "Email sent successfully", + "draftId": draftId, + "subject": subject, + "recipients": recipients, + "cc": cc, + "bcc": bcc, + "attachments": attachmentsCount, + "sentTimestamp": self.services.utils.timestampGetUtc(), + "sourceDocument": sourceDocument + }) + logger.info(f"Email sent successfully. Draft ID: {draftId}, Subject: {subject}") + self.services.chat.progressLogUpdate(operationId, 0.7 + (idx + 1) * 0.2 / len(draftEmails), f"Sent {idx + 1}/{len(draftEmails)}: {subject}") + else: + errorResult = { + "status": "error", + "message": "Failed to send draft email", + "draftId": draftId, + "subject": subject, + "recipients": recipients, + "sendError": { + "statusCode": sendResponse.status_code, + "response": sendResponse.text + }, + "sentTimestamp": self.services.utils.timestampGetUtc(), + "sourceDocument": sourceDocument + } + failedResults.append(errorResult) + logger.error(f"Failed to send email. Draft ID: {draftId}, Status: {sendResponse.status_code}, Response: {sendResponse.text}") + + except Exception as e: + errorResult = { + "status": "error", + "message": f"Exception while sending draft email: {str(e)}", + "draftId": draftId, + "subject": draftEmailJson.get("subject", "Unknown"), + "recipients": draftEmailJson.get("recipients", []), + "exception": str(e), + "sentTimestamp": self.services.utils.timestampGetUtc(), + "sourceDocument": sourceDocument + } + failedResults.append(errorResult) + logger.error(f"Error sending draft email {draftId}: {str(e)}") + + # Build result summary + totalEmails = len(draftEmails) + successfulEmails = len(sentResults) + failedEmails = len(failedResults) + + resultData = { + "totalEmails": totalEmails, + "successfulEmails": successfulEmails, + "failedEmails": failedEmails, + "sentResults": sentResults, + "failedResults": failedResults, + "timestamp": self.services.utils.timestampGetUtc() + } + + # Determine overall success status + self.services.chat.progressLogUpdate(operationId, 0.9, f"Sent {successfulEmails}/{totalEmails} email(s)") + if successfulEmails == 0: + self.services.chat.progressLogFinish(operationId, False) + validationMetadata = { + "actionType": "outlook.sendDraftEmail", + "connectionReference": connectionReference, + "totalEmails": totalEmails, + "successfulEmails": successfulEmails, + "failedEmails": failedEmails, + "status": "all_failed" + } + return ActionResult.isFailure( + error=f"Failed to send all {totalEmails} email(s)", + documents=[ActionDocument( + documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(resultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + )] + ) + elif failedEmails > 0: + # Partial success + logger.warning(f"Sent {successfulEmails} out of {totalEmails} emails. {failedEmails} failed.") + validationMetadata = { + "actionType": "outlook.sendDraftEmail", + "connectionReference": connectionReference, + "totalEmails": totalEmails, + "successfulEmails": successfulEmails, + "failedEmails": failedEmails, + "status": "partial_success" + } + self.services.chat.progressLogFinish(operationId, True) + return ActionResult( + success=True, + documents=[ActionDocument( + documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(resultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + )] + ) + else: + # All successful + logger.info(f"Successfully sent all {totalEmails} email(s)") + validationMetadata = { + "actionType": "outlook.sendDraftEmail", + "connectionReference": connectionReference, + "totalEmails": totalEmails, + "successfulEmails": successfulEmails, + "failedEmails": failedEmails, + "status": "all_successful" + } + self.services.chat.progressLogFinish(operationId, True) + return ActionResult( + success=True, + documents=[ActionDocument( + documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(resultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + )] + ) + + except ImportError: + logger.error("requests module not available") + return ActionResult.isFailure(error="requests module not available") + except Exception as e: + logger.error(f"Error in sendDraftEmail: {str(e)}") + return ActionResult.isFailure(error=str(e)) + diff --git a/modules/workflows/methods/methodOutlook/helpers/__init__.py b/modules/workflows/methods/methodOutlook/helpers/__init__.py new file mode 100644 index 00000000..45028b5a --- /dev/null +++ b/modules/workflows/methods/methodOutlook/helpers/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Helper modules for Outlook method operations.""" + diff --git a/modules/workflows/methods/methodOutlook/helpers/connection.py b/modules/workflows/methods/methodOutlook/helpers/connection.py new file mode 100644 index 00000000..8f3daded --- /dev/null +++ b/modules/workflows/methods/methodOutlook/helpers/connection.py @@ -0,0 +1,95 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Connection helper for Outlook operations. +Handles Microsoft connection management and permission checking. +""" + +import logging +import requests +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + +class ConnectionHelper: + """Helper for Microsoft connection management in Outlook operations""" + + def __init__(self, methodInstance): + """ + Initialize connection helper. + + Args: + methodInstance: Instance of MethodOutlook (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: + """ + Helper function to get Microsoft connection details. + """ + try: + logger.debug(f"Getting Microsoft connection for reference: {connectionReference}") + + # Get the connection from the service + userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference) + if not userConnection: + logger.error(f"Connection not found: {connectionReference}") + return None + + logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}") + + # Get a fresh token for this connection + token = self.services.chat.getFreshConnectionToken(userConnection.id) + if not token: + logger.error(f"Fresh token not found for connection: {userConnection.id}") + logger.debug(f"Connection details: {userConnection}") + return None + + logger.debug(f"Fresh token retrieved for connection {userConnection.id}") + + # Check if connection is active + if userConnection.status.value != "active": + logger.error(f"Connection is not active: {userConnection.id}, status: {userConnection.status.value}") + return None + + return { + "id": userConnection.id, + "accessToken": token.tokenAccess, + "refreshToken": token.tokenRefresh, + "scopes": ["Mail.ReadWrite", "Mail.Send", "Mail.ReadWrite.Shared", "User.Read"] # Valid Microsoft Graph API scopes + } + except Exception as e: + logger.error(f"Error getting Microsoft connection: {str(e)}") + return None + + async def checkPermissions(self, connection: Dict[str, Any]) -> bool: + """ + Check if the current connection has the necessary permissions for Outlook operations. + """ + try: + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + # Test permissions by trying to access the user's mail folder + test_url = f"{graph_url}/me/mailFolders" + response = requests.get(test_url, headers=headers) + + if response.status_code == 200: + return True + elif response.status_code == 403: + logger.error("Permission denied - connection lacks necessary mail permissions") + logger.error("Required scopes: Mail.ReadWrite, Mail.Send, Mail.ReadWrite.Shared") + return False + else: + logger.warning(f"Permission check returned status {response.status_code}") + return False + + except Exception as e: + logger.error(f"Error checking permissions: {str(e)}") + return False + diff --git a/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py b/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py new file mode 100644 index 00000000..88644a33 --- /dev/null +++ b/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py @@ -0,0 +1,184 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Email Processing helper for Outlook operations. +Handles email search query sanitization, search parameter building, and filter construction. +""" + +import logging +import re +from typing import Dict, Any + +logger = logging.getLogger(__name__) + +class EmailProcessingHelper: + """Helper for email search and processing operations""" + + def __init__(self, methodInstance): + """ + Initialize email processing helper. + + Args: + methodInstance: Instance of MethodOutlook (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def sanitizeSearchQuery(self, query: str) -> str: + """ + Sanitize and validate search query for Microsoft Graph API + + Microsoft Graph API has specific requirements for search queries: + - Escape special characters properly + - Handle search operators correctly + - Ensure query format is valid + """ + if not query: + return "" + + # Clean the query + clean_query = query.strip() + + # Handle folder specifications first + if clean_query.lower().startswith('folder:'): + folder_name = clean_query[7:].strip() + if folder_name: + # Return the folder specification as-is + return clean_query + + # Remove any double quotes that might cause issues + clean_query = clean_query.replace('"', '') + + # Handle common search operators + # Recognize Graph operators including both singular and plural forms for hasAttachments + lowered = clean_query.lower() + if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']): + # This is an advanced search query, return as-is + return clean_query + + # For basic text search, ensure it's safe for contains() filter + # Remove any characters that might break the OData filter syntax + # Remove or escape characters that could break OData filter syntax + safe_query = re.sub(r'[\\\'"]', '', clean_query) + + return safe_query + + def buildSearchParameters(self, query: str, folder: str, limit: int) -> Dict[str, Any]: + """ + Build search parameters for Microsoft Graph API + + This method handles the complexity of building search parameters + while avoiding conflicts between $search and $filter parameters. + """ + params = { + "$top": limit + } + + if not query or not query.strip(): + # No query specified, just get emails from folder + if folder and folder.lower() != "all": + # Use folder name directly for well-known folders, or get folder ID + if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]: + params["$filter"] = f"parentFolderId eq '{folder}'" + else: + # For custom folders, we need to get the folder ID first + # This will be handled by the calling method + params["$filter"] = f"parentFolderId eq '{folder}'" + # Add orderby for basic queries + params["$orderby"] = "receivedDateTime desc" + return params + + clean_query = self.sanitizeSearchQuery(query) + + # Check if this is a folder specification (e.g., "folder:Drafts", "folder:Inbox") + if clean_query.lower().startswith('folder:'): + folder_name = clean_query[7:].strip() # Remove "folder:" prefix + if folder_name: + # This is a folder specification, not a text search + # Just filter by folder and return + params["$filter"] = f"parentFolderId eq '{folder_name}'" + params["$orderby"] = "receivedDateTime desc" + return params + + # Check if this is a complex search query with multiple operators + # Recognize Graph operators including both singular and plural forms for hasAttachments + lowered = clean_query.lower() + if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']): + # This is an advanced search query, use $search + # Microsoft Graph API supports complex search syntax + params["$search"] = f'"{clean_query}"' + + # Note: When using $search, we cannot combine it with $orderby or $filter for folder + # We'll need to filter results after the API call + # Folder filtering will be done after the API call + else: + # Use $filter for basic text search, but keep it simple to avoid "InefficientFilter" error + # Microsoft Graph API has limitations on complex filters + if len(clean_query) > 50: + # If query is too long, truncate it to avoid complex filter issues + clean_query = clean_query[:50] + + + # Use only subject search to keep filter simple + # Handle wildcard queries specially + if clean_query == "*" or clean_query == "": + # For wildcard or empty query, don't use contains filter + # Just use folder filter if specified + if folder and folder.lower() != "all": + params["$filter"] = f"parentFolderId eq '{folder}'" + else: + # No filter needed for wildcard search across all folders + pass + else: + params["$filter"] = f"contains(subject,'{clean_query}')" + + # Add folder filter if specified + if folder and folder.lower() != "all": + params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'" + + # Add orderby for basic queries + params["$orderby"] = "receivedDateTime desc" + + + return params + + def buildGraphFilter(self, filter_text: str) -> Dict[str, str]: + """ + Build proper Microsoft Graph API filter parameters based on filter text + + Args: + filter_text (str): The filter text to process + + Returns: + Dict[str, str]: Dictionary with either $filter or $search parameter + """ + if not filter_text: + return {} + + filter_text = filter_text.strip() + + # Handle folder specifications (e.g., "folder:Drafts", "folder:Inbox") + if filter_text.lower().startswith('folder:'): + folder_name = filter_text[7:].strip() # Remove "folder:" prefix + if folder_name: + # This is a folder specification, return empty to let the main method handle it + return {} + + # Handle search queries (from:, to:, subject:, etc.) - check this FIRST + # Support both singular and plural forms for hasAttachments + lt = filter_text.lower() + if any(lt.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']): + return {"$search": f'"{filter_text}"'} + + # Handle email address filters (only if it's NOT a search query) + if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'): + return {"$filter": f"from/fromAddress/address eq '{filter_text}'"} + + # Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.) + if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']): + return {"$filter": filter_text} + + # Handle text content - search in subject + return {"$filter": f"contains(subject,'{filter_text}')"} + diff --git a/modules/workflows/methods/methodOutlook/helpers/folderManagement.py b/modules/workflows/methods/methodOutlook/helpers/folderManagement.py new file mode 100644 index 00000000..1ca7be87 --- /dev/null +++ b/modules/workflows/methods/methodOutlook/helpers/folderManagement.py @@ -0,0 +1,110 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Folder Management helper for Outlook operations. +Handles folder ID resolution and folder name lookups. +""" + +import logging +import requests +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + +class FolderManagementHelper: + """Helper for folder management operations""" + + def __init__(self, methodInstance): + """ + Initialize folder management helper. + + Args: + methodInstance: Instance of MethodOutlook (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def getFolderId(self, folder_name: str, connection: Dict[str, Any]) -> Optional[str]: + """ + Get the folder ID for a given folder name + + This is needed for proper filtering when using advanced search queries + """ + try: + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + # Get mail folders + api_url = f"{graph_url}/me/mailFolders" + response = requests.get(api_url, headers=headers) + + if response.status_code == 200: + folders_data = response.json() + all_folders = folders_data.get("value", []) + + + + # Try exact match first + for folder in all_folders: + if folder.get("displayName", "").lower() == folder_name.lower(): + + return folder.get("id") + + # Try common variations for Drafts folder + if folder_name.lower() == "drafts": + draft_variations = ["drafts", "draft", "entwürfe", "entwurf", "brouillons", "brouillon"] + for folder in all_folders: + folder_display_name = folder.get("displayName", "").lower() + if any(variation in folder_display_name for variation in draft_variations): + + return folder.get("id") + + # Try common variations for other folders + if folder_name.lower() == "sent items": + sent_variations = ["sent items", "sent", "gesendete elemente", "éléments envoyés"] + for folder in all_folders: + folder_display_name = folder.get("displayName", "").lower() + if any(variation in folder_display_name for variation in sent_variations): + + return folder.get("id") + + logger.warning(f"Folder '{folder_name}' not found. Available folders: {[f.get('displayName', 'Unknown') for f in all_folders]}") + return None + else: + logger.warning(f"Could not retrieve folders: {response.status_code}") + return None + + except Exception as e: + logger.warning(f"Error getting folder ID for '{folder_name}': {str(e)}") + return None + + def getFolderNameById(self, folder_id: str, connection: Dict[str, Any]) -> str: + """ + Get the folder display name for a given folder ID + """ + try: + graph_url = "https://graph.microsoft.com/v1.0" + headers = { + "Authorization": f"Bearer {connection['accessToken']}", + "Content-Type": "application/json" + } + + # Get folder by ID + api_url = f"{graph_url}/me/mailFolders/{folder_id}" + response = requests.get(api_url, headers=headers) + + if response.status_code == 200: + folder_data = response.json() + return folder_data.get("displayName", folder_id) + else: + logger.warning(f"Could not retrieve folder name for ID {folder_id}: {response.status_code}") + return folder_id + + except Exception as e: + logger.warning(f"Error getting folder name for ID '{folder_id}': {str(e)}") + return folder_id + diff --git a/modules/workflows/methods/methodOutlook/methodOutlook.py b/modules/workflows/methods/methodOutlook/methodOutlook.py new file mode 100644 index 00000000..31bc7dc3 --- /dev/null +++ b/modules/workflows/methods/methodOutlook/methodOutlook.py @@ -0,0 +1,237 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +import logging +from datetime import datetime, UTC +from modules.workflows.methods.methodBase import MethodBase +from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter +from modules.shared.frontendTypes import FrontendType + +# Import helpers +from .helpers.connection import ConnectionHelper +from .helpers.emailProcessing import EmailProcessingHelper +from .helpers.folderManagement import FolderManagementHelper + +# Import actions +from .actions.readEmails import readEmails +from .actions.searchEmails import searchEmails +from .actions.composeAndDraftEmailWithContext import composeAndDraftEmailWithContext +from .actions.sendDraftEmail import sendDraftEmail + +logger = logging.getLogger(__name__) + +class MethodOutlook(MethodBase): + """Outlook method implementation for email operations""" + + def __init__(self, services): + """Initialize the Outlook method""" + super().__init__(services) + self.name = "outlook" + self.description = "Handle Microsoft Outlook email operations" + + # Initialize helper modules + self.connection = ConnectionHelper(self) + self.emailProcessing = EmailProcessingHelper(self) + self.folderManagement = FolderManagementHelper(self) + + # RBAC-Integration: Action-Definitionen mit actionId + self._actions = { + "readEmails": WorkflowActionDefinition( + actionId="outlook.readEmails", + description="Read emails and metadata from a mailbox folder", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "folder": WorkflowActionParameter( + name="folder", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions="outlook.folder", + required=False, + default="Inbox", + description="Folder to read from" + ), + "limit": WorkflowActionParameter( + name="limit", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + default=1000, + description="Maximum items to return", + validation={"min": 1, "max": 10000} + ), + "filter": WorkflowActionParameter( + name="filter", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Sender, query operators, or subject text" + ), + "outputMimeType": WorkflowActionParameter( + name="outputMimeType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["application/json", "text/plain", "text/csv"], + required=False, + default="application/json", + description="MIME type for output file" + ) + }, + execute=readEmails.__get__(self, self.__class__) + ), + "searchEmails": WorkflowActionDefinition( + actionId="outlook.searchEmails", + description="Search emails by query and return matching items with metadata", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "query": WorkflowActionParameter( + name="query", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Search expression" + ), + "folder": WorkflowActionParameter( + name="folder", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions="outlook.folder", + required=False, + default="All", + description="Folder scope or All" + ), + "limit": WorkflowActionParameter( + name="limit", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + default=1000, + description="Maximum items to return", + validation={"min": 1, "max": 10000} + ), + "outputMimeType": WorkflowActionParameter( + name="outputMimeType", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["application/json", "text/plain", "text/csv"], + required=False, + default="application/json", + description="MIME type for output file" + ) + }, + execute=searchEmails.__get__(self, self.__class__) + ), + "composeAndDraftEmailWithContext": WorkflowActionDefinition( + actionId="outlook.composeAndDraftEmailWithContext", + description="Compose email content using AI from context and optional documents, then create a draft", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "to": WorkflowActionParameter( + name="to", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=True, + description="Recipient email addresses" + ), + "context": WorkflowActionParameter( + name="context", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="Detailed context for composing the email" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=False, + description="Document references for context/attachments" + ), + "cc": WorkflowActionParameter( + name="cc", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=False, + description="CC recipients" + ), + "bcc": WorkflowActionParameter( + name="bcc", + type="List[str]", + frontendType=FrontendType.MULTISELECT, + required=False, + description="BCC recipients" + ), + "emailStyle": WorkflowActionParameter( + name="emailStyle", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["formal", "casual", "business"], + required=False, + default="business", + description="Email style: formal, casual, or business" + ), + "maxLength": WorkflowActionParameter( + name="maxLength", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + default=1000, + description="Maximum length for generated content", + validation={"min": 100, "max": 10000} + ) + }, + execute=composeAndDraftEmailWithContext.__get__(self, self.__class__) + ), + "sendDraftEmail": WorkflowActionDefinition( + actionId="outlook.sendDraftEmail", + description="Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function)" + ) + }, + execute=sendDraftEmail.__get__(self, self.__class__) + ) + } + + # Validate actions after definition + self._validateActions() + + # Register actions as methods (optional, für direkten Zugriff) + self.readEmails = readEmails.__get__(self, self.__class__) + self.searchEmails = searchEmails.__get__(self, self.__class__) + self.composeAndDraftEmailWithContext = composeAndDraftEmailWithContext.__get__(self, self.__class__) + self.sendDraftEmail = sendDraftEmail.__get__(self, self.__class__) + + def _format_timestamp_for_filename(self) -> str: + """Format current timestamp as YYYYMMDD-hhmmss for filenames.""" + return datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py.old similarity index 100% rename from modules/workflows/methods/methodSharepoint.py rename to modules/workflows/methods/methodSharepoint.py.old diff --git a/modules/workflows/methods/methodSharepoint/__init__.py b/modules/workflows/methods/methodSharepoint/__init__.py new file mode 100644 index 00000000..40c14cf3 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +from .methodSharepoint import MethodSharepoint + +__all__ = ['MethodSharepoint'] + diff --git a/modules/workflows/methods/methodSharepoint/actions/__init__.py b/modules/workflows/methods/methodSharepoint/actions/__init__.py new file mode 100644 index 00000000..6975f8af --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Action modules for SharePoint operations.""" + +# Export all actions +from .findDocumentPath import findDocumentPath +from .readDocuments import readDocuments +from .uploadDocument import uploadDocument +from .listDocuments import listDocuments +from .analyzeFolderUsage import analyzeFolderUsage +from .findSiteByUrl import findSiteByUrl +from .downloadFileByPath import downloadFileByPath +from .copyFile import copyFile +from .uploadFile import uploadFile + +__all__ = [ + 'findDocumentPath', + 'readDocuments', + 'uploadDocument', + 'listDocuments', + 'analyzeFolderUsage', + 'findSiteByUrl', + 'downloadFileByPath', + 'copyFile', + 'uploadFile', +] + diff --git a/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py new file mode 100644 index 00000000..075c8b96 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py @@ -0,0 +1,337 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Analyze Folder Usage action for SharePoint operations. +Analyzes usage intensity of folders and files in SharePoint. +""" + +import logging +import time +import json +from datetime import datetime, timezone, timedelta +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Analyze usage intensity of folders and files in SharePoint. + - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval. + - Output format: JSON with usage analytics grouped by time intervals. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - documentList (list, required): Document list reference(s) containing findDocumentPath result. + - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago. + - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time. + - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day". + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Analyze Folder Usage", + "SharePoint Analytics", + "Processing document list", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery") + if isinstance(documentList, str): + documentList = [documentList] + startDateTime = parameters.get("startDateTime") + endDateTime = parameters.get("endDateTime") + interval = parameters.get("interval", "day") + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + # Require either documentList or pathQuery + if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList or pathQuery is required") + + # Resolve folder/item information from documentList or pathQuery + siteId = None + driveId = None + itemId = None + folderPath = None + folderName = None + foundDocuments = None + + if documentList: + foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + if not foundDocuments: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No documents found in documentList") + + # Get siteId from first document (all should be from same site) + firstItem = foundDocuments[0] + siteId = firstItem.get("siteId") + if not siteId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Site ID missing from documentList") + + # Get drive ID (needed for analytics) + driveId = await self.services.sharepoint.getDriveId(siteId) + if not driveId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Could not determine drive ID for the site") + + # If no items from documentList, try pathQuery fallback + if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + if sites: + siteId = sites[0].get("id") + # Parse pathQuery to find the folder/item + pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery) + + # Extract folder path from pathQuery + folderPath = '/' + if pathQueryParsed and pathQueryParsed.startswith('/sites/'): + parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed) + if parsedPath: + innerPath = parsedPath.get("innerPath", "") + folderPath = '/' + innerPath if innerPath else '/' + elif pathQueryParsed: + folderPath = pathQueryParsed + + # Get drive ID + driveId = await self.services.sharepoint.getDriveId(siteId) + if not driveId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Could not determine drive ID for the site") + + # Get folder/item by path + folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/')) + if not folderInfo: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}") + + # Add pathQuery item to foundDocuments for processing + foundDocuments = [{ + "id": folderInfo.get("id"), + "name": folderInfo.get("name", ""), + "type": "folder" if folderInfo.get("folder") else "file", + "siteId": siteId, + "fullPath": folderPath, + "webUrl": folderInfo.get("webUrl", "") + }] + + if not siteId or not driveId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") + + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Set access token + if not self.services.sharepoint.setAccessTokenFromConnection(connection): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Failed to set SharePoint access token") + + # Process all items from documentList or pathQuery + # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage") + itemsToAnalyze = [] + if foundDocuments: + for item in foundDocuments: + itemId = item.get("id") + itemType = item.get("type", "").lower() + + # Only process folders, skip files and site-level items + if itemId and itemType == "folder": + itemsToAnalyze.append({ + "id": itemId, + "name": item.get("name", ""), + "type": itemType, + "path": item.get("fullPath", ""), + "webUrl": item.get("webUrl", "") + }) + + if not itemsToAnalyze: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.") + + self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)") + + # Analyze each item + allAnalytics = [] + totalActivities = 0 + uniqueUsers = set() + activityTypes = {} + + # Compute actual date range values (getFolderUsageAnalytics will set defaults if None) + # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them + actualStartDateTime = startDateTime + actualEndDateTime = endDateTime + if not actualEndDateTime: + actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + if not actualStartDateTime: + startDate = datetime.now(timezone.utc) - timedelta(days=30) + actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z') + + for idx, item in enumerate(itemsToAnalyze): + progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5 + self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})") + + # Get usage analytics for this folder + analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics( + siteId=siteId, + driveId=driveId, + itemId=item["id"], + startDateTime=startDateTime, + endDateTime=endDateTime, + interval=interval + ) + + if "error" in analyticsResult: + logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}") + # Continue with other items even if one fails + itemAnalytics = { + "itemId": item["id"], + "itemName": item["name"], + "itemType": item["type"], + "itemPath": item["path"], + "error": analyticsResult.get("error", "Unknown error") + } + else: + # Process analytics for this item + itemActivities = 0 + itemUsers = set() + itemActivityTypes = {} + + if "value" in analyticsResult: + for intervalData in analyticsResult["value"]: + activities = intervalData.get("activities", []) + for activity in activities: + itemActivities += 1 + totalActivities += 1 + + action = activity.get("action", {}) + actionType = action.get("verb", "unknown") + itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1 + activityTypes[actionType] = activityTypes.get(actionType, 0) + 1 + + actor = activity.get("actor", {}) + userPrincipalName = actor.get("userPrincipalName", "") + if userPrincipalName: + itemUsers.add(userPrincipalName) + uniqueUsers.add(userPrincipalName) + + itemAnalytics = { + "itemId": item["id"], + "itemName": item["name"], + "itemType": item["type"], + "itemPath": item["path"], + "webUrl": item["webUrl"], + "analytics": analyticsResult, + "summary": { + "totalActivities": itemActivities, + "uniqueUsers": len(itemUsers), + "activityTypes": itemActivityTypes + } + } + + # Include note if analytics are not available + if "note" in analyticsResult: + itemAnalytics["note"] = analyticsResult["note"] + + allAnalytics.append(itemAnalytics) + + self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data") + + # Process and format analytics data + resultData = { + "siteId": siteId, + "driveId": driveId, + "startDateTime": actualStartDateTime, # Store computed date range (not None) + "endDateTime": actualEndDateTime, # Store computed date range (not None) + "interval": interval, + "itemsAnalyzed": len(itemsToAnalyze), + "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]), + "items": allAnalytics, + "summary": { + "totalActivities": totalActivities, + "uniqueUsers": len(uniqueUsers), + "activityTypes": activityTypes + }, + "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " + + f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." + + (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""), + "timestamp": self.services.utils.timestampGetUtc() + } + + self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)") + + validationMetadata = { + "actionType": "sharepoint.analyzeFolderUsage", + "itemsAnalyzed": len(itemsToAnalyze), + "interval": interval, + "totalActivities": totalActivities, + "uniqueUsers": len(uniqueUsers) + } + + self.services.chat.progressLogFinish(operationId, True) + return ActionResult( + success=True, + documents=[ + ActionDocument( + documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"), + documentData=json.dumps(resultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + ] + ) + + except Exception as e: + logger.error(f"Error analyzing folder usage: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass + return ActionResult( + success=False, + error=str(e) + ) + diff --git a/modules/workflows/methods/methodSharepoint/actions/copyFile.py b/modules/workflows/methods/methodSharepoint/actions/copyFile.py new file mode 100644 index 00000000..1b6d821d --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/copyFile.py @@ -0,0 +1,163 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Copy File action for SharePoint operations. +Copies file within SharePoint. +""" + +import logging +import json +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Copy file within SharePoint. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info + - sourceFolder (str, required): Source folder path relative to site root + - sourceFile (str, required): Source file name + - destFolder (str, required): Destination folder path relative to site root + - destFile (str, required): Destination file name + + Returns: + - ActionResult with ActionDocument containing copy result + """ + try: + connectionReference = parameters.get("connectionReference") + if not connectionReference: + return ActionResult.isFailure(error="connectionReference parameter is required") + + siteIdParam = parameters.get("siteId") + if not siteIdParam: + return ActionResult.isFailure(error="siteId parameter is required") + + sourceFolder = parameters.get("sourceFolder") + if not sourceFolder: + return ActionResult.isFailure(error="sourceFolder parameter is required") + + sourceFile = parameters.get("sourceFile") + if not sourceFile: + return ActionResult.isFailure(error="sourceFile parameter is required") + + destFolder = parameters.get("destFolder") + if not destFolder: + return ActionResult.isFailure(error="destFolder parameter is required") + + destFile = parameters.get("destFile") + if not destFile: + return ActionResult.isFailure(error="destFile parameter is required") + + # Extract siteId from document if it's a reference + siteId = None + if isinstance(siteIdParam, str): + from modules.datamodels.datamodelDocref import DocumentReferenceList + try: + docList = DocumentReferenceList.from_string_list([siteIdParam]) + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList) + if chatDocuments and len(chatDocuments) > 0: + siteInfoJson = json.loads(chatDocuments[0].documentData) + siteId = siteInfoJson.get("id") + except: + pass + + if not siteId: + siteId = siteIdParam + else: + siteId = siteIdParam + + if not siteId: + return ActionResult.isFailure(error="Could not extract siteId from parameter") + + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Copy file + await self.services.sharepoint.copyFileAsync( + siteId=siteId, + sourceFolder=sourceFolder, + sourceFile=sourceFile, + destFolder=destFolder, + destFile=destFile + ) + + logger.info(f"Copied file in SharePoint: {sourceFolder}/{sourceFile} -> {destFolder}/{destFile}") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "file_copy_result", + "json", + workflowContext, + "copyFile" + ) + + result = { + "success": True, + "siteId": siteId, + "sourcePath": f"{sourceFolder}/{sourceFile}", + "destPath": f"{destFolder}/{destFile}" + } + + validationMetadata = self._createValidationMetadata( + "copyFile", + siteId=siteId, + sourcePath=f"{sourceFolder}/{sourceFile}", + destPath=f"{destFolder}/{destFile}" + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(result, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + # Handle file not found gracefully + if "itemNotFound" in str(e) or "404" in str(e): + logger.warning(f"File not found for copy: {parameters.get('sourceFolder')}/{parameters.get('sourceFile')}") + # Return success with skipped status + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "file_copy_result", + "json", + workflowContext, + "copyFile" + ) + + result = { + "success": True, + "skipped": True, + "reason": "File not found (may not exist yet)" + } + + validationMetadata = self._createValidationMetadata( + "copyFile", + skipped=True + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(result, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + errorMsg = f"Error copying file in SharePoint: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py new file mode 100644 index 00000000..d6e291a8 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py @@ -0,0 +1,117 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Download File By Path action for SharePoint operations. +Downloads file from SharePoint by exact file path. +""" + +import logging +import json +import base64 +import os +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Download file from SharePoint by exact file path. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info + - filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx") + + Returns: + - ActionResult with ActionDocument containing file content as base64-encoded bytes + """ + try: + connectionReference = parameters.get("connectionReference") + if not connectionReference: + return ActionResult.isFailure(error="connectionReference parameter is required") + + siteIdParam = parameters.get("siteId") + if not siteIdParam: + return ActionResult.isFailure(error="siteId parameter is required") + + filePath = parameters.get("filePath") + if not filePath: + return ActionResult.isFailure(error="filePath parameter is required") + + # Extract siteId from document if it's a reference + siteId = None + if isinstance(siteIdParam, str): + # Try to parse from document reference + from modules.datamodels.datamodelDocref import DocumentReferenceList + try: + docList = DocumentReferenceList.from_string_list([siteIdParam]) + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList) + if chatDocuments and len(chatDocuments) > 0: + siteInfoJson = json.loads(chatDocuments[0].documentData) + siteId = siteInfoJson.get("id") + except: + pass + + if not siteId: + # Assume it's the site ID directly + siteId = siteIdParam + else: + siteId = siteIdParam + + if not siteId: + return ActionResult.isFailure(error="Could not extract siteId from parameter") + + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Download file + fileContent = await self.services.sharepoint.downloadFileByPath( + siteId=siteId, + filePath=filePath + ) + + if fileContent is None: + return ActionResult.isFailure(error=f"File not found or could not be downloaded: {filePath}") + + logger.info(f"Downloaded file from SharePoint: {filePath} ({len(fileContent)} bytes)") + + # Generate filename from filePath + fileName = os.path.basename(filePath) or "downloaded_file" + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + fileName.split('.')[0] if '.' in fileName else fileName, + fileName.split('.')[-1] if '.' in fileName else "bin", + workflowContext, + "downloadFileByPath" + ) + + # Encode as base64 + fileBase64 = base64.b64encode(fileContent).decode('utf-8') + + validationMetadata = self._createValidationMetadata( + "downloadFileByPath", + siteId=siteId, + filePath=filePath, + fileSize=len(fileContent) + ) + + document = ActionDocument( + documentName=filename, + documentData=fileBase64, + mimeType="application/octet-stream", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error downloading file from SharePoint: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py new file mode 100644 index 00000000..01c1baf3 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py @@ -0,0 +1,497 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Find Document Path action for SharePoint operations. +Finds documents and folders by name/path across SharePoint sites. +""" + +import logging +import time +import json +import urllib.parse +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Find documents and folders by name/path across sites. + - Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults. + - Output format: JSON with found items and paths. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - site (str, optional): Site hint. + - searchQuery (str, required): Search terms or path. + - maxResults (int, optional): Maximum items to return. Default: 1000. + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_find_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Find Document Path", + "SharePoint Search", + f"Query: {parameters.get('searchQuery', '*')}", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + site = parameters.get("site") + searchQuery = parameters.get("searchQuery", "*") + maxResults = parameters.get("maxResults", 1000) + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + # Parse searchQuery to extract path, search terms, search type, and options + pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(searchQuery) + logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'") + + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...) + siteFromPath = None + directSite = None + if pathQuery and pathQuery.startswith('/sites/'): + parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery) + if parsedPath: + siteFromPath = parsedPath.get("siteName") + logger.info(f"Extracted site from Microsoft-standard pathQuery '{pathQuery}': '{siteFromPath}'") + + # Try to get site directly by path (optimization - no need to load all 60 sites) + directSite = await self.siteDiscovery.getSiteByStandardPath(siteFromPath) + if directSite: + logger.info(f"Got site directly by standard path - no need to discover all sites") + sites = [directSite] + else: + logger.warning(f"Could not get site directly, falling back to site discovery") + directSite = None + else: + logger.warning(f"Failed to parse site from standard pathQuery '{pathQuery}'") + + # If we didn't get the site directly, use discovery and filtering + if not directSite: + # Determine which site hint to use (priority: site parameter > site from pathQuery > site_hint from searchOptions) + siteHintToUse = site or siteFromPath or searchOptions.get("site_hint") + + # Discover SharePoint sites - use targeted approach when site hint is available + self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites") + if siteHintToUse: + # When site hint is available, discover all sites first, then filter + allSites = await self.siteDiscovery.discoverSharePointSites() + if not allSites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + + sites = self.siteDiscovery.filterSitesByHint(allSites, siteHintToUse) + logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites") + if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'") + else: + # No site hint - discover all sites + sites = await self.siteDiscovery.discoverSharePointSites() + if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + + # Resolve path query into search paths + searchPaths = self.pathProcessing.resolvePathQuery(pathQuery) + + self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)") + + try: + # Search across all discovered sites + foundDocuments = [] + allSitesSearched = [] + + # Handle different search approaches based on search type + if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*": + # Use unified search for folders - this is global and searches all sites + try: + + # Use Microsoft Graph Search API syntax (simple term search only) + terms = [t for t in fileQuery.split() if t.strip()] + + if len(terms) > 1: + # Multiple terms: search for ALL terms (AND) - more specific results + queryString = " AND ".join(terms) + else: + # Single term: search for the term + queryString = terms[0] if terms else fileQuery + logger.info(f"Using unified search for folders: {queryString}") + + payload = { + "requests": [ + { + "entityTypes": ["driveItem"], + "query": {"queryString": queryString}, + "from": 0, + "size": 50 + } + ] + } + logger.info(f"Using unified search API for folders with queryString: {queryString}") + + # Use global search endpoint (site-specific search not available) + unifiedResult = await self.apiClient.makeGraphApiCall( + "search/query", + method="POST", + data=json.dumps(payload).encode("utf-8") + ) + + if "error" in unifiedResult: + logger.warning(f"Unified search failed: {unifiedResult['error']}") + items = [] + else: + # Flatten hits -> driveItem resources + items = [] + for container in (unifiedResult.get("value", []) or []): + for hitsContainer in (container.get("hitsContainers", []) or []): + for hit in (hitsContainer.get("hits", []) or []): + resource = hit.get("resource") + if resource: + items.append(resource) + + logger.info(f"Unified search returned {len(items)} items (pre-filter)") + + # Apply our improved folder detection logic + folderItems = [] + for item in items: + resource = item + + # Use the same detection logic as our test + isFolder = self.services.sharepoint.detectFolderType(resource) + + if isFolder: + folderItems.append(item) + + items = folderItems + logger.info(f"Filtered to {len(items)} folders using improved detection logic") + + # Process unified search results - extract site information from webUrl + for item in items: + itemName = item.get("name", "") + webUrl = item.get("webUrl", "") + + # Extract site information from webUrl + siteName = "Unknown Site" + siteId = "unknown" + + if webUrl and '/sites/' in webUrl: + try: + # Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/... + urlParts = webUrl.split('/sites/') + if len(urlParts) > 1: + sitePath = urlParts[1].split('/')[0] + # Find matching site from discovered sites + # First try to match by site name (URL path) + for site in sites: + if site.get("name") == sitePath: + siteName = site.get("displayName", sitePath) + siteId = site.get("id", "unknown") + break + else: + # If no match by name, try to match by displayName + for site in sites: + if site.get("displayName") == sitePath: + siteName = site.get("displayName", sitePath) + siteId = site.get("id", "unknown") + break + else: + # If no exact match, use the site path as site name + siteName = sitePath + # Try to find a site with similar name + for site in sites: + if sitePath.lower() in site.get("name", "").lower() or sitePath.lower() in site.get("displayName", "").lower(): + siteName = site.get("displayName", sitePath) + siteId = site.get("id", "unknown") + break + except Exception as e: + logger.warning(f"Error extracting site info from URL {webUrl}: {e}") + + # Use improved folder detection logic + isFolder = self.services.sharepoint.detectFolderType(item) + itemType = "folder" if isFolder else "file" + itemPath = item.get("parentReference", {}).get("path", "") + logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'") + + # Simple filtering like test file - just check search type + if searchType == "files" and isFolder: + continue # Skip folders when searching for files + elif searchType == "folders" and not isFolder: + continue # Skip files when searching for folders + + # Simple approach like test file - no complex filtering + logger.debug(f"Item '{itemName}' found - adding to results") + + # Create result with full path information for proper action chaining + parentPath = item.get("parentReference", {}).get("path", "") + + # Extract the full SharePoint path from webUrl or parentReference + fullPath = "" + if webUrl: + # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung + if '/sites/' in webUrl: + pathPart = webUrl.split('/sites/')[1] + # Decode URL encoding and convert to backslash format + decodedPath = urllib.parse.unquote(pathPart) + fullPath = "\\" + decodedPath.replace('/', '\\') + elif parentPath: + # Use parentReference path if available + fullPath = parentPath.replace('/', '\\') + + docInfo = { + "id": item.get("id"), + "name": item.get("name"), + "type": "folder" if isFolder else "file", + "siteName": siteName, + "siteId": siteId, + "webUrl": webUrl, + "fullPath": fullPath, + "parentPath": parentPath + } + + foundDocuments.append(docInfo) + + logger.info(f"Found {len(foundDocuments)} documents from unified search") + + except Exception as e: + logger.error(f"Error performing unified folder search: {str(e)}") + # Fallback to site-by-site search + pass + + # If no unified search was performed or it failed, fall back to site-by-site search + if not foundDocuments: + # Use simple approach like test file - no complex filtering + siteScopedSites = sites + + for site in siteScopedSites: + siteId = site["id"] + siteName = site["displayName"] + siteUrl = site["webUrl"] + + logger.info(f"Searching in site: {siteName} ({siteUrl})") + + # Check if pathQuery contains a specific folder path (not just /sites/SiteName) + folderPath = None + if pathQuery and pathQuery.startswith('/sites/'): + parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery) + if parsedPath: + innerPath = parsedPath.get("innerPath", "") + if innerPath and innerPath.strip(): + # Remove leading slash if present + folderPath = innerPath.lstrip('/') + + # Generic approach: Try to find the folder, if it fails, remove first segment + # This works for all languages because we test the actual API response + # In SharePoint Graph API, /drive/root already points to the default document library, + # so library names in paths should be removed + pathSegments = [s for s in folderPath.split('/') if s.strip()] + if len(pathSegments) > 1: + # Try with first segment removed (first segment is likely the document library) + testPath = '/'.join(pathSegments[1:]) + # Quick test: try to get folder info (this is fast and doesn't require full search) + testEndpoint = f"sites/{siteId}/drive/root:/{urllib.parse.quote(testPath, safe='')}:" + testResult = await self.apiClient.makeGraphApiCall(testEndpoint) + if testResult and "error" not in testResult: + # Path without first segment works - first segment was likely the document library + folderPath = testPath + logger.info(f"Removed document library name '{pathSegments[0]}' from folder path (tested via API)") + else: + # Keep original path - first segment is not a document library + logger.info(f"Keeping original folder path '{folderPath}' (first segment is not a document library)") + elif len(pathSegments) == 1: + # Only one segment - likely the document library itself, use root + folderPath = None + logger.info(f"Only one segment '{pathSegments[0]}' found, likely document library - using root") + + if folderPath: + logger.info(f"Extracted folder path from pathQuery: '{folderPath}'") + else: + logger.info(f"Folder path resolved to root (only document library in path)") + + # Use Microsoft Graph API for this specific site + # Handle empty or wildcard queries + if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*": + # For wildcard/empty queries, list all items + if folderPath: + # List items in specific folder + encodedPath = urllib.parse.quote(folderPath, safe='') + endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/children" + logger.info(f"Listing items in folder: '{folderPath}'") + else: + # List all items in the drive root + endpoint = f"sites/{siteId}/drive/root/children" + + # Make the API call to list items + listResult = await self.apiClient.makeGraphApiCall(endpoint) + if "error" in listResult: + logger.warning(f"List failed for site {siteName}: {listResult['error']}") + continue + # Process list results for this site + items = listResult.get("value", []) + logger.info(f"Retrieved {len(items)} items from site {siteName}") + else: + # For files, use regular search API + # Clean the query: remove path-like syntax and invalid KQL syntax + searchQueryCleaned = self.pathProcessing.cleanSearchQuery(fileQuery) + # URL-encode the query parameter + encodedQuery = urllib.parse.quote(searchQueryCleaned, safe='') + + if folderPath: + # Search in specific folder + encodedPath = urllib.parse.quote(folderPath, safe='') + endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/search(q='{encodedQuery}')" + logger.info(f"Searching in folder '{folderPath}' with query: '{searchQueryCleaned}' (encoded: '{encodedQuery}')") + else: + # Search in drive root + endpoint = f"sites/{siteId}/drive/root/search(q='{encodedQuery}')" + logger.info(f"Using search API for files with query: '{searchQueryCleaned}' (encoded: '{encodedQuery}')") + + # Make the search API call (files) + searchResult = await self.apiClient.makeGraphApiCall(endpoint) + if "error" in searchResult: + logger.warning(f"Search failed for site {siteName}: {searchResult['error']}") + continue + # Process search results for this site (files) + items = searchResult.get("value", []) + logger.info(f"Retrieved {len(items)} items from site {siteName}") + + siteDocuments = [] + + for item in items: + itemName = item.get("name", "") + + # Use improved folder detection logic + isFolder = self.services.sharepoint.detectFolderType(item) + + itemType = "folder" if isFolder else "file" + itemPath = item.get("parentReference", {}).get("path", "") + logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'") + + # Simple filtering like test file - just check search type + if searchType == "files" and isFolder: + continue # Skip folders when searching for files + elif searchType == "folders" and not isFolder: + continue # Skip files when searching for folders + + # Simple approach like test file - no complex filtering + logger.debug(f"Item '{itemName}' found - adding to results") + + # Create result with full path information for proper action chaining + webUrl = item.get("webUrl", "") + parentPath = item.get("parentReference", {}).get("path", "") + + # Extract the full SharePoint path from webUrl or parentReference + fullPath = "" + if webUrl: + # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung + if '/sites/' in webUrl: + pathPart = webUrl.split('/sites/')[1] + # Decode URL encoding and convert to backslash format + decodedPath = urllib.parse.unquote(pathPart) + fullPath = "\\" + decodedPath.replace('/', '\\') + elif parentPath: + # Use parentReference path if available + fullPath = parentPath.replace('/', '\\') + + docInfo = { + "id": item.get("id"), + "name": item.get("name"), + "type": "folder" if isFolder else "file", + "siteName": siteName, + "siteId": siteId, + "webUrl": webUrl, + "fullPath": fullPath, + "parentPath": parentPath + } + + siteDocuments.append(docInfo) + + foundDocuments.extend(siteDocuments) + allSitesSearched.append({ + "siteName": siteName, + "siteUrl": siteUrl, + "siteId": siteId, + "documentsFound": len(siteDocuments) + }) + + logger.info(f"Found {len(siteDocuments)} documents in site {siteName}") + + # Limit total results to maxResults + if len(foundDocuments) > maxResults: + foundDocuments = foundDocuments[:maxResults] + logger.info(f"Limited results to {maxResults} items") + + self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)") + + resultData = { + "searchQuery": searchQuery, + "totalResults": len(foundDocuments), + "maxResults": maxResults, + "foundDocuments": foundDocuments, + "timestamp": self.services.utils.timestampGetUtc() + } + + except Exception as e: + logger.error(f"Error searching SharePoint: {str(e)}") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=str(e)) + + # Use default JSON format for output + outputExtension = ".json" # Default + outputMimeType = "application/json" # Default + + validationMetadata = { + "actionType": "sharepoint.findDocumentPath", + "searchQuery": searchQuery, + "maxResults": maxResults, + "totalResults": len(foundDocuments), + "hasResults": len(foundDocuments) > 0 + } + + self.services.chat.progressLogFinish(operationId, True) + return ActionResult( + success=True, + documents=[ + ActionDocument( + documentName=self._generateMeaningfulFileName("sharepoint_find_path", "json", None, "findDocumentPath"), + documentData=json.dumps(resultData, indent=2), + mimeType=outputMimeType, + validationMetadata=validationMetadata + ) + ] + ) + + except Exception as e: + logger.error(f"Error finding document path: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass + return ActionResult.isFailure(error=str(e)) + diff --git a/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py new file mode 100644 index 00000000..405b35f2 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py @@ -0,0 +1,88 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Find Site By URL action for SharePoint operations. +Finds SharePoint site by hostname and site path. +""" + +import logging +import json +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Find SharePoint site by hostname and site path. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com") + - sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM") + + Returns: + - ActionResult with ActionDocument containing site information (id, displayName, name, webUrl) + """ + try: + connectionReference = parameters.get("connectionReference") + if not connectionReference: + return ActionResult.isFailure(error="connectionReference parameter is required") + + hostname = parameters.get("hostname") + if not hostname: + return ActionResult.isFailure(error="hostname parameter is required") + + sitePath = parameters.get("sitePath") + if not sitePath: + return ActionResult.isFailure(error="sitePath parameter is required") + + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Find site by URL + siteInfo = await self.services.sharepoint.findSiteByUrl( + hostname=hostname, + sitePath=sitePath + ) + + if not siteInfo: + return ActionResult.isFailure(error=f"Site not found: {hostname}:/sites/{sitePath}") + + logger.info(f"Found SharePoint site: {siteInfo.get('displayName')} (ID: {siteInfo.get('id')})") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "sharepoint_site", + "json", + workflowContext, + "findSiteByUrl" + ) + + validationMetadata = self._createValidationMetadata( + "findSiteByUrl", + hostname=hostname, + sitePath=sitePath, + siteId=siteInfo.get("id") + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(siteInfo, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error finding SharePoint site: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodSharepoint/actions/listDocuments.py b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py new file mode 100644 index 00000000..78aabadc --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py @@ -0,0 +1,345 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +List Documents action for SharePoint operations. +Lists documents and folders in SharePoint paths across sites. +""" + +import logging +import time +import json +import urllib.parse +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: List documents and folders in SharePoint paths across sites. + - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional). + - Output format: JSON with folder items and metadata. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - documentList (list, required): Document list reference(s) containing findDocumentPath result. + - includeSubfolders (bool, optional): Include one level of subfolders. Default: False. + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_list_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "List Documents", + "SharePoint Listing", + "Processing document list", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery", "*") + if isinstance(documentList, str): + documentList = [documentList] + includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + # Require either documentList or pathQuery + if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList or pathQuery is required") + + # Parse documentList to extract folder path and site information + listQuery, sites, _, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # If no folder path found from documentList, use pathQuery if provided + if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + listQuery = pathQuery + logger.info(f"Using pathQuery for list query: {listQuery}") + # Resolve sites from pathQuery + sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # Validate required parameters + if not listQuery: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") + + if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.") + + # Get connection + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}") + logger.debug(f"Connection ID: {connection['id']}") + + self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path") + + # Parse listQuery to extract path, search terms, search type, and options + pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(listQuery) + + # Check if listQuery is a folder ID (starts with 01PPXICCB...) + if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'): + # Direct folder ID - use it directly + folderPaths = [listQuery] + logger.info(f"Using direct folder ID: {listQuery}") + else: + # Remove site prefix from pathQuery before resolving (it's only for site filtering) + pathQueryForResolve = pathQuery + # Microsoft-standard path: /sites/SiteName/Path -> /Path + if pathQuery.startswith('/sites/'): + parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery) + if parsedPath: + innerPath = parsedPath.get("innerPath", "") + pathQueryForResolve = '/' + innerPath if innerPath else '/' + else: + pathQueryForResolve = '/' + + # Remove first path segment if it looks like a document library name + # In SharePoint Graph API, /drive/root already points to the default document library, + # so library names in paths should be removed + # Generic approach: if path has multiple segments, store original for fallback + pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()] + if len(pathSegments) > 1: + # Path has multiple segments - first might be a library name + # Store original for potential fallback + originalPath = pathQueryForResolve + # Try without first segment (assuming it's a library name) + pathQueryForResolve = '/' + '/'.join(pathSegments[1:]) + logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'") + elif len(pathSegments) == 1: + # Only one segment - if it's a common library-like name, use root + firstSegmentLower = pathSegments[0].lower() + libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek'] + if any(indicator in firstSegmentLower for indicator in libraryIndicators): + pathQueryForResolve = '/' + logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root") + + # Resolve path query into folder paths + folderPaths = self.pathProcessing.resolvePathQuery(pathQueryForResolve) + logger.info(f"Resolved folder paths: {folderPaths}") + + # Process each folder path across all sites + listResults = [] + + self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)") + + for folderPath in folderPaths: + try: + folderResults = [] + + for site in sites: + siteId = site["id"] + siteName = site["displayName"] + siteUrl = site["webUrl"] + + logger.info(f"Listing folder {folderPath} in site: {siteName}") + + # Determine the endpoint based on folder path + if folderPath in ["/", ""] or folderPath == "*": + # Root folder + endpoint = f"sites/{siteId}/drive/root/children" + elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'): + # Direct folder ID + endpoint = f"sites/{siteId}/drive/items/{folderPath}/children" + else: + # Specific folder path - remove leading slash if present and URL encode + folderPathClean = folderPath.lstrip('/') + # URL encode the path for Graph API (spaces and special characters need encoding) + folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/') + endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children" + + # Make the API call to list folder contents + apiResult = await self.apiClient.makeGraphApiCall(endpoint) + + if "error" in apiResult: + logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}") + continue + + # Process the results + items = apiResult.get("value", []) + processedItems = [] + + for item in items: + # Use improved folder detection logic + isFolder = self.services.sharepoint.detectFolderType(item) + + itemInfo = { + "id": item.get("id"), + "name": item.get("name"), + "size": item.get("size", 0), + "createdDateTime": item.get("createdDateTime"), + "lastModifiedDateTime": item.get("lastModifiedDateTime"), + "webUrl": item.get("webUrl"), + "type": "folder" if isFolder else "file", + "siteName": siteName, + "siteUrl": siteUrl + } + + # Add file-specific information + if "file" in item: + itemInfo.update({ + "mimeType": item["file"].get("mimeType"), + "downloadUrl": item.get("@microsoft.graph.downloadUrl") + }) + + # Add folder-specific information + if "folder" in item: + itemInfo.update({ + "childCount": item["folder"].get("childCount", 0) + }) + + processedItems.append(itemInfo) + + # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only) + if includeSubfolders: + folderItems = [item for item in processedItems if item['type'] == 'folder'] + logger.info(f"Including subfolders - processing {len(folderItems)} folders") + subfolderCount = 0 + maxSubfolders = 10 # Limit to prevent infinite loops + + for item in processedItems[:]: # Use slice to avoid modifying list during iteration + if item["type"] == "folder" and subfolderCount < maxSubfolders: + subfolderCount += 1 + subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}" + subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children" + + logger.debug(f"Getting contents of subfolder: {item['name']}") + subfolderResult = await self.apiClient.makeGraphApiCall(subfolderEndpoint) + if "error" not in subfolderResult: + subfolderItems = subfolderResult.get("value", []) + logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}") + + for subfolderItem in subfolderItems: + # Use improved folder detection logic for subfolder items + subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem) + + # Only add files and direct subfolders, NO RECURSION + subfolderItemInfo = { + "id": subfolderItem.get("id"), + "name": subfolderItem.get("name"), + "size": subfolderItem.get("size", 0), + "createdDateTime": subfolderItem.get("createdDateTime"), + "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"), + "webUrl": subfolderItem.get("webUrl"), + "type": "folder" if subfolderIsFolder else "file", + "parentPath": subfolderPath, + "siteName": siteName, + "siteUrl": siteUrl + } + + if "file" in subfolderItem: + subfolderItemInfo.update({ + "mimeType": subfolderItem["file"].get("mimeType"), + "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl") + }) + + processedItems.append(subfolderItemInfo) + else: + logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}") + elif subfolderCount >= maxSubfolders: + logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders") + break + + logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}") + + folderResults.append({ + "siteName": siteName, + "siteUrl": siteUrl, + "itemCount": len(processedItems), + "items": processedItems + }) + + listResults.append({ + "folderPath": folderPath, + "sitesProcessed": len(folderResults), + "siteResults": folderResults + }) + + except Exception as e: + logger.error(f"Error listing folder {folderPath}: {str(e)}") + listResults.append({ + "folderPath": folderPath, + "error": str(e), + "sitesProcessed": 0, + "siteResults": [] + }) + + # Create result data + totalItems = sum(len(siteResult.get("items", [])) for result in listResults for siteResult in result.get("siteResults", [])) + + resultData = { + "listQuery": listQuery, + "pathQuery": pathQuery, + "totalItems": totalItems, + "foldersProcessed": len(listResults), + "listResults": listResults, + "includeSubfolders": includeSubfolders, + "timestamp": self.services.utils.timestampGetUtc() + } + + self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s) in {len(listResults)} folder(s)") + + validationMetadata = { + "actionType": "sharepoint.listDocuments", + "listQuery": listQuery, + "totalItems": totalItems, + "foldersProcessed": len(listResults), + "includeSubfolders": includeSubfolders + } + + self.services.chat.progressLogFinish(operationId, True) + return ActionResult( + success=True, + documents=[ + ActionDocument( + documentName=self._generateMeaningfulFileName("sharepoint_list", "json", None, "listDocuments"), + documentData=json.dumps(resultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + ] + ) + + except Exception as e: + logger.error(f"Error listing SharePoint documents: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass + return ActionResult( + success=False, + error=str(e) + ) + diff --git a/modules/workflows/methods/methodSharepoint/actions/readDocuments.py b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py new file mode 100644 index 00000000..2bc2688c --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py @@ -0,0 +1,290 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Read Documents action for SharePoint operations. +Reads documents from SharePoint and extracts content/metadata. +""" + +import logging +import time +import json +import base64 +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Read documents from SharePoint and extract content/metadata. + - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional). + - Output format: Standardized ActionDocument format (documentName, documentData, mimeType). + - Binary files (PDFs, etc.) are Base64-encoded in documentData. + - Text files are stored as plain text in documentData. + - Returns ActionResult with documents list for template processing. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - documentList (list, optional): Document list reference(s) containing findDocumentPath result. + - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath). + - includeMetadata (bool, optional): Include metadata. Default: True. + + Returns: + - ActionResult with documents: List[ActionDocument] where each ActionDocument contains: + - documentName: File name + - documentData: Base64-encoded content (binary files) or plain text (text files) + - mimeType: MIME type (e.g., application/pdf, text/plain) + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_read_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Read Documents", + "SharePoint Document Reading", + "Processing document list", + parentOperationId=parentOperationId + ) + + documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery", "*") + connectionReference = parameters.get("connectionReference") + includeMetadata = parameters.get("includeMetadata", True) + + # Validate connection reference + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + # Require either documentList or pathQuery + if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList or pathQuery is required") + + # Get connection first + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Parse documentList to extract foundDocuments and site information + sharePointFileIds = None + sites = None + + if documentList: + foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + if foundDocuments: + # Extract SharePoint file IDs from foundDocuments + sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"] + if not sharePointFileIds: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No files found in documentList from findDocumentPath result") + logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from documentList") + + # If we have SharePoint file IDs from documentList (findDocumentPath result), read them directly + if sharePointFileIds and sites: + # Read SharePoint files directly using their IDs + readResults = [] + siteId = sites[0]['id'] + + self.services.chat.progressLogUpdate(operationId, 0.5, f"Reading {len(sharePointFileIds)} file(s) from SharePoint") + for idx, fileId in enumerate(sharePointFileIds): + try: + self.services.chat.progressLogUpdate(operationId, 0.5 + (idx * 0.3 / len(sharePointFileIds)), f"Reading file {idx + 1}/{len(sharePointFileIds)}") + # Get file info from SharePoint + endpoint = f"sites/{siteId}/drive/items/{fileId}" + fileInfo = await self.apiClient.makeGraphApiCall(endpoint) + + if "error" in fileInfo: + logger.warning(f"Failed to get file info for {fileId}: {fileInfo['error']}") + continue + + # Get file content using SharePoint service (handles binary data correctly) + fileName = fileInfo.get("name", f"file_{fileId}") + fileContent = await self.services.sharepoint.downloadFile(siteId, fileId) + + # Create result document + resultItem = { + "fileId": fileId, + "fileName": fileName, + "sharepointFileId": fileId, + "siteName": sites[0]['displayName'], + "siteUrl": sites[0]['webUrl'], + "size": fileInfo.get("size", 0), + "createdDateTime": fileInfo.get("createdDateTime"), + "lastModifiedDateTime": fileInfo.get("lastModifiedDateTime"), + "webUrl": fileInfo.get("webUrl") + } + + # Add content if available + if fileContent: + resultItem["content"] = fileContent + + # Add metadata if requested + if includeMetadata: + resultItem["metadata"] = { + "mimeType": fileInfo.get("file", {}).get("mimeType"), + "downloadUrl": fileInfo.get("@microsoft.graph.downloadUrl"), + "createdBy": fileInfo.get("createdBy", {}), + "lastModifiedBy": fileInfo.get("lastModifiedBy", {}), + "parentReference": fileInfo.get("parentReference", {}) + } + + readResults.append(resultItem) + except Exception as e: + logger.error(f"Error reading file {fileId}: {str(e)}") + continue + + if not readResults: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No files could be read from documentList") + + # Convert read results to ActionDocument objects + # IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData + # The system will create FileData and ChatDocument automatically + self.services.chat.progressLogUpdate(operationId, 0.8, f"Processing {len(readResults)} document(s)") + + actionDocuments = [] + for resultItem in readResults: + fileContent = resultItem.get("content") + fileName = resultItem.get("fileName", f"file_{resultItem.get('fileId')}") + + # Determine MIME type from metadata or file extension + mimeType = "application/octet-stream" + if resultItem.get("metadata", {}).get("mimeType"): + mimeType = resultItem["metadata"]["mimeType"] + elif fileName: + if fileName.endswith('.pdf'): + mimeType = "application/pdf" + elif fileName.endswith('.txt'): + mimeType = "text/plain" + elif fileName.endswith('.json'): + mimeType = "application/json" + + # For binary files (PDFs, etc.), store Base64-encoded content directly + # The GenerationService will detect PDF mimeType and handle base64 decoding + if fileContent and isinstance(fileContent, bytes): + # Encode binary content as Base64 string + base64Content = base64.b64encode(fileContent).decode('utf-8') + validationMetadata = { + "actionType": "sharepoint.readDocuments", + "fileName": fileName, + "sharepointFileId": resultItem.get("sharepointFileId"), + "siteName": resultItem.get("siteName"), + "mimeType": mimeType, + "contentType": "binary", + "size": len(fileContent), + "includeMetadata": includeMetadata + } + actionDoc = ActionDocument( + documentName=fileName, + documentData=base64Content, # Base64 string for binary files + mimeType=mimeType, + validationMetadata=validationMetadata + ) + actionDocuments.append(actionDoc) + logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument") + elif fileContent: + # Text content - store directly in documentData + validationMetadata = { + "actionType": "sharepoint.readDocuments", + "fileName": fileName, + "sharepointFileId": resultItem.get("sharepointFileId"), + "siteName": resultItem.get("siteName"), + "mimeType": mimeType, + "contentType": "text", + "includeMetadata": includeMetadata + } + actionDoc = ActionDocument( + documentName=fileName, + documentData=fileContent if isinstance(fileContent, str) else str(fileContent), + mimeType=mimeType, + validationMetadata=validationMetadata + ) + actionDocuments.append(actionDoc) + else: + # No content - store metadata only + docData = { + "fileName": fileName, + "sharepointFileId": resultItem.get("sharepointFileId"), + "siteName": resultItem.get("siteName"), + "siteUrl": resultItem.get("siteUrl"), + "size": resultItem.get("size"), + "createdDateTime": resultItem.get("createdDateTime"), + "lastModifiedDateTime": resultItem.get("lastModifiedDateTime"), + "webUrl": resultItem.get("webUrl") + } + if resultItem.get("metadata"): + docData["metadata"] = resultItem["metadata"] + + validationMetadata = { + "actionType": "sharepoint.readDocuments", + "fileName": fileName, + "sharepointFileId": resultItem.get("sharepointFileId"), + "siteName": resultItem.get("siteName"), + "mimeType": mimeType, + "contentType": "metadata_only", + "includeMetadata": includeMetadata + } + actionDoc = ActionDocument( + documentName=fileName, + documentData=json.dumps(docData, indent=2), + mimeType=mimeType, + validationMetadata=validationMetadata + ) + actionDocuments.append(actionDoc) + + # Return success with action documents + self.services.chat.progressLogUpdate(operationId, 0.9, f"Read {len(actionDocuments)} document(s)") + self.services.chat.progressLogFinish(operationId, True) + return ActionResult.isSuccess(documents=actionDocuments) + + # If no sites from documentList, try pathQuery fallback + if not sites and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # If still no sites, return error + if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with file information, or pathQuery must be provided. Use findDocumentPath first to get file paths, or provide pathQuery directly.") + + # This should never be reached if logic above is correct + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Unexpected error: could not process documentList or pathQuery") + except Exception as e: + logger.error(f"Error reading SharePoint documents: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass # Don't fail on progress logging errors + return ActionResult( + success=False, + error=str(e) + ) + diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py new file mode 100644 index 00000000..82c93434 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py @@ -0,0 +1,278 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Upload Document action for SharePoint operations. +Uploads documents to SharePoint. +""" + +import logging +import time +import json +import urllib.parse +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference + - Input requirements: connectionReference (required); documentList (required); pathQuery (optional). + - Output format: JSON with upload status and file info. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - documentList (list, required): Document reference(s) to upload. File names are taken from the documents. + - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath). + """ + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Upload Document", + "SharePoint Upload", + "Processing document list", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery") + if isinstance(documentList, str): + documentList = [documentList] + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + if not documentList: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Document list is required") + + # Parse documentList to extract folder path and site information + uploadPath, sites, filesToUpload, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # If no folder path found from documentList, use pathQuery if provided + if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + uploadPath = pathQuery + logger.info(f"Using pathQuery for upload path: {uploadPath}") + # Resolve sites from pathQuery + sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # Validate required parameters + if not uploadPath: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.") + + if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.") + + if not filesToUpload: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No files to upload found in documentList.") + + # Get connection + self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Process upload paths + uploadPaths = [] + if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'): + # It's a folder ID - use it directly + uploadPaths = [uploadPath] + logger.info(f"Using folder ID directly for upload: {uploadPath}") + else: + # It's a path - resolve it normally + uploadPaths = self.pathProcessing.resolvePathQuery(uploadPath) + + # Process each document upload + uploadResults = [] + + # Extract file names from documents + fileNames = [doc.fileName for doc in filesToUpload] + logger.info(f"Using file names from documentList: {fileNames}") + + self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)") + + for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)): + try: + fileId = chatDocument.fileId + fileData = self.services.chat.getFileData(fileId) + + if not fileData: + logger.warning(f"File data not found for fileId: {fileId}") + uploadResults.append({ + "fileName": fileName, + "fileId": fileId, + "error": "File data not found", + "uploadStatus": "failed" + }) + continue + + # Upload to the first available site (or could be made configurable) + uploadSuccessful = False + + for site in sites: + siteId = site["id"] + siteName = site["displayName"] + siteUrl = site["webUrl"] + + # Use the first upload path or default to Documents + uploadPath = uploadPaths[0] if uploadPaths else "/Documents" + + # Handle wildcard paths - replace with default Documents folder + if uploadPath == "*": + uploadPath = "/Documents" + logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload") + + # Check if uploadPath is a folder ID or a regular path + if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'): + # It's a folder ID - use the folder-specific upload endpoint + uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content" + logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}") + else: + # It's a regular path - use the root-based upload endpoint + uploadPath = uploadPath.rstrip('/') + '/' + fileName + uploadPathClean = uploadPath.lstrip('/') + uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content" + logger.info(f"Using path-based upload endpoint: {uploadEndpoint}") + + # Upload endpoint for small files (< 4MB) + if len(fileData) < 4 * 1024 * 1024: # 4MB + + # Upload the file + uploadResult = await self.apiClient.makeGraphApiCall( + uploadEndpoint, + method="PUT", + data=fileData + ) + + if "error" not in uploadResult: + uploadResults.append({ + "fileName": fileName, + "fileId": fileId, + "uploadStatus": "success", + "siteName": siteName, + "siteUrl": siteUrl, + "uploadPath": uploadPath, + "uploadEndpoint": uploadEndpoint, + "sharepointFileId": uploadResult.get("id"), + "webUrl": uploadResult.get("webUrl"), + "size": uploadResult.get("size"), + "createdDateTime": uploadResult.get("createdDateTime") + }) + uploadSuccessful = True + break + else: + logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}") + else: + # For large files, we would need to implement resumable upload + logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}") + continue + + if not uploadSuccessful: + uploadResults.append({ + "fileName": fileName, + "fileId": fileId, + "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).", + "uploadStatus": "failed" + }) + + except Exception as e: + logger.error(f"Error uploading document {fileName}: {str(e)}") + uploadResults.append({ + "fileName": fileName, + "fileId": fileId, + "error": str(e), + "uploadStatus": "failed" + }) + + # Update progress for each file + self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)") + + # Create result data + resultData = { + "connectionReference": connectionReference, + "uploadPath": uploadPath, + "documentList": documentList, + "fileNames": fileNames, + "sitesAvailable": len(sites), + "uploadResults": uploadResults, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": self.services.utils.timestampGetUtc() + } + + # Use default JSON format for output + outputExtension = ".json" # Default + outputMimeType = "application/json" # Default + + validationMetadata = { + "actionType": "sharepoint.uploadDocument", + "connectionReference": connectionReference, + "uploadPath": uploadPath, + "fileNames": fileNames, + "uploadCount": len(uploadResults), + "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]), + "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"]) + } + + successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"]) + self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)") + self.services.chat.progressLogFinish(operationId, successfulUploads > 0) + + return ActionResult( + success=True, + documents=[ + ActionDocument( + documentName=self._generateMeaningfulFileName("sharepoint_upload", "json", None, "uploadDocument"), + documentData=json.dumps(resultData, indent=2), + mimeType=outputMimeType, + validationMetadata=validationMetadata + ) + ] + ) + + except Exception as e: + logger.error(f"Error uploading to SharePoint: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass + return ActionResult( + success=False, + error=str(e) + ) + diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadFile.py b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py new file mode 100644 index 00000000..3d8a9499 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py @@ -0,0 +1,145 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Upload File action for SharePoint operations. +Uploads raw file content (bytes) to SharePoint. +""" + +import logging +import json +import base64 +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +@action +async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Upload raw file content (bytes) to SharePoint. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info + - folderPath (str, required): Folder path relative to site root + - fileName (str, required): File name + - content (str, required): Document reference containing file content as base64-encoded bytes + + Returns: + - ActionResult with ActionDocument containing upload result + """ + try: + connectionReference = parameters.get("connectionReference") + if not connectionReference: + return ActionResult.isFailure(error="connectionReference parameter is required") + + siteIdParam = parameters.get("siteId") + if not siteIdParam: + return ActionResult.isFailure(error="siteId parameter is required") + + folderPath = parameters.get("folderPath") + if not folderPath: + return ActionResult.isFailure(error="folderPath parameter is required") + + fileName = parameters.get("fileName") + if not fileName: + return ActionResult.isFailure(error="fileName parameter is required") + + contentParam = parameters.get("content") + if not contentParam: + return ActionResult.isFailure(error="content parameter is required") + + # Extract siteId from document if it's a reference + siteId = None + if isinstance(siteIdParam, str): + from modules.datamodels.datamodelDocref import DocumentReferenceList + try: + docList = DocumentReferenceList.from_string_list([siteIdParam]) + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList) + if chatDocuments and len(chatDocuments) > 0: + siteInfoJson = json.loads(chatDocuments[0].documentData) + siteId = siteInfoJson.get("id") + except: + pass + + if not siteId: + siteId = siteIdParam + else: + siteId = siteIdParam + + if not siteId: + return ActionResult.isFailure(error="Could not extract siteId from parameter") + + # Get file content from document + from modules.datamodels.datamodelDocref import DocumentReferenceList + docList = DocumentReferenceList.from_string_list([contentParam] if isinstance(contentParam, str) else contentParam) + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList) + if not chatDocuments or len(chatDocuments) == 0: + return ActionResult.isFailure(error="Could not get file content from document reference") + + fileContentBase64 = chatDocuments[0].documentData + + # Decode base64 + try: + fileContent = base64.b64decode(fileContentBase64) + except Exception as e: + return ActionResult.isFailure(error=f"Could not decode base64 file content: {str(e)}") + + # Get Microsoft connection + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Upload file + uploadResult = await self.services.sharepoint.uploadFile( + siteId=siteId, + folderPath=folderPath, + fileName=fileName, + content=fileContent + ) + + if "error" in uploadResult: + return ActionResult.isFailure(error=f"Upload failed: {uploadResult['error']}") + + logger.info(f"Uploaded file to SharePoint: {folderPath}/{fileName} ({len(fileContent)} bytes)") + + # Generate filename + workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None + filename = self._generateMeaningfulFileName( + "file_upload_result", + "json", + workflowContext, + "uploadFile" + ) + + result = { + "success": True, + "siteId": siteId, + "filePath": f"{folderPath}/{fileName}", + "fileSize": len(fileContent), + "uploadResult": uploadResult + } + + validationMetadata = self._createValidationMetadata( + "uploadFile", + siteId=siteId, + filePath=f"{folderPath}/{fileName}", + fileSize=len(fileContent) + ) + + document = ActionDocument( + documentName=filename, + documentData=json.dumps(result, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + + return ActionResult.isSuccess(documents=[document]) + + except Exception as e: + errorMsg = f"Error uploading file to SharePoint: {str(e)}" + logger.error(errorMsg) + return ActionResult.isFailure(error=errorMsg) + diff --git a/modules/workflows/methods/methodSharepoint/helpers/__init__.py b/modules/workflows/methods/methodSharepoint/helpers/__init__.py new file mode 100644 index 00000000..cc1293b3 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/helpers/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Helper modules for SharePoint method operations.""" + diff --git a/modules/workflows/methods/methodSharepoint/helpers/apiClient.py b/modules/workflows/methods/methodSharepoint/helpers/apiClient.py new file mode 100644 index 00000000..7cead7ef --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/helpers/apiClient.py @@ -0,0 +1,102 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +API Client helper for SharePoint operations. +Handles Microsoft Graph API calls with timeout and error handling. +""" + +import logging +import aiohttp +import asyncio +from typing import Dict, Any + +logger = logging.getLogger(__name__) + +class ApiClientHelper: + """Helper for Microsoft Graph API calls""" + + def __init__(self, methodInstance): + """ + Initialize API client helper. + + Args: + methodInstance: Instance of MethodSharepoint (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + async def makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: + """ + Make a Microsoft Graph API call with timeout and detailed logging. + + Args: + endpoint: API endpoint (without base URL) + method: HTTP method (GET, POST, PUT) + data: Optional request body data (bytes) + + Returns: + Dict with API response or error information + """ + try: + if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.accessToken: + return {"error": "SharePoint service not configured with access token"} + + headers = { + "Authorization": f"Bearer {self.services.sharepoint._target.accessToken}", + "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" + } + + url = f"https://graph.microsoft.com/v1.0/{endpoint}" + logger.info(f"Making Graph API call: {method} {url}") + + # Set timeout to 30 seconds + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + if method == "GET": + logger.debug(f"Starting GET request to {url}") + async with session.get(url, headers=headers) as response: + logger.info(f"Graph API response: {response.status}") + if response.status == 200: + result = await response.json() + logger.debug(f"Graph API success: {len(str(result))} characters response") + return result + else: + errorText = await response.text() + logger.error(f"Graph API call failed: {response.status} - {errorText}") + return {"error": f"API call failed: {response.status} - {errorText}"} + + elif method == "PUT": + logger.debug(f"Starting PUT request to {url}") + async with session.put(url, headers=headers, data=data) as response: + logger.info(f"Graph API response: {response.status}") + if response.status in [200, 201]: + result = await response.json() + logger.debug(f"Graph API success: {len(str(result))} characters response") + return result + else: + errorText = await response.text() + logger.error(f"Graph API call failed: {response.status} - {errorText}") + return {"error": f"API call failed: {response.status} - {errorText}"} + + elif method == "POST": + logger.debug(f"Starting POST request to {url}") + async with session.post(url, headers=headers, data=data) as response: + logger.info(f"Graph API response: {response.status}") + if response.status in [200, 201]: + result = await response.json() + logger.debug(f"Graph API success: {len(str(result))} characters response") + return result + else: + errorText = await response.text() + logger.error(f"Graph API call failed: {response.status} - {errorText}") + return {"error": f"API call failed: {response.status} - {errorText}"} + + except asyncio.TimeoutError: + logger.error(f"Graph API call timed out after 30 seconds: {endpoint}") + return {"error": f"API call timed out after 30 seconds: {endpoint}"} + except Exception as e: + logger.error(f"Error making Graph API call: {str(e)}") + return {"error": f"Error making Graph API call: {str(e)}"} + diff --git a/modules/workflows/methods/methodSharepoint/helpers/connection.py b/modules/workflows/methods/methodSharepoint/helpers/connection.py new file mode 100644 index 00000000..3c2ce16d --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/helpers/connection.py @@ -0,0 +1,67 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Connection helper for SharePoint operations. +Handles Microsoft connection management and SharePoint service configuration. +""" + +import logging +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + +class ConnectionHelper: + """Helper for Microsoft connection management in SharePoint operations""" + + def __init__(self, methodInstance): + """ + Initialize connection helper. + + Args: + methodInstance: Instance of MethodSharepoint (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: + """ + Get Microsoft connection from connection reference and configure SharePoint service. + + Args: + connectionReference: Connection reference string + + Returns: + Dict with connection info or None if failed + """ + try: + userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference) + if not userConnection: + logger.warning(f"No user connection found for reference: {connectionReference}") + return None + + if userConnection.authority.value != "msft": + logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})") + return None + + # Check if connection is active or pending (pending means OAuth in progress) + if userConnection.status.value not in ["active", "pending"]: + logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}") + return None + + # Configure SharePoint service with the UserConnection + if not self.services.sharepoint.setAccessTokenFromConnection(userConnection): + logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}") + return None + + logger.info(f"Successfully configured SharePoint service with Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}") + + return { + "id": userConnection.id, + "userConnection": userConnection, + "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes + } + except Exception as e: + logger.error(f"Error getting Microsoft connection: {str(e)}") + return None + diff --git a/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py new file mode 100644 index 00000000..138e2ea3 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py @@ -0,0 +1,252 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Document Parsing helper for SharePoint operations. +Handles parsing of document lists and extracting found documents and site information. +""" + +import logging +import json +from typing import Dict, Any, List, Optional + +logger = logging.getLogger(__name__) + +class DocumentParsingHelper: + """Helper for parsing document lists and extracting document information""" + + def __init__(self, methodInstance): + """ + Initialize document parsing helper. + + Args: + methodInstance: Instance of MethodSharepoint (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + async def parseDocumentListForFoundDocuments(self, documentList: Any) -> tuple[Optional[List[Dict[str, Any]]], Optional[List[Dict[str, Any]]], Optional[str]]: + """ + Parse documentList to extract foundDocuments and site information. + + Parameters: + documentList: Document list (can be list, DocumentReferenceList, or string) + + Returns: + tuple: (foundDocuments, sites, errorMessage) + - foundDocuments: List of found documents from findDocumentPath result + - sites: List of site dictionaries with id, displayName, webUrl + - errorMessage: Error message if parsing failed, None otherwise + """ + try: + if isinstance(documentList, str): + documentList = [documentList] + + # Resolve documentList to get actual documents + from modules.datamodels.datamodelDocref import DocumentReferenceList + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + else: + docRefList = DocumentReferenceList(references=[]) + + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + if not chatDocuments: + return None, None, "No documents found for the provided document list" + + firstDocument = chatDocuments[0] + fileData = self.services.chat.getFileData(firstDocument.fileId) + if not fileData: + return None, None, None # No fileData, but not an error (might be regular file) + + try: + resultData = json.loads(fileData) + foundDocuments = resultData.get("foundDocuments", []) + + # If no foundDocuments, check if it's a listDocuments result (has listResults) + if not foundDocuments and "listResults" in resultData: + logger.info(f"documentList contains listResults from listDocuments, converting to foundDocuments format") + listResults = resultData.get("listResults", []) + foundDocuments = [] + siteIdFromList = None + siteNameFromList = None + + for listResult in listResults: + siteResults = listResult.get("siteResults", []) + for siteResult in siteResults: + items = siteResult.get("items", []) + # Extract site info from first item if available + if items and not siteIdFromList: + siteNameFromList = items[0].get("siteName") + + for item in items: + # Convert listDocuments item format to foundDocuments format + if item.get("type") == "file": + foundDoc = { + "id": item.get("id"), + "name": item.get("name"), + "type": "file", + "siteName": item.get("siteName"), + "siteId": None, # Will be determined from site discovery + "webUrl": item.get("webUrl"), + "fullPath": item.get("webUrl", ""), + "parentPath": item.get("parentPath", "") + } + foundDocuments.append(foundDoc) + + # Discover sites to get siteId if we have siteName + if foundDocuments and siteNameFromList and not siteIdFromList: + logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'") + allSites = await self.method.siteDiscovery.discoverSharePointSites() + matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteNameFromList) + if matchingSites: + siteIdFromList = matchingSites[0].get("id") + # Update all foundDocuments with siteId + for doc in foundDocuments: + doc["siteId"] = siteIdFromList + logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'") + + logger.info(f"Converted {len(foundDocuments)} files from listResults format") + + if not foundDocuments: + return None, None, None # No foundDocuments, but not an error + + # Extract site information from foundDocuments + firstDoc = foundDocuments[0] + siteName = firstDoc.get("siteName") + siteId = firstDoc.get("siteId") + + # If siteId is missing (from listDocuments conversion), discover sites to find it + if siteName and not siteId: + logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'") + allSites = await self.method.siteDiscovery.discoverSharePointSites() + matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName) + if matchingSites: + siteId = matchingSites[0].get("id") + logger.info(f"Found siteId '{siteId}' for site '{siteName}'") + + sites = None + if siteName and siteId: + sites = [{ + "id": siteId, + "displayName": siteName, + "webUrl": firstDoc.get("webUrl", "") + }] + logger.info(f"Using specific site from documentList: {siteName} (ID: {siteId})") + elif siteName: + # Try to get site by name + allSites = await self.method.siteDiscovery.discoverSharePointSites() + matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName) + if matchingSites: + sites = [{ + "id": matchingSites[0].get("id"), + "displayName": siteName, + "webUrl": matchingSites[0].get("webUrl", "") + }] + logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})") + else: + return None, None, f"Site '{siteName}' not found. Cannot determine target site." + else: + return None, None, "Site information missing from documentList. Cannot determine target site." + + return foundDocuments, sites, None + + except json.JSONDecodeError as e: + return None, None, f"Invalid JSON in documentList: {str(e)}" + except Exception as e: + return None, None, f"Error processing documentList: {str(e)}" + + except Exception as e: + logger.error(f"Error parsing documentList: {str(e)}") + return None, None, f"Error parsing documentList: {str(e)}" + + async def parseDocumentListForFolder(self, documentList: Any) -> tuple[Optional[str], Optional[List[Dict[str, Any]]], Optional[List], Optional[str]]: + """ + Parse documentList to extract folder path, site information, and files to upload. + + Parameters: + documentList: Document list (can be list, DocumentReferenceList, or string) + + Returns: + tuple: (folderPath, sites, filesToUpload, errorMessage) + - folderPath: Folder path from findDocumentPath result (or None) + - sites: List of site dictionaries with id, displayName, webUrl + - filesToUpload: List of ChatDocument objects to upload (or None) + - errorMessage: Error message if parsing failed, None otherwise + """ + try: + if isinstance(documentList, str): + documentList = [documentList] + + # Resolve documentList to get actual documents + from modules.datamodels.datamodelDocref import DocumentReferenceList + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + else: + docRefList = DocumentReferenceList(references=[]) + + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + if not chatDocuments: + return None, None, None, "No documents found for the provided document list" + + # Check if first document is a findDocumentPath result (has foundDocuments) + firstDocument = chatDocuments[0] + fileData = self.services.chat.getFileData(firstDocument.fileId) + + folderPath = None + sites = None + filesToUpload = None + + if fileData: + try: + resultData = json.loads(fileData) + foundDocuments = resultData.get("foundDocuments", []) + + if foundDocuments: + # Extract folder path from first found document + firstDoc = foundDocuments[0] + parentPath = firstDoc.get("parentPath", "") + if parentPath: + folderPath = parentPath + + # Extract site information + siteName = firstDoc.get("siteName") + siteId = firstDoc.get("siteId") + + if siteName and siteId: + sites = [{ + "id": siteId, + "displayName": siteName, + "webUrl": firstDoc.get("webUrl", "") + }] + elif siteName: + # Discover sites to find siteId + allSites = await self.method.siteDiscovery.discoverSharePointSites() + matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName) + if matchingSites: + sites = [{ + "id": matchingSites[0].get("id"), + "displayName": siteName, + "webUrl": matchingSites[0].get("webUrl", "") + }] + + # For uploadDocument: filesToUpload are the chatDocuments themselves + # (they contain the files to upload) + filesToUpload = chatDocuments + + except json.JSONDecodeError: + # Not a findDocumentPath result - treat as regular files to upload + filesToUpload = chatDocuments + else: + # No fileData - treat as regular files to upload + filesToUpload = chatDocuments + + return folderPath, sites, filesToUpload, None + + except Exception as e: + logger.error(f"Error parsing documentList for folder: {str(e)}") + return None, None, None, f"Error parsing documentList for folder: {str(e)}" + diff --git a/modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py b/modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py new file mode 100644 index 00000000..3e1a94f1 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py @@ -0,0 +1,338 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Path Processing helper for SharePoint operations. +Handles search query parsing, path resolution, and query cleaning. +""" + +import logging +import re +from typing import List, Optional, Dict, Any + +logger = logging.getLogger(__name__) + +class PathProcessingHelper: + """Helper for path and query processing""" + + def __init__(self, methodInstance): + """ + Initialize path processing helper. + + Args: + methodInstance: Instance of MethodSharepoint (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + def parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]: + """ + Parse searchQuery to extract path, search terms, search type, and search options. + + CRITICAL: NEVER convert words to paths! Words stay as search terms. + - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson") + - "root, gose" → fileQuery="root, gose" (NOT "/root/gose") + - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject" + + Parameters: + searchQuery (str): Enhanced search query with options: + - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={} + - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={} + - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={} + - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={} + - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={} + - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={} + - "exact:\"Operations 2025\"" -> exact phrase matching + - "regex:^Operations.*2025$" -> regex pattern matching + - "case:DELTA" -> case-sensitive search + - "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present + + Returns: + tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions) + """ + try: + if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*": + return "*", "*", "all", {} + + searchQuery = searchQuery.strip() + searchOptions = {} + + # CRITICAL: Do NOT convert space-separated or comma-separated words to paths! + # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson" + # "root, gose" should stay as "root, gose", NOT "/root/gose" + + # Check for search type specification (files:, folders:, all:) FIRST + searchType = "all" # Default + if searchQuery.startswith(("files:", "folders:", "all:")): + typeParts = searchQuery.split(':', 1) + searchType = typeParts[0].strip() + searchQuery = typeParts[1].strip() + + # Extract optional site hint tokens: support "site=Name" or leading "site:Name" + def _extractSiteHint(q: str) -> tuple[str, Optional[str]]: + try: + qStrip = q.strip() + # Leading form: site:KM LayerFinance ... + if qStrip.lower().startswith("site:"): + after = qStrip[5:].lstrip() + # site name until next space or end + if ' ' in after: + siteName, rest = after.split(' ', 1) + else: + siteName, rest = after, '' + return rest.strip(), siteName.strip() + # Inline key=value form anywhere + m = re.search(r"\bsite=([^;\s]+)", qStrip, flags=re.IGNORECASE) + if m: + siteName = m.group(1).strip() + # remove the token from query + qNew = re.sub(r"\bsite=[^;\s]+;?", "", qStrip, flags=re.IGNORECASE).strip() + return qNew, siteName + except Exception: + pass + return q, None + + searchQuery, extractedSite = _extractSiteHint(searchQuery) + if extractedSite: + searchOptions["site_hint"] = extractedSite + logger.info(f"Extracted site hint: '{extractedSite}'") + + # Extract name="..." if present (for quoted multi-word names) + nameMatch = re.search(r"name=\"([^\"]+)\"", searchQuery) + if nameMatch: + searchQuery = nameMatch.group(1) + logger.info(f"Extracted name from quotes: '{searchQuery}'") + + # Check for search mode specification (exact:, regex:, case:, and:) + if searchQuery.startswith(("exact:", "regex:", "case:", "and:")): + modeParts = searchQuery.split(':', 1) + mode = modeParts[0].strip() + searchQuery = modeParts[1].strip() + + if mode == "exact": + searchOptions["exact_match"] = True + # Remove quotes if present + if searchQuery.startswith('"') and searchQuery.endswith('"'): + searchQuery = searchQuery[1:-1] + elif mode == "regex": + searchOptions["regex_match"] = True + elif mode == "case": + searchOptions["case_sensitive"] = True + elif mode == "and": + searchOptions["and_terms"] = True + + # Check if it contains path:search format + # Microsoft-standard paths: /sites/SiteName/Path:files:.pdf + if ':' in searchQuery: + # For Microsoft-standard paths (/sites/...), find the colon that separates path from search + if searchQuery.startswith('/sites/'): + # Find the colon that separates path from search (after the full path) + # Look for pattern: /sites/SiteName/Path/...:files:.pdf + # We need to find the colon that's followed by search type or file extension + colonPositions = [] + for i, char in enumerate(searchQuery): + if char == ':': + colonPositions.append(i) + + # If we have colons, find the one that's followed by search type or file extension + splitPos = None + if colonPositions: + for pos in colonPositions: + afterColon = searchQuery[pos+1:pos+10].strip().lower() + # Check if this colon is followed by search type or looks like a file extension + if afterColon.startswith(('files:', 'folders:', 'all:', '.')) or afterColon == '': + splitPos = pos + break + + # If no clear split found, use the last colon + if splitPos is None and colonPositions: + splitPos = colonPositions[-1] + + if splitPos: + pathPart = searchQuery[:splitPos].strip() + searchPart = searchQuery[splitPos+1:].strip() + else: + # Fallback: split on first colon + parts = searchQuery.split(':', 1) + pathPart = parts[0].strip() + searchPart = parts[1].strip() + else: + # Regular path:search format - split on first colon + parts = searchQuery.split(':', 1) + pathPart = parts[0].strip() + searchPart = parts[1].strip() + + # Check if searchPart starts with search type (files:, folders:, all:) + if searchPart.startswith(("files:", "folders:", "all:")): + typeParts = searchPart.split(':', 1) + searchType = typeParts[0].strip() # Update searchType + searchPart = typeParts[1].strip() if len(typeParts) > 1 else "" + + # Handle path part + if not pathPart or pathPart == "*": + pathQuery = "*" + elif pathPart.startswith('/'): + pathQuery = pathPart + else: + pathQuery = f"/Documents/{pathPart}" + + # Handle search part + if not searchPart or searchPart == "*": + fileQuery = "*" + else: + fileQuery = searchPart + + return pathQuery, fileQuery, searchType, searchOptions + + # No colon - check if it looks like a path + elif searchQuery.startswith('/'): + # It's a path only + return searchQuery, "*", searchType, searchOptions + + else: + # It's a search term only - keep words as-is, do NOT convert to paths + # "root document lesson" stays as "root document lesson" + # "root, gose" stays as "root, gose" + return "*", searchQuery, searchType, searchOptions + + except Exception as e: + logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}") + raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}") + + def resolvePathQuery(self, pathQuery: str) -> List[str]: + """ + Resolve pathQuery into a list of search paths for SharePoint operations. + + Parameters: + pathQuery (str): Query string that can contain: + - Direct paths (e.g., "/Documents/Project1") + - Wildcards (e.g., "/Documents/*") + - Multiple paths separated by semicolons (e.g., "/Docs; /Files") + - Single word relative paths (e.g., "Project1" -> resolved to default folder) + - Empty string or "*" for global search + - Space-separated words are treated as search terms, NOT folder paths + + Returns: + List[str]: List of resolved paths + """ + try: + if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*": + return ["*"] # Global search across all sites + + # Split by semicolon to handle multiple paths + rawPaths = [path.strip() for path in pathQuery.split(';') if path.strip()] + resolvedPaths = [] + + for rawPath in rawPaths: + # Handle wildcards - return as-is + if '*' in rawPath: + resolvedPaths.append(rawPath) + # Handle absolute paths + elif rawPath.startswith('/'): + resolvedPaths.append(rawPath) + # Handle single word relative paths - prepend default folder + # BUT NOT space-separated words (those are search terms, not paths) + elif ' ' not in rawPath: + resolvedPaths.append(f"/Documents/{rawPath}") + else: + # Check if this looks like a path (has path separators) or search terms + if '\\' in rawPath or '/' in rawPath: + # This looks like a path with spaces in folder names - treat as valid path + resolvedPaths.append(rawPath) + logger.info(f"Path with spaces '{rawPath}' treated as valid folder path") + else: + # Space-separated words without path separators are search terms + # Return as "*" to search globally + logger.info(f"Space-separated words '{rawPath}' treated as search terms, not folder path") + resolvedPaths.append("*") + + # Remove duplicates while preserving order + seen = set() + uniquePaths = [] + for path in resolvedPaths: + if path not in seen: + seen.add(path) + uniquePaths.append(path) + + logger.info(f"Resolved pathQuery '{pathQuery}' to {len(uniquePaths)} paths: {uniquePaths}") + return uniquePaths + + except Exception as e: + logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}") + raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}") + + def cleanSearchQuery(self, query: str) -> str: + """ + Clean search query to make it compatible with Graph API KQL syntax. + Removes path-like syntax and invalid KQL constructs. + + Parameters: + query (str): Raw search query that may contain paths and invalid syntax + + Returns: + str: Cleaned query suitable for Graph API search endpoint + """ + if not query or not query.strip(): + return "" + + query = query.strip() + + # Handle patterns like: "Company Share/Freigegebene Dokumente/.../expenses:files:.pdf" + # Extract the search term and file extension + + # First, extract file extension if present (format: :files:.pdf or just .pdf at the end) + fileExtension = "" + if ':files:' in query.lower() or ':folders:' in query.lower(): + # Extract extension after the type filter + extMatch = re.search(r':(?:files|folders):(\.\w+)', query, re.IGNORECASE) + if extMatch: + fileExtension = extMatch.group(1) + # Remove the type filter part + query = re.sub(r':(?:files|folders):\.?\w*', '', query, flags=re.IGNORECASE) + elif query.endswith(('.pdf', '.doc', '.docx', '.xls', '.xlsx', '.txt', '.csv', '.ppt', '.pptx')): + # Extract extension from end + extMatch = re.search(r'(\.\w+)$', query) + if extMatch: + fileExtension = extMatch.group(1) + query = query[:-len(fileExtension)] + + # Extract search term: get the last segment after the last slash (filename part) + queryNormalized = query.replace('\\', '/') + if '/' in queryNormalized: + # Extract the last segment (usually the filename/search term) + lastSegment = queryNormalized.split('/')[-1] + # Remove any remaining colons or type filters + if ':' in lastSegment: + lastSegment = lastSegment.split(':')[0] + searchTerm = lastSegment.strip() + else: + # No path separators, use the query as-is but remove type filters + if ':' in query: + searchTerm = query.split(':')[0].strip() + else: + searchTerm = query.strip() + + # Remove any remaining type filters or invalid syntax + searchTerm = re.sub(r':(?:files|folders|all):?', '', searchTerm, flags=re.IGNORECASE) + searchTerm = searchTerm.strip() + + # If we have a file extension, include it in the search term + # Note: Graph API search endpoint may not support filetype: syntax + # So we include the extension as part of the search term or filter results after + if fileExtension: + extWithoutDot = fileExtension.lstrip('.') + # Try simple approach: add extension as search term + # If this doesn't work, we'll filter results after search + if searchTerm: + # Include extension in search - Graph API will search in filename + searchTerm = f"{searchTerm} {extWithoutDot}" + else: + searchTerm = extWithoutDot + + # Final cleanup: remove any remaining invalid characters for KQL + # Keep alphanumeric, spaces, hyphens, underscores, dots, and common search operators + searchTerm = re.sub(r'[^\w\s\-\.\*]', ' ', searchTerm) + searchTerm = ' '.join(searchTerm.split()) # Normalize whitespace + + return searchTerm if searchTerm else "*" + diff --git a/modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py b/modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py new file mode 100644 index 00000000..f59de8f7 --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py @@ -0,0 +1,173 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Site Discovery helper for SharePoint operations. +Handles SharePoint site discovery, filtering, and resolution. +""" + +import logging +import urllib.parse +from typing import Dict, Any, List, Optional + +logger = logging.getLogger(__name__) + +class SiteDiscoveryHelper: + """Helper for SharePoint site discovery and resolution""" + + def __init__(self, methodInstance): + """ + Initialize site discovery helper. + + Args: + methodInstance: Instance of MethodSharepoint (for access to services) + """ + self.method = methodInstance + self.services = methodInstance.services + + async def discoverSharePointSites(self, limit: Optional[int] = None) -> List[Dict[str, Any]]: + """ + Discover SharePoint sites accessible to the user via Microsoft Graph API. + + Args: + limit: Optional limit on number of sites to return + + Returns: + List of site information dictionaries + """ + try: + # Query Microsoft Graph to get sites the user has access to + endpoint = "sites?search=*" + if limit: + endpoint += f"&$top={limit}" + + result = await self.method.apiClient.makeGraphApiCall(endpoint) + + if "error" in result: + logger.error(f"Error discovering SharePoint sites: {result['error']}") + return [] + + sites = result.get("value", []) + if limit: + sites = sites[:limit] + + logger.info(f"Discovered {len(sites)} SharePoint sites" + (f" (limited to {limit})" if limit else "")) + + # Process and return site information + processedSites = [] + for site in sites: + siteInfo = { + "id": site.get("id"), + "displayName": site.get("displayName"), + "name": site.get("name"), + "webUrl": site.get("webUrl"), + "description": site.get("description"), + "createdDateTime": site.get("createdDateTime"), + "lastModifiedDateTime": site.get("lastModifiedDateTime") + } + processedSites.append(siteInfo) + logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}") + + return processedSites + + except Exception as e: + logger.error(f"Error discovering SharePoint sites: {str(e)}") + return [] + + def extractHostnameFromWebUrl(self, webUrl: str) -> Optional[str]: + """Extract hostname from SharePoint webUrl (e.g., https://pcuster.sharepoint.com)""" + try: + if not webUrl: + return None + parsed = urllib.parse.urlparse(webUrl) + return parsed.hostname + except Exception as e: + logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}") + return None + + def extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]: + """ + Extract site name from Microsoft-standard server-relative path. + Delegates to SharePoint service. + """ + return self.services.sharepoint.extractSiteFromStandardPath(pathQuery) + + async def getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]: + """ + Get SharePoint site directly by Microsoft-standard path. + Delegates to SharePoint service. + """ + return await self.services.sharepoint.getSiteByStandardPath(sitePath) + + def filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]: + """ + Filter discovered sites by a human-entered site hint. + Delegates to SharePoint service. + """ + return self.services.sharepoint.filterSitesByHint(sites, siteHint) + + async def getSiteId(self, hostname: str, sitePath: str) -> str: + """ + Get SharePoint site ID from hostname and site path. + + Args: + hostname: SharePoint hostname + sitePath: Site path + + Returns: + Site ID string + """ + try: + endpoint = f"sites/{hostname}:/{sitePath}" + result = await self.method.apiClient.makeGraphApiCall(endpoint) + + if "error" in result: + logger.error(f"Error getting site ID: {result['error']}") + return "" + + return result.get("id", "") + except Exception as e: + logger.error(f"Error getting site ID: {str(e)}") + return "" + + async def resolveSitesFromPathQuery(self, pathQuery: str) -> tuple[List[Dict[str, Any]], Optional[str]]: + """ + Resolve sites from pathQuery using SharePoint service helper methods. + + Args: + pathQuery: Path query string + + Returns: + Tuple of (sites list, error message) + """ + try: + # Validate pathQuery format + isValid, errorMsg = self.services.sharepoint.validatePathQuery(pathQuery) + if not isValid: + return [], errorMsg + + # Resolve sites using service helper + sites = await self.services.sharepoint.resolveSitesFromPathQuery(pathQuery) + if not sites: + return [], "No SharePoint sites found or accessible" + + return sites, None + except Exception as e: + logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}") + return [], f"Error resolving sites from pathQuery: {str(e)}" + + def parseSiteUrl(self, siteUrl: str) -> Dict[str, str]: + """Parse SharePoint site URL to extract hostname and site path""" + try: + parsed = urllib.parse.urlparse(siteUrl) + hostname = parsed.hostname + path = parsed.path.strip('/') + + return { + "hostname": hostname, + "sitePath": path + } + except Exception as e: + logger.error(f"Error parsing site URL {siteUrl}: {str(e)}") + return {"hostname": "", "sitePath": ""} + diff --git a/modules/workflows/methods/methodSharepoint/methodSharepoint.py b/modules/workflows/methods/methodSharepoint/methodSharepoint.py new file mode 100644 index 00000000..299d3fed --- /dev/null +++ b/modules/workflows/methods/methodSharepoint/methodSharepoint.py @@ -0,0 +1,387 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +SharePoint operations method module. +Handles SharePoint document operations using the SharePoint service. +""" + +import logging +from modules.workflows.methods.methodBase import MethodBase +from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter +from modules.shared.frontendTypes import FrontendType + +# Import helpers +from .helpers.connection import ConnectionHelper +from .helpers.siteDiscovery import SiteDiscoveryHelper +from .helpers.documentParsing import DocumentParsingHelper +from .helpers.pathProcessing import PathProcessingHelper +from .helpers.apiClient import ApiClientHelper + +# Import actions +from .actions.findDocumentPath import findDocumentPath +from .actions.readDocuments import readDocuments +from .actions.uploadDocument import uploadDocument +from .actions.listDocuments import listDocuments +from .actions.analyzeFolderUsage import analyzeFolderUsage +from .actions.findSiteByUrl import findSiteByUrl +from .actions.downloadFileByPath import downloadFileByPath +from .actions.copyFile import copyFile +from .actions.uploadFile import uploadFile + +logger = logging.getLogger(__name__) + +class MethodSharepoint(MethodBase): + """SharePoint operations methods.""" + + def __init__(self, services): + super().__init__(services) + self.name = "sharepoint" + self.description = "SharePoint operations methods" + + # Initialize helper modules + self.connection = ConnectionHelper(self) + self.siteDiscovery = SiteDiscoveryHelper(self) + self.documentParsing = DocumentParsingHelper(self) + self.pathProcessing = PathProcessingHelper(self) + self.apiClient = ApiClientHelper(self) + + # RBAC-Integration: Action-Definitionen mit actionId + self._actions = { + "findDocumentPath": WorkflowActionDefinition( + actionId="sharepoint.findDocumentPath", + description="Find documents and folders by name/path across sites", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "site": WorkflowActionParameter( + name="site", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Site hint" + ), + "searchQuery": WorkflowActionParameter( + name="searchQuery", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Search terms or path" + ), + "maxResults": WorkflowActionParameter( + name="maxResults", + type="int", + frontendType=FrontendType.NUMBER, + required=False, + default=1000, + description="Maximum items to return", + validation={"min": 1, "max": 10000} + ) + }, + execute=findDocumentPath.__get__(self, self.__class__) + ), + "readDocuments": WorkflowActionDefinition( + actionId="sharepoint.readDocuments", + description="Read documents from SharePoint and extract content/metadata", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=False, + description="Document list reference(s) containing findDocumentPath result" + ), + "pathQuery": WorkflowActionParameter( + name="pathQuery", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Direct path query if no documentList (e.g., /sites/SiteName/FolderPath)" + ), + "includeMetadata": WorkflowActionParameter( + name="includeMetadata", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=True, + description="Include metadata" + ) + }, + execute=readDocuments.__get__(self, self.__class__) + ), + "uploadDocument": WorkflowActionDefinition( + actionId="sharepoint.uploadDocument", + description="Upload documents to SharePoint", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) to upload. File names are taken from the documents" + ), + "pathQuery": WorkflowActionParameter( + name="pathQuery", + type="str", + frontendType=FrontendType.TEXT, + required=False, + description="Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath)" + ) + }, + execute=uploadDocument.__get__(self, self.__class__) + ), + "listDocuments": WorkflowActionDefinition( + actionId="sharepoint.listDocuments", + description="List documents and folders in SharePoint paths across sites", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document list reference(s) containing findDocumentPath result" + ), + "includeSubfolders": WorkflowActionParameter( + name="includeSubfolders", + type="bool", + frontendType=FrontendType.CHECKBOX, + required=False, + default=False, + description="Include one level of subfolders" + ) + }, + execute=listDocuments.__get__(self, self.__class__) + ), + "analyzeFolderUsage": WorkflowActionDefinition( + actionId="sharepoint.analyzeFolderUsage", + description="Analyze usage intensity of folders and files in SharePoint", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document list reference(s) containing findDocumentPath result" + ), + "startDateTime": WorkflowActionParameter( + name="startDateTime", + type="str", + frontendType=FrontendType.DATETIME, + required=False, + description="Start date/time in ISO format (e.g., 2025-11-01T00:00:00Z). Default: 30 days ago" + ), + "endDateTime": WorkflowActionParameter( + name="endDateTime", + type="str", + frontendType=FrontendType.DATETIME, + required=False, + description="End date/time in ISO format (e.g., 2025-11-30T23:59:59Z). Default: current time" + ), + "interval": WorkflowActionParameter( + name="interval", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["day", "week", "month"], + required=False, + default="day", + description="Time interval for grouping activities" + ) + }, + execute=analyzeFolderUsage.__get__(self, self.__class__) + ), + "findSiteByUrl": WorkflowActionDefinition( + actionId="sharepoint.findSiteByUrl", + description="Find SharePoint site by hostname and site path", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "hostname": WorkflowActionParameter( + name="hostname", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="SharePoint hostname (e.g., example.sharepoint.com)" + ), + "sitePath": WorkflowActionParameter( + name="sitePath", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Site path (e.g., SteeringBPM or /sites/SteeringBPM)" + ) + }, + execute=findSiteByUrl.__get__(self, self.__class__) + ), + "downloadFileByPath": WorkflowActionDefinition( + actionId="sharepoint.downloadFileByPath", + description="Download file from SharePoint by exact file path", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "siteId": WorkflowActionParameter( + name="siteId", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="SharePoint site ID (from findSiteByUrl result) or document reference containing site info" + ), + "filePath": WorkflowActionParameter( + name="filePath", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Full file path relative to site root (e.g., /General/50 Docs hosted by SELISE/file.xlsx)" + ) + }, + execute=downloadFileByPath.__get__(self, self.__class__) + ), + "copyFile": WorkflowActionDefinition( + actionId="sharepoint.copyFile", + description="Copy file within SharePoint", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "siteId": WorkflowActionParameter( + name="siteId", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="SharePoint site ID (from findSiteByUrl result) or document reference containing site info" + ), + "sourceFolder": WorkflowActionParameter( + name="sourceFolder", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Source folder path relative to site root" + ), + "sourceFile": WorkflowActionParameter( + name="sourceFile", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Source file name" + ), + "destFolder": WorkflowActionParameter( + name="destFolder", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Destination folder path relative to site root" + ), + "destFile": WorkflowActionParameter( + name="destFile", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Destination file name" + ) + }, + execute=copyFile.__get__(self, self.__class__) + ), + "uploadFile": WorkflowActionDefinition( + actionId="sharepoint.uploadFile", + description="Upload raw file content (bytes) to SharePoint", + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label" + ), + "siteId": WorkflowActionParameter( + name="siteId", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="SharePoint site ID (from findSiteByUrl result) or document reference containing site info" + ), + "folderPath": WorkflowActionParameter( + name="folderPath", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Folder path relative to site root" + ), + "fileName": WorkflowActionParameter( + name="fileName", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="File name" + ), + "content": WorkflowActionParameter( + name="content", + type="str", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference containing file content as base64-encoded bytes" + ) + }, + execute=uploadFile.__get__(self, self.__class__) + ) + } + + # Validate actions after definition + self._validateActions() + + # Register actions as methods (optional, für direkten Zugriff) + self.findDocumentPath = findDocumentPath.__get__(self, self.__class__) + self.readDocuments = readDocuments.__get__(self, self.__class__) + self.uploadDocument = uploadDocument.__get__(self, self.__class__) + self.listDocuments = listDocuments.__get__(self, self.__class__) + self.analyzeFolderUsage = analyzeFolderUsage.__get__(self, self.__class__) + self.findSiteByUrl = findSiteByUrl.__get__(self, self.__class__) + self.downloadFileByPath = downloadFileByPath.__get__(self, self.__class__) + self.copyFile = copyFile.__get__(self, self.__class__) + self.uploadFile = uploadFile.__get__(self, self.__class__) + diff --git a/modules/workflows/processing/shared/methodDiscovery.py b/modules/workflows/processing/shared/methodDiscovery.py index 02c584cc..30708010 100644 --- a/modules/workflows/processing/shared/methodDiscovery.py +++ b/modules/workflows/processing/shared/methodDiscovery.py @@ -27,12 +27,16 @@ def discoverMethods(serviceCenter): # Import the methods package methodsPackage = importlib.import_module('modules.workflows.methods') - # Discover all modules in the package + # Discover all modules and packages in the methods package for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__): - if not isPkg and name.startswith('method'): + if name.startswith('method'): try: - # Import the module - module = importlib.import_module(f'modules.workflows.methods.{name}') + if isPkg: + # Package (folder) - import __init__.py which exports the Method class + module = importlib.import_module(f'modules.workflows.methods.{name}') + else: + # Module (file) - import directly + module = importlib.import_module(f'modules.workflows.methods.{name}') # Find all classes in the module that inherit from MethodBase for itemName, item in inspect.getmembers(module): From b2196bc6a357a09be02e1c879666745613a00c10 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 17 Dec 2025 22:03:58 +0100 Subject: [PATCH 03/21] neutralization integrated into extractor --- .../methods/methodContext/actions/__init__.py | 2 + .../methodContext/actions/extractContent.py | 71 ++++- .../methodContext/actions/neutralizeData.py | 256 ++++++++++++++++++ .../methods/methodContext/methodContext.py | 16 ++ modules/workflows/workflowManager.py | 108 ++------ 5 files changed, 364 insertions(+), 89 deletions(-) create mode 100644 modules/workflows/methods/methodContext/actions/neutralizeData.py diff --git a/modules/workflows/methods/methodContext/actions/__init__.py b/modules/workflows/methods/methodContext/actions/__init__.py index 9059d6bc..1750882e 100644 --- a/modules/workflows/methods/methodContext/actions/__init__.py +++ b/modules/workflows/methods/methodContext/actions/__init__.py @@ -6,11 +6,13 @@ # Export all actions from .getDocumentIndex import getDocumentIndex from .extractContent import extractContent +from .neutralizeData import neutralizeData from .triggerPreprocessingServer import triggerPreprocessingServer __all__ = [ 'getDocumentIndex', 'extractContent', + 'neutralizeData', 'triggerPreprocessingServer', ] diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py index 799ce61d..8c5fd5fb 100644 --- a/modules/workflows/methods/methodContext/actions/extractContent.py +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -12,7 +12,7 @@ from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelDocref import DocumentReferenceList -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy +from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart logger = logging.getLogger(__name__) @@ -108,6 +108,74 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: # Pass operationId for hierarchical per-document progress logging extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) + # Check if neutralization is enabled and should be applied automatically + neutralizationEnabled = False + try: + config = self.services.neutralization.getConfig() + neutralizationEnabled = config and config.enabled + except Exception as e: + logger.debug(f"Could not check neutralization config: {str(e)}") + + # Neutralize extracted data if enabled (for dynamic mode: after extraction, before AI processing) + if neutralizationEnabled: + self.services.chat.progressLogUpdate(operationId, 0.7, "Neutralizing extracted data") + logger.info("Neutralization enabled - neutralizing extracted content data") + + # Neutralize each ContentExtracted result + for extracted in extractedResults: + if extracted.parts: + neutralizedParts = [] + for part in extracted.parts: + if not isinstance(part, ContentPart): + # Try to parse as ContentPart if it's a dict + if isinstance(part, dict): + try: + part = ContentPart(**part) + except Exception as e: + logger.warning(f"Could not parse ContentPart: {str(e)}") + neutralizedParts.append(part) + continue + else: + neutralizedParts.append(part) + continue + + # Neutralize the data field if it contains text + if part.data: + try: + # Call neutralization service + neutralizationResult = self.services.neutralization.processText(part.data) + + if neutralizationResult and 'neutralized_text' in neutralizationResult: + # Replace data with neutralized text + neutralizedData = neutralizationResult['neutralized_text'] + + # Create new ContentPart with neutralized data + neutralizedPart = ContentPart( + id=part.id, + parentId=part.parentId, + label=part.label, + typeGroup=part.typeGroup, + mimeType=part.mimeType, + data=neutralizedData, + metadata=part.metadata.copy() if part.metadata else {} + ) + neutralizedParts.append(neutralizedPart) + else: + # Neutralization failed, use original part + logger.warning(f"Neutralization did not return neutralized_text for part {part.id}") + neutralizedParts.append(part) + except Exception as e: + logger.error(f"Error neutralizing part {part.id}: {str(e)}") + # On error, use original part + neutralizedParts.append(part) + else: + # No data to neutralize, keep original part + neutralizedParts.append(part) + + # Update extracted result with neutralized parts + extracted.parts = neutralizedParts + logger.info(f"Neutralized {len(neutralizedParts)} content parts") + # Build ActionDocuments from ContentExtracted results self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents") actionDocuments = [] @@ -129,6 +197,7 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: "documentIndex": i, "extractedId": extracted.id, "partCount": len(extracted.parts) if extracted.parts else 0, + "neutralized": neutralizationEnabled, "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None } actionDoc = ActionDocument( diff --git a/modules/workflows/methods/methodContext/actions/neutralizeData.py b/modules/workflows/methods/methodContext/actions/neutralizeData.py new file mode 100644 index 00000000..240fe6b1 --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py @@ -0,0 +1,256 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +""" +Neutralize Data action for Context operations. +Neutralizes extracted content data from ContentExtracted documents. +""" + +import logging +import time +from typing import Dict, Any +from modules.workflows.methods.methodBase import action +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.datamodels.datamodelDocref import DocumentReferenceList +from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart + +logger = logging.getLogger(__name__) + +@action +async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Neutralize data from ContentExtracted documents. + + This action takes documents containing ContentExtracted objects (from extractContent) + and neutralizes the text data in ContentPart.data fields. + + Parameters: + - documentList (list, required): Document reference(s) containing ContentExtracted objects. + + Returns: + - ActionResult with ActionDocument containing neutralized ContentExtracted objects + """ + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"context_neutralize_{workflowId}_{int(time.time())}" + + # Check if neutralization is enabled + neutralizationEnabled = False + try: + config = self.services.neutralization.getConfig() + neutralizationEnabled = config and config.enabled + except Exception as e: + logger.debug(f"Could not check neutralization config: {str(e)}") + + if not neutralizationEnabled: + logger.info("Neutralization is not enabled, returning documents unchanged") + # Return original documents if neutralization is disabled + # Get documents from documentList + documentListParam = parameters.get("documentList") + if not documentListParam: + return ActionResult.isFailure(error="documentList is required") + + # Convert to DocumentReferenceList if needed + if isinstance(documentListParam, DocumentReferenceList): + documentList = documentListParam + elif isinstance(documentListParam, str): + documentList = DocumentReferenceList.from_string_list([documentListParam]) + elif isinstance(documentListParam, list): + documentList = DocumentReferenceList.from_string_list(documentListParam) + else: + return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}") + + # Get ChatDocuments from documentList + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + if not chatDocuments: + return ActionResult.isFailure(error="No documents found in documentList") + + # Return original documents as ActionDocuments + actionDocuments = [] + for chatDoc in chatDocuments: + # Extract ContentExtracted from documentData if available + if hasattr(chatDoc, 'documentData') and chatDoc.documentData: + actionDoc = ActionDocument( + documentName=getattr(chatDoc, 'fileName', 'unknown'), + documentData=chatDoc.documentData, + mimeType=getattr(chatDoc, 'mimeType', 'application/json'), + validationMetadata={ + "actionType": "context.neutralizeData", + "neutralized": False, + "reason": "Neutralization disabled" + } + ) + actionDocuments.append(actionDoc) + + return ActionResult.isSuccess(documents=actionDocuments) + + # Extract documentList from parameters dict + documentListParam = parameters.get("documentList") + if not documentListParam: + return ActionResult.isFailure(error="documentList is required") + + # Convert to DocumentReferenceList if needed + if isinstance(documentListParam, DocumentReferenceList): + documentList = documentListParam + elif isinstance(documentListParam, str): + documentList = DocumentReferenceList.from_string_list([documentListParam]) + elif isinstance(documentListParam, list): + documentList = DocumentReferenceList.from_string_list(documentListParam) + else: + return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}") + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Neutralizing data from documents", + "Data Neutralization", + f"Documents: {len(documentList.references)}", + parentOperationId=parentOperationId + ) + + # Get ChatDocuments from documentList + self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents") + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + + if not chatDocuments: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No documents found in documentList") + + logger.info(f"Neutralizing data from {len(chatDocuments)} documents") + + # Process each document + self.services.chat.progressLogUpdate(operationId, 0.3, "Processing documents") + actionDocuments = [] + + for i, chatDoc in enumerate(chatDocuments): + try: + # Extract ContentExtracted from documentData + if not hasattr(chatDoc, 'documentData') or not chatDoc.documentData: + logger.warning(f"Document {i+1} has no documentData, skipping") + continue + + documentData = chatDoc.documentData + + # Check if it's a ContentExtracted object + if isinstance(documentData, ContentExtracted): + contentExtracted = documentData + elif isinstance(documentData, dict): + # Try to parse as ContentExtracted + try: + contentExtracted = ContentExtracted(**documentData) + except Exception as e: + logger.warning(f"Document {i+1} documentData is not ContentExtracted: {str(e)}") + continue + else: + logger.warning(f"Document {i+1} documentData is not ContentExtracted or dict") + continue + + # Neutralize each ContentPart's data field + neutralizedParts = [] + for part in contentExtracted.parts: + if not isinstance(part, ContentPart): + # Try to parse as ContentPart + if isinstance(part, dict): + try: + part = ContentPart(**part) + except Exception as e: + logger.warning(f"Could not parse ContentPart: {str(e)}") + neutralizedParts.append(part) + continue + else: + neutralizedParts.append(part) + continue + + # Neutralize the data field if it contains text + if part.data: + try: + self.services.chat.progressLogUpdate( + operationId, + 0.3 + (i / len(chatDocuments)) * 0.6, + f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}" + ) + + # Call neutralization service + neutralizationResult = self.services.neutralization.processText(part.data) + + if neutralizationResult and 'neutralized_text' in neutralizationResult: + # Replace data with neutralized text + neutralizedData = neutralizationResult['neutralized_text'] + + # Create new ContentPart with neutralized data + neutralizedPart = ContentPart( + id=part.id, + parentId=part.parentId, + label=part.label, + typeGroup=part.typeGroup, + mimeType=part.mimeType, + data=neutralizedData, + metadata=part.metadata.copy() if part.metadata else {} + ) + neutralizedParts.append(neutralizedPart) + else: + # Neutralization failed, use original part + logger.warning(f"Neutralization did not return neutralized_text for part {part.id}") + neutralizedParts.append(part) + except Exception as e: + logger.error(f"Error neutralizing part {part.id}: {str(e)}") + # On error, use original part + neutralizedParts.append(part) + else: + # No data to neutralize, keep original part + neutralizedParts.append(part) + + # Create neutralized ContentExtracted object + neutralizedContentExtracted = ContentExtracted( + id=contentExtracted.id, + parts=neutralizedParts, + summary=contentExtracted.summary + ) + + # Create ActionDocument + originalFileName = getattr(chatDoc, 'fileName', f"document_{i+1}.json") + baseName = originalFileName.rsplit('.', 1)[0] if '.' in originalFileName else originalFileName + documentName = f"{baseName}_neutralized_{contentExtracted.id}.json" + + validationMetadata = { + "actionType": "context.neutralizeData", + "documentIndex": i, + "extractedId": contentExtracted.id, + "partCount": len(neutralizedParts), + "neutralized": True, + "originalFileName": originalFileName + } + + actionDoc = ActionDocument( + documentName=documentName, + documentData=neutralizedContentExtracted, + mimeType="application/json", + validationMetadata=validationMetadata + ) + actionDocuments.append(actionDoc) + + except Exception as e: + logger.error(f"Error processing document {i+1}: {str(e)}") + # Continue with other documents + continue + + if not actionDocuments: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid ContentExtracted documents found to neutralize") + + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess(documents=actionDocuments) + + except Exception as e: + logger.error(f"Error in data neutralization: {str(e)}") + + # Complete progress tracking with failure + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass # Don't fail on progress logging errors + + return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py index 5481f70b..a635764f 100644 --- a/modules/workflows/methods/methodContext/methodContext.py +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -13,6 +13,7 @@ from .helpers.formatting import FormattingHelper # Import actions from .actions.getDocumentIndex import getDocumentIndex from .actions.extractContent import extractContent +from .actions.neutralizeData import neutralizeData from .actions.triggerPreprocessingServer import triggerPreprocessingServer logger = logging.getLogger(__name__) @@ -68,6 +69,20 @@ class MethodContext(MethodBase): }, execute=extractContent.__get__(self, self.__class__) ), + "neutralizeData": WorkflowActionDefinition( + actionId="context.neutralizeData", + description="Neutralize extracted data from ContentExtracted documents (for use after extractContent)", + parameters={ + "documentList": WorkflowActionParameter( + name="documentList", + type="List[str]", + frontendType=FrontendType.DOCUMENT_REFERENCE, + required=True, + description="Document reference(s) containing ContentExtracted objects to neutralize" + ) + }, + execute=neutralizeData.__get__(self, self.__class__) + ), "triggerPreprocessingServer": WorkflowActionDefinition( actionId="context.triggerPreprocessingServer", description="Trigger preprocessing server at customer tenant to update database with configuration", @@ -104,5 +119,6 @@ class MethodContext(MethodBase): # Register actions as methods (optional, für direkten Zugriff) self.getDocumentIndex = getDocumentIndex.__get__(self, self.__class__) self.extractContent = extractContent.__get__(self, self.__class__) + self.neutralizeData = neutralizeData.__get__(self, self.__class__) self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__) diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index 1906c0f6..987f46bf 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -960,17 +960,13 @@ class WorkflowManager: async def _processFileIds(self, fileIds: List[str], messageId: str = None) -> List[ChatDocument]: """Process file IDs from existing files and return ChatDocument objects. - If neutralization is enabled, files are neutralized and new files are created with neutralized content. - If neutralization fails, the document is not included and an error is logged to ChatLog.""" - documents = [] - # Check if neutralization is enabled - neutralizationEnabled = False - try: - config = self.services.neutralization.getConfig() - neutralizationEnabled = config and config.enabled - except Exception as e: - logger.debug(f"Could not check neutralization config: {str(e)}") + NOTE: Neutralization is NOT performed here. For dynamic workflows, neutralization + should happen AFTER content extraction (in extractContent action) to neutralize + extracted data (ContentPart.data), not ChatDocuments. This ensures neutralization + happens after extraction but before AI processing. + """ + documents = [] workflow = self.services.workflow @@ -984,87 +980,23 @@ class WorkflowManager: originalFileName = fileInfo.get("fileName", "unknown") originalMimeType = fileInfo.get("mimeType", "application/octet-stream") - fileIdToUse = fileId - fileNameToUse = originalFileName fileSizeToUse = fileInfo.get("size", 0) - neutralizationFailed = False - # Neutralize file if enabled - if neutralizationEnabled: - try: - # Neutralize the file using the neutralization service - neutralizationResult = self.services.neutralization.processFile(fileId) - - # Check if file is binary (not neutralized) - if neutralizationResult.get('is_binary', False): - # Binary file - log INFO and use original file - infoMsg = f"File '{originalFileName}' (MIME type: {neutralizationResult.get('mime_type', 'unknown')}) is a binary file. Binary file neutralization will be implemented in the future. Using original file without neutralization." - logger.info(infoMsg) - self.services.chat.storeLog(workflow, { - "message": infoMsg, - "type": "info", - "status": "running", - "progress": 50 - }) - # Use original file (fileIdToUse already set to fileId) - elif neutralizationResult and 'neutralized_text' in neutralizationResult: - neutralizedText = neutralizationResult['neutralized_text'] - - # Create new file with neutralized content - neutralizedFileName = neutralizationResult.get('neutralized_file_name', f"neutralized_{originalFileName}") - neutralizedContentBytes = neutralizedText.encode('utf-8') - - # Create file in component storage - neutralizedFileItem = self.services.interfaceDbComponent.createFile( - name=neutralizedFileName, - mimeType=originalMimeType, - content=neutralizedContentBytes - ) - # Persist file data - self.services.interfaceDbComponent.createFileData(neutralizedFileItem.id, neutralizedContentBytes) - - # Use the neutralized file ID and actual size - fileIdToUse = neutralizedFileItem.id - fileNameToUse = neutralizedFileName - fileSizeToUse = len(neutralizedContentBytes) - - logger.info(f"Neutralized file {fileId} -> {fileIdToUse} ({fileNameToUse})") - else: - neutralizationFailed = True - errorMsg = f"Neutralization did not return neutralized_text for file '{originalFileName}' (ID: {fileId})" - logger.warning(errorMsg) - self.services.chat.storeLog(workflow, { - "message": errorMsg, - "type": "error", - "status": "error", - "progress": -1 - }) - except Exception as e: - neutralizationFailed = True - errorMsg = f"Failed to neutralize file '{originalFileName}' (ID: {fileId}): {str(e)}" - logger.error(errorMsg) - self.services.chat.storeLog(workflow, { - "message": errorMsg, - "type": "error", - "status": "error", - "progress": -1 - }) + # NOTE: Neutralization removed from here - it should happen in extractContent action + # after content extraction but before AI processing (for dynamic workflows) + # This ensures we neutralize extracted data (ContentPart.data), not ChatDocuments - # Only skip document if neutralization failed (not for binary files) - if not neutralizationFailed: - # Create document with file ID (neutralized or original) - document = ChatDocument( - id=str(uuid.uuid4()), - messageId=messageId or "", - fileId=fileIdToUse, - fileName=fileNameToUse, - fileSize=fileSizeToUse, - mimeType=originalMimeType - ) - documents.append(document) - logger.info(f"Processed file ID {fileId} -> {document.fileName} (using fileId: {fileIdToUse})") - else: - logger.warning(f"Skipping document for file ID {fileId} due to neutralization failure") + # Create document with original file ID (no neutralization) + document = ChatDocument( + id=str(uuid.uuid4()), + messageId=messageId or "", + fileId=fileId, + fileName=originalFileName, + fileSize=fileSizeToUse, + mimeType=originalMimeType + ) + documents.append(document) + logger.info(f"Processed file ID {fileId} -> {document.fileName}") except Exception as e: errorMsg = f"Error processing file ID {fileId}: {str(e)}" logger.error(errorMsg) From 982932d2f5903b8a71c6dd67d30ac71e3ec75f52 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 23 Dec 2025 00:34:15 +0100 Subject: [PATCH 04/21] enhanced document generation with images --- modules/datamodels/datamodelJson.py | 95 +- .../datamodels/datamodelWorkflowActions.py | 4 +- modules/services/__init__.py | 3 + modules/services/serviceAi/mainServiceAi.py | 99 +- .../mainServiceGeneration.py | 119 +- .../renderers/rendererBaseTemplate.py | 55 +- .../renderers/rendererCsv.py | 14 +- .../renderers/rendererDocx.py | 14 +- .../renderers/rendererHtml.py | 204 +++- .../renderers/rendererImage.py | 4 +- .../renderers/rendererMarkdown.py | 14 +- .../renderers/rendererPdf.py | 96 +- .../renderers/rendererPptx.py | 304 +++-- .../renderers/rendererText.py | 12 +- .../renderers/rendererXlsx.py | 81 +- .../serviceGeneration/subContentGenerator.py | 840 +++++++++++++ .../serviceGeneration/subContentIntegrator.py | 167 +++ .../subDocumentPurposeAnalyzer.py | 316 +++++ .../subStructureGenerator.py | 488 ++++++++ modules/shared/jsonUtils.py | 29 +- .../methods/methodAi/actions/convert.py | 2 +- .../methodAi/actions/generateDocument.py | 388 +++++- .../workflows/methods/methodAi/methodAi.py | 7 +- modules/workflows/methods/methodBase.py | 43 +- .../processing/adaptive/contentValidator.py | 62 +- .../processing/core/actionExecutor.py | 84 +- .../workflows/processing/modes/modeDynamic.py | 14 +- .../ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md | 354 ++++++ ...ONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md | 459 +++++++ ...DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md | 1067 +++++++++++++++++ ...N_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md | 398 ++++++ .../shared/RENDERING_ISSUE_ANALYSIS.md | 238 ++++ .../processing/shared/methodDiscovery.py | 22 +- .../processing/shared/placeholderFactory.py | 13 + .../shared/promptGenerationActionsDynamic.py | 6 + .../workflows/processing/workflowProcessor.py | 9 +- modules/workflows/workflowManager.py | 141 ++- requirements.txt | 3 + .../test09_document_generation_formats.py | 410 +++++++ 39 files changed, 6236 insertions(+), 442 deletions(-) create mode 100644 modules/services/serviceGeneration/subContentGenerator.py create mode 100644 modules/services/serviceGeneration/subContentIntegrator.py create mode 100644 modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py create mode 100644 modules/services/serviceGeneration/subStructureGenerator.py create mode 100644 modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md create mode 100644 modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md create mode 100644 modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md create mode 100644 modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md create mode 100644 modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md create mode 100644 tests/functional/test09_document_generation_formats.py diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py index 8ead97e1..784cc042 100644 --- a/modules/datamodels/datamodelJson.py +++ b/modules/datamodels/datamodelJson.py @@ -19,12 +19,14 @@ supportedSectionTypes: List[str] = [ ] # Canonical JSON template used for AI generation (documents array + sections) -# Rendering pipelines can select the first document and read its sections. +# This template is used for STRUCTURE generation - sections have empty elements arrays. +# For content generation, elements arrays will be populated later. jsonTemplateDocument: str = """{ "metadata": { "split_strategy": "single_document", "source_documents": [], - "extraction_method": "ai_generation" + "extraction_method": "ai_generation", + "title": "{{DOCUMENT_TITLE}}" }, "documents": [ { @@ -33,56 +35,77 @@ jsonTemplateDocument: str = """{ "filename": "document.json", "sections": [ { - "id": "section_heading_example", + "id": "section_heading_main_title", "content_type": "heading", - "elements": [ - {"level": 1, "text": "Heading Text"} - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Main document title heading", + "order": 1, + "elements": [] }, { - "id": "section_paragraph_example", + "id": "section_paragraph_introduction", "content_type": "paragraph", - "elements": [ - {"text": "Paragraph text content"} - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Introduction paragraph", + "order": 2, + "elements": [] + }, + { + "id": "section_heading_section_1", + "content_type": "heading", + "complexity": "simple", + "generation_hint": "Section heading for topic 1", + "order": 3, + "elements": [] + }, + { + "id": "section_paragraph_section_1", + "content_type": "paragraph", + "complexity": "simple", + "generation_hint": "Content paragraph for section 1", + "order": 4, + "elements": [] }, { "id": "section_bullet_list_example", "content_type": "bullet_list", - "elements": [ - { - "items": ["Item 1", "Item 2"] - } - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Bullet list items", + "order": 5, + "elements": [] + }, + { + "id": "section_image_example", + "content_type": "image", + "complexity": "complex", + "generation_hint": "Illustration for document", + "image_prompt": "A detailed description for image generation", + "order": 6, + "elements": [] }, { "id": "section_table_example", "content_type": "table", - "elements": [ - { - "headers": ["Column 1", "Column 2"], - "rows": [ - ["Row 1 Col 1", "Row 1 Col 2"], - ["Row 2 Col 1", "Row 2 Col 2"] - ], - "caption": "Table caption" - } - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Data table with relevant information", + "order": 7, + "elements": [] }, { "id": "section_code_example", "content_type": "code_block", - "elements": [ - { - "code": "function example() { return true; }", - "language": "javascript" - } - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Code example or snippet", + "order": 8, + "elements": [] + }, + { + "id": "section_paragraph_conclusion", + "content_type": "paragraph", + "complexity": "simple", + "generation_hint": "Conclusion paragraph", + "order": 9, + "elements": [] } ] } diff --git a/modules/datamodels/datamodelWorkflowActions.py b/modules/datamodels/datamodelWorkflowActions.py index 1857883b..a3812955 100644 --- a/modules/datamodels/datamodelWorkflowActions.py +++ b/modules/datamodels/datamodelWorkflowActions.py @@ -19,9 +19,9 @@ class WorkflowActionParameter(BaseModel): name: str = Field(description="Parameter name") type: str = Field(description="Python type as string: 'str', 'int', 'bool', 'List[str]', etc.") frontendType: FrontendType = Field(description="UI rendering type (from global FrontendType enum)") - frontendOptions: Optional[Union[str, List[Dict[str, Any]]]] = Field( + frontendOptions: Optional[Union[str, List[str]]] = Field( None, - description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or static list. For custom types, this is automatically set to the API endpoint." + description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or list of strings (e.g., ['txt', 'json']). For custom types, this is automatically set to the API endpoint." ) required: bool = Field(False, description="Whether parameter is required") default: Optional[Any] = Field(None, description="Default value") diff --git a/modules/services/__init__.py b/modules/services/__init__.py index 6edfe13b..32e0cb3f 100644 --- a/modules/services/__init__.py +++ b/modules/services/__init__.py @@ -57,6 +57,9 @@ class Services: from modules.interfaces.interfaceDbComponentObjects import getInterface as getComponentInterface self.interfaceDbComponent = getComponentInterface(user) + # Expose RBAC directly on services for convenience + self.rbac = self.interfaceDbApp.rbac if self.interfaceDbApp else None + # Initialize service packages from .serviceExtraction.mainServiceExtraction import ExtractionService diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 67a47163..648e922c 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1206,37 +1206,74 @@ If no trackable items can be identified, return: {{"kpis": []}} else: content_for_generation = None - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + # Detect if this is a section generation prompt (not full document generation) + # Section prompts contain "SECTION TO GENERATE" marker + isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt - generation_prompt = await buildGenerationPrompt( - outputFormat, prompt, title, content_for_generation, None, self.services - ) - - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": content_for_generation, - "services": self.services - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - # Extract user prompt from promptArgs for task completion analysis - userPrompt = None - if promptArgs: - userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") - - # Track generation progress - the looping function will update with byte progress - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId, - userPrompt=userPrompt - ) + if isSectionGeneration: + # For section generation, use the prompt directly without wrapping + # Section prompts are already complete and should not be wrapped in document generation template + logger.debug("Detected section generation prompt - skipping document generation wrapper") + generation_prompt = prompt + + # Call AI directly without looping (sections are simple, single-call) + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation") + request = AiCallRequest( + prompt=generation_prompt, + context="", + options=options + ) + response = await self.callAi(request) + generated_json = response.content if response and response.content else "" + + # For section generation, return the raw JSON content directly + # No rendering needed - sections are just JSON elements + self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated") + self.services.chat.progressLogFinish(aiOperationId, True) + + metadata = AiResponseMetadata( + title=title or "Section Content", + operationType=opType.value if opType else None + ) + + return AiResponse( + content=generated_json, + metadata=metadata, + documents=[] + ) + else: + # Full document generation - use the wrapper + self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") + from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + + generation_prompt = await buildGenerationPrompt( + outputFormat, prompt, title, content_for_generation, None, self.services + ) + + promptArgs = { + "outputFormat": outputFormat, + "userPrompt": prompt, + "title": title, + "extracted_content": content_for_generation, + "services": self.services + } + + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") + # Extract user prompt from promptArgs for task completion analysis + userPrompt = None + if promptArgs: + userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") + + # Track generation progress - the looping function will update with byte progress + generated_json = await self._callAiWithLooping( + generation_prompt, + options, + "document_generation", + buildGenerationPrompt, + promptArgs, + aiOperationId, + userPrompt=userPrompt + ) # Calculate final size for completion message finalSize = len(generated_json.encode('utf-8')) if generated_json else 0 @@ -1291,7 +1328,7 @@ If no trackable items can be identified, return: {{"kpis": []}} from modules.services.serviceGeneration.mainServiceGeneration import GenerationService generationService = GenerationService(self.services) self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") - rendered_content, mime_type = await generationService.renderReport( + rendered_content, mime_type, _images = await generationService.renderReport( generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self ) self.services.chat.progressLogFinish(renderOperationId, True) diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index cb1d6f9f..5b518afa 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -82,14 +82,62 @@ class GenerationService: documentData = doc_data['content'] mimeType = doc_data['mimeType'] - # Convert document data to string content - content = convertDocumentDataToString(documentData, getFileExtension(documentName)) + # Handle binary data (images, PDFs, Office docs) differently from text + # Check if this is a binary MIME type + binaryMimeTypes = { + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/pdf", + "image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp", "image/svg+xml", + } - # Skip empty or minimal content - minimalContentPatterns = ['{}', '[]', 'null', '""', "''"] - if not content or content.strip() == "" or content.strip() in minimalContentPatterns: - logger.warning(f"Empty or minimal content for document {documentName}, skipping") - continue + isBinaryMimeType = mimeType in binaryMimeTypes + base64encoded = False + content = None + + if isBinaryMimeType: + # For binary data, handle bytes vs base64 string vs regular string + if isinstance(documentData, bytes): + # Already bytes - encode to base64 string for storage + import base64 + content = base64.b64encode(documentData).decode('utf-8') + base64encoded = True + elif isinstance(documentData, str): + # Check if it's already valid base64 + import base64 + try: + # Try to decode to verify it's base64 + base64.b64decode(documentData, validate=True) + # Valid base64 - use as is + content = documentData + base64encoded = True + except Exception: + # Not valid base64 - might be raw string, try encoding + try: + content = base64.b64encode(documentData.encode('utf-8')).decode('utf-8') + base64encoded = True + except Exception: + logger.warning(f"Could not process binary data for {documentName}, skipping") + continue + else: + # Other types - convert to string then base64 + import base64 + try: + content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8') + base64encoded = True + except Exception: + logger.warning(f"Could not encode binary data for {documentName}, skipping") + continue + else: + # Text data - convert to string + content = convertDocumentDataToString(documentData, getFileExtension(documentName)) + + # Skip empty or minimal content + minimalContentPatterns = ['{}', '[]', 'null', '""', "''"] + if not content or content.strip() == "" or content.strip() in minimalContentPatterns: + logger.warning(f"Empty or minimal content for document {documentName}, skipping") + continue # Normalize file extension based on mime type if missing or incorrect try: @@ -102,6 +150,13 @@ class GenerationService: "text/markdown": ".md", "text/plain": ".txt", "application/json": ".json", + "image/png": ".png", + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/gif": ".gif", + "image/webp": ".webp", + "image/bmp": ".bmp", + "image/svg+xml": ".svg", } expectedExt = mime_to_ext.get(mimeType) if expectedExt: @@ -114,20 +169,6 @@ class GenerationService: except Exception: pass - # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text - base64encoded = False - try: - binaryMimeTypes = { - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "application/vnd.openxmlformats-officedocument.presentationml.presentation", - "application/pdf", - } - if isinstance(documentData, str) and mimeType in binaryMimeTypes: - base64encoded = True - except Exception: - base64encoded = False - # Create document with file in one step using interfaces directly document = self._createDocument( fileName=documentName, @@ -278,7 +319,7 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]: """ Render extracted JSON content to the specified output format. Always uses unified "documents" array format. @@ -291,7 +332,8 @@ class GenerationService: aiService: AI service instance for generation prompt creation Returns: - tuple: (rendered_content, mime_type) + tuple: (rendered_content, mime_type, images_list) + images_list: List of image dicts with base64Data, altText, caption, etc. """ try: # Validate JSON input @@ -311,12 +353,10 @@ class GenerationService: if "sections" not in single_doc: raise ValueError("Document must contain 'sections' field") - # Create content for single document renderer - contentToRender = { - "sections": single_doc["sections"], - "metadata": extractedContent.get("metadata", {}), - "continuation": extractedContent.get("continuation", None) - } + # Pass standardized schema to renderer (maintains architecture) + # Renderer should extract sections from documents array according to standardized schema + # Standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + contentToRender = extractedContent # Pass full standardized schema # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) @@ -324,9 +364,15 @@ class GenerationService: raise ValueError(f"Unsupported output format: {outputFormat}") # Render the JSON content directly (AI generation handled by main service) + # Renderer receives standardized schema and extracts what it needs renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService) - return renderedContent, mimeType + # Get images from renderer if available + images = [] + if hasattr(renderer, 'getRenderedImages'): + images = renderer.getRenderedImages() + + return renderedContent, mimeType, images except Exception as e: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") @@ -353,14 +399,21 @@ class GenerationService: def _getFormatRenderer(self, output_format: str): """Get the appropriate renderer for the specified format using auto-discovery.""" try: - from .renderers.registry import getRenderer + from .renderers.registry import getRenderer, getSupportedFormats renderer = getRenderer(output_format, services=self.services) if renderer: return renderer + # Log available formats for debugging + availableFormats = getSupportedFormats() + logger.error( + f"No renderer found for format '{output_format}'. " + f"Available formats: {availableFormats}" + ) + # Fallback to text renderer if no specific renderer found - logger.warning(f"No renderer found for format {output_format}, falling back to text") + logger.warning(f"Falling back to text renderer for format {output_format}") fallbackRenderer = getRenderer('text', services=self.services) if fallbackRenderer: return fallbackRenderer @@ -370,4 +423,6 @@ class GenerationService: except Exception as e: logger.error(f"Error getting renderer for {output_format}: {str(e)}") + import traceback + logger.debug(traceback.format_exc()) return None \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 1f013457..491c1d06 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -66,12 +66,34 @@ class BaseRenderer(ABC): pass def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: - """Extract sections from report data.""" - return reportData.get('sections', []) + """ + Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + """ + if "documents" not in reportData: + raise ValueError("Report data must follow standardized schema with 'documents' array") + + documents = reportData.get("documents", []) + if not isinstance(documents, list) or len(documents) == 0: + raise ValueError("Standardized schema must contain at least one document in 'documents' array") + + firstDoc = documents[0] + if not isinstance(firstDoc, dict) or "sections" not in firstDoc: + raise ValueError("Document in standardized schema must contain 'sections' field") + + return firstDoc.get("sections", []) def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: - """Extract metadata from report data.""" - return reportData.get('metadata', {}) + """ + Extract metadata from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + """ + if "metadata" not in reportData: + raise ValueError("Report data must follow standardized schema with 'metadata' field") + + metadata = reportData.get("metadata", {}) + if not isinstance(metadata, dict): + raise ValueError("Metadata in standardized schema must be a dictionary") + + return metadata def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str: """Get title from report data or use fallback.""" @@ -79,14 +101,33 @@ class BaseRenderer(ABC): return metadata.get('title', fallbackTitle) def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool: - """Validate that JSON content has the expected structure.""" + """ + Validate that JSON content follows standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + """ if not isinstance(jsonContent, dict): return False - if "sections" not in jsonContent: + # Validate metadata field exists + if "metadata" not in jsonContent: return False - sections = jsonContent.get("sections", []) + if not isinstance(jsonContent.get("metadata"), dict): + return False + + # Validate documents array exists and is not empty + if "documents" not in jsonContent: + return False + + documents = jsonContent.get("documents", []) + if not isinstance(documents, list) or len(documents) == 0: + return False + + # Validate first document has sections + firstDoc = documents[0] + if not isinstance(firstDoc, dict) or "sections" not in firstDoc: + return False + + sections = firstDoc.get("sections", []) if not isinstance(sections, list): return False diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py index d0a3ec04..c18d7481 100644 --- a/modules/services/serviceGeneration/renderers/rendererCsv.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -41,15 +41,16 @@ class RendererCsv(BaseRenderer): async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate CSV content from structured JSON document.""" try: - # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Generate CSV content csvRows = [] @@ -60,7 +61,6 @@ class RendererCsv(BaseRenderer): csvRows.append([]) # Empty row # Process each section in order - sections = jsonContent.get("sections", []) for section in sections: sectionCsv = self._renderJsonSectionToCsv(section) if sectionCsv: diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index f33b898d..48fb94f1 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -71,22 +71,22 @@ class RendererDocx(BaseRenderer): self._setupBasicDocumentStyles(doc) self._setupDocumentStyles(doc, styleSet) - # Validate JSON structure - if not isinstance(json_content, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(json_content): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in json_content: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(json_content) + metadata = self._extractMetadata(json_content) # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Add document title using Title style if document_title: doc.add_paragraph(document_title, style='Title') # Process each section in order - sections = json_content.get("sections", []) for section in sections: self._renderJsonSection(doc, section, styleSet) diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index bc15917d..163690d3 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -28,14 +28,25 @@ class RendererHtml(BaseRenderer): async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to HTML format using AI-analyzed styling.""" try: + # Extract images first + images = self._extractImages(extractedContent) + + # Store images in instance for later retrieval + self._renderedImages = images + # Generate HTML using AI-analyzed styling htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) + # Replace base64 data URIs with relative file paths if images exist + if images: + htmlContent = self._replaceImageDataUris(htmlContent, images) + return htmlContent, "text/html" except Exception as e: self.logger.error(f"Error rendering HTML: {str(e)}") # Return minimal HTML fallback + self._renderedImages = [] # Initialize empty list on error return f"{title}

{title}

Error rendering report: {str(e)}

", "text/html" async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: @@ -45,14 +56,15 @@ class RendererHtml(BaseRenderer): styles = await self._getStyleSet(userPrompt, aiService) # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Build HTML document htmlParts = [] @@ -77,7 +89,6 @@ class RendererHtml(BaseRenderer): htmlParts.append('
') # Process each section - sections = jsonContent.get("sections", []) for section in sections: sectionHtml = self._renderJsonSection(section, styles) if sectionHtml: @@ -377,12 +388,15 @@ class RendererHtml(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON heading to HTML using AI-generated styles.""" try: - # Normalize non-dict inputs - if isinstance(headingData, str): + # Normalize inputs - headingData is typically a list of elements from _getSectionData + if isinstance(headingData, list): + # Extract first element from elements array + if headingData and len(headingData) > 0: + headingData = headingData[0] if isinstance(headingData[0], dict) else {} + else: + return "" + elif isinstance(headingData, str): headingData = {"text": headingData, "level": 2} - elif isinstance(headingData, list): - # Render a list as bullet list under a default heading label - return self._renderJsonBulletList({"items": headingData}, styles) elif not isinstance(headingData, dict): return "" @@ -402,21 +416,28 @@ class RendererHtml(BaseRenderer): def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON paragraph to HTML using AI-generated styles.""" try: - # Normalize non-dict inputs - if isinstance(paragraphData, str): - paragraphData = {"text": paragraphData} - elif isinstance(paragraphData, list): - # Treat list as bullet list paragraph - return self._renderJsonBulletList({"items": paragraphData}, styles) - elif not isinstance(paragraphData, dict): + # Normalize inputs - paragraphData is typically a list of elements from _getSectionData + if isinstance(paragraphData, list): + # Extract text from all paragraph elements + texts = [] + for el in paragraphData: + if isinstance(el, dict) and "text" in el: + texts.append(el["text"]) + elif isinstance(el, str): + texts.append(el) + if texts: + # Join multiple paragraphs with

tags + return '\n'.join(f'

{text}

' for text in texts) + return "" + elif isinstance(paragraphData, str): + return f'

{paragraphData}

' + elif isinstance(paragraphData, dict): + text = paragraphData.get("text", "") + if text: + return f'

{text}

' + return "" + else: return "" - - text = paragraphData.get("text", "") - - if text: - return f'

{text}

' - - return "" except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") @@ -441,16 +462,145 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON image to HTML.""" + """Render a JSON image to HTML with placeholder for later replacement.""" try: base64Data = imageData.get("base64Data", "") altText = imageData.get("altText", "Image") + caption = imageData.get("caption", "") if base64Data: - return f'{altText}' + # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris + # Include a marker so we can find and replace it + imageMarker = f"" + imgTag = f'{altText}' + + if caption: + return f'{imageMarker}
{imgTag}
{caption}
' + else: + return f'{imageMarker}{imgTag}' return "" except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") return f'
[Image: {imageData.get("altText", "Image")}]
' + + def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Extract all images from JSON structure. + + Returns: + List of image data dictionaries with base64Data, altText, caption, sectionId + """ + images = [] + + try: + # Extract from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + documents = jsonContent.get("documents", []) + if not documents or not isinstance(documents, list): + return images + + for doc in documents: + if not isinstance(doc, dict): + continue + sections = doc.get("sections", []) + for section in sections: + if section.get("content_type") == "image": + elements = section.get("elements", []) + for element in elements: + base64Data = element.get("base64Data", "") + + # If base64Data not found, try extracting from url data URI + if not base64Data: + url = element.get("url", "") + if url.startswith("data:image/"): + # Extract base64 from data URI: data:image/png;base64, + import re + match = re.match(r'data:image/[^;]+;base64,(.+)', url) + if match: + base64Data = match.group(1) + + if base64Data: + sectionId = section.get("id", "unknown") + # Generate filename from section ID + filename = f"{sectionId}.png" + # Clean filename (remove invalid characters) + filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename) + + images.append({ + "base64Data": base64Data, + "altText": element.get("altText", "Image"), + "caption": element.get("caption"), + "sectionId": sectionId, + "filename": filename + }) + self.logger.debug(f"Extracted image from section {sectionId}: {filename}") + + self.logger.info(f"Extracted {len(images)} image(s) from JSON structure") + return images + + except Exception as e: + self.logger.warning(f"Error extracting images: {str(e)}") + return [] + + def _replaceImageDataUris(self, htmlContent: str, images: List[Dict[str, Any]]) -> str: + """ + Replace base64 data URIs in HTML with relative file paths. + + Args: + htmlContent: HTML content with data URIs + images: List of image data dictionaries + + Returns: + HTML content with relative file paths + """ + try: + import base64 + import re + + # Find all image data URIs in HTML + dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)' + + def replaceDataUri(match): + base64Data = match.group(1) + + # Find matching image in images list + matchingImage = None + for img in images: + if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]): + matchingImage = img + break + + if matchingImage: + # Use filename from image data (generated from section ID) + filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png") + + # Replace with relative path + altText = matchingImage.get("altText", "Image") + caption = matchingImage.get("caption", "") + + if caption: + return f'
{altText}
{caption}
' + else: + return f'{altText}' + else: + # Keep original if no match found + return match.group(0) + + # Replace all data URIs + updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent) + + return updatedHtml + + except Exception as e: + self.logger.warning(f"Error replacing image data URIs: {str(e)}") + return htmlContent # Return original if replacement fails + + def getRenderedImages(self) -> List[Dict[str, Any]]: + """ + Get images that were extracted during rendering. + Returns list of image dicts with base64Data, altText, caption, and filename. + """ + if not hasattr(self, '_renderedImages'): + return [] + return self._renderedImages diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index 53392d07..7ea450b2 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -123,7 +123,7 @@ class RendererImage(BaseRenderer): promptParts.append(f"Document Title: {title}") # Analyze content and create visual description - sections = extractedContent.get("sections", []) + sections = self._extractSections(extractedContent) contentDescription = self._analyzeContentForVisualDescription(sections) if contentDescription: @@ -286,7 +286,7 @@ Return only the compressed prompt, no explanations. styleElements.append("corporate, professional design") # Analyze content type for additional style hints - sections = extractedContent.get("sections", []) + sections = self._extractSections(extractedContent) hasTables = any(self._getSectionType(s) == "table" for s in sections) hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections) hasCode = any(self._getSectionType(s) == "code_block" for s in sections) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index b07c8d51..3c9569e9 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -41,15 +41,16 @@ class RendererMarkdown(BaseRenderer): def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate markdown content from structured JSON document.""" try: - # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Build markdown content markdownParts = [] @@ -59,7 +60,6 @@ class RendererMarkdown(BaseRenderer): markdownParts.append("") # Process each section - sections = jsonContent.get("sections", []) for section in sections: sectionMarkdown = self._renderJsonSection(section) if sectionMarkdown: diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 9d0e483d..1cfcfad7 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -65,14 +65,15 @@ class RendererPdf(BaseRenderer): styles = await self._getStyleSet(userPrompt, aiService) # Validate JSON structure - if not isinstance(json_content, dict): - raise ValueError("JSON content must be a dictionary") + if not self._validateJsonStructure(json_content): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in json_content: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(json_content) + metadata = self._extractMetadata(json_content) # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Make title shorter to prevent wrapping/overlapping if len(document_title) > 40: @@ -102,8 +103,7 @@ class RendererPdf(BaseRenderer): story.append(Spacer(1, 30)) # Add spacing before page break story.append(PageBreak()) - # Process each section - sections = json_content.get("sections", []) + # Process each section (sections already extracted above) self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER") for i, section in enumerate(sections): self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER") @@ -505,7 +505,7 @@ class RendererPdf(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") - return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))] + return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))] def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON table to PDF elements using AI-generated styles.""" @@ -555,9 +555,9 @@ class RendererPdf(BaseRenderer): elements = [] for item in items: if isinstance(item, str): - elements.append(Paragraph(f"• {item}", self._create_normal_style(styles))) + elements.append(Paragraph(f"• {item}", self._createNormalStyle(styles))) elif isinstance(item, dict) and "text" in item: - elements.append(Paragraph(f"• {item['text']}", self._create_normal_style(styles))) + elements.append(Paragraph(f"• {item['text']}", self._createNormalStyle(styles))) if elements: elements.append(Spacer(1, bullet_style_def.get("space_after", 3))) @@ -637,16 +637,84 @@ class RendererPdf(BaseRenderer): return [] def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: - """Render a JSON image to PDF elements.""" + """Render a JSON image to PDF elements using reportlab.""" try: base64_data = image_data.get("base64Data", "") alt_text = image_data.get("altText", "Image") + caption = image_data.get("caption", "") - if base64_data: - # For now, just add a placeholder since reportlab image handling is complex + # If base64Data not found, try extracting from url data URI + if not base64_data: + url = image_data.get("url", "") + if url.startswith("data:image/"): + # Extract base64 from data URI: data:image/png;base64, + import re + match = re.match(r'data:image/[^;]+;base64,(.+)', url) + if match: + base64_data = match.group(1) + + if not base64_data: return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] - return [] + try: + from reportlab.platypus import Image as ReportLabImage + from reportlab.lib.units import inch + import base64 + import io + + # Decode base64 image data + imageBytes = base64.b64decode(base64_data) + imageStream = io.BytesIO(imageBytes) + + # Create reportlab Image element + # Try to get image dimensions from PIL + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size + + # Scale to fit page (max width 6 inches, maintain aspect ratio) + maxWidth = 6 * inch + if imgWidth > maxWidth: + scale = maxWidth / imgWidth + imgWidth = maxWidth + imgHeight = imgHeight * scale + else: + imgWidth = imgWidth * (inch / 72) # Convert pixels to inches (assuming 72 DPI) + imgHeight = imgHeight * (inch / 72) + + # Reset stream for reportlab + imageStream.seek(0) + except Exception: + # Fallback: use default size + imgWidth = 4 * inch + imgHeight = 3 * inch + imageStream.seek(0) + + # Create reportlab Image + reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight) + + elements = [reportlabImage] + + # Add caption if available + if caption: + captionStyle = self._createNormalStyle(styles) + captionStyle.fontSize = 10 + captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666")) + elements.append(Paragraph(f"{caption}", captionStyle)) + elif alt_text and alt_text != "Image": + # Use alt text as caption if no caption provided + captionStyle = self._createNormalStyle(styles) + captionStyle.fontSize = 10 + captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666")) + elements.append(Paragraph(f"Figure: {alt_text}", captionStyle)) + + return elements + + except Exception as imgError: + self.logger.warning(f"Error embedding image in PDF: {str(imgError)}") + # Fallback to placeholder + return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index ac04ea90..f7b65eb1 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -66,6 +66,9 @@ class RendererPptx(BaseRenderer): # Debug: Show first 200 chars of content logger.info(f"JSON content preview: {str(extractedContent)[:200]}...") + # Store prs reference for image methods + self._currentPresentation = prs + for i, slide_data in enumerate(slidesData): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") # Debug: Show slide content preview @@ -75,6 +78,9 @@ class RendererPptx(BaseRenderer): else: logger.warning(f" ⚠️ Slide {i+1} has NO content!") + # Check if slide has images + hasImages = slide_data.get("images") and len(slide_data.get("images", [])) > 0 + # Create slide with appropriate layout based on content slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles) slide_layout = prs.slide_layouts[slideLayoutIndex] @@ -92,67 +98,71 @@ class RendererPptx(BaseRenderer): title_color = self._get_safe_color(title_style.get("color", (31, 78, 121))) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - # Set content with AI-generated styling - content_shape = slide.placeholders[1] - content_text = slide_data.get("content", "") + # Handle images first (if present) + if hasImages: + self._addImagesToSlide(slide, slide_data.get("images", []), styles) - # Format content text with AI styles - text_frame = content_shape.text_frame - text_frame.clear() - - # Split content into paragraphs - paragraphs = content_text.split('\n\n') - - for i, paragraph in enumerate(paragraphs): - if paragraph.strip(): - if i == 0: - p = text_frame.paragraphs[0] - else: - p = text_frame.add_paragraph() - - p.text = paragraph.strip() - - # Apply AI-generated styling based on content type - if paragraph.startswith('#'): - # Header - p.text = paragraph.lstrip('#').strip() - heading_style = styles.get("heading", {}) - p.font.size = Pt(heading_style.get("font_size", 32)) - p.font.bold = heading_style.get("bold", True) - heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*heading_color) - elif paragraph.startswith('##'): - # Subheader - p.text = paragraph.lstrip('#').strip() - subheading_style = styles.get("subheading", {}) - p.font.size = Pt(subheading_style.get("font_size", 24)) - p.font.bold = subheading_style.get("bold", True) - subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79))) - p.font.color.rgb = RGBColor(*subheading_color) - elif paragraph.startswith('*') and paragraph.endswith('*'): - # Bold text - p.text = paragraph.strip('*') - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = True - paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) - else: - # Regular text - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) - - # Apply alignment - align = paragraph_style.get("align", "left") - if align == "center": - p.alignment = PP_ALIGN.CENTER - elif align == "right": - p.alignment = PP_ALIGN.RIGHT - else: - p.alignment = PP_ALIGN.LEFT + # Set content with AI-generated styling (if not image-only slide) + if slide_content or not hasImages: + content_shape = slide.placeholders[1] + + # Format content text with AI styles + text_frame = content_shape.text_frame + text_frame.clear() + + # Split content into paragraphs + paragraphs = slide_content.split('\n\n') + + for paraIdx, paragraph in enumerate(paragraphs): + if paragraph.strip(): + if paraIdx == 0: + p = text_frame.paragraphs[0] + else: + p = text_frame.add_paragraph() + + p.text = paragraph.strip() + + # Apply AI-generated styling based on content type + if paragraph.startswith('#'): + # Header + p.text = paragraph.lstrip('#').strip() + heading_style = styles.get("heading", {}) + p.font.size = Pt(heading_style.get("font_size", 32)) + p.font.bold = heading_style.get("bold", True) + heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*heading_color) + elif paragraph.startswith('##'): + # Subheader + p.text = paragraph.lstrip('#').strip() + subheading_style = styles.get("subheading", {}) + p.font.size = Pt(subheading_style.get("font_size", 24)) + p.font.bold = subheading_style.get("bold", True) + subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79))) + p.font.color.rgb = RGBColor(*subheading_color) + elif paragraph.startswith('*') and paragraph.endswith('*'): + # Bold text + p.text = paragraph.strip('*') + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = True + paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*paragraph_color) + else: + # Regular text + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*paragraph_color) + + # Apply alignment + align = paragraph_style.get("align", "left") + if align == "center": + p.alignment = PP_ALIGN.CENTER + elif align == "right": + p.alignment = PP_ALIGN.RIGHT + else: + p.alignment = PP_ALIGN.LEFT # If no slides were created, create a default slide if not slidesData: @@ -568,15 +578,16 @@ JSON ONLY. NO OTHER TEXT.""" slides = [] try: - # Validate JSON structure - if not isinstance(json_content, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(json_content): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in json_content: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(json_content) + metadata = self._extractMetadata(json_content) # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Create title slide slides.append({ @@ -585,7 +596,6 @@ JSON ONLY. NO OTHER TEXT.""" }) # Process sections into slides based on content and user intent - sections = json_content.get("sections", []) slides.extend(self._createSlidesFromSections(sections, styles)) # If no content slides were created, create a default content slide @@ -624,6 +634,24 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Handle image sections specially + if content_type == "image": + # Extract image data + images = [] + for element in elements: + if element.get("base64Data"): + images.append({ + "base64Data": element.get("base64Data"), + "altText": element.get("altText", "Image"), + "caption": element.get("caption") + }) + + return { + "title": section_title or element.get("altText", "Image"), + "content": "", # No text content for image slides + "images": images + } + # Build slide content based on section type content_parts = [] @@ -645,7 +673,8 @@ JSON ONLY. NO OTHER TEXT.""" return { "title": section_title, - "content": slide_content + "content": slide_content, + "images": [] # No images for non-image sections } except Exception as e: @@ -835,7 +864,8 @@ JSON ONLY. NO OTHER TEXT.""" if current_slide_content: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content) + "content": "\n\n".join(current_slide_content), + "images": [] }) current_slide_content = [] @@ -844,6 +874,31 @@ JSON ONLY. NO OTHER TEXT.""" if isinstance(element, dict) and "text" in element: current_slide_title = element.get("text", "Untitled Section") break + elif section_type == "image": + # Create separate slide for image + if current_slide_content: + slides.append({ + "title": current_slide_title, + "content": "\n\n".join(current_slide_content), + "images": [] + }) + current_slide_content = [] + + # Extract image data + imageData = [] + for element in elements: + if element.get("base64Data"): + imageData.append({ + "base64Data": element.get("base64Data"), + "altText": element.get("altText", "Image"), + "caption": element.get("caption") + }) + + slides.append({ + "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), + "content": "", + "images": imageData + }) else: # Add content to current slide formatted_content = self._formatSectionContent(section) @@ -854,7 +909,8 @@ JSON ONLY. NO OTHER TEXT.""" if current_slide_content: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content) + "content": "\n\n".join(current_slide_content), + "images": [] }) return slides @@ -869,6 +925,10 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Image sections return empty content (handled separately) + if content_type == "image": + return "" + # Process each element in the section content_parts = [] for element in elements: @@ -891,6 +951,110 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting section content: {str(e)}") return "" + def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None: + """Add images to a PowerPoint slide.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + import base64 + import io + + if not images: + return + + # Get slide dimensions from presentation + if hasattr(self, '_currentPresentation'): + prs = self._currentPresentation + else: + prs = slide.presentation + slideWidth = prs.slide_width + slideHeight = prs.slide_height + titleHeight = Inches(1.5) # Approximate title height + + # Available area for images + availableWidth = slideWidth - Inches(1) # Margins + availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins + + # Position images + if len(images) == 1: + # Single image: center it + img = images[0] + base64Data = img.get("base64Data") + if base64Data: + imageBytes = base64.b64decode(base64Data) + imageStream = io.BytesIO(imageBytes) + + # Get image dimensions + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size + + # Scale to fit available space (max 80% of slide) + maxWidth = availableWidth * 0.8 + maxHeight = availableHeight * 0.8 + + scale = min(maxWidth / imgWidth, maxHeight / imgHeight, 1.0) + finalWidth = imgWidth * scale + finalHeight = imgHeight * scale + + # Center image + left = (slideWidth - finalWidth) / 2 + top = titleHeight + (availableHeight - finalHeight) / 2 + + imageStream.seek(0) + except Exception: + # Fallback: use default size + finalWidth = Inches(6) + finalHeight = Inches(4.5) + left = (slideWidth - finalWidth) / 2 + top = titleHeight + Inches(1) + imageStream.seek(0) + + # Add image to slide + slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + + # Add caption if available + caption = img.get("caption") or img.get("altText") + if caption and caption != "Image": + # Add text box below image + captionTop = top + finalHeight + Inches(0.2) + captionBox = slide.shapes.add_textbox( + Inches(1), + captionTop, + slideWidth - Inches(2), + Inches(0.5) + ) + captionFrame = captionBox.text_frame + captionFrame.text = caption + captionFrame.paragraphs[0].font.size = Pt(12) + captionFrame.paragraphs[0].font.italic = True + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + else: + # Multiple images: arrange in grid + cols = 2 if len(images) <= 4 else 3 + rows = (len(images) + cols - 1) // cols + + imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols + imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows + + for idx, img in enumerate(images): + base64Data = img.get("base64Data") + if base64Data: + row = idx // cols + col = idx % cols + + imageBytes = base64.b64decode(base64Data) + imageStream = io.BytesIO(imageBytes) + + left = Inches(0.5) + col * (imgWidth + Inches(0.5)) + top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5)) + + slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight) + + except Exception as e: + logger.warning(f"Error adding images to slide: {str(e)}") + def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" from datetime import datetime, UTC diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index ceb1c638..56d4af61 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -64,14 +64,15 @@ class RendererText(BaseRenderer): """Generate text content from structured JSON document.""" try: # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Build text content textParts = [] @@ -82,7 +83,6 @@ class RendererText(BaseRenderer): textParts.append("") # Process each section - sections = jsonContent.get("sections", []) for section in sections: sectionText = self._renderJsonSection(section) if sectionText: diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index fadecd88..2ebe11c2 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -451,7 +451,7 @@ class RendererXlsx(BaseRenderer): def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: """Generate sheet names based on actual content structure.""" - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) # If no sections, create a single sheet if not sections: @@ -496,7 +496,7 @@ class RendererXlsx(BaseRenderer): if not sheetNames: return - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) tableSections = [s for s in sections if s.get("content_type") == "table"] if len(tableSections) > 1: @@ -607,7 +607,7 @@ class RendererXlsx(BaseRenderer): row += 1 # Content overview - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) sheet[f'A{row + 1}'] = "Content Overview:" sheet[f'A{row + 1}'].font = Font(bold=True) @@ -640,7 +640,7 @@ class RendererXlsx(BaseRenderer): def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]): """Populate additional sheets based on content types.""" try: - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) for sheetName in sheetNames: if sheetName not in sheets: @@ -692,12 +692,14 @@ class RendererXlsx(BaseRenderer): for element in elements: if section_type == "table": startRow = self._addTableToExcel(sheet, element, styles, startRow) - elif section_type == "list": + elif section_type == "bullet_list" or section_type == "list": startRow = self._addListToExcel(sheet, element, styles, startRow) elif section_type == "paragraph": startRow = self._addParagraphToExcel(sheet, element, styles, startRow) elif section_type == "heading": startRow = self._addHeadingToExcel(sheet, element, styles, startRow) + elif section_type == "image": + startRow = self._addImageToExcel(sheet, element, styles, startRow) else: startRow = self._addParagraphToExcel(sheet, element, styles, startRow) @@ -807,6 +809,75 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not add heading to Excel: {str(e)}") return startRow + 1 + + def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: + """Add an image element to Excel sheet using openpyxl.""" + try: + base64Data = element.get("base64Data", "") + altText = element.get("altText", "Image") + caption = element.get("caption", "") + + if not base64Data: + # No image data - add placeholder text + sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + return startRow + 1 + + try: + from openpyxl.drawing.image import Image as OpenpyxlImage + import base64 + import io + + # Decode base64 image data + imageBytes = base64.b64decode(base64Data) + imageStream = io.BytesIO(imageBytes) + + # Create openpyxl Image + img = OpenpyxlImage(imageStream) + + # Set image size (max width 6 inches, maintain aspect ratio) + maxWidth = 400 # pixels (approximately 6 inches at 72 DPI) + if img.width > maxWidth: + scale = maxWidth / img.width + img.width = maxWidth + img.height = int(img.height * scale) + + # Anchor image to cell (A column, current row) + img.anchor = f'A{startRow}' + + # Add image to sheet + sheet.add_image(img) + + # Calculate height needed for image (approximate) + # Excel row height is in points (1/72 inch), image height is in pixels + # Assuming 72 DPI: pixels = points + imageHeightPoints = img.height / 1.33 # Approximate conversion + sheet.row_dimensions[startRow].height = max(15, imageHeightPoints) # Min 15 points + + # Add caption below image if available + if caption: + startRow += 1 + sheet.cell(row=startRow, column=1, value=caption) + sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10) + sheet.cell(row=startRow, column=1).alignment = Alignment(horizontal="left") + elif altText and altText != "Image": + startRow += 1 + sheet.cell(row=startRow, column=1, value=f"Figure: {altText}") + sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10) + + return startRow + 1 + + except ImportError: + self.logger.warning("openpyxl.drawing.image not available, using placeholder") + sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + return startRow + 1 + except Exception as imgError: + self.logger.warning(f"Error embedding image in Excel: {str(imgError)}") + sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + return startRow + 1 + + except Exception as e: + self.logger.warning(f"Could not add image to Excel: {str(e)}") + return startRow + 1 def _formatTimestamp(self) -> str: """Format current timestamp for document generation.""" diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py new file mode 100644 index 00000000..1b1f64a9 --- /dev/null +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -0,0 +1,840 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Content Generator for hierarchical document generation. +Generates content for each section in the document structure. +""" + +import logging +import asyncio +from typing import Dict, Any, Optional, List, Callable +from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator + +logger = logging.getLogger(__name__) + + +class ContentGenerator: + """Generates content for document sections""" + + def __init__(self, services: Any): + self.services = services + self.integrator = ContentIntegrator(services) + + async def generateContent( + self, + structure: Dict[str, Any], + cachedContent: Optional[Dict[str, Any]] = None, + userPrompt: str = "", + progressCallback: Optional[Callable] = None, + parallelGeneration: bool = True, + batchSize: int = 10 + ) -> Dict[str, Any]: + """ + Generate content for all sections in structure. + + Args: + structure: Document structure from Phase 1 + cachedContent: Extracted content cache + userPrompt: Original user prompt + progressCallback: Function to call for progress updates + parallelGeneration: Enable parallel section generation + batchSize: Number of sections to process in parallel + + Returns: + Complete document structure with populated elements + """ + try: + documents = structure.get("documents", []) + + if not documents: + logger.warning("No documents found in structure") + return structure + + allGeneratedSections = [] + totalSectionsAcrossDocs = 0 + + # Count total sections for progress tracking + for doc in documents: + totalSectionsAcrossDocs += len(doc.get("sections", [])) + + if progressCallback: + progressCallback(0, totalSectionsAcrossDocs, "Starting content generation...") + + currentSectionIndex = 0 + + for docIdx, doc in enumerate(documents): + sections = doc.get("sections", []) + totalSections = len(sections) + + if totalSections == 0: + continue + + # Determine if parallel generation is beneficial + # Use sequential if only 1 section or if sections depend on each other + useParallel = parallelGeneration and totalSections > 1 + + # Count images - if many images, parallel is still beneficial but slower + imageCount = sum(1 for s in sections if s.get("content_type") == "image") + + if progressCallback and docIdx > 0: + progressCallback( + currentSectionIndex, + totalSectionsAcrossDocs, + f"Processing document {docIdx + 1}/{len(documents)}..." + ) + + if useParallel: + # Generate in batches for parallel processing + generatedSections = await self._generateSectionsParallel( + sections=sections, + cachedContent=cachedContent, + userPrompt=userPrompt, + documentMetadata=structure.get("metadata", {}), + progressCallback=lambda idx, total, msg: progressCallback( + currentSectionIndex + idx, + totalSectionsAcrossDocs, + msg + ) if progressCallback else None, + batchSize=batchSize + ) + else: + # Generate sequentially (better for context-dependent sections) + generatedSections = await self._generateSectionsSequential( + sections=sections, + cachedContent=cachedContent, + userPrompt=userPrompt, + documentMetadata=structure.get("metadata", {}), + progressCallback=lambda idx, total, msg: progressCallback( + currentSectionIndex + idx, + totalSectionsAcrossDocs, + msg + ) if progressCallback else None + ) + + allGeneratedSections.extend(generatedSections) + currentSectionIndex += totalSections + + if progressCallback: + progressCallback( + totalSectionsAcrossDocs, + totalSectionsAcrossDocs, + "Content generation complete" + ) + + # Integrate generated content into structure + completeStructure = self.integrator.integrateContent( + structure=structure, + generatedSections=allGeneratedSections + ) + + return completeStructure + + except Exception as e: + logger.error(f"Error generating content: {str(e)}") + raise + + async def _generateSectionsSequential( + self, + sections: List[Dict[str, Any]], + cachedContent: Optional[Dict[str, Any]], + userPrompt: str, + documentMetadata: Dict[str, Any], + progressCallback: Optional[Callable] = None + ) -> List[Dict[str, Any]]: + """ + Generate sections sequentially with enhanced progress tracking. + Uses previous sections for context continuity. + """ + generatedSections = [] + previousSections = [] + totalSections = len(sections) + + for idx, section in enumerate(sections): + try: + contentType = section.get("content_type", "content") + sectionId = section.get("id", f"section_{idx}") + + # Enhanced progress message + if contentType == "image": + message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..." + elif contentType == "heading": + message = f"Generating heading..." + elif contentType == "paragraph": + message = f"Generating paragraph..." + else: + message = f"Generating {contentType}..." + + if progressCallback: + progressCallback( + idx + 1, + totalSections, + message + ) + + context = { + "userPrompt": userPrompt, + "cachedContent": cachedContent, + "previousSections": previousSections.copy(), + "targetSection": section, + "documentMetadata": documentMetadata, + "operationId": None + } + + generated = await self._generateSectionContent(section, context) + generatedSections.append(generated) + previousSections.append(generated) + + # Log success + if contentType == "image": + logger.info(f"Successfully generated image for section {sectionId}") + elif not generated.get("error"): + logger.debug(f"Successfully generated {contentType} for section {sectionId}") + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + errorSection = self.integrator.createErrorSection(section, str(e)) + generatedSections.append(errorSection) + previousSections.append(errorSection) + + return generatedSections + + async def _generateSectionsParallel( + self, + sections: List[Dict[str, Any]], + cachedContent: Optional[Dict[str, Any]], + userPrompt: str, + documentMetadata: Dict[str, Any], + progressCallback: Optional[Callable] = None, + batchSize: int = 10 + ) -> List[Dict[str, Any]]: + """ + Generate sections in parallel batches with enhanced progress tracking. + + Args: + sections: List of sections to generate + cachedContent: Extracted content cache + userPrompt: Original user prompt + documentMetadata: Document metadata + progressCallback: Progress callback function + batchSize: Number of sections to process in parallel per batch + + Returns: + List of generated sections + """ + generatedSections = [] + totalSections = len(sections) + + if totalSections == 0: + return [] + + # Adjust batch size based on section types (images take longer) + imageCount = sum(1 for s in sections if s.get("content_type") == "image") + if imageCount > 0: + # Reduce batch size if many images (images are slower) + adjustedBatchSize = min(batchSize, max(3, batchSize - imageCount // 2)) + else: + adjustedBatchSize = batchSize + + # Process in batches + totalBatches = (totalSections + adjustedBatchSize - 1) // adjustedBatchSize + accumulatedPreviousSections = [] # Track sections from previous batches + + for batchNum, batchStart in enumerate(range(0, totalSections, adjustedBatchSize)): + batch = sections[batchStart:batchStart + adjustedBatchSize] + batchEnd = min(batchStart + adjustedBatchSize, totalSections) + + if progressCallback: + progressCallback( + batchStart, + totalSections, + f"Processing batch {batchNum + 1}/{totalBatches} ({len(batch)} sections)..." + ) + + async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]): + try: + contentType = section.get("content_type", "content") + sectionId = section.get("id", f"section_{globalIndex}") + + # Enhanced progress message based on content type + if contentType == "image": + message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..." + elif contentType == "heading": + message = f"Generating heading..." + elif contentType == "paragraph": + message = f"Generating paragraph..." + else: + message = f"Generating {contentType}..." + + if progressCallback: + progressCallback( + globalIndex + 1, + totalSections, + message + ) + + context = { + "userPrompt": userPrompt, + "cachedContent": cachedContent, + "previousSections": batchPreviousSections.copy(), # Include sections from previous batches + "targetSection": section, + "documentMetadata": documentMetadata, + "operationId": None # Can be set if needed for nested progress + } + + result = await self._generateSectionContent(section, context) + + # Log success + if contentType == "image": + logger.info(f"Successfully generated image for section {sectionId}") + elif not result.get("error"): + logger.debug(f"Successfully generated {contentType} for section {sectionId}") + + return result + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + return self.integrator.createErrorSection(section, str(e)) + + # Generate batch in parallel + # Pass accumulated previous sections to each task in this batch + batchTasks = [ + generateWithProgress(section, batchStart + idx, idx, accumulatedPreviousSections) + for idx, section in enumerate(batch) + ] + + batchResults = await asyncio.gather( + *batchTasks, + return_exceptions=True + ) + + # Handle exceptions and collect results + for idx, result in enumerate(batchResults): + if isinstance(result, Exception): + logger.error(f"Error in parallel generation batch {batchNum + 1}: {str(result)}") + errorSection = self.integrator.createErrorSection(batch[idx], str(result)) + generatedSections.append(errorSection) + accumulatedPreviousSections.append(errorSection) # Add to accumulated for next batch + else: + generatedSections.append(result) + accumulatedPreviousSections.append(result) # Add to accumulated for next batch + + # Update progress after batch completion + if progressCallback: + progressCallback( + batchEnd, + totalSections, + f"Completed batch {batchNum + 1}/{totalBatches}" + ) + + return generatedSections + + async def _generateSectionContent( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Generate content for a single section. + + Args: + section: Section to generate content for + context: Generation context + + Returns: + Section with populated elements array + """ + try: + contentType = section.get("content_type", "") + complexity = section.get("complexity", "simple") + + if contentType == "image": + return await self._generateImageSection(section, context) + elif complexity == "complex": + return await self._generateComplexTextSection(section, context) + else: + return await self._generateSimpleSection(section, context) + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + return self.integrator.createErrorSection(section, str(e)) + + async def _generateSimpleSection( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """Generate content for simple section (heading, paragraph)""" + try: + contentType = section.get("content_type", "") + generationHint = section.get("generation_hint", "") + + # Create section-specific prompt + sectionPrompt = self._createSectionPrompt(section, context) + + # Debug: Log section generation prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + try: + self.services.utils.writeDebugFile( + sectionPrompt, + f"document_generation_section_{sectionId}_{contentType}_prompt" + ) + except Exception as e: + logger.debug(f"Could not write debug file for section prompt: {e}") + + # Call AI to generate content + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + resultFormat="json" + ) + + aiResponse = await self.services.ai.callAiContent( + prompt=sectionPrompt, + options=options, + outputFormat="json" + ) + + # Debug: Log section generation response (always log, even if empty) + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + responseContent = '' + if aiResponse: + if hasattr(aiResponse, 'content') and aiResponse.content: + responseContent = aiResponse.content + elif hasattr(aiResponse, 'documents') and aiResponse.documents: + responseContent = f"[Response has {len(aiResponse.documents)} documents]" + else: + responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" + else: + responseContent = '[No response object]' + + self.services.utils.writeDebugFile( + responseContent, + f"document_generation_section_{sectionId}_{contentType}_response" + ) + logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") + except Exception as e: + logger.warning(f"Could not write debug file for section response: {e}") + import traceback + logger.debug(traceback.format_exc()) + + if not aiResponse or not aiResponse.content: + logger.error(f"AI section generation returned empty response for section {sectionId}") + logger.error(f"Response object: {aiResponse}, has content: {hasattr(aiResponse, 'content') if aiResponse else False}") + raise ValueError("AI section generation returned empty response") + + # Extract JSON elements + rawContent = aiResponse.content if aiResponse and aiResponse.content else "" + if not rawContent or not rawContent.strip(): + logger.error(f"AI section generation returned empty response for section {sectionId}") + logger.error(f"Response object: {aiResponse}, content length: {len(rawContent) if rawContent else 0}") + raise ValueError("AI section generation returned empty response") + + extractedJson = self.services.utils.jsonExtractString(rawContent) + if not extractedJson or not extractedJson.strip(): + logger.error(f"No JSON found in AI response for section {sectionId}") + logger.error(f"Raw response (first 1000 chars): {rawContent[:1000]}") + logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}") + raise ValueError("No JSON found in AI section response") + + import json + try: + elementsData = json.loads(extractedJson) + logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON from AI response for section {section.get('id')}") + logger.error(f"JSON decode error: {str(e)}") + logger.error(f"Extracted JSON length: {len(extractedJson)} chars") + logger.error(f"Extracted JSON (first 1000 chars): {extractedJson[:1000]}") + if len(extractedJson) > 1000: + logger.error(f"Extracted JSON (last 500 chars): {extractedJson[-500:]}") + logger.error(f"Raw AI response length: {len(rawContent)} chars") + logger.error(f"Raw AI response (first 1000 chars): {rawContent[:1000] if rawContent else 'None'}") + + # Try to recover from truncated JSON if it looks like it was cut off + if "Expecting" in str(e) and ("delimiter" in str(e) or "value" in str(e)): + # Check if JSON starts correctly but is truncated + if extractedJson.strip().startswith('{"elements"'): + logger.warning(f"JSON appears truncated, attempting recovery...") + # Use closeJsonStructures which handles unterminated strings properly + try: + from modules.shared.jsonUtils import closeJsonStructures + recoveredJson = closeJsonStructures(extractedJson) + + logger.info(f"Attempting to parse recovered JSON (closed structures)") + logger.debug(f"Recovered JSON length: {len(recoveredJson)} chars (original: {len(extractedJson)} chars)") + + elementsData = json.loads(recoveredJson) + logger.info(f"Successfully recovered JSON for section {section.get('id')}") + except (json.JSONDecodeError, ValueError) as recoveryError: + logger.error(f"JSON recovery failed: {str(recoveryError)}") + logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}") + # Check if raw response might be truncated + if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted + logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)") + logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits") + raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}") + else: + raise ValueError(f"Invalid JSON in AI response: {str(e)}") + else: + raise ValueError(f"Invalid JSON in AI response: {str(e)}") + + # Extract elements array - handle various response formats + elements = None + + if isinstance(elementsData, dict): + # Try to find elements in various possible locations + if "elements" in elementsData: + elements = elementsData["elements"] + elif "content" in elementsData and isinstance(elementsData["content"], list): + # Some models return {"content": [...]} + elements = elementsData["content"] + elif "data" in elementsData and isinstance(elementsData["data"], list): + # Some models return {"data": [...]} + elements = elementsData["data"] + elif len(elementsData) == 1: + # Single key dict - might be the elements directly + firstValue = list(elementsData.values())[0] + if isinstance(firstValue, list): + elements = firstValue + else: + # Try to convert entire dict to a single element + logger.warning(f"AI returned dict without 'elements' key, attempting to convert: {list(elementsData.keys())}") + # For heading/paragraph, create element from dict + if contentType == "heading": + text = elementsData.get("text") or elementsData.get("heading") or str(elementsData) + level = elementsData.get("level", 1) + elements = [{"level": level, "text": text}] + elif contentType == "paragraph": + text = elementsData.get("text") or elementsData.get("content") or str(elementsData) + elements = [{"text": text}] + else: + # Try to create element from dict structure + elements = [elementsData] + elif isinstance(elementsData, list): + elements = elementsData + else: + # Primitive value - wrap it + logger.warning(f"AI returned primitive value, wrapping: {type(elementsData)}") + if contentType == "heading": + elements = [{"level": 1, "text": str(elementsData)}] + elif contentType == "paragraph": + elements = [{"text": str(elementsData)}] + else: + elements = [{"text": str(elementsData)}] + + if elements is None: + logger.error(f"Could not extract elements from AI response. Response structure: {type(elementsData)}, keys: {list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") + logger.error(f"Full response (first 500 chars): {str(extractedJson)[:500]}") + raise ValueError(f"Invalid elements format in AI response. Expected dict with 'elements' key or list, got: {type(elementsData)}") + + # Validate elements is a list + if not isinstance(elements, list): + logger.warning(f"Elements is not a list, converting: {type(elements)}") + elements = [elements] + + # Update section with elements + section["elements"] = elements + return section + + except Exception as e: + logger.error(f"Error generating simple section: {str(e)}") + raise + + async def _generateImageSection( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """Generate image for image section or include existing image""" + try: + # Check if this is an existing image to include + imageSource = section.get("image_source", "generate") + + if imageSource == "existing": + # Include existing image from cachedContent + imageRefId = section.get("image_reference_id") + if not imageRefId: + raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id") + + cachedContent = context.get("cachedContent", {}) + imageDocuments = cachedContent.get("imageDocuments", []) + + # Find the image document + imageDoc = next((img for img in imageDocuments if img.get("id") == imageRefId), None) + if not imageDoc: + raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments") + + # Create image element from existing image + altText = imageDoc.get("altText", section.get("generation_hint", "Image")) + mimeType = imageDoc.get("mimeType", "image/png") + + section["elements"] = [{ + "base64Data": imageDoc.get("base64Data"), + "altText": altText, + "mimeType": mimeType, + "caption": section.get("metadata", {}).get("caption") + }] + + logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}") + return section + + # Generate new image (existing logic) + imagePrompt = section.get("image_prompt") + if not imagePrompt: + # Try to create from generation_hint + generationHint = section.get("generation_hint", "") + if generationHint: + imagePrompt = f"Create a professional illustration: {generationHint}" + else: + raise ValueError(f"Image section {section.get('id')} missing image_prompt and generation_hint") + + # Call AI service for image generation + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage + import json + + # Create image generation prompt + promptModel = AiCallPromptImage( + prompt=imagePrompt, + size="1024x1024", + quality="standard", + style="vivid" + ) + promptJson = promptModel.model_dump_json(exclude_none=True, indent=2) + + options = AiCallOptions( + operationType=OperationTypeEnum.IMAGE_GENERATE, + resultFormat="base64" + ) + + # Log image generation start + logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...") + + # Call AI for image generation + aiResponse = await self.services.ai.callAiContent( + prompt=promptJson, + options=options, + outputFormat="base64" + ) + + # Extract base64 image data + base64Data = None + + if aiResponse and aiResponse.documents and len(aiResponse.documents) > 0: + imageDoc = aiResponse.documents[0] + base64Data = imageDoc.documentData + logger.debug(f"Image data extracted from documents: {len(base64Data) if base64Data else 0} chars") + + # Fallback: check content field (might be base64 string) + if not base64Data and aiResponse and aiResponse.content: + base64Data = aiResponse.content + logger.debug(f"Image data extracted from content: {len(base64Data) if base64Data else 0} chars") + + if not base64Data: + raise ValueError("Image generation returned no data") + + # Validate base64 data + try: + import base64 + base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars + except Exception as e: + logger.warning(f"Image data may not be valid base64: {str(e)}") + # Continue anyway - renderer will handle it + + # Create image element + altText = section.get("generation_hint", "Image") + if not altText or altText == "Image": + # Use image_prompt as alt text if generation_hint is generic + altText = section.get("image_prompt", "Image")[:100] # Limit length + + caption = section.get("metadata", {}).get("caption") + + section["elements"] = [{ + "url": f"data:image/png;base64,{base64Data}", + "base64Data": base64Data, + "altText": altText, + "caption": caption + }] + + logger.info(f"Successfully generated image for section {section.get('id')}") + return section + + except Exception as e: + logger.error(f"Error generating image section: {str(e)}") + raise + + async def _generateComplexTextSection( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """Generate content for complex text section (long chapter)""" + # For now, use same approach as simple section + # Can be enhanced later with chunking for very long content + return await self._generateSimpleSection(section, context) + + def _createSectionPrompt( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> str: + """Create sub-prompt for section content generation""" + contentType = section.get("content_type", "") + generationHint = section.get("generation_hint", "") + userPrompt = context.get("userPrompt", "") + cachedContent = context.get("cachedContent") + previousSections = context.get("previousSections", []) + documentMetadata = context.get("documentMetadata", {}) + + # Get user language + userLanguage = self._getUserLanguage() + + # Format cached content + cachedContentText = "" + if cachedContent and cachedContent.get("extractedContent"): + cachedContentText = self._formatCachedContent(cachedContent) + + # Format previous sections for context + previousSectionsText = "" + if previousSections: + formattedSections = [] + for s in previousSections[-10:]: # Last 10 sections for context (increased from 5) + prevContentType = s.get('content_type', 'unknown') # Use different variable name to avoid shadowing + order = s.get('order', 0) + hint = s.get('generation_hint', '') + elements = s.get('elements', []) + + # Extract actual content from elements + contentPreview = "" + if elements: + if prevContentType == "heading": + # Extract heading text + for elem in elements: + if isinstance(elem, dict) and "text" in elem: + contentPreview = f": \"{elem['text']}\"" + break + elif prevContentType == "paragraph": + # Extract paragraph text (first 100 chars) + for elem in elements: + if isinstance(elem, dict) and "text" in elem: + text = elem['text'] + contentPreview = f": \"{text[:100]}{'...' if len(text) > 100 else ''}\"" + break + elif prevContentType == "bullet_list": + # Extract bullet items + for elem in elements: + if isinstance(elem, dict) and "items" in elem: + items = elem['items'] + if items: + contentPreview = f": {items[:3]}{'...' if len(items) > 3 else ''}" + break + + formattedSections.append( + f"- Section {order} ({prevContentType}){contentPreview}" + ) + previousSectionsText = "\n".join(formattedSections) + + prompt = f"""{'='*80} +SECTION TO GENERATE: +{'='*80} +Type: {contentType} +Hint: {generationHint} +{'='*80} + +CONTEXT: +- User Request: {userPrompt} +- Previous Sections: {len(previousSections)} sections already generated +- Document Title: {documentMetadata.get('title', 'Unknown')} + +{'='*80} +PREVIOUS SECTIONS (for continuity): +{'='*80} +{previousSectionsText if previousSectionsText else "This is the first section."} +{'='*80} + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{cachedContentText if cachedContentText else "None"} +{'='*80} + +TASK: Generate content for this section ONLY. + +INSTRUCTIONS: +1. Generate content appropriate for section type: {contentType} +2. Use the generation hint: {generationHint} +3. Consider previous sections for continuity +4. Use extracted content if relevant +5. All content must be in the language '{userLanguage}' + +6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure. + +REQUIRED FORMAT - Return ONLY this structure: + +For heading: +{{"elements": [{{"level": 1, "text": "Heading Text"}}]}} + +For paragraph: +{{"elements": [{{"text": "Paragraph text content"}}]}} + +For table: +{{"elements": [{{"headers": ["Col1", "Col2"], "rows": [["Row1", "Row2"]]}}]}} + +For bullet_list: +{{"elements": [{{"items": ["Item 1", "Item 2"]}}]}} + +For code_block: +{{"elements": [{{"code": "code content here", "language": "python"}}]}} + +CRITICAL RULES: +- Return ONLY {{"elements": [...]}} - nothing else +- DO NOT include "metadata", "documents", "sections", or any other fields +- DO NOT return a full document structure +- DO NOT add explanatory text before or after the JSON +- The response must start with {{"elements": and end with }} +- This is a SINGLE SECTION, not a full document +""" + return prompt + + def _formatCachedContent(self, cachedContent: Dict[str, Any]) -> str: + """Format cached content for prompt inclusion""" + try: + extractedContent = cachedContent.get("extractedContent", []) + if not extractedContent: + return "No content extracted." + + formattedParts = [] + for extracted in extractedContent: + if hasattr(extracted, 'parts'): + for part in extracted.parts: + if hasattr(part, 'content'): + formattedParts.append(part.content) + elif isinstance(extracted, dict): + formattedParts.append(str(extracted)) + else: + formattedParts.append(str(extracted)) + + return "\n\n".join(formattedParts) if formattedParts else "No content extracted." + + except Exception as e: + logger.warning(f"Error formatting cached content: {str(e)}") + return "Error formatting cached content." + + def _getUserLanguage(self) -> str: + """Get user language for document generation""" + try: + if self.services: + if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: + return self.services.currentUserLanguage + elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): + return self.services.user.language + except Exception: + pass + return 'en' # Default fallback + diff --git a/modules/services/serviceGeneration/subContentIntegrator.py b/modules/services/serviceGeneration/subContentIntegrator.py new file mode 100644 index 00000000..7bee437e --- /dev/null +++ b/modules/services/serviceGeneration/subContentIntegrator.py @@ -0,0 +1,167 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Content Integrator for hierarchical document generation. +Merges generated content into document structure and validates completeness. +""" + +import logging +from typing import Dict, Any, List, Tuple + +logger = logging.getLogger(__name__) + + +class ContentIntegrator: + """Integrates generated content into document structure""" + + def __init__(self, services: Any = None): + self.services = services + + def integrateContent( + self, + structure: Dict[str, Any], + generatedSections: List[Dict[str, Any]] + ) -> Dict[str, Any]: + """ + Merge generated sections into document structure. + + Args: + structure: Original document structure + generatedSections: List of sections with populated elements + + Returns: + Complete document structure ready for rendering + """ + try: + # Create mapping of section IDs to generated sections + sectionMap = {section.get("id"): section for section in generatedSections} + + # Process each document + for doc in structure.get("documents", []): + sections = doc.get("sections", []) + + for idx, section in enumerate(sections): + sectionId = section.get("id") + + # Find corresponding generated section + if sectionId in sectionMap: + generatedSection = sectionMap[sectionId] + + # Merge elements into structure section + if "elements" in generatedSection: + section["elements"] = generatedSection["elements"] + + # Preserve error information if present + if generatedSection.get("error"): + section["error"] = True + section["errorMessage"] = generatedSection.get("errorMessage") + section["originalContentType"] = generatedSection.get("originalContentType") + else: + # Section not generated - create error section + logger.warning(f"Section {sectionId} not found in generated sections") + section = self.createErrorSection( + section, + f"Section {sectionId} was not generated" + ) + sections[idx] = section + + # Debug: Write final merged structure to debug file + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + import json + structureJson = json.dumps(structure, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile( + structureJson, + "document_generation_final_merged_json" + ) + logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") + except Exception as e: + logger.debug(f"Could not write debug file for final merged JSON: {e}") + + return structure + + except Exception as e: + logger.error(f"Error integrating content: {str(e)}") + raise + + def validateCompleteness( + self, + document: Dict[str, Any] + ) -> Tuple[bool, List[str]]: + """ + Validate that all sections have content. + + Args: + document: Document structure to validate + + Returns: + (is_complete, list_of_missing_sections) + """ + missingSections = [] + + try: + for doc in document.get("documents", []): + sections = doc.get("sections", []) + + for section in sections: + sectionId = section.get("id", "unknown") + elements = section.get("elements", []) + + # Check if section has content + if not elements or len(elements) == 0: + # Skip error sections (they have error text) + if not section.get("error"): + missingSections.append(sectionId) + else: + # Validate elements have actual content + hasContent = False + for element in elements: + # Check different content types + if element.get("text") or element.get("base64Data") or \ + element.get("headers") or element.get("items") or \ + element.get("code"): + hasContent = True + break + + if not hasContent and not section.get("error"): + missingSections.append(sectionId) + + return len(missingSections) == 0, missingSections + + except Exception as e: + logger.error(f"Error validating completeness: {str(e)}") + return False, [f"Validation error: {str(e)}"] + + def createErrorSection( + self, + originalSection: Dict[str, Any], + errorMessage: str + ) -> Dict[str, Any]: + """ + Create error placeholder section. + + Args: + originalSection: Original section that failed + errorMessage: Error message to display + + Returns: + Error section with placeholder content + """ + contentType = originalSection.get("content_type", "content") + sectionId = originalSection.get("id", "unknown") + + return { + "id": sectionId, + "content_type": "paragraph", # Change to paragraph for error display + "elements": [{ + "text": f"[ERROR: Failed to generate {contentType} for section '{sectionId}'. Error: {errorMessage}]" + }], + "order": originalSection.get("order", 0), + "error": True, + "errorMessage": errorMessage, + "originalContentType": contentType, + "title": originalSection.get("title"), + "generation_hint": originalSection.get("generation_hint"), + "complexity": originalSection.get("complexity") + } + diff --git a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py new file mode 100644 index 00000000..d6620d3d --- /dev/null +++ b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py @@ -0,0 +1,316 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Purpose Analyzer for hierarchical document generation. +Uses AI to analyze user prompt and determine purpose for each document. +""" + +import logging +import json +from typing import Dict, Any, List, Optional +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + +logger = logging.getLogger(__name__) + + +class DocumentPurposeAnalyzer: + """Analyzes user prompt and documents to determine document purposes""" + + def __init__(self, services: Any): + self.services = services + + async def analyzeDocumentPurposes( + self, + userPrompt: str, + chatDocuments: List[ChatDocument], + actionContext: str = "generateDocument" + ) -> Dict[str, Any]: + """ + Use AI to analyze user prompt and determine purpose for each document. + + Args: + userPrompt: User's original prompt + chatDocuments: List of ChatDocument objects to analyze + actionContext: Action name (e.g., "generateDocument", "extractData") + + Returns: + { + "document_purposes": [ + { + "document_id": "...", + "purpose": "extract_text_content" | "include_image" | ..., + "reasoning": "...", + "extractionPrompt": "..." (if purpose requires extraction), + "processingNotes": "..." + } + ], + "overall_intent": "..." + } + """ + try: + if not chatDocuments: + return { + "document_purposes": [], + "overall_intent": "No documents provided" + } + + # Create document metadata list for AI analysis + documentMetadata = [] + for doc in chatDocuments: + docInfo = { + "document_id": doc.id, + "fileName": doc.fileName, + "mimeType": doc.mimeType, + "fileSize": doc.fileSize + } + documentMetadata.append(docInfo) + + # Create analysis prompt + analysisPrompt = self._createAnalysisPrompt( + userPrompt=userPrompt, + actionContext=actionContext, + documentMetadata=documentMetadata + ) + + # Debug: Log purpose analysis prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + self.services.utils.writeDebugFile( + analysisPrompt, + "document_purpose_analysis_prompt" + ) + except Exception as e: + logger.debug(f"Could not write debug file for purpose analysis prompt: {e}") + + # Call AI for analysis + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + resultFormat="json" + ) + + aiResponse = await self.services.ai.callAiContent( + prompt=analysisPrompt, + options=options, + outputFormat="json" + ) + + # Debug: Log purpose analysis response + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + responseContent = aiResponse.content if aiResponse and aiResponse.content else '' + responseMetadata = { + "status": aiResponse.status if aiResponse else "N/A", + "error": aiResponse.error if aiResponse else "N/A", + "documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0 + } + self.services.utils.writeDebugFile( + f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}", + "document_purpose_analysis_response" + ) + except Exception as e: + logger.debug(f"Could not write debug file for purpose analysis response: {e}") + + if not aiResponse or not aiResponse.content: + logger.warning("AI purpose analysis returned empty response, using defaults") + return self._createDefaultPurposes(chatDocuments, actionContext) + + # Extract and parse JSON + extractedJson = self.services.utils.jsonExtractString(aiResponse.content) + if not extractedJson: + logger.warning("No JSON found in purpose analysis response, using defaults") + return self._createDefaultPurposes(chatDocuments, actionContext) + + try: + analysisResult = json.loads(extractedJson) + + # Validate structure + if "document_purposes" not in analysisResult: + logger.warning("Invalid analysis result structure, using defaults") + return self._createDefaultPurposes(chatDocuments, actionContext) + + # Ensure all documents have purposes + analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])} + for doc in chatDocuments: + if doc.id not in analyzedIds: + logger.warning(f"Document {doc.id} not in analysis result, adding default purpose") + defaultPurpose = self._determineDefaultPurpose(doc, actionContext) + analysisResult["document_purposes"].append({ + "document_id": doc.id, + "purpose": defaultPurpose, + "reasoning": f"Default purpose based on document type and action context", + "extractionPrompt": None, + "processingNotes": None + }) + + return analysisResult + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse purpose analysis JSON: {str(e)}") + logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}") + return self._createDefaultPurposes(chatDocuments, actionContext) + + except Exception as e: + logger.error(f"Error analyzing document purposes: {str(e)}") + return self._createDefaultPurposes(chatDocuments, actionContext) + + def _createAnalysisPrompt( + self, + userPrompt: str, + actionContext: str, + documentMetadata: List[Dict[str, Any]] + ) -> str: + """Create AI prompt for document purpose analysis""" + + # Format document list + docListText = "" + for i, docInfo in enumerate(documentMetadata, 1): + docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n" + docListText += f" File Name: {docInfo['fileName']}\n" + docListText += f" MIME Type: {docInfo['mimeType']}\n" + docListText += f" File Size: {docInfo['fileSize']} bytes\n" + + # Get user language + userLanguage = self._getUserLanguage() + + prompt = f"""{'='*80} +DOCUMENT PURPOSE ANALYSIS +{'='*80} + +USER PROMPT: +{userPrompt} + +ACTION CONTEXT: {actionContext} + +DOCUMENTS PROVIDED: +{docListText} +{'='*80} + +TASK: For each document, determine its purpose based on: +1. User prompt intent (what the user wants to do) +2. Action context (what action is being performed) +3. Document type (mimeType - is it text, image, etc.) +4. Document metadata (fileName, size) + +AVAILABLE PURPOSES: +- "extract_text_content": Extract text content for use in document generation +- "include_image": Include the image directly in the generated document (for images) +- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts) +- "use_as_template": Use document structure/layout as template for generation +- "use_as_reference": Use as background context/reference without detailed extraction +- "extract_data": Extract structured data (key-value pairs, entities, fields) +- "attach": Document is an attachment - don't process, just attach to output +- "convert_format": Convert document format (for convert actions) +- "translate": Translate document content (for translate actions) +- "summarize": Create summary of document (for summarize actions) +- "compare": Compare documents (for comparison actions) +- "merge": Merge documents (for merge actions) +- "extract_tables_charts": Extract tables and charts specifically +- "use_for_styling": Use document for styling/formatting reference only +- "extract_metadata": Extract only document metadata + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → "include_image" + - If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision" + - Default for images in generateDocument → "include_image" + +2. For text documents in generateDocument: + - If user mentions "template" or "structure" → "use_as_template" + - If user mentions "reference" or "context" → "use_as_reference" + - Default → "extract_text_content" + +3. Consider action context: + - generateDocument: Usually "extract_text_content" or "include_image" + - extractData: Usually "extract_data" + - translateDocument: Usually "translate" + - summarizeDocument: Usually "summarize" + +4. Return ONLY valid JSON following this structure: +{{ + "document_purposes": [ + {{ + "document_id": "document_id_here", + "purpose": "extract_text_content", + "reasoning": "Brief explanation in language '{userLanguage}'", + "extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null", + "processingNotes": "Any special processing requirements or null" + }} + ], + "overall_intent": "Summary of how documents should be used together in language '{userLanguage}'" +}} + +5. All content must be in the language '{userLanguage}' +6. Return ONLY the JSON structure. No explanations before or after. + +Return ONLY the JSON structure. +""" + return prompt + + def _createDefaultPurposes( + self, + chatDocuments: List[ChatDocument], + actionContext: str + ) -> Dict[str, Any]: + """Create default purposes when AI analysis fails""" + purposes = [] + + for doc in chatDocuments: + purpose = self._determineDefaultPurpose(doc, actionContext) + purposes.append({ + "document_id": doc.id, + "purpose": purpose, + "reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})", + "extractionPrompt": None, + "processingNotes": None + }) + + return { + "document_purposes": purposes, + "overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action" + } + + def _determineDefaultPurpose( + self, + doc: ChatDocument, + actionContext: str + ) -> str: + """Determine default purpose based on document type and action context""" + mimeType = doc.mimeType or "" + + # Image documents + if mimeType.startswith("image/"): + if actionContext == "generateDocument": + return "include_image" + elif actionContext in ["extractData", "process"]: + return "analyze_image_vision" + else: + return "include_image" # Default for images + + # Action-specific defaults + if actionContext == "extractData": + return "extract_data" + elif actionContext == "translateDocument": + return "translate" + elif actionContext == "summarizeDocument": + return "summarize" + elif actionContext == "convertDocument" or actionContext == "convert": + return "convert_format" + elif actionContext == "generateDocument": + return "extract_text_content" + else: + # Default for other actions + return "extract_text_content" + + def _getUserLanguage(self) -> str: + """Get user language for document generation""" + try: + if self.services: + if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: + return self.services.currentUserLanguage + elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): + return self.services.user.language + except Exception: + pass + return 'en' # Default fallback + diff --git a/modules/services/serviceGeneration/subStructureGenerator.py b/modules/services/serviceGeneration/subStructureGenerator.py new file mode 100644 index 00000000..d2ef1aeb --- /dev/null +++ b/modules/services/serviceGeneration/subStructureGenerator.py @@ -0,0 +1,488 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Structure Generator for hierarchical document generation. +Generates document skeleton with section placeholders. +""" + +import logging +import json +from typing import Dict, Any, Optional, List +from modules.datamodels.datamodelJson import jsonTemplateDocument + +logger = logging.getLogger(__name__) + + +class StructureGenerator: + """Generates document structure with section placeholders""" + + def __init__(self, services: Any): + self.services = services + + async def generateStructure( + self, + userPrompt: str, + documentList: Optional[Any] = None, + cachedContent: Optional[Dict[str, Any]] = None, + maxSectionLength: int = 500, + existingImages: Optional[List[Dict[str, Any]]] = None + ) -> Dict[str, Any]: + """ + Generate document structure with sections. + + Args: + userPrompt: User's original prompt + documentList: Optional document references + cachedContent: Optional extracted content cache + maxSectionLength: Maximum words for simple sections + existingImages: Optional list of existing images to include + + Returns: + Document structure with empty elements arrays + """ + try: + # Create structure generation prompt + structurePrompt = self._createStructurePrompt( + userPrompt=userPrompt, + cachedContent=cachedContent, + maxSectionLength=maxSectionLength, + existingImages=existingImages or [] + ) + + # Debug: Log structure generation prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + self.services.utils.writeDebugFile( + structurePrompt, + "document_generation_structure_prompt" + ) + except Exception as e: + logger.debug(f"Could not write debug file for structure prompt: {e}") + + # Call AI to generate structure + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + resultFormat="json" + ) + + aiResponse = await self.services.ai.callAiContent( + prompt=structurePrompt, + options=options, + outputFormat="json" + ) + + # Debug: Log structure generation response + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + self.services.utils.writeDebugFile( + aiResponse.content if aiResponse and aiResponse.content else '', + "document_generation_structure_response" + ) + except Exception as e: + logger.debug(f"Could not write debug file for structure response: {e}") + + if not aiResponse or not aiResponse.content: + raise ValueError("AI structure generation returned empty response") + + # Extract and parse JSON + extractedJson = self.services.utils.jsonExtractString(aiResponse.content) + if not extractedJson: + raise ValueError("No JSON found in AI structure response") + + structure = json.loads(extractedJson) + + # Validate and enhance structure + structure = self._validateAndEnhanceStructure(structure, maxSectionLength) + + return structure + + except Exception as e: + logger.error(f"Error generating structure: {str(e)}") + raise + + def _createStructurePrompt( + self, + userPrompt: str, + cachedContent: Optional[Dict[str, Any]] = None, + maxSectionLength: int = 500, + existingImages: Optional[List[Dict[str, Any]]] = None + ) -> str: + """ + Create prompt for structure generation. + """ + # Get user language + userLanguage = self._getUserLanguage() + + # Format cached content if available + cachedContentText = "" + if cachedContent and cachedContent.get("extractedContent"): + cachedContentText = self._formatCachedContent(cachedContent) + + # Use provided existingImages or extract from cachedContent + if existingImages is None: + existingImages = [] + if cachedContent and cachedContent.get("imageDocuments"): + existingImages = cachedContent.get("imageDocuments", []) + + # Create structure template + structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title") + + prompt = f"""{'='*80} +USER REQUEST: +{'='*80} +{userPrompt} +{'='*80} + +TASK: Generate a document STRUCTURE (skeleton) with sections. +Do NOT generate actual content yet - only the structure. + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{cachedContentText if cachedContentText else "No source documents provided."} +{'='*80} + +INSTRUCTIONS: +1. Analyze the user request and extracted content +2. Create a document structure with CONTENT sections only +3. For each section, specify: + - id: Unique identifier (e.g., "section_title_1", "section_image_1") + - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" + - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) + - generation_hint: Brief description of what content should be generated + - image_prompt: (only for image sections) Detailed prompt for image generation + - order: Section order number (starting from 1) + - elements: [] (empty array - will be populated later) + +4. Identify image sections: + - If user requests illustrations/images, create image sections + - If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them + - Add image_prompt field with detailed description for image generation (only for new images) + - Set complexity to "complex" + - For existing images: Set image_source to "existing" and image_reference_id to the image document ID + - Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}} + - Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} + +{'='*80} +EXISTING IMAGES (to include in document): +{'='*80} +{self._formatExistingImages(existingImages) if existingImages else "No existing images provided."} +{'='*80} + +6. Identify complex text sections: + - Long chapters (>{maxSectionLength} words expected) should be marked as "complex" + - Short paragraphs/headings should be "simple" + +7. Return ONLY valid JSON following this structure: +{structureTemplate} + +5. CRITICAL RULES: + - Return ONLY valid JSON (no comments, no trailing commas, double quotes only) + - Follow the exact JSON schema structure provided + - IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays) + - ALL sections MUST include "generation_hint" field with a brief description of what content should be generated + - ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images + - Image sections MUST include "image_prompt" field with detailed description for image generation + - Order numbers MUST start from 1 (not 0) + - All content must be in the language '{userLanguage}' + - Do NOT generate actual content - only structure (skeleton) + - Use only supported content_type values: "heading", "paragraph", "image", "table", "bullet_list", "code_block" + +Return ONLY the JSON structure. No explanations. +""" + return prompt + + def _validateAndEnhanceStructure( + self, + structure: Dict[str, Any], + maxSectionLength: int + ) -> Dict[str, Any]: + """ + Validate structure and enhance with complexity identification. + """ + try: + # Ensure structure has required fields + if "documents" not in structure: + if "sections" in structure: + # Convert single-document format to multi-document format + structure = { + "metadata": structure.get("metadata", {}), + "documents": [{ + "id": "doc_1", + "title": structure.get("metadata", {}).get("title", "Document"), + "filename": "document.json", + "sections": structure.get("sections", []) + }] + } + else: + raise ValueError("Structure missing 'documents' or 'sections' field") + + # Process each document + for doc in structure.get("documents", []): + sections = doc.get("sections", []) + + # Process and validate sections according to standardized schema + for idx, section in enumerate(sections): + # Ensure required fields + if "id" not in section: + section["id"] = f"section_{idx + 1}" + + sectionId = section.get("id", "") + section["order"] = idx + 1 + + if "elements" not in section: + section["elements"] = [] + + # Identify complexity if not set + if "complexity" not in section: + section["complexity"] = self._identifySectionComplexity( + section, + maxSectionLength + ) + + # Ensure generation_hint exists (required for content generation) + if "generation_hint" not in section or not section.get("generation_hint"): + # Create meaningful generation hint from section id or content type + contentType = section.get("content_type", "") + + # Extract meaningful hint from section ID + meaningfulHint = self._extractMeaningfulHint(sectionId, contentType, section.get("elements", [])) + section["generation_hint"] = meaningfulHint + + # Ensure image sections have proper configuration + if section.get("content_type") == "image": + imageSource = section.get("image_source", "generate") + + if imageSource == "existing": + # Existing image - ensure image_reference_id is set + if "image_reference_id" not in section: + logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id") + # Existing images are simple (no generation needed) + section["complexity"] = "simple" + else: + # New image generation - ensure image_prompt + if "image_prompt" not in section or not section.get("image_prompt"): + # Try to extract from generation_hint + generationHint = section.get("generation_hint", "") + if generationHint: + # Enhance generation_hint to be a proper image prompt + section["image_prompt"] = self._enhanceImagePrompt(generationHint) + else: + # Create default based on document context + docTitle = doc.get("title", "Document") + section["image_prompt"] = f"Generate an illustration for: {docTitle}" + + # Ensure complexity is set to complex for new image generation + section["complexity"] = "complex" + + return structure + + except Exception as e: + logger.error(f"Error validating structure: {str(e)}") + raise + + def _identifySectionComplexity( + self, + section: Dict[str, Any], + maxSectionLength: int + ) -> str: + """ + Identify if section is simple or complex. + + Rules: + - Images: always complex + - Long chapters (>maxSectionLength words): complex + - Others: simple + """ + contentType = section.get("content_type", "") + + # Images are always complex + if contentType == "image": + return "complex" + + # Check generation_hint for length indicators + generationHint = section.get("generation_hint", "").lower() + + # Keywords indicating long content + longContentKeywords = [ + "chapter", "long", "detailed", "comprehensive", + "extensive", "full", "complete story" + ] + + if any(keyword in generationHint for keyword in longContentKeywords): + return "complex" + + # Default to simple + return "simple" + + def _extractMeaningfulHint( + self, + sectionId: str, + contentType: str, + elements: List[Any] + ) -> str: + """ + Extract meaningful generation hint from section ID, content type, or elements. + + Args: + sectionId: Section identifier (e.g., "section_heading_current_state") + contentType: Content type (e.g., "heading", "paragraph") + elements: Existing elements if any + + Returns: + Meaningful generation hint string + """ + sectionIdLower = sectionId.lower() + + # Try to extract text from existing elements first (most accurate) + if elements and isinstance(elements, list) and len(elements) > 0: + firstElement = elements[0] + if isinstance(firstElement, dict): + if "text" in firstElement and firstElement["text"]: + if contentType == "heading": + return firstElement["text"] + elif contentType == "paragraph": + return f"Content paragraph: {firstElement['text'][:50]}..." + + # Extract meaningful text from section ID + # Remove common prefixes: "section_", "section_heading_", "section_paragraph_", etc. + meaningfulPart = sectionId + for prefix in ["section_heading_", "section_paragraph_", "section_bullet_list_", + "section_code_block_", "section_image_", "section_"]: + if meaningfulPart.lower().startswith(prefix): + meaningfulPart = meaningfulPart[len(prefix):] + break + + # Convert snake_case to Title Case + # e.g., "current_state" -> "Current State" + words = meaningfulPart.replace("_", " ").split() + titleCase = " ".join(word.capitalize() for word in words if word) + + # Handle special cases + if "introduction" in sectionIdLower or "intro" in sectionIdLower: + return "Introduction paragraph" + elif "conclusion" in sectionIdLower: + return "Conclusion paragraph" + elif "footer" in sectionIdLower or "copyright" in sectionIdLower: + return "Footer content" + elif "title" in sectionIdLower and "main" in sectionIdLower: + # Main title - try to get from document title or use generic + return "Main document title" + + # Create hint based on content type and extracted text + if contentType == "heading": + if titleCase: + return titleCase + else: + return "Section heading" + elif contentType == "paragraph": + if titleCase: + return f"Content paragraph about {titleCase.lower()}" + else: + return f"Content paragraph" + elif contentType == "bullet_list": + if titleCase: + return f"Bullet list: {titleCase.lower()}" + else: + return "Bullet list items" + elif contentType == "code_block": + return "Code content" + else: + if titleCase: + return f"Content for {titleCase.lower()}" + else: + return f"Content for {contentType} section" + + def _extractImagePrompts( + self, + structure: Dict[str, Any] + ) -> Dict[str, str]: + """ + Extract image generation prompts from structure. + Maps section_id -> image_prompt + """ + imagePrompts = {} + + for doc in structure.get("documents", []): + for section in doc.get("sections", []): + if section.get("content_type") == "image": + sectionId = section.get("id") + imagePrompt = section.get("image_prompt") + if sectionId and imagePrompt: + imagePrompts[sectionId] = imagePrompt + + return imagePrompts + + def _formatCachedContent( + self, + cachedContent: Dict[str, Any] + ) -> str: + """ + Format cached content for prompt inclusion. + """ + try: + extractedContent = cachedContent.get("extractedContent", []) + if not extractedContent: + return "No content extracted." + + # Format ContentPart objects + formattedParts = [] + for extracted in extractedContent: + if hasattr(extracted, 'parts'): + for part in extracted.parts: + if hasattr(part, 'content'): + formattedParts.append(part.content) + elif isinstance(extracted, dict): + formattedParts.append(str(extracted)) + else: + formattedParts.append(str(extracted)) + + return "\n\n".join(formattedParts) if formattedParts else "No content extracted." + + except Exception as e: + logger.warning(f"Error formatting cached content: {str(e)}") + return "Error formatting cached content." + + def _enhanceImagePrompt(self, generationHint: str) -> str: + """ + Enhance generation hint to be a proper image generation prompt. + Adds visual details and style guidance if missing. + """ + # If hint already contains visual details, use as-is + visualKeywords = ["illustration", "image", "picture", "visual", "depict", "show", "drawing"] + if any(keyword.lower() in generationHint.lower() for keyword in visualKeywords): + return generationHint + + # Enhance with visual description + enhanced = f"Create a professional illustration: {generationHint}" + return enhanced + + def _formatExistingImages(self, imageDocuments: List[Dict[str, Any]]) -> str: + """Format existing images list for prompt inclusion""" + if not imageDocuments: + return "No existing images provided." + + formatted = [] + for i, imgDoc in enumerate(imageDocuments, 1): + formatted.append(f"{i}. Image ID: {imgDoc.get('id')}") + formatted.append(f" File Name: {imgDoc.get('fileName', 'Unknown')}") + formatted.append(f" MIME Type: {imgDoc.get('mimeType', 'Unknown')}") + formatted.append(f" Alt Text: {imgDoc.get('altText', 'Image')}") + formatted.append("") + + return "\n".join(formatted) + + def _getUserLanguage(self) -> str: + """Get user language for document generation""" + try: + if self.services: + if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: + return self.services.currentUserLanguage + elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): + return self.services.user.language + except Exception: + pass + return 'en' # Default fallback + diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index df48b141..907e84a6 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -199,8 +199,7 @@ def closeJsonStructures(text: str) -> str: # Handle unterminated strings: find the last unclosed string # Look for patterns like: "value" or "value\n (unterminated) - # Simple heuristic: if we end with an unterminated string (odd number of quotes at end) - # Try to close it by finding the last opening quote and closing it + # Check if we're in the middle of a string value when text ends if result.strip(): # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') @@ -219,6 +218,32 @@ def closeJsonStructures(text: str) -> str: # Find where the string should end (before next comma, bracket, or brace) # For now, just close it at the end result += '"' + else: + # Even number of quotes, but might still be in middle of string if cut off + # Check if text ends with a colon followed by a quote (start of string value) + # or ends with text that looks like it's inside a string (no closing quote after last quote) + import re + # Pattern: ends with "text" where text doesn't end with quote + # Look for pattern like: "text": "incomplete + if re.search(r':\s*"[^"]*$', result): + # We're in the middle of a string value, close it + result += '"' + # Also check if we end with text after a quote (like "key": "value but cut off) + elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result): + # Check if last quote is followed by non-quote, non-structural chars + lastQuotePos = result.rfind('"') + if lastQuotePos >= 0: + afterQuote = result[lastQuotePos + 1:] + # If after quote we have text but no closing quote, comma, or brace, we're in a string + if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote): + # Check if it's escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + if escapeCount % 2 == 0: + result += '"' # Count open/close brackets and braces openBraces = result.count('{') diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py index 1c34fa9b..788fadea 100644 --- a/modules/workflows/methods/methodAi/actions/convert.py +++ b/modules/workflows/methods/methodAi/actions/convert.py @@ -98,7 +98,7 @@ async def convert(self, parameters: Dict[str, Any]) -> ActionResult: renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") renderOptions["includeHeader"] = parameters.get("includeHeader", True) - rendered_content, mime_type = await generationService.renderReport( + rendered_content, mime_type, _images = await generationService.renderReport( jsonData, normalizedOutputFormat, title, None, None ) diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 5badc321..5b5db12f 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -3,13 +3,18 @@ """ Generate Document action for AI operations. -Generates documents from scratch or based on templates/inputs. +Generates documents from scratch or based on templates/inputs using hierarchical approach. """ import logging -from typing import Dict, Any +import time +from typing import Dict, Any, Optional from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy +from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator +from modules.services.serviceGeneration.subContentGenerator import ContentGenerator +from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer logger = logging.getLogger(__name__) @@ -17,15 +22,18 @@ logger = logging.getLogger(__name__) async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - - Purpose: Generate documents from scratch or based on templates/inputs. + - Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach. - Input requirements: prompt or description (required); optional documentList (for templates/references). - - Output format: Document in specified format (default: docx). + - Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt). Parameters: - prompt (str, required): Description of the document to generate. - documentList (list, optional): Template documents or reference documents to use as a guide. - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc. - - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx. + - resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt. + - maxSectionLength (int, optional): Maximum words for simple sections. Default: 500. + - parallelGeneration (bool, optional): Enable parallel section generation. Default: True. + - progressLogging (bool, optional): Send ChatLog progress updates. Default: True. """ prompt = parameters.get("prompt") if not prompt: @@ -33,21 +41,361 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: documentList = parameters.get("documentList", []) documentType = parameters.get("documentType") - resultType = parameters.get("resultType", "docx") + resultType = parameters.get("resultType", "txt") - aiPrompt = f"Generate a document based on the following requirements: {prompt}" - if documentType: - aiPrompt += f" Document type: {documentType}." - if documentList: - aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style." - aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization." + # Auto-detect format from prompt if not explicitly provided + if resultType == "txt" and prompt: + promptLower = prompt.lower() + if "html" in promptLower or "html5" in promptLower: + resultType = "html" + logger.info(f"Auto-detected HTML format from prompt") + elif "pdf" in promptLower: + resultType = "pdf" + logger.info(f"Auto-detected PDF format from prompt") + elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"): + resultType = "md" + logger.info(f"Auto-detected Markdown format from prompt") + elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower: + resultType = "txt" + logger.info(f"Auto-detected Text format from prompt") - processParams = { - "aiPrompt": aiPrompt, - "resultType": resultType - } - if documentList: - processParams["documentList"] = documentList + maxSectionLength = parameters.get("maxSectionLength", 500) + parallelGeneration = parameters.get("parallelGeneration", True) + progressLogging = parameters.get("progressLogging", True) - return await self.process(processParams) + # Create operation ID for progress tracking + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"doc_gen_{workflowId}_{int(time.time())}" + parentOperationId = parameters.get('parentOperationId') + + try: + # Phase 1: Structure Generation + if progressLogging: + self.services.chat.progressLogStart( + operationId, + "Document", + "Structure Generation", + "Generating document structure...", + parentOperationId=parentOperationId + ) + + structureGenerator = StructureGenerator(self.services) + + # Analyze document purposes and process documents accordingly + cachedContent = None + imageDocuments = [] + documentPurposes = {} + + if documentList: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...") + + # Convert documentList to DocumentReferenceList + from modules.datamodels.datamodelDocref import DocumentReferenceList + + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, str): + docRefList = DocumentReferenceList.from_string_list([documentList]) + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + else: + docRefList = DocumentReferenceList(references=[]) + + # Get ChatDocuments + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + if chatDocuments: + logger.info(f"Analyzing purposes for {len(chatDocuments)} documents") + + # Analyze document purposes using AI + purposeAnalyzer = DocumentPurposeAnalyzer(self.services) + purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes( + userPrompt=prompt, + chatDocuments=chatDocuments, + actionContext="generateDocument" + ) + + documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])} + logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}") + + # Separate documents by purpose + textDocs = [] + imageDocsToInclude = [] + imageDocsToAnalyze = [] + + for doc in chatDocuments: + docPurpose = documentPurposes.get(doc.id, {}) + purpose = docPurpose.get("purpose", "extract_text_content") + + if purpose == "include_image": + imageDocsToInclude.append(doc) + elif purpose == "analyze_image_vision": + imageDocsToAnalyze.append(doc) + elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]: + textDocs.append(doc) + # Skip "attach" purpose - don't process + + # Process text documents (extract content) + extractedResults = [] + if textDocs: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...") + + # Prepare extraction options with purpose-specific prompts + extractionOptionsList = [] + for doc in textDocs: + docPurpose = documentPurposes.get(doc.id, {}) + extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document" + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + extractionOptionsList.append((doc, extractionOptions)) + + # Extract content from text documents + for doc, extractionOptions in extractionOptionsList: + try: + docResults = self.services.extraction.extractContent( + [doc], + extractionOptions, + parentOperationId=operationId + ) + extractedResults.extend(docResults) + except Exception as e: + logger.error(f"Error extracting content from {doc.fileName}: {str(e)}") + + logger.info(f"Extracted content from {len(extractedResults)} text document(s)") + + # Process images to analyze (vision call) + if imageDocsToAnalyze: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...") + + # Extract content from images using vision analysis + for doc in imageDocsToAnalyze: + try: + docPurpose = documentPurposes.get(doc.id, {}) + extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image" + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + + docResults = self.services.extraction.extractContent( + [doc], + extractionOptions, + parentOperationId=operationId + ) + extractedResults.extend(docResults) + except Exception as e: + logger.error(f"Error analyzing image {doc.fileName}: {str(e)}") + + logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI") + + # Process images to include (store image data) + if imageDocsToInclude: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...") + + # Get image data for inclusion + from modules.interfaces.interfaceDbComponentObjects import getInterface + dbInterface = getInterface() + + for doc in imageDocsToInclude: + try: + # Get image bytes + imageBytes = dbInterface.getFileData(doc.fileId) + if imageBytes: + # Encode to base64 + import base64 + base64Data = base64.b64encode(imageBytes).decode('utf-8') + + # Create image document entry + imageDoc = { + "id": doc.id, + "fileName": doc.fileName, + "mimeType": doc.mimeType, + "base64Data": base64Data, + "altText": doc.fileName or "Image", + "fileSize": doc.fileSize + } + imageDocuments.append(imageDoc) + logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)") + else: + logger.warning(f"Could not retrieve image data for {doc.fileName}") + except Exception as e: + logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}") + + logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion") + + # Build cachedContent with all information + cachedContent = { + "extractedContent": extractedResults, + "imageDocuments": imageDocuments, + "documentPurposes": documentPurposes, + "extractionTimestamp": time.time(), + "sourceDocuments": [doc.id for doc in chatDocuments] + } + + logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include") + + # Generate structure + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...") + + structure = await structureGenerator.generateStructure( + userPrompt=prompt, + documentList=documentList if documentList else None, + cachedContent=cachedContent, + maxSectionLength=maxSectionLength, + existingImages=imageDocuments # Pass existing images for structure generation + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") + + # Phase 2: Content Generation + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.34, + "Starting content generation..." + ) + + contentGenerator = ContentGenerator(self.services) + + # Create enhanced progress callback + def progressCallback(sectionIndex: int, totalSections: int, message: str): + if progressLogging: + # Calculate progress: 34% to 90% for content generation phase + if totalSections > 0: + progress = 0.34 + (0.56 * (sectionIndex / totalSections)) + else: + progress = 0.34 + + # Format message + if sectionIndex > 0 and totalSections > 0: + progressMessage = f"Section {sectionIndex}/{totalSections}: {message}" + else: + progressMessage = message + + self.services.chat.progressLogUpdate( + operationId, + progress, + progressMessage + ) + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=prompt, + progressCallback=progressCallback, + parallelGeneration=parallelGeneration + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") + + # Phase 3: Integration & Rendering + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.91, + "Rendering final document..." + ) + + # Use existing renderReport method + title = structure.get("metadata", {}).get("title", "Generated Document") + if documentType: + title = f"{title} ({documentType})" + + renderedContent, mimeType, images = await self.services.generation.renderReport( + extractedContent=completeStructure, + outputFormat=resultType, + title=title, + userPrompt=prompt, + aiService=self.services.ai + ) + + # Build list of documents to return + documents = [ + ActionDocument( + documentName=f"document.{resultType}", + documentData=renderedContent, + mimeType=mimeType + ) + ] + + # Add images as separate documents + if images: + logger.info(f"Processing {len(images)} image(s) from renderer") + import base64 + for idx, imageData in enumerate(images): + try: + base64Data = imageData.get("base64Data", "") + altText = imageData.get("altText", f"image_{idx + 1}") + caption = imageData.get("caption", "") + sectionId = imageData.get("sectionId", f"section_{idx + 1}") + + if base64Data: + # Decode base64 to bytes + imageBytes = base64.b64decode(base64Data) + + # Determine filename and mime type + filename = imageData.get("filename", f"image_{idx + 1}.png") + if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): + filename = f"image_{idx + 1}.png" + + # Determine mime type from filename + if filename.lower().endswith('.png'): + imageMimeType = "image/png" + elif filename.lower().endswith(('.jpg', '.jpeg')): + imageMimeType = "image/jpeg" + elif filename.lower().endswith('.gif'): + imageMimeType = "image/gif" + elif filename.lower().endswith('.webp'): + imageMimeType = "image/webp" + else: + imageMimeType = "image/png" # Default + + # Add image document + documents.append(ActionDocument( + documentName=filename, + documentData=imageBytes, + mimeType=imageMimeType + )) + logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})") + else: + logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping") + except Exception as e: + logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True) + continue + else: + logger.debug("No images returned from renderer") + + # Note: Document creation is handled by the workflow system + # We just return the rendered content and images in ActionResult + + if progressLogging: + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess(documents=documents) + + except Exception as e: + logger.error(f"Error in hierarchical document generation: {str(e)}") + if progressLogging: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 101c8586..7595c2eb 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -353,11 +353,10 @@ class MethodAi(MethodBase): "resultType": WorkflowActionParameter( name="resultType", type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["docx", "pdf", "txt", "md"], + frontendType=FrontendType.TEXT, required=False, - default="docx", - description="Output format" + default="txt", + description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt" ) }, execute=generateDocument.__get__(self, self.__class__) diff --git a/modules/workflows/methods/methodBase.py b/modules/workflows/methods/methodBase.py index 72f35c19..a20f5ec1 100644 --- a/modules/workflows/methods/methodBase.py +++ b/modules/workflows/methods/methodBase.py @@ -98,9 +98,13 @@ class MethodBase: self.logger.error(f"Method {self.name} has no _actions dictionary defined. Actions will not be available.") return result + totalActions = len(self._actions) + deniedActions = [] + for actionName, actionDef in self._actions.items(): # RBAC-Check: Prüfe ob Action für aktuellen User verfügbar ist if not self._checkActionPermission(actionDef.actionId): + deniedActions.append(f"{actionName} ({actionDef.actionId})") continue # Skip if user doesn't have permission # Konvertiere WorkflowActionDefinition zu System-Format @@ -110,6 +114,11 @@ class MethodBase: 'method': self._createActionWrapper(actionDef) } + if deniedActions: + self.logger.warning(f"Method {self.name}: {len(deniedActions)}/{totalActions} actions denied by RBAC: {deniedActions[:5]}{'...' if len(deniedActions) > 5 else ''}") + if not result and totalActions > 0: + self.logger.error(f"Method {self.name}: ALL {totalActions} actions denied by RBAC! This will result in empty action list.") + return result def _checkActionPermission(self, actionId: str) -> bool: @@ -120,22 +129,36 @@ class MethodBase: REQUIREMENT: RBAC-Service muss verfügbar sein. """ if not hasattr(self.services, 'rbac') or not self.services.rbac: - self.logger.error(f"RBAC service not available. Action {actionId} will be denied.") + self.logger.error(f"RBAC service not available (services.rbac is None). Action {actionId} will be denied.") return False - currentUser = self.services.chat.getCurrentUser() + # Get current user from services.user (not from chat service) + currentUser = getattr(self.services, 'user', None) if not currentUser: - self.logger.warning(f"No current user found. Action {actionId} will be denied.") + self.logger.warning(f"No current user found (services.user is None). Action {actionId} will be denied.") return False # RBAC-Check: RESOURCE context, item = actionId - permissions = self.services.rbac.getUserPermissions( - user=currentUser, - context=AccessRuleContext.RESOURCE, - item=actionId - ) - - return permissions.view + try: + permissions = self.services.rbac.getUserPermissions( + user=currentUser, + context=AccessRuleContext.RESOURCE, + item=actionId + ) + hasPermission = permissions.view + if not hasPermission: + # Log detailed RBAC denial info + userRoles = getattr(currentUser, 'roleLabels', []) or [] + self.logger.warning( + f"RBAC denied action {actionId} for user {currentUser.id}. " + f"User roles: {userRoles}, " + f"Permissions: view={permissions.view}, edit={permissions.edit}, delete={permissions.delete}. " + f"No matching RBAC rule found for context=RESOURCE, item={actionId}" + ) + return hasPermission + except Exception as e: + self.logger.error(f"RBAC check failed for action {actionId}: {str(e)}. Action will be denied.") + return False def _convertParametersToSystemFormat(self, parameters: Dict[str, WorkflowActionParameter]) -> Dict[str, Dict[str, Any]]: """Convert WorkflowActionParameter dict to system format for API/UI consumption""" diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 119a4692..b1de9f98 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -37,52 +37,6 @@ class ContentValidator: """ return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory) - def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]: - """Generic document analysis - create simple summaries with metadata.""" - summaries = [] - for doc in documents: - try: - data = getattr(doc, 'documentData', None) - name = getattr(doc, 'documentName', 'Unknown') - mimeType = getattr(doc, 'mimeType', 'unknown') - formatExt = self._detectFormat(doc) - sizeInfo = self._calculateSize(doc) - - # Simple preview: if it's dict/list, dump JSON; otherwise use string - preview = None - if data is not None: - if isinstance(data, (dict, list)): - preview = json.dumps(data, indent=2, ensure_ascii=False) - # Truncate if too large - if len(preview) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW: - preview = preview[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]" - else: - text = str(data) - if len(text) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW: - preview = text[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]" - else: - preview = text - - summary = { - "name": name, - "mimeType": mimeType, - "format": formatExt, - "size": sizeInfo["readable"], - "preview": preview - } - summaries.append(summary) - except Exception as e: - logger.warning(f"Error analyzing document {getattr(doc, 'documentName', 'Unknown')}: {str(e)}") - summaries.append({ - "name": getattr(doc, 'documentName', 'Unknown'), - "mimeType": getattr(doc, 'mimeType', 'unknown'), - "format": "unknown", - "size": "0 B", - "preview": None, - "error": str(e) - }) - return summaries - def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]: """Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs.""" try: @@ -120,9 +74,11 @@ class ContentValidator: "order": section.get("order") } + # Get elements for processing + elements = section.get("elements", []) + # For tables: extract caption and statistics if section.get("content_type") == "table": - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: tableElement = elements[0] sectionSummary["caption"] = tableElement.get("caption") @@ -134,7 +90,6 @@ class ContentValidator: # For lists: extract item count elif section.get("content_type") == "list": - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: listElement = elements[0] items = listElement.get("items", []) @@ -142,7 +97,6 @@ class ContentValidator: # For paragraphs/headings: extract text preview elif section.get("content_type") in ["paragraph", "heading"]: - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: textElement = elements[0] text = textElement.get("text", "") @@ -174,8 +128,10 @@ class ContentValidator: "order": section.get("order") } + # Get elements for processing + elements = section.get("elements", []) + if section.get("content_type") == "table": - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: tableElement = elements[0] sectionSummary["caption"] = tableElement.get("caption") @@ -475,6 +431,12 @@ VALIDATION RULES: 5. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name. 6. Multi-step workflow awareness: If ACTION HISTORY is present, consider the workflow as a whole. Document metadata (e.g., extraction_method) describes how data was EXTRACTED in the last step, not necessarily how it was OBTAINED in the workflow. 7. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help. +8. CRITICAL - Data vs Data Description: When criteria require specific data types (e.g., images, tables, charts, files), distinguish between: + - ACTUAL DATA: The actual data itself (binary data, structured data, embedded content) + - DATA DESCRIPTIONS: Text fields that describe or specify what data should be created (e.g., "image_description", "table_description", "chart_specification") - these are TEXT METADATA, NOT the actual data + - If only descriptions/specifications exist but no actual data, the criterion is NOT met. Descriptions are instructions for creating data, not the data itself. + - Check content types in sections/elements: if content_type matches the required data type (e.g., "image" for images, "table" for tables), actual data exists. If only text fields describing the data exist, the data is missing. + - Check document statistics: if counts for the required data type are 0, the data is missing even if descriptions exist. VALIDATION STEPS: - Check ACTION HISTORY first (if present) for PROCESS-ORIENTED criteria (e.g., "search performed", "sources used", "verification done") diff --git a/modules/workflows/processing/core/actionExecutor.py b/modules/workflows/processing/core/actionExecutor.py index 71b5572f..0e4d6ee4 100644 --- a/modules/workflows/processing/core/actionExecutor.py +++ b/modules/workflows/processing/core/actionExecutor.py @@ -84,43 +84,85 @@ class ActionExecutor: enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats logger.info(f"Expected formats: {action.expectedDocumentFormats}") - # Get current task execution operationId to pass as parent to action methods - # This MUST be the "Service Workflow Execution" operation ID (taskExec_*) - parentOperationId = None + # Get current task execution operationId (taskExec_*) - this is the parent of the action + taskOperationId = None try: progressLogger = self.services.chat.createProgressLogger() activeOperations = progressLogger.getActiveOperations() - logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}") + logger.debug(f"Looking for task operation ID. Active operations: {list(activeOperations.keys())}") # Look for task execution operation (starts with "taskExec_") - # This is the "Service Workflow Execution" level that should be parent of ALL actions + # This is the Task level that should be parent of this action for opId in activeOperations.keys(): if opId.startswith("taskExec_"): - parentOperationId = opId - logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}") + taskOperationId = opId + logger.info(f"Found task operation ID: {taskOperationId} for action {action.execMethod}.{action.execAction}") break - if not parentOperationId: - logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}") + if not taskOperationId: + logger.error(f"CRITICAL: No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}. Action logs will appear at root level!") except Exception as e: - logger.error(f"Error getting parent operation ID: {str(e)}") + logger.error(f"Error getting task operation ID: {str(e)}") - # Add parentOperationId to parameters so action methods can use it - # This is critical for UI dashboard hierarchical display - if parentOperationId: - enhancedParameters['parentOperationId'] = parentOperationId - logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}") + # Create action operationId entry - Action is child of Task + import time + actionOperationId = f"action_{action.execMethod}_{action.execAction}_{workflow.id}_{taskNum}_{actionNum}_{int(time.time())}" + + try: + # Start action progress tracking - Action is child of Task + # CRITICAL: If taskOperationId is None, the action will appear at root level + self.services.chat.progressLogStart( + actionOperationId, + action.execMethod.capitalize(), + action.execAction, + f"Task {taskNum} Action {actionNum}", + parentOperationId=taskOperationId # Will be None if taskExec_ not found + ) + except Exception as e: + logger.error(f"Error starting action progress log: {str(e)}") + + # Add action operationId to parameters so action methods can use it for their steps + # Action steps should be children of the action, not the task + # CRITICAL: This must always be set, even if taskOperationId is None + enhancedParameters['parentOperationId'] = actionOperationId + if taskOperationId: + logger.info(f"Created action operationId '{actionOperationId}' (parent: {taskOperationId}) for action {action.execMethod}.{action.execAction}") else: - logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!") + logger.warning(f"Created action operationId '{actionOperationId}' WITHOUT parent (taskExec_ not found) for action {action.execMethod}.{action.execAction}. Action will appear at root level!") # Check workflow status before executing the action checkWorkflowStopped(self.services) - result = await self.executeAction( - methodName=action.execMethod, - actionName=action.execAction, - parameters=enhancedParameters - ) + # Execute action and track success for progress log + result = None + actionSuccess = False + try: + result = await self.executeAction( + methodName=action.execMethod, + actionName=action.execAction, + parameters=enhancedParameters + ) + actionSuccess = result.success if result else False + except Exception as e: + logger.error(f"Error executing action: {str(e)}") + actionSuccess = False + finally: + # Finish action progress tracking + try: + self.services.chat.progressLogFinish(actionOperationId, actionSuccess) + except Exception as e: + logger.error(f"Error finishing action progress log: {str(e)}") + + # If action execution failed, return error result + if result is None: + action.setError("Action execution failed") + return ActionResult( + success=False, + documents=[], + resultLabel=action.execResultLabel, + error="Action execution failed" + ) + resultLabel = action.execResultLabel # Trace action result with full document metadata diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py index c4654460..50889b22 100644 --- a/modules/workflows/processing/modes/modeDynamic.py +++ b/modules/workflows/processing/modes/modeDynamic.py @@ -565,10 +565,9 @@ class DynamicMode(BaseMode): methodInstance = _methods[methodName]['instance'] if actionName in methodInstance.actions: action_info = methodInstance.actions[actionName] - docstring = action_info.get('description', '') - # Extract parameter names from docstring to check if documentList exists - paramDescriptions, _ = methodInstance._extractParameterDetails(docstring) - if 'documentList' in paramDescriptions: + # Use structured WorkflowActionParameter objects from new system + parameters_def = action_info.get('parameters', {}) + if 'documentList' in parameters_def: # Convert DocumentReferenceList to string list for database serialization # Action methods will convert it back to DocumentReferenceList when needed parameters['documentList'] = docList.to_string_list() @@ -596,10 +595,9 @@ class DynamicMode(BaseMode): methodInstance = _methods[methodName]['instance'] if actionName in methodInstance.actions: action_info = methodInstance.actions[actionName] - docstring = action_info.get('description', '') - # Extract parameter names from docstring to check if connectionReference exists - paramDescriptions, _ = methodInstance._extractParameterDetails(docstring) - if 'connectionReference' in paramDescriptions: + # Use structured WorkflowActionParameter objects from new system + parameters_def = action_info.get('parameters', {}) + if 'connectionReference' in parameters_def: parameters['connectionReference'] = connectionRef logger.info(f"Added connectionReference to parameters: {connectionRef}") except Exception as e: diff --git a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md new file mode 100644 index 00000000..39c649ce --- /dev/null +++ b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md @@ -0,0 +1,354 @@ +# Architecture & Implementation Analysis +## Deep Review of Hierarchical Document Generation + +**Date**: 2025-12-22 +**Status**: Critical Issues Found + +--- + +## Executive Summary + +The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed. + +--- + +## ✅ What's Correctly Implemented + +### Phase 1: Core Infrastructure ✅ +- ✅ `StructureGenerator` class exists with `generateStructure()` method +- ✅ `ContentGenerator` class exists with `generateContent()` method +- ✅ `ContentIntegrator` class exists with `integrateContent()` method +- ✅ `generateDocument` action uses hierarchical approach +- ✅ Basic progress logging implemented +- ✅ Error handling with `createErrorSection()` implemented + +### Phase 2: Image Generation ✅ +- ✅ `_generateImageSection()` method implemented +- ✅ Image prompt extraction from structure +- ✅ Base64 image data storage +- ✅ Error handling for image failures + +### Phase 3: Parallel Processing ✅ +- ✅ `_generateSectionsParallel()` method implemented +- ✅ `_generateSectionsSequential()` method implemented +- ✅ Batch processing for large documents +- ✅ Progress callback system +- ✅ Exception handling in parallel execution + +--- + +## ❌ Critical Issues Found + +### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED** + +**Problem**: +- In parallel mode, sections within the same batch cannot see each other (correct) +- BUT: Sections in later batches should see sections from earlier batches +- **Current Status**: Code was fixed to accumulate previous sections, but needs verification + +**Location**: `subContentGenerator.py` lines 240-319 + +**Fix Applied**: +- Added `accumulatedPreviousSections` to track sections across batches +- Pass accumulated sections to each batch +- **VERIFICATION NEEDED**: Test that prompts actually show previous sections + +**Risk**: Medium - May cause continuity issues in generated content + +--- + +### Issue 2: Variable Shadowing Bug ✅ **FIXED** + +**Problem**: +- `contentType` variable was shadowed in loop, causing wrong section type in prompts + +**Location**: `subContentGenerator.py` line 676 + +**Fix Applied**: +- Renamed loop variable to `prevContentType` + +**Status**: ✅ Fixed + +--- + +### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED** + +**Problem**: +- Structure generator creates generic hints like "Section heading" instead of meaningful hints +- AI generates same content for all headings because hints are identical + +**Location**: `subStructureGenerator.py` lines 242-269 + +**Fix Applied**: +- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs +- Example: `section_heading_current_state` → "Current State" + +**Status**: ✅ Fixed + +--- + +### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED** + +**Problem**: +- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays +- Template missing `generation_hint` and `complexity` fields +- Template showed `order: 0` but should start from 1 + +**Location**: `datamodelJson.py` + +**Fix Applied**: +- Updated template to show empty `elements: []` +- Added `generation_hint` to all sections +- Added `complexity` to all sections +- Changed `order` to start from 1 +- Added `title` to metadata + +**Status**: ✅ Fixed + +--- + +### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED** + +**Problem**: +- Prompt said "All sections must have empty elements arrays" but template showed filled arrays +- Prompt didn't explicitly require `generation_hint` and `complexity` fields + +**Location**: `subStructureGenerator.py` lines 181-190 + +**Fix Applied**: +- Enhanced prompt to explicitly require `generation_hint` and `complexity` +- Clarified that template examples show structure, but elements must be empty + +**Status**: ✅ Fixed + +--- + +## ⚠️ Remaining Issues & Gaps + +### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED** + +**Problem**: +- No validation that structure has required fields before content generation +- No check that all sections have `generation_hint` before generating content + +**Expected** (from Phase 6): +```python +# Validate structure before content generation +if not validateStructure(structure): + raise ValueError("Invalid structure") +``` + +**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate + +**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better + +**Recommendation**: Add explicit validation method + +--- + +### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED** + +**Problem**: +- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing +- Should show `generation_hint` as fallback when elements not available + +**Location**: `subContentGenerator.py` lines 671-709 + +**Current Behavior**: +- Shows content preview if elements exist +- Shows nothing if elements don't exist + +**Expected Behavior**: +- Show content preview if elements exist +- Show `generation_hint` as fallback if elements don't exist + +**Impact**: Medium - Reduces context quality in parallel generation + +**Recommendation**: Add fallback to show `generation_hint` when elements not available + +--- + +### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED** + +**Problem**: +- Debug file writes `aiResponse.content` (raw AI response) before validation +- Can't verify if `generation_hint` was added by validation + +**Location**: `subStructureGenerator.py` lines 77-84 + +**Impact**: Low - Makes debugging harder but doesn't affect functionality + +**Recommendation**: Write validated structure to separate debug file + +--- + +### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED** + +**Problem**: +- No unit tests for any components (Phase 7 requirement) +- No tests for structure generation +- No tests for content generation +- No tests for integration + +**Impact**: High - No way to verify correctness or catch regressions + +**Recommendation**: Add comprehensive unit tests + +--- + +### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED** + +**Problem**: +- No end-to-end tests +- No tests with images +- No tests with long documents +- No error scenario tests + +**Impact**: High - No verification of complete flow + +**Recommendation**: Add integration tests + +--- + +### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED** + +**Problem**: +- Content is extracted and cached, but: + - No cache validation (check if documents changed) + - No cache reuse verification + - Content is passed to prompts but may not be formatted efficiently + +**Expected** (from Phase 5): +- Cache validation +- Efficient formatting +- Performance testing + +**Current**: Basic caching exists but not optimized + +**Impact**: Medium - Works but could be more efficient + +**Recommendation**: Add cache validation and optimization + +--- + +### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN** + +**Problem**: +- Implementation plan requires renderer updates for images +- HTML renderer should create separate image files +- PDF/XLSX/PPTX renderers should embed images +- **Status unknown** - need to verify renderers handle images correctly + +**Impact**: High - Images may not render correctly + +**Recommendation**: Verify all renderers handle images correctly + +--- + +## 📋 Architecture Compliance Check + +### Data Structure Compliance ✅ + +| Field | Required | Implemented | Status | +|-------|----------|-------------|--------| +| `metadata.title` | Yes | ✅ | ✅ | +| `metadata.split_strategy` | Yes | ✅ | ✅ | +| `sections[].id` | Yes | ✅ | ✅ | +| `sections[].content_type` | Yes | ✅ | ✅ | +| `sections[].complexity` | Yes | ✅ | ✅ | +| `sections[].generation_hint` | Yes | ✅ | ✅ | +| `sections[].order` | Yes | ✅ | ✅ | +| `sections[].elements` | Yes | ✅ | ✅ | +| `sections[].image_prompt` | Image only | ✅ | ✅ | + +### Component Method Compliance ✅ + +| Component | Method | Required | Implemented | Status | +|-----------|--------|----------|-------------|--------| +| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ | +| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ | +| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ | +| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ | +| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ | +| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ | +| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ | +| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ | +| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ | +| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ | +| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ | + +--- + +## 🎯 Priority Fixes Needed + +### Critical (Must Fix) +1. ✅ **Issue 2**: Variable shadowing bug - **FIXED** +2. ✅ **Issue 3**: Missing generation_hint - **FIXED** +3. ✅ **Issue 4**: JSON template mismatch - **FIXED** +4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED** +5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION** + +### High Priority (Should Fix) +6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION** +7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED** +8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED** + +### Medium Priority (Nice to Have) +9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED** +10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED** +11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED** +12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED** + +--- + +## ✅ Summary + +### What Works +- Core infrastructure is implemented +- Image generation is integrated +- Parallel processing is implemented +- Error handling is in place +- Progress logging works + +### What's Fixed (This Session) +- Variable shadowing bug +- Missing generation_hint extraction +- JSON template architecture mismatch +- Prompt instructions clarity +- Previous sections tracking (needs verification) + +### What Needs Work +- Unit and integration tests +- Renderer verification +- Previous sections formatting fallback +- Cache optimization +- Structure validation + +### Overall Status +**Architecture**: ✅ **85% Compliant** +**Implementation**: ✅ **80% Complete** +**Testing**: ❌ **0% Complete** +**Production Ready**: ⚠️ **Not Yet** (needs testing and verification) + +--- + +## Next Steps + +1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode +2. **Verify Issue 12**: Test that all renderers handle images correctly +3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator) +4. **Add Integration Tests**: Test end-to-end flow with various scenarios +5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available +6. **Add Structure Validation**: Explicit validation before content generation +7. **Optimize Content Caching**: Add cache validation and efficient formatting + +--- + +**Analysis Complete**: 2025-12-22 + diff --git a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md new file mode 100644 index 00000000..d0a59e80 --- /dev/null +++ b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md @@ -0,0 +1,459 @@ +# Concept: Hierarchical Document Generation with Image Integration + +## Executive Summary + +This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently. + +**Key Decisions**: +- ✅ **Performance**: Parallel processing with ChatLog progress messages +- ✅ **Error Handling**: Skip failed sections, show error messages +- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access) +- ✅ **Backward Compatibility**: Not needed - implement as new default + +**Renderer Status**: +- ✅ **Ready**: Text, Markdown, DOCX renderers +- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images) +- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support) + +## Problem Statement + +Currently, the document generation system has the following limitations: + +1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures +2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters) +3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily +4. **No Structured Approach**: No mechanism to first define document structure, then populate sections + +## Current Architecture Analysis + +### Current Flow: +``` +User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document +``` + +### Issues: +- AI generates complete JSON structure in one pass +- Images are generated separately via `ai.generate` action +- No mechanism to integrate generated images into document structure +- JSON schema supports `image` content_type, but AI rarely generates it +- Content extraction happens per action, not cached/reused + +### Current Image Handling: +- Images can be rendered IF they exist in JSON structure (`content_type: "image"`) +- Image data expected as `base64Data` in elements +- Renderers support image rendering (Docx, PDF, HTML, etc.) +- But images are never generated WITHIN document generation + +## Proposed Solution: Hierarchical Document Generation + +### Core Concept + +**Three-Phase Approach:** +1. **Structure Generation Phase**: Generate document skeleton with section placeholders +2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts +3. **Integration Phase**: Merge all generated content into final document structure + +### Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Phase 1: Structure Generation │ +│ - Generate document skeleton │ +│ - Identify sections (text, image, complex) │ +│ - Create section placeholders with metadata │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 2: Content Generation (Tree-like) │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 1: Heading (simple) │ │ +│ │ → Generate directly │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 2: Paragraph (simple) │ │ +│ │ → Generate directly │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 3: Image (complex) │ │ +│ │ → Sub-prompt: Generate image │ │ +│ │ → Store image data │ │ +│ │ → Create image section with base64Data │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 4: Long Chapter (complex) │ │ +│ │ → Sub-prompt: Generate chapter content │ │ +│ │ → Split into subsections if needed │ │ +│ └──────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 3: Integration │ +│ - Merge all generated content │ +│ - Replace placeholders with actual data │ +│ - Validate structure completeness │ +│ - Render to final format │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Detailed Design + +### Phase 1: Structure Generation + +**Purpose**: Create document skeleton with section metadata + +**Process**: +1. AI generates document structure with sections +2. Each section includes: + - `id`: Unique identifier + - `content_type`: Type (heading, paragraph, image, table, etc.) + - `complexity`: "simple" or "complex" + - `generation_hint`: Instructions for content generation + - `order`: Section order + - `elements`: Empty or placeholder + +**Example Structure**: +```json +{ + "metadata": { + "title": "Children's Bedtime Story", + "split_strategy": "single_document" + }, + "documents": [{ + "id": "doc_1", + "sections": [ + { + "id": "section_title", + "content_type": "heading", + "complexity": "simple", + "generation_hint": "Story title", + "order": 1, + "elements": [] + }, + { + "id": "section_intro", + "content_type": "paragraph", + "complexity": "simple", + "generation_hint": "Introduction paragraph", + "order": 2, + "elements": [] + }, + { + "id": "section_image_1", + "content_type": "image", + "complexity": "complex", + "generation_hint": "Illustration: Rabbit meeting owl in moonlit forest", + "image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch", + "order": 3, + "elements": [] + }, + { + "id": "section_chapter_1", + "content_type": "paragraph", + "complexity": "complex", + "generation_hint": "First chapter: Rabbit's adventure begins", + "order": 4, + "elements": [] + } + ] + }] +} +``` + +### Phase 2: Content Generation + +**Purpose**: Generate actual content for each section + +**Process**: +1. Iterate through sections in order +2. For each section: + - **Simple sections** (heading, short paragraph): + - Generate content directly via AI + - Populate `elements` array + - **Complex sections** (image, long chapter): + - Create sub-prompt based on `generation_hint` and `image_prompt` + - Generate content via specialized action: + - Images: `ai.generate` with image generation + - Long text: `ai.process` with focused prompt + - Store generated content + - Populate `elements` array + +**Content Caching**: +- Extract content from source documents ONCE at the start +- Cache extracted content for reuse across all sections +- Pass cached content to sub-prompts to avoid re-extraction + +**Image Generation**: +- For `content_type: "image"` sections: + - Use `image_prompt` from structure + - Call `ai.generate` action with image generation + - Receive base64 image data + - Create image element: + ```json + { + "url": "data:image/png;base64,", + "base64Data": "", + "altText": "", + "caption": "" + } + ``` + +### Phase 3: Integration + +**Purpose**: Merge all content into final document structure + +**Process**: +1. Validate all sections have content +2. Merge generated content into structure +3. Replace placeholders with actual data +4. Finalize JSON structure +5. Render to target format (docx, pdf, html, etc.) + +## Implementation Strategy + +### New Components Needed + +1. **Structure Generator** (`structureGenerator.py`) + - Generates document skeleton + - Identifies section complexity + - Creates generation hints + +2. **Content Generator** (`contentGenerator.py`) + - Generates content for each section + - Handles simple vs complex sections + - Manages sub-prompts and image generation + - Caches extracted content + +3. **Content Integrator** (`contentIntegrator.py`) + - Merges generated content + - Validates completeness + - Finalizes document structure + +### Modified Components + +1. **`generateDocument` action** + - Implement hierarchical generation as default + - Orchestrate three phases + - Add progress logging for each phase + +2. **`process` action** + - Support content caching (extract once, reuse) + - Support sub-prompt generation for sections + +3. **Prompt Builder** (`subPromptBuilderGeneration.py`) + - Add structure generation prompt + - Add section-specific content prompts + - Add image generation prompt templates + +4. **Renderers** (Update required): + - **HTML Renderer**: Create separate image files and link them + - **PDF Renderer**: Embed images using reportlab + - **XLSX Renderer**: Add image embedding support + - **PPTX Renderer**: Add image embedding support + +### New Action Parameters + +**For `generateDocument`**: +- `enableImageIntegration`: boolean (default: true) +- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words) +- `parallelGeneration`: boolean (default: true) - enable parallel section generation +- `progressLogging`: boolean (default: true) - send ChatLog progress updates + +**For sub-prompts**: +- `sectionContext`: Previous sections for context +- `cachedContent`: Extracted content cache (to avoid re-extraction) +- `targetSection`: Section metadata +- `previousSections`: Array of already-generated sections for continuity + +## Benefits + +1. **Image Integration**: Images can be generated and embedded into documents +2. **Structured Approach**: Clear separation of structure and content +3. **Efficiency**: Content extracted once, reused across sections +4. **Scalability**: Can handle very long documents by splitting into sections +5. **Quality**: Better control over complex sections (images, long chapters) +6. **Flexibility**: Can generate different content types per section + +## Migration Strategy + +**Note**: No backwards compatibility needed - can implement directly as new default. + +1. **Phase 1**: Implement hierarchical generation as new default +2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support +3. **Phase 3**: Testing and refinement +4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only) + +## Example Workflow + +**User Request**: "Create a children's bedtime story with 5 illustrations" + +**Phase 1 Output**: +```json +{ + "metadata": {"title": "Flöckchen's Adventure"}, + "documents": [{ + "sections": [ + {"id": "title", "content_type": "heading", "complexity": "simple", ...}, + {"id": "intro", "content_type": "paragraph", "complexity": "simple", ...}, + {"id": "img1", "content_type": "image", "complexity": "complex", + "image_prompt": "Rabbit meeting owl", ...}, + {"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...}, + {"id": "img2", "content_type": "image", "complexity": "complex", ...}, + ... + ] + }] +} +``` + +**Phase 2 Process**: +- Generate title → populate elements +- Generate intro → populate elements +- Generate image 1 → call `ai.generate`, store base64 → populate elements +- Generate chapter 1 → sub-prompt → populate elements +- Generate image 2 → call `ai.generate`, store base64 → populate elements +- ... + +**Phase 3 Output**: Complete document with all sections populated, ready for rendering + +## Renderer Readiness Assessment + +### Current Renderer Status for Image Handling: + +1. **Text Renderer** (`rendererText.py`): ✅ **READY** + - Skips images, shows placeholder: `[Image: altText]` + - No changes needed + +2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY** + - Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)` + - No changes needed (markdown limitation) + +3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE** + - Currently: Embeds base64 directly in `` tag as data URI + - **Required Change**: Create separate image files and link to them + - Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML + - Update `` tags to use relative paths: `...` + - Return multiple files: HTML file + image files + +4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE** + - Currently: Shows placeholder `[Image: altText]` + - **Required Change**: Embed images directly in PDF using reportlab + - Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes + +5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY** + - Embeds images directly using `doc.add_picture()` + - Adds captions below images + - No changes needed + +6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION** + - Currently: No image handling found + - **Required Change**: Add image support using openpyxl + - Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells + - Store images in worksheet cells or as floating images + +7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION** + - Currently: No image handling found + - **Required Change**: Add image support using python-pptx + - Implementation: Use `slide.shapes.add_picture()` to add images to slides + +### Renderer Update Requirements: + +**Priority 1 (Critical for HTML output)**: +- HTML Renderer: Create separate image files and link them + +**Priority 2 (Important for document formats)**: +- PDF Renderer: Embed images using reportlab +- XLSX Renderer: Add image embedding support +- PPTX Renderer: Add image embedding support + +## Answers to Open Questions + +### 1. Performance: How to handle very large documents (100+ sections)? + +**Answer**: Use parallel processing where possible, with progress ChatLog messages. + +**Implementation Strategy**: +- **Parallel Section Generation**: Generate independent sections in parallel using asyncio +- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time) +- **Progress Tracking**: Send ChatLog progress updates: + - "Generating structure..." (Phase 1) + - "Generating content for section X/Y..." (Phase 2) + - "Generating image for section X..." (Phase 2 - images) + - "Merging content..." (Phase 3) + - "Rendering final document..." (Phase 3) +- **Streaming**: For very large documents, consider streaming partial results + +**Example Progress Messages**: +``` +Phase 1: Structure Generation (0% → 33%) +Phase 2: Content Generation (33% → 90%) + - Section 1/10: Heading (34%) + - Section 2/10: Paragraph (40%) + - Section 3/10: Image generation (50%) + - Section 4/10: Chapter (60%) + ... +Phase 3: Integration & Rendering (90% → 100%) +``` + +### 2. Error Handling: What if one section fails? + +**Answer**: Skip failed sections, keep section title and type, show error message in the section. + +**Implementation Strategy**: +- **Graceful Degradation**: Continue processing remaining sections +- **Error Section**: Create error placeholder section: + ```json + { + "id": "section_failed_3", + "content_type": "paragraph", + "elements": [{ + "text": "[ERROR: Failed to generate content for this section. Error: ]" + }], + "order": 3, + "error": true, + "errorMessage": "" + } + ``` +- **Logging**: Log errors for debugging but don't fail entire document +- **User Notification**: Include error count in final progress message + +### 3. Image Storage: Where to store generated images? + +**Answer**: Store images in JSON as base64, as renderers need them afterwards. + +**Implementation Strategy**: +- **In-Memory Storage**: Keep base64 strings in JSON structure during generation +- **JSON Structure**: Store in section elements: + ```json + { + "url": "data:image/png;base64,", + "base64Data": "", + "altText": "Image description", + "caption": "Optional caption" + } + ``` +- **Memory Management**: For very large images, consider compression or chunking +- **Renderer Access**: All renderers can access `base64Data` directly from JSON +- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering + +### 4. Backward Compatibility: How to ensure existing workflows still work? + +**Answer**: No backwards compatibility needed. + +**Implementation Strategy**: +- **New Default**: Hierarchical generation becomes the default mode +- **Clean Migration**: All document generation uses hierarchical approach +- **No Fallback**: Remove single-pass mode (or keep as internal fallback only) +- **Breaking Change**: Acceptable since this is a new feature/enhancement + +## Next Steps + +1. **Review and Approval**: Get feedback on concept +2. **Detailed Design**: Design API and data structures +3. **Prototype**: Implement Phase 1 (structure generation) +4. **Testing**: Test with real use cases +5. **Full Implementation**: Implement all phases +6. **Migration**: Migrate existing workflows + diff --git a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md new file mode 100644 index 00000000..55a0c35c --- /dev/null +++ b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md @@ -0,0 +1,1067 @@ +# Detailed Design: Hierarchical Document Generation with Image Integration + +## Table of Contents + +1. [Architecture Overview](#architecture-overview) +2. [Data Structures](#data-structures) +3. [Component Design](#component-design) +4. [API Design](#api-design) +5. [Image Handling](#image-handling) +6. [Progress Logging](#progress-logging) +7. [Error Handling](#error-handling) +8. [Performance Considerations](#performance-considerations) + +## Architecture Overview + +### System Flow + +``` +┌─────────────────────────────────────────────────────────────┐ +│ User Request: generateDocument │ +│ Parameters: prompt, documentList, resultType, etc. │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 1: Structure Generation │ +│ - Extract content from documentList (if provided) │ +│ - Cache extracted content │ +│ - Generate document skeleton with sections │ +│ - Identify section complexity │ +│ - Create generation hints │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 2: Content Generation (Parallel) │ +│ │ +│ Simple Sections (heading, short paragraph): │ +│ ┌────────────────────────────────────────┐ │ +│ │ Generate content directly via AI │ │ +│ │ Populate elements array │ │ +│ └────────────────────────────────────────┘ │ +│ │ +│ Complex Sections (image, long chapter): │ +│ ┌────────────────────────────────────────┐ │ +│ │ Create sub-prompt │ │ +│ │ Generate content (text or image) │ │ +│ │ Store in elements array │ │ +│ └────────────────────────────────────────┘ │ +│ │ +│ Progress Updates: │ +│ - "Generating section X/Y..." │ +│ - "Generating image for section X..." │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 3: Integration & Rendering │ +│ - Validate all sections have content │ +│ - Merge generated content into structure │ +│ - Replace placeholders with actual data │ +│ - Render to target format (docx, pdf, html, etc.) │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Final Document(s) │ +│ - Single document (docx, pdf, html, etc.) │ +│ - Or multiple files (html + image files) │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Data Structures + +### Document Structure (Phase 1 Output) + +```python +{ + "metadata": { + "title": str, + "split_strategy": str, # "single_document" | "multi_document" + "source_documents": List[str], + "extraction_method": str + }, + "documents": [ + { + "id": str, + "title": str, + "filename": str, + "sections": [ + { + "id": str, + "content_type": str, # "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" + "complexity": str, # "simple" | "complex" + "generation_hint": str, + "image_prompt": Optional[str], # Only for image sections + "order": int, + "elements": [], # Empty initially, populated in Phase 2 + "metadata": Optional[Dict[str, Any]] + } + ] + } + ] +} +``` + +### Section Content (Phase 2 Output) + +**Simple Section (heading)**: +```python +{ + "id": "section_title", + "content_type": "heading", + "elements": [ + { + "level": int, + "text": str + } + ], + "order": 1 +} +``` + +**Simple Section (paragraph)**: +```python +{ + "id": "section_intro", + "content_type": "paragraph", + "elements": [ + { + "text": str + } + ], + "order": 2 +} +``` + +**Complex Section (image)**: +```python +{ + "id": "section_image_1", + "content_type": "image", + "elements": [ + { + "url": "data:image/png;base64,", + "base64Data": str, # Full base64 encoded image + "altText": str, + "caption": Optional[str] + } + ], + "order": 3 +} +``` + +**Error Section**: +```python +{ + "id": "section_failed_4", + "content_type": "paragraph", + "elements": [ + { + "text": f"[ERROR: Failed to generate content for this section. Error: {error_message}]" + } + ], + "order": 4, + "error": True, + "errorMessage": str, + "originalContentType": str # Original content_type that failed +} +``` + +### Content Cache + +```python +{ + "extractedContent": List[ContentPart], # From extraction service + "extractionTimestamp": float, + "sourceDocuments": List[str] # Document IDs +} +``` + +### Generation Context + +```python +{ + "userPrompt": str, + "cachedContent": ContentCache, + "previousSections": List[Dict[str, Any]], # Already generated sections + "targetSection": Dict[str, Any], # Section to generate + "documentMetadata": Dict[str, Any] +} +``` + +## Component Design + +### 1. StructureGenerator + +**Purpose**: Generate document skeleton with section placeholders + +**Location**: `poweron/gateway/modules/services/serviceGeneration/subStructureGenerator.py` + +**Methods**: +```python +class StructureGenerator: + async def generateStructure( + self, + userPrompt: str, + documentList: Optional[DocumentReferenceList], + cachedContent: Optional[ContentCache], + services: Any + ) -> Dict[str, Any]: + """ + Generate document structure with sections. + + Returns: + Document structure with empty elements arrays + """ + + def _createStructurePrompt( + self, + userPrompt: str, + cachedContent: Optional[ContentCache], + services: Any + ) -> str: + """ + Create prompt for structure generation. + """ + + def _identifySectionComplexity( + self, + section: Dict[str, Any], + userPrompt: str + ) -> str: + """ + Identify if section is simple or complex. + + Rules: + - Images: always complex + - Long chapters (>maxSectionLength words): complex + - Others: simple + """ + + def _extractImagePrompts( + self, + structure: Dict[str, Any], + userPrompt: str + ) -> Dict[str, str]: + """ + Extract image generation prompts from structure and user prompt. + Maps section_id -> image_prompt + """ +``` + +### 2. ContentGenerator + +**Purpose**: Generate content for each section + +**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentGenerator.py` + +**Methods**: +```python +class ContentGenerator: + async def generateContent( + self, + structure: Dict[str, Any], + cachedContent: Optional[ContentCache], + userPrompt: str, + services: Any, + progressCallback: Optional[Callable] = None + ) -> Dict[str, Any]: + """ + Generate content for all sections in structure. + + Args: + structure: Document structure from Phase 1 + cachedContent: Extracted content cache + userPrompt: Original user prompt + services: Services instance + progressCallback: Function to call for progress updates + + Returns: + Complete document structure with populated elements + """ + + async def _generateSectionContent( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate content for a single section. + + Returns: + Section with populated elements array + """ + + async def _generateSimpleSection( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate content for simple section (heading, paragraph). + """ + + async def _generateImageSection( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate image for image section. + Calls ai.generate action with image generation. + """ + + async def _generateComplexTextSection( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate content for complex text section (long chapter). + Uses focused sub-prompt. + """ + + async def _generateSectionsParallel( + self, + sections: List[Dict[str, Any]], + context: GenerationContext, + services: Any, + progressCallback: Optional[Callable] = None + ) -> List[Dict[str, Any]]: + """ + Generate content for multiple sections in parallel. + Uses asyncio.gather for parallel execution. + """ + + def _createSectionPrompt( + self, + section: Dict[str, Any], + context: GenerationContext + ) -> str: + """ + Create sub-prompt for section content generation. + """ +``` + +### 3. ContentIntegrator + +**Purpose**: Merge generated content and render final document + +**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentIntegrator.py` + +**Methods**: +```python +class ContentIntegrator: + def integrateContent( + self, + structure: Dict[str, Any], + generatedSections: List[Dict[str, Any]] + ) -> Dict[str, Any]: + """ + Merge generated sections into document structure. + + Returns: + Complete document structure ready for rendering + """ + + def validateCompleteness( + self, + document: Dict[str, Any] + ) -> Tuple[bool, List[str]]: + """ + Validate that all sections have content. + + Returns: + (is_complete, list_of_missing_sections) + """ + + def createErrorSection( + self, + originalSection: Dict[str, Any], + errorMessage: str + ) -> Dict[str, Any]: + """ + Create error placeholder section. + """ +``` + +### 4. Modified generateDocument Action + +**Location**: `poweron/gateway/modules/workflows/methods/methodAi/actions/generateDocument.py` + +**Changes**: +```python +@action +async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Generate documents using hierarchical approach. + """ + # Extract parameters + prompt = parameters.get("prompt") + documentList = parameters.get("documentList", []) + resultType = parameters.get("resultType", "docx") + maxSectionLength = parameters.get("maxSectionLength", 500) + parallelGeneration = parameters.get("parallelGeneration", True) + progressLogging = parameters.get("progressLogging", True) + + # Create operation ID for progress tracking + operationId = f"doc_gen_{self.services.workflow.id}_{int(time.time())}" + parentOperationId = parameters.get('parentOperationId') + + try: + # Phase 1: Structure Generation + if progressLogging: + self.services.chat.progressLogStart( + operationId, + "Document", + "Structure Generation", + "Generating document structure...", + parentOperationId=parentOperationId + ) + + structureGenerator = StructureGenerator(self.services) + + # Extract and cache content if documentList provided + cachedContent = None + if documentList: + # Extract content once + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + if chatDocuments: + extractionOptions = ExtractionOptions( + prompt="Extract all content from documents", + mergeStrategy=MergeStrategy(mergeType="concatenate") + ) + extractedResults = self.services.extraction.extractContent( + chatDocuments, + extractionOptions + ) + cachedContent = { + "extractedContent": extractedResults, + "extractionTimestamp": time.time(), + "sourceDocuments": [doc.id for doc in chatDocuments] + } + + # Generate structure + structure = await structureGenerator.generateStructure( + userPrompt=prompt, + documentList=documentList, + cachedContent=cachedContent, + services=self.services + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") + + # Phase 2: Content Generation + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.34, + "Starting content generation..." + ) + + contentGenerator = ContentGenerator(self.services) + + def progressCallback(sectionIndex: int, totalSections: int, message: str): + if progressLogging: + progress = 0.34 + (0.56 * (sectionIndex / totalSections)) + self.services.chat.progressLogUpdate( + operationId, + progress, + f"Section {sectionIndex}/{totalSections}: {message}" + ) + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=prompt, + services=self.services, + progressCallback=progressCallback + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") + + # Phase 3: Integration & Rendering + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.91, + "Rendering final document..." + ) + + # Use existing renderReport method + title = structure.get("metadata", {}).get("title", "Generated Document") + renderedContent, mimeType = await self.services.generation.renderReport( + extractedContent=completeStructure, + outputFormat=resultType, + title=title, + userPrompt=prompt, + aiService=self.services.ai + ) + + # Create document + document = self.services.generation._createDocument( + fileName=f"document.{resultType}", + mimeType=mimeType, + content=renderedContent, + base64encoded=(mimeType not in ["text/plain", "text/html", "text/markdown"]), + messageId=None + ) + + if progressLogging: + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess( + documents=[ActionDocument( + documentName=f"document.{resultType}", + documentData=renderedContent, + mimeType=mimeType + )] + ) + + except Exception as e: + logger.error(f"Error in hierarchical document generation: {str(e)}") + if progressLogging: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=str(e)) +``` + +## API Design + +### Structure Generation Prompt + +```python +def _createStructurePrompt( + userPrompt: str, + cachedContent: Optional[ContentCache], + services: Any +) -> str: + """ + Create prompt for structure generation. + """ + prompt = f""" +{'='*80} +USER REQUEST: +{'='*80} +{userPrompt} +{'='*80} + +TASK: Generate a document STRUCTURE (skeleton) with sections. +Do NOT generate actual content yet - only the structure. + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{_formatCachedContent(cachedContent) if cachedContent else "No source documents provided."} +{'='*80} + +INSTRUCTIONS: +1. Analyze the user request and extracted content +2. Create a document structure with sections +3. For each section, specify: + - id: Unique identifier + - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" + - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) + - generation_hint: Brief description of what content should be generated + - image_prompt: (only for image sections) Detailed prompt for image generation + - order: Section order number + - elements: [] (empty array - will be populated later) + +4. Identify image sections: + - If user requests illustrations/images, create image sections + - Add image_prompt field with detailed description + - Set complexity to "complex" + +5. Identify complex text sections: + - Long chapters (>500 words expected) should be marked as "complex" + - Short paragraphs/headings should be "simple" + +6. Return ONLY valid JSON following this structure: +{{ + "metadata": {{ + "title": "Document Title", + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }}, + "documents": [ + {{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.json", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "complexity": "simple", + "generation_hint": "Main title", + "order": 1, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "image", + "complexity": "complex", + "generation_hint": "Illustration for chapter 1", + "image_prompt": "Detailed description for image generation", + "order": 2, + "elements": [] + }} + ] + }} + ] +}} + +Return ONLY the JSON structure. No explanations. +""" + return prompt +``` + +### Section Content Generation Prompt + +```python +def _createSectionPrompt( + section: Dict[str, Any], + context: GenerationContext +) -> str: + """ + Create sub-prompt for section content generation. + """ + sectionType = section.get("content_type") + generationHint = section.get("generation_hint", "") + + prompt = f""" +{'='*80} +SECTION TO GENERATE: +{'='*80} +Type: {sectionType} +Hint: {generationHint} +{'='*80} + +CONTEXT: +- User Request: {context.userPrompt} +- Previous Sections: {len(context.previousSections)} sections already generated +- Document Title: {context.documentMetadata.get('title', 'Unknown')} + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{_formatCachedContent(context.cachedContent) if context.cachedContent else "None"} +{'='*80} + +TASK: Generate content for this section ONLY. + +INSTRUCTIONS: +1. Generate content appropriate for section type: {sectionType} +2. Use the generation hint: {generationHint} +3. Consider previous sections for continuity +4. Use extracted content if relevant + +5. Return ONLY the elements array for this section: + +For heading: +{{ + "elements": [ + {{"level": 1, "text": "Heading Text"}} + ] +}} + +For paragraph: +{{ + "elements": [ + {{"text": "Paragraph text content"}} + ] +}} + +For image: +{{ + "elements": [ + {{ + "url": "data:image/png;base64,", + "base64Data": "", + "altText": "Image description", + "caption": "Optional caption" + }} + ] +}} + +Return ONLY the elements array as JSON. No other text. +""" + return prompt +``` + +## Image Handling + +### Image Generation Flow + +```python +async def _generateImageSection( + section: Dict[str, Any], + context: GenerationContext, + services: Any +) -> Dict[str, Any]: + """ + Generate image for image section. + """ + imagePrompt = section.get("image_prompt") + if not imagePrompt: + raise ValueError(f"Image section {section.get('id')} missing image_prompt") + + # Call ai.generate action with image generation + from modules.workflows.methods.methodAi.actions.generate import generate + + generateParams = { + "prompt": imagePrompt, + "resultType": "png", + "parentOperationId": context.operationId + } + + result = await generate(self=services.ai, parameters=generateParams) + + if not result.success or not result.documents: + raise ValueError(f"Image generation failed: {result.error}") + + # Extract base64 image data + imageDoc = result.documents[0] + base64Data = imageDoc.documentData + + # Create image element + section["elements"] = [{ + "url": f"data:image/png;base64,{base64Data}", + "base64Data": base64Data, + "altText": section.get("generation_hint", "Image"), + "caption": section.get("metadata", {}).get("caption") + }] + + return section +``` + +### HTML Renderer Image Handling + +**Location**: `poweron/gateway/modules/services/serviceGeneration/renderers/rendererHtml.py` + +**Changes**: +```python +async def render( + self, + extractedContent: Dict[str, Any], + title: str, + userPrompt: str = None, + aiService=None +) -> Tuple[str, str]: + """ + Render HTML with separate image files. + + Returns: + (html_content, mime_type) + """ + # Generate HTML + htmlContent = await self._generateHtmlFromJson(...) + + # Extract images and create separate files + images = self._extractImages(extractedContent) + + if images: + # Create image files + imageFiles = [] + for idx, imageData in enumerate(images): + base64Data = imageData.get("base64Data") + if base64Data: + # Decode base64 + imageBytes = base64.b64decode(base64Data) + + # Create filename + filename = f"image_{idx + 1}.png" + + # Update HTML to use relative path + htmlContent = htmlContent.replace( + f'data:image/png;base64,{base64Data}', + filename + ) + + imageFiles.append({ + "filename": filename, + "content": imageBytes, + "mimeType": "image/png" + }) + + # Return HTML + image files info + # Note: This requires modification to return multiple files + # For now, embed base64 (will be updated in implementation) + return htmlContent, "text/html" + + return htmlContent, "text/html" + +def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Extract all images from JSON structure. + """ + images = [] + + documents = jsonContent.get("documents", []) + if not documents: + sections = jsonContent.get("sections", []) + documents = [{"sections": sections}] + + for doc in documents: + sections = doc.get("sections", []) + for section in sections: + if section.get("content_type") == "image": + elements = section.get("elements", []) + for element in elements: + if element.get("base64Data"): + images.append(element) + + return images +``` + +## Progress Logging + +### Progress Stages + +```python +PROGRESS_STAGES = { + "structure_generation": { + "start": 0.0, + "end": 0.33, + "messages": [ + "Extracting content from documents...", + "Generating document structure...", + "Structure generated" + ] + }, + "content_generation": { + "start": 0.34, + "end": 0.90, + "messages": [ + "Starting content generation...", + "Generating section {current}/{total}...", + "Generating image for section {section_id}...", + "Content generated" + ] + }, + "integration_rendering": { + "start": 0.91, + "end": 1.0, + "messages": [ + "Rendering final document...", + "Document complete" + ] + } +} +``` + +### Progress Callback Implementation + +```python +def createProgressCallback( + operationId: str, + totalSections: int, + services: Any +) -> Callable: + """ + Create progress callback function. + """ + def progressCallback( + sectionIndex: int, + totalSections: int, + message: str + ): + # Calculate progress + baseProgress = 0.34 # Start of content generation phase + phaseProgress = 0.56 # Length of content generation phase + sectionProgress = (sectionIndex / totalSections) * phaseProgress + currentProgress = baseProgress + sectionProgress + + # Update progress log + services.chat.progressLogUpdate( + operationId, + currentProgress, + f"Section {sectionIndex}/{totalSections}: {message}" + ) + + return progressCallback +``` + +## Error Handling + +### Error Section Creation + +```python +def createErrorSection( + originalSection: Dict[str, Any], + errorMessage: str +) -> Dict[str, Any]: + """ + Create error placeholder section. + """ + return { + "id": originalSection.get("id", "unknown"), + "content_type": "paragraph", # Change to paragraph for error display + "elements": [{ + "text": f"[ERROR: Failed to generate {originalSection.get('content_type', 'content')} for section '{originalSection.get('id', 'unknown')}'. Error: {errorMessage}]" + }], + "order": originalSection.get("order", 0), + "error": True, + "errorMessage": errorMessage, + "originalContentType": originalSection.get("content_type") + } +``` + +### Error Handling in Content Generation + +```python +async def _generateSectionContent( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any +) -> Dict[str, Any]: + """ + Generate content for a single section with error handling. + """ + try: + complexity = section.get("complexity", "simple") + contentType = section.get("content_type") + + if contentType == "image": + return await self._generateImageSection(section, context, services) + elif complexity == "complex": + return await self._generateComplexTextSection(section, context, services) + else: + return await self._generateSimpleSection(section, context, services) + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + return createErrorSection(section, str(e)) +``` + +## Performance Considerations + +### Parallel Generation + +```python +async def _generateSectionsParallel( + self, + sections: List[Dict[str, Any]], + context: GenerationContext, + services: Any, + progressCallback: Optional[Callable] = None +) -> List[Dict[str, Any]]: + """ + Generate content for multiple sections in parallel. + """ + async def generateWithProgress(section: Dict[str, Any], index: int): + if progressCallback: + progressCallback(index + 1, len(sections), f"Generating {section.get('content_type')}...") + + return await self._generateSectionContent(section, context, services) + + # Generate all sections in parallel + results = await asyncio.gather( + *[generateWithProgress(section, idx) for idx, section in enumerate(sections)], + return_exceptions=True + ) + + # Handle exceptions + generatedSections = [] + for idx, result in enumerate(results): + if isinstance(result, Exception): + logger.error(f"Error generating section {idx}: {str(result)}") + generatedSections.append( + createErrorSection(sections[idx], str(result)) + ) + else: + generatedSections.append(result) + + return generatedSections +``` + +### Batch Processing for Large Documents + +```python +async def generateContent( + self, + structure: Dict[str, Any], + cachedContent: Optional[ContentCache], + userPrompt: str, + services: Any, + progressCallback: Optional[Callable] = None, + batchSize: int = 10 +) -> Dict[str, Any]: + """ + Generate content with batching for large documents. + """ + documents = structure.get("documents", []) + + for doc in documents: + sections = doc.get("sections", []) + + # Process in batches + for batchStart in range(0, len(sections), batchSize): + batch = sections[batchStart:batchStart + batchSize] + + # Generate batch in parallel + generatedBatch = await self._generateSectionsParallel( + batch, + context, + services, + progressCallback + ) + + # Update sections + for idx, generated in enumerate(generatedBatch): + sections[batchStart + idx] = generated + + return structure +``` + +## Testing Strategy + +### Unit Tests + +1. **StructureGenerator Tests**: + - Test structure generation with/without source documents + - Test complexity identification + - Test image prompt extraction + +2. **ContentGenerator Tests**: + - Test simple section generation + - Test image section generation + - Test complex text section generation + - Test parallel generation + - Test error handling + +3. **ContentIntegrator Tests**: + - Test content merging + - Test validation + - Test error section creation + +### Integration Tests + +1. **End-to-End Tests**: + - Test complete document generation flow + - Test with images + - Test with long documents + - Test error scenarios + +2. **Renderer Tests**: + - Test HTML renderer with separate image files + - Test PDF renderer with embedded images + - Test XLSX/PPTX renderers with images + +### Performance Tests + +1. **Large Document Tests**: + - Test with 100+ sections + - Test parallel generation performance + - Test memory usage + +2. **Image Generation Tests**: + - Test multiple images + - Test large images + - Test image generation failures + diff --git a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md new file mode 100644 index 00000000..4476c2b9 --- /dev/null +++ b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md @@ -0,0 +1,398 @@ +# Implementation Plan: Hierarchical Document Generation + +## Overview + +This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration. + +## Implementation Phases + +### Phase 1: Core Infrastructure (Week 1) + +**Goal**: Set up core components and data structures + +#### Tasks: + +1. **Create StructureGenerator Component** + - [ ] Create `subStructureGenerator.py` + - [ ] Implement `generateStructure()` method + - [ ] Implement `_createStructurePrompt()` method + - [ ] Implement `_identifySectionComplexity()` method + - [ ] Implement `_extractImagePrompts()` method + - [ ] Add unit tests + +2. **Create ContentGenerator Component** + - [ ] Create `subContentGenerator.py` + - [ ] Implement `generateContent()` method + - [ ] Implement `_generateSectionContent()` method + - [ ] Implement `_generateSimpleSection()` method + - [ ] Implement `_generateComplexTextSection()` method + - [ ] Implement `_createSectionPrompt()` method + - [ ] Add unit tests + +3. **Create ContentIntegrator Component** + - [ ] Create `subContentIntegrator.py` + - [ ] Implement `integrateContent()` method + - [ ] Implement `validateCompleteness()` method + - [ ] Implement `createErrorSection()` method + - [ ] Add unit tests + +4. **Update generateDocument Action** + - [ ] Modify `generateDocument.py` to use hierarchical approach + - [ ] Add Phase 1: Structure generation + - [ ] Add Phase 2: Content generation (sequential first) + - [ ] Add Phase 3: Integration & rendering + - [ ] Add basic progress logging + - [ ] Add error handling + +**Deliverables**: +- Core components created +- Basic hierarchical generation working (sequential) +- Unit tests passing + +**Estimated Time**: 3-4 days + +--- + +### Phase 2: Image Generation Integration (Week 1-2) + +**Goal**: Integrate image generation into content generation + +#### Tasks: + +1. **Implement Image Section Generation** + - [ ] Add `_generateImageSection()` method to ContentGenerator + - [ ] Integrate with `ai.generate` action + - [ ] Handle base64 image data storage + - [ ] Add image prompt extraction from structure + - [ ] Add error handling for image generation failures + +2. **Update Structure Generation Prompt** + - [ ] Add image section detection in structure prompt + - [ ] Add image_prompt field extraction + - [ ] Test with user prompts requesting images + +3. **Test Image Integration** + - [ ] Test image generation in document structure + - [ ] Test multiple images in one document + - [ ] Test image generation failures + +**Deliverables**: +- Image generation integrated +- Images stored as base64 in JSON +- Error handling for image failures + +**Estimated Time**: 2-3 days + +--- + +### Phase 3: Parallel Processing & Progress Logging (Week 2) + +**Goal**: Implement parallel section generation and detailed progress logging + +#### Tasks: + +1. **Implement Parallel Generation** + - [ ] Add `_generateSectionsParallel()` method + - [ ] Use `asyncio.gather()` for parallel execution + - [ ] Add batch processing for large documents + - [ ] Handle exceptions in parallel execution + - [ ] Test parallel vs sequential performance + +2. **Enhance Progress Logging** + - [ ] Create progress callback system + - [ ] Add detailed progress messages: + - Structure generation progress + - Section-by-section progress + - Image generation progress + - Rendering progress + - [ ] Calculate accurate progress percentages + - [ ] Test progress updates + +3. **Update generateDocument Action** + - [ ] Integrate parallel generation + - [ ] Add progress callback to content generation + - [ ] Update progress logging throughout phases + +**Deliverables**: +- Parallel section generation working +- Detailed progress logging +- Performance improvements + +**Estimated Time**: 2-3 days + +--- + +### Phase 4: Renderer Updates (Week 2-3) + +**Goal**: Update renderers to properly handle images + +#### Tasks: + +1. **Update HTML Renderer** + - [ ] Modify `rendererHtml.py` + - [ ] Add `_extractImages()` method + - [ ] Implement separate image file creation + - [ ] Update HTML to use relative image paths + - [ ] Handle multiple image files + - [ ] Test HTML + image files output + +2. **Update PDF Renderer** + - [ ] Modify `rendererPdf.py` + - [ ] Update `_renderJsonImage()` to embed images + - [ ] Use `reportlab.platypus.Image()` with base64 + - [ ] Handle image sizing and positioning + - [ ] Test PDF with embedded images + +3. **Update XLSX Renderer** + - [ ] Modify `rendererXlsx.py` + - [ ] Add `_renderJsonImage()` method + - [ ] Use `openpyxl.drawing.image.Image()` to embed images + - [ ] Handle image placement in cells + - [ ] Test XLSX with images + +4. **Update PPTX Renderer** + - [ ] Modify `rendererPptx.py` + - [ ] Add `_renderJsonImage()` method + - [ ] Use `slide.shapes.add_picture()` to add images + - [ ] Handle image sizing on slides + - [ ] Test PPTX with images + +**Deliverables**: +- All renderers support images +- HTML creates separate image files +- PDF/XLSX/PPTX embed images directly + +**Estimated Time**: 4-5 days + +--- + +### Phase 5: Content Caching & Optimization (Week 3) + +**Goal**: Implement content caching to avoid re-extraction + +#### Tasks: + +1. **Implement Content Cache** + - [ ] Create ContentCache data structure + - [ ] Extract content once at start of generation + - [ ] Pass cached content to all sub-prompts + - [ ] Add cache validation (check if documents changed) + - [ ] Test cache reuse + +2. **Optimize Prompt Building** + - [ ] Update structure prompt to use cached content + - [ ] Update section prompts to use cached content + - [ ] Format cached content efficiently + - [ ] Test prompt sizes + +3. **Performance Testing** + - [ ] Test with large documents + - [ ] Test with multiple source documents + - [ ] Measure performance improvements + - [ ] Optimize bottlenecks + +**Deliverables**: +- Content caching implemented +- No redundant content extraction +- Performance optimized + +**Estimated Time**: 2-3 days + +--- + +### Phase 6: Error Handling & Edge Cases (Week 3-4) + +**Goal**: Robust error handling and edge case coverage + +#### Tasks: + +1. **Enhance Error Handling** + - [ ] Improve error section creation + - [ ] Add error recovery strategies + - [ ] Handle partial failures gracefully + - [ ] Add error logging and reporting + +2. **Handle Edge Cases** + - [ ] Empty document list + - [ ] No sections generated + - [ ] All sections fail + - [ ] Very large images + - [ ] Very long documents (100+ sections) + - [ ] Missing image prompts + - [ ] Invalid section types + +3. **Add Validation** + - [ ] Validate structure before content generation + - [ ] Validate content before integration + - [ ] Validate final document before rendering + - [ ] Add comprehensive error messages + +**Deliverables**: +- Robust error handling +- Edge cases covered +- Clear error messages + +**Estimated Time**: 2-3 days + +--- + +### Phase 7: Testing & Refinement (Week 4) + +**Goal**: Comprehensive testing and refinement + +#### Tasks: + +1. **Unit Testing** + - [ ] Complete unit tests for all components + - [ ] Test all methods + - [ ] Test error scenarios + - [ ] Achieve >80% code coverage + +2. **Integration Testing** + - [ ] Test end-to-end document generation + - [ ] Test with various document types + - [ ] Test with images + - [ ] Test with long documents + - [ ] Test error scenarios + +3. **Performance Testing** + - [ ] Test with 10, 50, 100+ sections + - [ ] Measure generation time + - [ ] Measure memory usage + - [ ] Compare parallel vs sequential + - [ ] Optimize if needed + +4. **User Acceptance Testing** + - [ ] Test with real user scenarios + - [ ] Test bedtime story with images (original use case) + - [ ] Test business documents + - [ ] Test technical documents + - [ ] Gather feedback + +5. **Documentation** + - [ ] Update API documentation + - [ ] Add code comments + - [ ] Update user guides + - [ ] Create examples + +**Deliverables**: +- Comprehensive test suite +- Performance benchmarks +- Documentation complete +- Ready for production + +**Estimated Time**: 3-4 days + +--- + +## Dependencies + +### External Dependencies +- `asyncio` - For parallel processing +- `base64` - For image encoding/decoding +- `reportlab` - For PDF image embedding +- `openpyxl` - For XLSX image embedding +- `python-pptx` - For PPTX image embedding + +### Internal Dependencies +- `serviceGeneration` - Main generation service +- `serviceAi` - AI service for generation +- `serviceExtraction` - Content extraction service +- `methodAi.actions.generate` - Image generation action +- `methodAi.actions.process` - Text generation action + +## Risk Mitigation + +### Risks and Mitigation Strategies + +1. **Risk**: Image generation failures break entire document + - **Mitigation**: Error handling creates error sections, continues processing + +2. **Risk**: Parallel generation causes memory issues + - **Mitigation**: Batch processing, limit concurrent operations + +3. **Risk**: Large base64 images cause JSON size issues + - **Mitigation**: Consider compression or chunking for very large images + +4. **Risk**: HTML renderer needs to return multiple files + - **Mitigation**: Modify return type or create file bundle system + +5. **Risk**: Performance not meeting expectations + - **Mitigation**: Profile and optimize bottlenecks, consider caching + +## Success Criteria + +### Functional Requirements +- ✅ Documents can be generated with embedded images +- ✅ HTML renderer creates separate image files +- ✅ PDF/XLSX/PPTX renderers embed images +- ✅ Progress logging shows detailed progress +- ✅ Error handling prevents complete failures +- ✅ Content extraction happens only once + +### Performance Requirements +- ✅ Parallel generation improves performance by 2x+ for multi-section documents +- ✅ Progress updates appear within 1 second of action +- ✅ Documents with 50+ sections complete in <5 minutes + +### Quality Requirements +- ✅ >80% code coverage +- ✅ All edge cases handled +- ✅ Clear error messages +- ✅ Comprehensive documentation + +## Rollout Plan + +### Step 1: Internal Testing (Week 4) +- Deploy to development environment +- Internal team testing +- Fix critical issues + +### Step 2: Beta Testing (Week 5) +- Deploy to staging environment +- Select beta users +- Gather feedback +- Fix issues + +### Step 3: Production Deployment (Week 6) +- Deploy to production +- Monitor performance +- Monitor errors +- Gather user feedback + +### Step 4: Optimization (Ongoing) +- Monitor usage patterns +- Optimize based on real-world usage +- Add enhancements based on feedback + +## Timeline Summary + +| Phase | Duration | Start | End | +|-------|----------|-------|-----| +| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 | +| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 | +| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 | +| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 | +| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 | +| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 | +| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 | + +**Total Estimated Time**: 4-5 weeks + +## Next Steps + +1. **Review and Approve Plan** + - Review implementation plan + - Approve timeline + - Assign resources + +2. **Set Up Development Environment** + - Create feature branch + - Set up test infrastructure + - Prepare development tools + +3. **Begin Phase 1** + - Start with StructureGenerator + - Set up project structure + - Begin implementation + diff --git a/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md b/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md new file mode 100644 index 00000000..ee790a69 --- /dev/null +++ b/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md @@ -0,0 +1,238 @@ +# Rendering Issue Analysis +## Why HTML Documents Are Being Rendered as Text + +**Date**: 2025-12-22 +**Issue**: Documents requested as HTML are being output as text/plain + +--- + +## Root Cause Analysis + +### Issue 1: `resultType` Not Extracted from Task Objective ❌ **CRITICAL** + +**Problem**: +- Task objective clearly states: "Generate a complete, well-structured **HTML document**" +- Validation shows: `EXPECTED FORMATS: ['html']` +- But action was called with: `ai.generateDocument {}` (empty parameters) +- So `resultType` defaults to `"docx"` instead of `"html"` + +**Location**: +- `generateDocument.py` line 44: `resultType = parameters.get("resultType", "docx")` +- No parameter extraction from task objective/prompt + +**Impact**: **CRITICAL** - Wrong format is used even though task clearly requests HTML + +**Fix Needed**: +- Extract `resultType` from task objective/prompt before calling action +- Or enhance `generateDocument` to detect format from prompt if not provided + +--- + +### Issue 2: HTML Not in Action Definition Options ❌ **CRITICAL** + +**Problem**: +- Action definition in `methodAi.py` line 357 only lists: `["docx", "pdf", "txt", "md"]` +- `"html"` is **NOT** in the allowed options +- But docstring says HTML is supported: `"resultType (str, optional): Output format (docx, pdf, txt, md, html, etc.)"` + +**Location**: +- `methodAi.py` line 357: `frontendOptions=["docx", "pdf", "txt", "md"]` + +**Impact**: **CRITICAL** - Even if HTML is requested, it might be rejected or not recognized + +**Fix Needed**: +- Add `"html"` to `frontendOptions` list + +--- + +### Issue 3: Renderer Fallback to Text ❌ **CRITICAL** + +**Problem**: +- When `resultType="docx"` is used (default) +- If docx renderer fails or is not found +- System falls back to text renderer (line 403-404 of `mainServiceGeneration.py`) +- This explains why output is `text/plain` instead of HTML + +**Location**: +- `mainServiceGeneration.py` lines 393-409: `_getFormatRenderer()` method +- Line 403: `logger.warning(f"No renderer found for format {output_format}, falling back to text")` + +**Impact**: **CRITICAL** - Wrong format is rendered + +**Fix Needed**: +- Fix docx renderer if it's failing +- Or better: Extract correct format from prompt + +--- + +### Issue 4: Missing Parameter Extraction ❌ **HIGH PRIORITY** + +**Problem**: +- Task objective contains format information ("HTML document") +- But no parameter extraction step extracts `resultType` from prompt +- Action is called with empty parameters `{}` + +**Location**: +- Workflow execution - parameter extraction phase +- Should extract `resultType: "html"` from task objective + +**Impact**: **HIGH** - System can't infer format from user intent + +**Fix Needed**: +- Add parameter extraction that detects format from prompt +- Or enhance `generateDocument` to auto-detect format from prompt + +--- + +## Flow Analysis + +### Expected Flow: +``` +1. Task Objective: "Generate HTML document..." +2. Parameter Extraction: Extract resultType="html" from objective +3. Action Call: ai.generateDocument({resultType: "html", prompt: "..."}) +4. Content Generation: Generate sections with content +5. Integration: Merge sections into complete structure +6. Rendering: Call renderReport(outputFormat="html") +7. HTML Renderer: Render to HTML +8. Output: document.html (text/html) +``` + +### Actual Flow (Broken): +``` +1. Task Objective: "Generate HTML document..." +2. Parameter Extraction: ❌ MISSING - no extraction +3. Action Call: ai.generateDocument({}) ❌ Empty parameters +4. Content Generation: ✅ Generate sections with content +5. Integration: ✅ Merge sections into complete structure +6. Rendering: Call renderReport(outputFormat="docx") ❌ Wrong format +7. Docx Renderer: ❌ Fails or not found +8. Fallback: Text renderer ❌ Wrong renderer +9. Output: document.text (text/plain) ❌ Wrong format +``` + +--- + +## Fixes Required + +### Fix 1: Add HTML to Action Definition Options ✅ **EASY** + +**File**: `gateway/modules/workflows/methods/methodAi/methodAi.py` +**Line**: 357 + +**Change**: +```python +frontendOptions=["docx", "pdf", "txt", "md", "html"], # Added "html" +``` + +--- + +### Fix 2: Extract resultType from Prompt ✅ **MEDIUM** + +**Option A**: Enhance `generateDocument` to detect format from prompt + +**File**: `gateway/modules/workflows/methods/methodAi/actions/generateDocument.py` +**After line 44**: + +```python +resultType = parameters.get("resultType", "docx") + +# Auto-detect format from prompt if not provided +if resultType == "docx" and prompt: + promptLower = prompt.lower() + if "html" in promptLower or "html5" in promptLower: + resultType = "html" + elif "pdf" in promptLower: + resultType = "pdf" + elif "markdown" in promptLower or "md" in promptLower: + resultType = "md" + elif "text" in promptLower or "txt" in promptLower: + resultType = "txt" +``` + +**Option B**: Extract in parameter planning phase (better, but requires workflow changes) + +--- + +### Fix 3: Improve Renderer Error Handling ✅ **MEDIUM** + +**File**: `gateway/modules/services/serviceGeneration/mainServiceGeneration.py` +**Lines**: 393-409 + +**Enhance**: Better error messages and logging when renderer not found + +```python +def _getFormatRenderer(self, output_format: str): + """Get the appropriate renderer for the specified format using auto-discovery.""" + try: + from .renderers.registry import getRenderer + renderer = getRenderer(output_format, services=self.services) + + if renderer: + return renderer + + # Log available formats for debugging + from .renderers.registry import getSupportedFormats + availableFormats = getSupportedFormats() + logger.error( + f"No renderer found for format '{output_format}'. " + f"Available formats: {availableFormats}" + ) + + # Fallback to text renderer if no specific renderer found + logger.warning(f"Falling back to text renderer for format {output_format}") + fallbackRenderer = getRenderer('text', services=self.services) + if fallbackRenderer: + return fallbackRenderer + + logger.error("Even text renderer fallback failed") + return None + + except Exception as e: + logger.error(f"Error getting renderer for {output_format}: {str(e)}") + return None +``` + +--- + +## Verification Steps + +After fixes: + +1. **Test HTML Generation**: + - Task: "Generate HTML document about AI" + - Expected: `resultType="html"` extracted or detected + - Expected: HTML renderer used + - Expected: Output is `document.html` with `text/html` MIME type + +2. **Test Format Detection**: + - Task: "Generate PDF report" + - Expected: `resultType="pdf"` detected + - Expected: PDF renderer used + +3. **Test Explicit Parameter**: + - Action: `ai.generateDocument({resultType: "html", prompt: "..."})` + - Expected: HTML renderer used (no fallback) + +--- + +## Summary + +**Root Causes**: +1. ❌ `resultType` not extracted from task objective +2. ❌ HTML not in action definition options +3. ❌ Renderer fallback to text when docx fails +4. ❌ No format auto-detection from prompt + +**Priority**: **CRITICAL** - System cannot produce HTML documents as requested + +**Estimated Fix Time**: +- Fix 1: 5 minutes +- Fix 2: 30 minutes +- Fix 3: 15 minutes +- **Total**: ~1 hour + +--- + +**Analysis Complete**: 2025-12-22 + diff --git a/modules/workflows/processing/shared/methodDiscovery.py b/modules/workflows/processing/shared/methodDiscovery.py index 30708010..e3bfa769 100644 --- a/modules/workflows/processing/shared/methodDiscovery.py +++ b/modules/workflows/processing/shared/methodDiscovery.py @@ -68,7 +68,7 @@ def discoverMethods(serviceCenter): # Method not discovered yet - create new instance methodInstance = item(serviceCenter) - # Use the actions property from MethodBase which handles @action decorator + # Use the actions property from MethodBase which handles WorkflowActionDefinition actions = methodInstance.actions # Create method info @@ -131,7 +131,7 @@ def getMethodsList(serviceCenter): return "\n\n".join(methodsList) def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str: - """Get action parameter list from method docstring for AI parameter generation (list only).""" + """Get action parameter list from WorkflowActionParameter structure for AI parameter generation (list only).""" try: if not methods or methodName not in methods: return "" @@ -141,17 +141,21 @@ def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, return "" action_info = methodInstance.actions[actionName] - # Extract parameter descriptions from docstring - docstring = action_info.get('description', '') - paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring) + # Use structured WorkflowActionParameter objects from new system + parameters = action_info.get('parameters', {}) param_list = [] - for paramName, paramDesc in paramDescriptions.items(): - paramType = paramTypes.get(paramName, 'Any') + for paramName, paramInfo in parameters.items(): + paramType = paramInfo.get('type', 'Any') + paramDesc = paramInfo.get('description', '') + paramRequired = paramInfo.get('required', False) + + # Format: paramName (type, required/optional): description + reqText = "required" if paramRequired else "optional" if paramDesc: - param_list.append(f"- {paramName} ({paramType}): {paramDesc}") + param_list.append(f"- {paramName} ({paramType}, {reqText}): {paramDesc}") else: - param_list.append(f"- {paramName} ({paramType})") + param_list.append(f"- {paramName} ({paramType}, {reqText})") # Return list only, without leading headings or trailing text return "\n".join(param_list) diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py index 99d1523f..797352ab 100644 --- a/modules/workflows/processing/shared/placeholderFactory.py +++ b/modules/workflows/processing/shared/placeholderFactory.py @@ -88,10 +88,23 @@ def extractAvailableMethods(service: Any) -> str: # Create a flat JSON format with compound action names for better AI parsing available_actions_json = {} + processed_methods = set() # Track processed methods to avoid duplicates + for methodName, methodInfo in methods.items(): + # Skip short name aliases - only process full class names (MethodXxx) + # Short names are stored as aliases but we want to avoid processing them twice + if not methodName.startswith('Method'): + continue + # Convert MethodAi -> ai, MethodDocument -> document, etc. shortName = methodName.replace('Method', '').lower() + # Skip if we've already processed this method (via its short name alias) + if shortName in processed_methods: + continue + + processed_methods.add(shortName) + for actionName, actionInfo in methodInfo['actions'].items(): # Create compound action name: method.action compoundActionName = f"{shortName}.{actionName}" diff --git a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py index 0ad5ecfe..31878033 100644 --- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py +++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py @@ -343,6 +343,12 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to - Check "structureComparison.gap" to see what's missing. If quantitative gaps are available, use them. - Next action should ONLY generate the MISSING part, NOT repeat what's already delivered +CRITICAL - Missing Data Generation Strategy: +- When gap analysis shows missing data (found count = 0 but required count > 0): + * Generate the missing data FIRST as separate outputs before attempting integration + * Do NOT try to generate AND integrate missing data in one step - data must exist before integration + * Only AFTER missing data exists can you integrate it with existing data in a subsequent action + === OUTPUT FORMAT === Return ONLY JSON (no markdown, no explanations). The decision MUST: - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...) diff --git a/modules/workflows/processing/workflowProcessor.py b/modules/workflows/processing/workflowProcessor.py index d97541e5..9c9d6c84 100644 --- a/modules/workflows/processing/workflowProcessor.py +++ b/modules/workflows/processing/workflowProcessor.py @@ -28,6 +28,7 @@ class WorkflowProcessor: self.services = services self.mode = self._createMode(services.workflow.workflowMode) self.workflow = services.workflow + self.workflowExecOperationId = None # Will be set by workflowManager for task hierarchy def _createMode(self, workflowMode: WorkflowModeEnum) -> BaseMode: """Create the appropriate mode implementation based on workflow mode""" @@ -111,16 +112,20 @@ class WorkflowProcessor: # Init progress logger operationId = f"taskExec_{workflow.id}_{taskIndex}_{int(time.time())}" + # Get parent operationId (Service Workflow Execution) if available + parentOperationId = getattr(self, 'workflowExecOperationId', None) + try: # Check workflow status before executing task checkWorkflowStopped(self.services) - # Start progress tracking + # Start progress tracking - Task is child of Service Workflow Execution self.services.chat.progressLogStart( operationId, "Workflow Execution", "Task Execution", - f"Task {taskIndex}" + f"Task {taskIndex}", + parentOperationId=parentOperationId ) logger.info(f"=== STARTING TASK EXECUTION ===") diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index 987f46bf..a5971904 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -566,72 +566,89 @@ class WorkflowManager: allTaskResults: List = [] previousResults: List[str] = [] - for idx, taskStep in enumerate(taskPlan.tasks): - currentTaskIndex = idx + 1 - logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}") + # Create "Service Workflow Execution" root entry - parent of all tasks + workflowExecOperationId = f"workflowExec_{workflow.id}" + self.services.chat.progressLogStart( + workflowExecOperationId, + "Service", + "Workflow Execution", + f"Executing {totalTasks} task(s)" + ) + + # Store workflow execution operationId in workflowProcessor for task hierarchy + handling.workflowExecOperationId = workflowExecOperationId + + try: + for idx, taskStep in enumerate(taskPlan.tasks): + currentTaskIndex = idx + 1 + logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}") - # Update workflow state before executing task (fixes "Task 0" issue) - handling.updateWorkflowBeforeExecutingTask(currentTaskIndex) + # Update workflow state before executing task (fixes "Task 0" issue) + handling.updateWorkflowBeforeExecutingTask(currentTaskIndex) - # Build TaskContext (mode-specific behavior is inside WorkflowProcessor) - taskContext = TaskContext( - taskStep=taskStep, - workflow=workflow, - workflowId=workflow.id, - availableDocuments=None, - availableConnections=None, - previousResults=previousResults, - previousHandover=None, - improvements=[], - retryCount=0, - previousActionResults=[], - previousReviewResult=None, - isRegeneration=False, - failurePatterns=[], - failedActions=[], - successfulActions=[], - criteriaProgress={ - 'met_criteria': set(), - 'unmet_criteria': set(), - 'attempt_history': [] - } - ) - - taskResult = await handling.executeTask(taskStep, workflow, taskContext) - - # Persist task result for cross-task/round document references - # Convert ChatTaskResult to WorkflowTaskResult for persistence - from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult - from modules.datamodels.datamodelChat import ActionResult - - # Get final ActionResult from task execution (last action result) - finalActionResult = None - if hasattr(taskResult, 'actionResult'): - finalActionResult = taskResult.actionResult - elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0: - # Use last action result from context - finalActionResult = taskContext.previousActionResults[-1] - - # Create WorkflowTaskResult for persistence - if finalActionResult: - workflowTaskResult = WorkflowTaskResult( - taskId=taskStep.id, - actionResult=finalActionResult + # Build TaskContext (mode-specific behavior is inside WorkflowProcessor) + taskContext = TaskContext( + taskStep=taskStep, + workflow=workflow, + workflowId=workflow.id, + availableDocuments=None, + availableConnections=None, + previousResults=previousResults, + previousHandover=None, + improvements=[], + retryCount=0, + previousActionResults=[], + previousReviewResult=None, + isRegeneration=False, + failurePatterns=[], + failedActions=[], + successfulActions=[], + criteriaProgress={ + 'met_criteria': set(), + 'unmet_criteria': set(), + 'attempt_history': [] + } ) - # Persist task result (creates ChatMessage + ChatDocuments) - await handling.persistTaskResult(workflowTaskResult, workflow, taskContext) - - handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow) - allTaskResults.append({ - 'taskStep': taskStep, - 'taskResult': taskResult, - 'handoverData': handoverData - }) - if taskResult.success and taskResult.feedback: - previousResults.append(taskResult.feedback) + + taskResult = await handling.executeTask(taskStep, workflow, taskContext) - # Mark workflow as completed; error/stop cases update status elsewhere - workflow.status = "completed" + # Persist task result for cross-task/round document references + # Convert ChatTaskResult to WorkflowTaskResult for persistence + from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult + from modules.datamodels.datamodelChat import ActionResult + + # Get final ActionResult from task execution (last action result) + finalActionResult = None + if hasattr(taskResult, 'actionResult'): + finalActionResult = taskResult.actionResult + elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0: + # Use last action result from context + finalActionResult = taskContext.previousActionResults[-1] + + # Create WorkflowTaskResult for persistence + if finalActionResult: + workflowTaskResult = WorkflowTaskResult( + taskId=taskStep.id, + actionResult=finalActionResult + ) + # Persist task result (creates ChatMessage + ChatDocuments) + await handling.persistTaskResult(workflowTaskResult, workflow, taskContext) + + handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow) + allTaskResults.append({ + 'taskStep': taskStep, + 'taskResult': taskResult, + 'handoverData': handoverData + }) + if taskResult.success and taskResult.feedback: + previousResults.append(taskResult.feedback) + + # Mark workflow as completed; error/stop cases update status elsewhere + workflow.status = "completed" + finally: + # Finish "Service Workflow Execution" entry + self.services.chat.progressLogFinish(workflowExecOperationId, True) + return None async def _processWorkflowResults(self) -> None: diff --git a/requirements.txt b/requirements.txt index 5191019b..6377611d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,6 +71,9 @@ google-cloud-texttospeech==2.16.3 ## MSFT Integration msal==1.24.1 +## Azure Integration +azure-communication-email>=1.0.0 # Azure Communication Services Email + ## Testing Dependencies pytest>=8.0.0 pytest-asyncio>=0.21.0 diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py new file mode 100644 index 00000000..0834f440 --- /dev/null +++ b/tests/functional/test09_document_generation_formats.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Generation Formats Test - Tests document generation in all supported formats +Tests HTML, PDF, DOCX, XLSX, and PPTX generation with images and various content types. +""" + +import asyncio +import json +import sys +import os +import time +import base64 +from typing import Dict, Any, List, Optional + +# Add the gateway to path (go up 2 levels from tests/functional/) +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +# Import the service initialization +from modules.services import getInterface as getServices +from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum +from modules.datamodels.datamodelUam import User +from modules.features.workflow import chatStart +import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects + + +class DocumentGenerationFormatsTester: + def __init__(self): + # Use root user for testing (has full access to everything) + from modules.interfaces.interfaceDbAppObjects import getRootInterface + rootInterface = getRootInterface() + self.testUser = rootInterface.currentUser + + # Initialize services using the existing system + self.services = getServices(self.testUser, None) # Test user, no workflow + self.workflow = None + self.testResults = {} + self.generatedDocuments = {} + + async def initialize(self): + """Initialize the test environment.""" + # Enable debug file logging for tests + from modules.shared.configuration import APP_CONFIG + APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True) + + # Set logging level to INFO to see workflow progress + import logging + logging.getLogger().setLevel(logging.INFO) + + print(f"Initialized test with user: {self.testUser.id}") + print(f"Mandate ID: {self.testUser.mandateId}") + print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}") + + def createTestPrompt(self, format: str) -> str: + """Create a test prompt for document generation in the specified format.""" + prompts = { + "html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.", + "pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.", + "docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.", + "xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.", + "pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX." + } + return prompts.get(format.lower(), prompts["docx"]) + + async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]: + """Generate a document in the specified format using workflow.""" + print("\n" + "="*80) + print(f"GENERATING DOCUMENT IN {format.upper()} FORMAT") + print("="*80) + + prompt = self.createTestPrompt(format) + print(f"Prompt: {prompt[:200]}...") + + # Create user input request + userInput = UserInputRequest( + prompt=prompt, + userLanguage="en" + ) + + # Start workflow + print(f"\nStarting workflow for {format.upper()} generation...") + workflow = await chatStart( + currentUser=self.testUser, + userInput=userInput, + workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC, + workflowId=None + ) + + if not workflow: + return { + "success": False, + "error": "Failed to start workflow" + } + + self.workflow = workflow + print(f"Workflow started: {workflow.id}") + + # Wait for workflow completion + print(f"Waiting for workflow completion...") + completed = await self.waitForWorkflowCompletion(timeout=300) # 5 minute timeout + + if not completed: + return { + "success": False, + "error": "Workflow did not complete within timeout", + "workflowId": workflow.id, + "status": workflow.status if workflow else "unknown" + } + + # Analyze results + results = self.analyzeWorkflowResults() + + # Extract documents for this format + documents = results.get("documents", []) + formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")] + + return { + "success": True, + "format": format, + "workflowId": workflow.id, + "status": results.get("status"), + "documentCount": len(formatDocuments), + "documents": formatDocuments, + "results": results + } + + async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool: + """Wait for workflow to complete.""" + if not self.workflow: + return False + + startTime = time.time() + lastStatus = None + + interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + + while True: + # Check timeout + if time.time() - startTime > timeout: + print(f"\n⏱️ Timeout after {timeout} seconds") + return False + + # Get current workflow status + try: + currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id) + if not currentWorkflow: + print("\n❌ Workflow not found") + return False + + currentStatus = currentWorkflow.status + elapsed = int(time.time() - startTime) + + # Print status if it changed + if currentStatus != lastStatus: + print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)") + lastStatus = currentStatus + + # Check if workflow is complete + if currentStatus in ["completed", "stopped", "failed"]: + self.workflow = currentWorkflow + statusIcon = "✅" if currentStatus == "completed" else "❌" + print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)") + return currentStatus == "completed" + + # Wait before next check + await asyncio.sleep(checkInterval) + + except Exception as e: + print(f"\n⚠️ Error checking workflow status: {str(e)}") + await asyncio.sleep(checkInterval) + + def analyzeWorkflowResults(self) -> Dict[str, Any]: + """Analyze workflow results and extract information.""" + if not self.workflow: + return {"error": "No workflow to analyze"} + + interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + workflow = interfaceDbChat.getWorkflow(self.workflow.id) + + if not workflow: + return {"error": "Workflow not found"} + + # Get unified chat data + chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None) + + # Count messages + messages = chatData.get("messages", []) + userMessages = [m for m in messages if m.get("role") == "user"] + assistantMessages = [m for m in messages if m.get("role") == "assistant"] + + # Count documents + documents = chatData.get("documents", []) + + # Get logs + logs = chatData.get("logs", []) + + results = { + "workflowId": workflow.id, + "status": workflow.status, + "workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None, + "currentRound": workflow.currentRound, + "totalTasks": workflow.totalTasks, + "totalActions": workflow.totalActions, + "messageCount": len(messages), + "userMessageCount": len(userMessages), + "assistantMessageCount": len(assistantMessages), + "documentCount": len(documents), + "logCount": len(logs), + "documents": documents, + "logs": logs + } + + print(f"\nWorkflow Results:") + print(f" Status: {results['status']}") + print(f" Tasks: {results['totalTasks']}") + print(f" Actions: {results['totalActions']}") + print(f" Messages: {results['messageCount']}") + print(f" Documents: {results['documentCount']}") + + # Print document details + if documents: + print(f"\nGenerated Documents:") + for doc in documents: + fileName = doc.get("fileName", "unknown") + fileSize = doc.get("fileSize", 0) + mimeType = doc.get("mimeType", "unknown") + print(f" - {fileName} ({fileSize} bytes, {mimeType})") + + return results + + def verifyDocumentFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]: + """Verify that a document matches the expected format.""" + fileName = document.get("fileName", "") + mimeType = document.get("mimeType", "") + fileSize = document.get("fileSize", 0) + + # Expected MIME types + expectedMimeTypes = { + "html": ["text/html", "application/xhtml+xml"], + "pdf": ["application/pdf"], + "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], + "xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"], + "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] + } + + # Expected file extensions + expectedExtensions = { + "html": [".html", ".htm"], + "pdf": [".pdf"], + "docx": [".docx"], + "xlsx": [".xlsx"], + "pptx": [".pptx"] + } + + formatLower = expectedFormat.lower() + expectedMimes = expectedMimeTypes.get(formatLower, []) + expectedExts = expectedExtensions.get(formatLower, []) + + # Check file extension + hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts) + + # Check MIME type + hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes) + + # Check file size (should be > 0) + hasValidSize = fileSize > 0 + + verification = { + "format": expectedFormat, + "fileName": fileName, + "mimeType": mimeType, + "fileSize": fileSize, + "hasCorrectExtension": hasCorrectExtension, + "hasCorrectMimeType": hasCorrectMimeType, + "hasValidSize": hasValidSize, + "isValid": hasCorrectExtension and hasValidSize + } + + return verification + + async def testAllFormats(self) -> Dict[str, Any]: + """Test document generation in all formats.""" + print("\n" + "="*80) + print("TESTING DOCUMENT GENERATION IN ALL FORMATS") + print("="*80) + + formats = ["html", "pdf", "docx", "xlsx", "pptx"] + results = {} + + for format in formats: + try: + print(f"\n{'='*80}") + print(f"Testing {format.upper()} format...") + print(f"{'='*80}") + + result = await self.generateDocumentInFormat(format) + results[format] = result + + if result.get("success"): + documents = result.get("documents", []) + if documents: + # Verify first document + verification = self.verifyDocumentFormat(documents[0], format) + result["verification"] = verification + + print(f"\n✅ {format.upper()} generation successful!") + print(f" Documents: {len(documents)}") + print(f" Verification: {'✅ PASS' if verification['isValid'] else '❌ FAIL'}") + if verification.get("fileName"): + print(f" File: {verification['fileName']}") + print(f" Size: {verification['fileSize']} bytes") + print(f" MIME: {verification['mimeType']}") + else: + print(f"\n⚠️ {format.upper()} generation completed but no documents found") + else: + error = result.get("error", "Unknown error") + print(f"\n❌ {format.upper()} generation failed: {error}") + + # Small delay between tests + await asyncio.sleep(2) + + except Exception as e: + import traceback + print(f"\n❌ Error testing {format.upper()}: {str(e)}") + print(traceback.format_exc()) + results[format] = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + + return results + + async def runTest(self): + """Run the complete test.""" + print("\n" + "="*80) + print("DOCUMENT GENERATION FORMATS TEST") + print("="*80) + + try: + # Initialize + await self.initialize() + + # Test all formats + results = await self.testAllFormats() + + # Summary + print("\n" + "="*80) + print("TEST SUMMARY") + print("="*80) + + successCount = 0 + failCount = 0 + + for format, result in results.items(): + if result.get("success"): + successCount += 1 + status = "✅ PASS" + docCount = result.get("documentCount", 0) + verification = result.get("verification", {}) + isValid = verification.get("isValid", False) + statusIcon = "✅" if isValid else "⚠️" + print(f"{statusIcon} {format.upper():6s}: {status} - {docCount} document(s)") + else: + failCount += 1 + error = result.get("error", "Unknown error") + print(f"❌ {format.upper():6s}: FAIL - {error}") + + print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats") + + self.testResults = { + "success": failCount == 0, + "successCount": successCount, + "failCount": failCount, + "totalFormats": len(results), + "results": results + } + + return self.testResults + + except Exception as e: + import traceback + print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}") + print(f"Traceback:\n{traceback.format_exc()}") + self.testResults = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + return self.testResults + + +async def main(): + """Run document generation formats test.""" + tester = DocumentGenerationFormatsTester() + results = await tester.runTest() + + # Print final results as JSON for easy parsing + print("\n" + "="*80) + print("FINAL RESULTS (JSON)") + print("="*80) + print(json.dumps(results, indent=2, default=str)) + + +if __name__ == "__main__": + asyncio.run(main()) + From 4d4db7bb85b6de868856298e5b34c0324cfd8c74 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 23 Dec 2025 00:35:04 +0100 Subject: [PATCH 05/21] fixes --- .../serviceGeneration/renderers/rendererImage.py | 12 ++++++------ .../serviceGeneration/renderers/rendererXlsx.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index 7ea450b2..ad83673b 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -47,15 +47,15 @@ class RendererImage(BaseRenderer): if not aiService: raise ValueError("AI service is required for image generation") - # Validate JSON structure - if not isinstance(extractedContent, dict): - raise ValueError("Extracted content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(extractedContent): + raise ValueError("Extracted content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in extractedContent: - raise ValueError("Extracted content must contain 'sections' field") + # Extract metadata from standardized schema + metadata = self._extractMetadata(extractedContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = extractedContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Create AI prompt for image generation imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService) diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index 2ebe11c2..a8cffd56 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -210,15 +210,15 @@ class RendererXlsx(BaseRenderer): # Get style set: default styles, enhanced with AI if userPrompt provided styles = await self._getStyleSet(userPrompt, aiService) - # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract metadata from standardized schema + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - document_title = jsonContent.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Create workbook wb = Workbook() From 262f3296bf83408f08e5fbdc539a8ca37de77991 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 23 Dec 2025 00:50:00 +0100 Subject: [PATCH 06/21] fixing renderers --- .../renderers/rendererPptx.py | 4 +- .../serviceGeneration/subContentGenerator.py | 33 +++++++-- modules/shared/jsonUtils.py | 69 ++++++++++++++----- 3 files changed, 81 insertions(+), 25 deletions(-) diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index f7b65eb1..6b1b9e18 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -12,8 +12,8 @@ logger = logging.getLogger(__name__) class RendererPptx(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" - def __init__(self): - super().__init__() + def __init__(self, services=None): + super().__init__(services=services) self.supportedFormats = ["pptx", "ppt"] self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation" diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py index 1b1f64a9..0f75f595 100644 --- a/modules/services/serviceGeneration/subContentGenerator.py +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -475,11 +475,34 @@ class ContentGenerator: except (json.JSONDecodeError, ValueError) as recoveryError: logger.error(f"JSON recovery failed: {str(recoveryError)}") logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}") - # Check if raw response might be truncated - if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted - logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)") - logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits") - raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}") + logger.error(f"Recovered JSON (last 200 chars): {recoveredJson[-200:] if 'recoveredJson' in locals() else 'N/A'}") + + # Last resort: try to extract partial content and create minimal valid JSON + try: + # Try to extract text content before the truncation point + import re + # Look for text field that might be partially complete + textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson) + if textMatch: + partialText = textMatch.group(1) + # Create minimal valid JSON with truncated text marked + elementsData = { + "elements": [{ + "text": partialText + "... [Content truncated due to token limit]" + }] + } + logger.warning(f"Created minimal JSON structure with truncated text for section {section.get('id')}") + else: + # If no text found, create empty structure + elementsData = {"elements": []} + logger.warning(f"Created empty JSON structure for section {section.get('id')} due to recovery failure") + except Exception as fallbackError: + logger.error(f"Fallback recovery also failed: {str(fallbackError)}") + # Check if raw response might be truncated + if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted + logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)") + logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits") + raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}") else: raise ValueError(f"Invalid JSON in AI response: {str(e)}") else: diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index 907e84a6..f2678b63 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -201,6 +201,7 @@ def closeJsonStructures(text: str) -> str: # Look for patterns like: "value" or "value\n (unterminated) # Check if we're in the middle of a string value when text ends if result.strip(): + import re # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') if quoteCount % 2 == 1: @@ -220,30 +221,62 @@ def closeJsonStructures(text: str) -> str: result += '"' else: # Even number of quotes, but might still be in middle of string if cut off - # Check if text ends with a colon followed by a quote (start of string value) - # or ends with text that looks like it's inside a string (no closing quote after last quote) - import re - # Pattern: ends with "text" where text doesn't end with quote - # Look for pattern like: "text": "incomplete + # More robust detection: check if text ends with alphanumeric/text chars after a quote + # This handles cases like: "text": "value cut off mid-word + + # Pattern 1: ends with colon + quote + text (no closing quote) if re.search(r':\s*"[^"]*$', result): # We're in the middle of a string value, close it result += '"' - # Also check if we end with text after a quote (like "key": "value but cut off) - elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result): - # Check if last quote is followed by non-quote, non-structural chars + else: + # Pattern 2: find last quote and check what comes after lastQuotePos = result.rfind('"') if lastQuotePos >= 0: afterQuote = result[lastQuotePos + 1:] - # If after quote we have text but no closing quote, comma, or brace, we're in a string - if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote): - # Check if it's escaped - escapeCount = 0 - i = lastQuotePos - 1 - while i >= 0 and result[i] == '\\': - escapeCount += 1 - i -= 1 - if escapeCount % 2 == 0: - result += '"' + # If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace + # and the text doesn't end with structural characters, we're likely in a string + if afterQuote: + # Check if it looks like we're in a string value (has text, no closing quote) + # Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ] + if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote): + # Check if it's escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + if escapeCount % 2 == 0: + # Verify we're actually in a string context (not in a key name) + # Look backwards to see if we have ": " before the quote (value context) + beforeQuote = result[:lastQuotePos] + # Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote) + if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]): + result += '"' + # Also check if text ends with alphanumeric (likely cut off mid-word) + elif re.search(r'[a-zA-Z]$', result): + # If we end with a letter and have a quote before it, likely in a string + result += '"' + + # Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string + # This handles edge cases where patterns above didn't match + if result.strip() and re.search(r'[a-zA-Z0-9]$', result): + # Count quotes - if we have quotes and end with text, might be in a string + if quoteCount > 0: + lastQuotePos = result.rfind('"') + if lastQuotePos >= 0: + afterQuote = result[lastQuotePos + 1:] + # If after quote is text (not empty, not structural), close it + if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]): # Check first 50 chars after quote + # Make sure we're not already closed (check if next char would be quote/comma/brace) + if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'): + # Check if escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + if escapeCount % 2 == 0: + result += '"' # Count open/close brackets and braces openBraces = result.count('{') From e1b3cd36f01fc6bca7284d47e72ef99835b5139e Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 25 Dec 2025 00:09:27 +0100 Subject: [PATCH 07/21] enhanced core ai call document handling with document intent --- modules/datamodels/datamodelExtraction.py | 8 + modules/services/serviceAi/mainServiceAi.py | 1626 +++++++++++++---- .../chunking/chunkerStructure.py | 36 +- .../mainServiceExtraction.py | 243 ++- .../mainServiceGeneration.py | 148 +- .../renderers/rendererBaseTemplate.py | 16 +- .../renderers/rendererDocx.py | 28 +- .../renderers/rendererHtml.py | 30 +- .../renderers/rendererMarkdown.py | 30 +- .../renderers/rendererPdf.py | 31 +- .../renderers/rendererPptx.py | 64 +- .../renderers/rendererText.py | 30 +- .../serviceGeneration/subContentGenerator.py | 179 +- .../serviceGeneration/subContentIntegrator.py | 20 +- .../subDocumentPurposeAnalyzer.py | 316 ---- .../subPromptBuilderGeneration.py | 7 +- .../subStructureGenerator.py | 104 +- modules/shared/jsonUtils.py | 35 +- .../methods/methodAi/actions/__init__.py | 4 - .../methods/methodAi/actions/convert.py | 157 -- .../methods/methodAi/actions/extractData.py | 59 - .../methodAi/actions/generateDocument.py | 379 +--- .../methods/methodAi/actions/process.py | 77 +- .../workflows/methods/methodAi/methodAi.py | 106 -- .../methodContext/actions/extractContent.py | 36 +- .../methods/methodContext/methodContext.py | 4 +- .../ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md | 354 ---- ...ONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md | 459 ----- ...DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md | 1067 ----------- ...N_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md | 398 ---- modules/workflows/workflowManager.py | 266 ++- .../test09_document_generation_formats.py | 353 +++- 32 files changed, 2799 insertions(+), 3871 deletions(-) delete mode 100644 modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py delete mode 100644 modules/workflows/methods/methodAi/actions/convert.py delete mode 100644 modules/workflows/methods/methodAi/actions/extractData.py delete mode 100644 modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md delete mode 100644 modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md delete mode 100644 modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md delete mode 100644 modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md diff --git a/modules/datamodels/datamodelExtraction.py b/modules/datamodels/datamodelExtraction.py index 886df3b9..65f84de0 100644 --- a/modules/datamodels/datamodelExtraction.py +++ b/modules/datamodels/datamodelExtraction.py @@ -61,6 +61,14 @@ class MergeStrategy(BaseModel): capabilities: Optional[Dict[str, Any]] = Field(default=None, description="Model capabilities for intelligent merging") +class DocumentIntent(BaseModel): + """Intent-Analyse für ein einzelnes Dokument""" + documentId: str = Field(description="ID des Dokuments") + intents: List[str] = Field(description="Liste von Intents: ['extract', 'render', 'reference'] - mehrere möglich") + extractionPrompt: Optional[str] = Field(default=None, description="Spezifischer Prompt für Extraktion (z.B. 'Extract text from images for legends')") + reasoning: str = Field(description="Erklärung für Debugging/Transparenz: Warum wurde dieser Intent gewählt?") + + class ExtractionOptions(BaseModel): """Options for document extraction and processing with clear data structures.""" diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 648e922c..30e7cc88 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -4,11 +4,12 @@ import json import logging import re import time +import base64 from typing import Dict, Any, List, Optional, Tuple -from modules.datamodels.datamodelChat import PromptPlaceholder +from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum -from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( @@ -183,7 +184,8 @@ Respond with ONLY a JSON object in this exact format: promptBuilder: Optional[callable] = None, promptArgs: Optional[Dict[str, Any]] = None, operationId: Optional[str] = None, - userPrompt: Optional[str] = None + userPrompt: Optional[str] = None, + contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content ) -> str: """ Shared core function for AI calls with repair-based looping system. @@ -254,10 +256,14 @@ Respond with ONLY a JSON object in this exact format: try: if iterationOperationId: self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") + # ARCHITECTURE: Pass ContentParts directly to AiCallRequest + # This allows model-aware chunking to handle large content properly + # ContentParts are only passed in first iteration (continuations don't need them) request = AiCallRequest( prompt=iterationPrompt, context="", - options=options + options=options, + contentParts=contentParts if iteration == 1 else None # Only pass ContentParts in first iteration ) # Write the ACTUAL prompt sent to AI @@ -971,22 +977,1164 @@ If no trackable items can be identified, return: {{"kpis": []}} self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") return result + # Helper methods for callAiContent refactoring + + async def _handleImageGeneration( + self, + prompt: str, + options: AiCallOptions, + title: Optional[str], + aiOperationId: str + ) -> AiResponse: + """Handle IMAGE_GENERATE operation type.""" + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") + + request = AiCallRequest( + prompt=prompt, + context="", + options=options + ) + + response = await self.callAi(request) + + if not response.content: + errorMsg = f"No image data returned: {response.content}" + logger.error(f"Error in AI image generation: {errorMsg}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + imageDoc = DocumentData( + documentName="generated_image.png", + documentData=response.content, + mimeType="image/png" + ) + + metadata = AiResponseMetadata( + title=title or "Generated Image", + operationType=options.operationType.value + ) + + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + "ai.generate.image" + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=response.content, + metadata=metadata, + documents=[imageDoc] + ) + + async def _handleWebOperation( + self, + prompt: str, + options: AiCallOptions, + opType: OperationTypeEnum, + aiOperationId: str + ) -> AiResponse: + """Handle WEB_SEARCH and WEB_CRAWL operation types.""" + self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") + + request = AiCallRequest( + prompt=prompt, # Raw JSON prompt - connector will parse it + context="", + options=options + ) + + response = await self.callAi(request) + + if not response.content: + errorMsg = f"No content returned from {opType.name}: {response.content}" + logger.error(f"Error in {opType.name}: {errorMsg}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + metadata = AiResponseMetadata( + operationType=opType.value + ) + + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + f"ai.{opType.name.lower()}" + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=response.content, + metadata=metadata + ) + + def _getIntentForDocument( + self, + docId: str, + intents: Optional[List[DocumentIntent]] + ) -> Optional[DocumentIntent]: + """Find DocumentIntent for given documentId.""" + if not intents: + return None + for intent in intents: + if intent.documentId == docId: + return intent + return None + + async def _clarifyDocumentIntents( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str + ) -> List[DocumentIntent]: + """ + Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. + Gibt DocumentIntent für jedes Dokument zurück. + + Args: + documents: Liste der zu verarbeitenden Dokumente + userPrompt: User-Anfrage + actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von DocumentIntent-Objekten + """ + from modules.datamodels.datamodelChat import ChatDocument + + # Erstelle Operation-ID für Intent-Analyse + intentOperationId = f"{parentOperationId}_intent_analysis" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + intentOperationId, + "Document Intent Analysis", + "Intent Analysis", + f"Analyzing {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse + documentMapping = {} # Maps original doc ID -> JSON doc ID + resolvedDocuments = [] + + for doc in documents: + preExtracted = self._resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + documentMapping[originalDocId] = doc.id + # Erstelle temporäres ChatDocument für ursprüngliches Dokument + from modules.datamodels.datamodelChat import ChatDocument + originalDoc = ChatDocument( + id=originalDocId, + fileName=preExtracted["originalDocument"]["fileName"], + mimeType=preExtracted["originalDocument"]["mimeType"], + fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), + fileId=doc.fileId # Behalte fileId vom JSON + ) + resolvedDocuments.append(originalDoc) + else: + resolvedDocuments.append(doc) + + # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten + intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters) + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=intentPrompt, + debugType="document_intent_analysis" + ) + + # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig + intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse)) + documentIntents = [] + for intent in intentsData.get("intents", []): + docId = intent.get("documentId") + # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID + if docId in documentMapping: + intent["documentId"] = documentMapping[docId] + documentIntents.append(DocumentIntent(**intent)) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([intent.dict() for intent in documentIntents], indent=2), + "document_intent_analysis_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(intentOperationId, True) + + return documentIntents + + except Exception as e: + self.services.chat.progressLogFinish(intentOperationId, False) + logger.error(f"Error in _clarifyDocumentIntents: {str(e)}") + raise + + def _resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]: + """ + Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält. + Gibt Dict zurück mit: + - originalDocument: ChatDocument-Info des ursprünglichen Dokuments + - contentExtracted: ContentExtracted-Objekt mit Parts + - parts: Liste der ContentParts + + Returns None wenn kein pre-extracted Format erkannt wird. + """ + if document.mimeType != "application/json": + return None + + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if not docBytes: + return None + + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if not isinstance(jsonData, dict): + return None + + # Check for ContentExtracted format + documentData = None + if "parts" in jsonData and isinstance(jsonData.get("parts"), list): + # Direct ContentExtracted format: {"id": "...", "parts": [...], ...} + documentData = jsonData + else: + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + + if documentData: + from modules.datamodels.datamodelExtraction import ContentExtracted + + try: + contentExtracted = ContentExtracted(**documentData) + + if contentExtracted.parts: + # Extrahiere ursprüngliche Dokument-Info aus den Parts + originalDocId = None + originalFileName = None + originalMimeType = None + + for part in contentExtracted.parts: + if part.metadata: + # Versuche ursprüngliche Dokument-Info zu finden + if not originalDocId and part.metadata.get("documentId"): + originalDocId = part.metadata.get("documentId") + if not originalFileName and part.metadata.get("originalFileName"): + originalFileName = part.metadata.get("originalFileName") + if not originalMimeType and part.metadata.get("documentMimeType"): + originalMimeType = part.metadata.get("documentMimeType") + + # Falls nicht gefunden, verwende documentName aus ContentExtracted + if not originalFileName and hasattr(contentExtracted, 'id'): + # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") + if document.fileName and "_extracted_" in document.fileName: + originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" + + return { + "originalDocument": { + "id": originalDocId or document.id, + "fileName": originalFileName or document.fileName, + "mimeType": originalMimeType or "application/pdf", + "fileSize": document.fileSize + }, + "contentExtracted": contentExtracted, + "parts": contentExtracted.parts + } + except Exception as parseError: + logger.debug(f"Could not parse ContentExtracted format: {str(parseError)}") + return None + + return None + except Exception as e: + logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") + return None + + def _buildIntentAnalysisPrompt( + self, + userPrompt: str, + documents: List[ChatDocument], + actionParameters: Dict[str, Any] + ) -> str: + """Baue Prompt für Intent-Analyse.""" + # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs + docListText = "" + for i, doc in enumerate(documents, 1): + # Prüfe ob es ein pre-extracted JSON ist + preExtracted = self._resolvePreExtractedDocument(doc) + + if preExtracted: + # Zeige ursprüngliches Dokument statt JSON + originalDoc = preExtracted["originalDocument"] + partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})" + docListText += f"\n{i}. Document ID: {originalDoc['id']}\n" + docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n" + docListText += f" MIME Type: {originalDoc['mimeType']}\n" + docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n" + else: + # Normales Dokument + docListText += f"\n{i}. Document ID: {doc.id}\n" + docListText += f" File Name: {doc.fileName}\n" + docListText += f" MIME Type: {doc.mimeType}\n" + docListText += f" File Size: {doc.fileSize} bytes\n" + + outputFormat = actionParameters.get("outputFormat", "txt") + + prompt = f"""USER REQUEST: +{userPrompt} + +DOCUMENTS TO ANALYZE: +{docListText} + +TASK: For each document, determine its intents (can be multiple): +- "extract": Content extraction needed (text, structure, OCR, etc.) +- "render": Image/binary should be rendered as-is (visual element) +- "reference": Document reference/attachment (no extraction, just reference) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], # Array - can contain multiple! + "extractionPrompt": "Extract all text content, preserving structure", + "reasoning": "User needs text content for document generation" + }}, + {{ + "documentId": "doc_2", + "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "extractionPrompt": "Extract text content from image using vision AI", + "reasoning": "Image contains text that needs extraction, but also should be rendered visually" + }}, + {{ + "documentId": "doc_3", + "intents": ["reference"], + "extractionPrompt": null, + "reasoning": "Document is only used as reference, no extraction needed" + }} + ] +}} + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → add "render" + - If user wants to "analyze", "read text", or "extract text" from images → add "extract" + - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering + +2. For text documents: + - If user mentions "template" or "structure" → "reference" or "extract" based on context + - If user mentions "reference" or "context" → "reference" + - Default → "extract" + +3. Consider output format: + - For formats like PDF, DOCX, PPTX: images usually need "render" + - For formats like CSV, JSON: usually "extract" only + - For HTML: can have both "extract" and "render" + +Return ONLY valid JSON following the structure above. +""" + return prompt + + async def _extractAndPrepareContent( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str + ) -> List[ContentPart]: + """ + Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. + Gibt Liste von ContentParts im passenden Format zurück. + + WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind. + Beispiel: Bild mit intents=["extract", "render"] erzeugt: + - ContentPart(contentFormat="object", ...) für Rendering + - ContentPart(contentFormat="extracted", ...) für Text-Analyse + + Args: + documents: Liste der zu verarbeitenden Dokumente + documentIntents: Liste von DocumentIntent-Objekten + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von ContentParts mit vollständigen Metadaten + """ + # Erstelle Operation-ID für Extraktion + extractionOperationId = f"{parentOperationId}_content_extraction" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + extractionOperationId, + "Content Extraction", + "Extraction", + f"Extracting from {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + allContentParts = [] + + for document in documents: + # Check if document is already a ContentExtracted document (pre-extracted JSON) + preExtracted = self._resolvePreExtractedDocument(document) + + if preExtracted: + # Verwende bereits extrahierte ContentParts direkt + contentExtracted = preExtracted["contentExtracted"] + intent = self._getIntentForDocument(document.id, documentIntents) + + if contentExtracted.parts: + for part in contentExtracted.parts: + # Überspringe leere Parts (Container ohne Daten) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + if part.typeGroup == "container": + continue # Überspringe leere Container + + if not part.metadata: + part.metadata = {} + + # Ensure metadata is complete + if "documentId" not in part.metadata: + part.metadata["documentId"] = document.id + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + + # WICHTIG: Prüfe Intent für dieses Part + partIntent = intent.intents if intent else ["extract"] + + # Wenn Intent "render" für Images hat, erstelle auch object Part + if "render" in partIntent and part.typeGroup == "image" and part.data: + # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part + # 1. Extracted Part (bereits vorhanden) + part.metadata["intent"] = "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + + # 2. Object Part für Rendering (base64 data ist bereits im extracted Part) + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Image'}", + typeGroup="image", + mimeType=part.mimeType or "image/jpeg", + data=part.data, # Base64 data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": part.id + } + ) + allContentParts.append(objectPart) + else: + # Normales extracted Part + part.metadata["intent"] = partIntent[0] if partIntent else "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + + logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") + logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") + continue # Skip normal extraction for this document + + # Check if it's standardized JSON format (has "documents" or "sections") + if document.mimeType == "application/json": + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if docBytes: + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.info(f"Document is already in standardized JSON format, using as reference") + # Create reference ContentPart for structured JSON + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="structure", + mimeType="application/json", + data=docData, + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "skipExtraction": True, + "intent": "reference" + } + ) + allContentParts.append(contentPart) + logger.info(f"✅ Using JSON document directly without extraction") + continue # Skip normal extraction for this document + except Exception as e: + logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}") + # Continue with normal extraction + + # Normal extraction path + intent = self._getIntentForDocument(document.id, documentIntents) + + if not intent: + # Default: extract für alle Dokumente ohne Intent + logger.warning(f"No intent found for document {document.id}, using default 'extract'") + intent = DocumentIntent( + documentId=document.id, + intents=["extract"], + extractionPrompt="Extract all content from the document", + reasoning="Default intent: no specific intent found" + ) + + # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen + + if "reference" in intent.intents: + # Erstelle Reference ContentPart + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="reference", + mimeType=document.mimeType, + data="", + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "intent": "reference", + "usageHint": f"Reference document: {document.fileName}" + } + ) + allContentParts.append(contentPart) + + # WICHTIG: "render" und "extract" können beide vorhanden sein! + # In diesem Fall erzeugen wir BEIDE ContentParts + + if "render" in intent.intents: + # Für Images/Binary: extrahiere als Object + if document.mimeType.startswith("image/") or self._isBinary(document.mimeType): + try: + # Lade Binary-Daten (getFileData ist nicht async - keine await nötig) + binaryData = self.services.interfaceDbComponent.getFileData(document.fileId) + if not binaryData: + logger.warning(f"No binary data found for document {document.id}") + continue + base64Data = base64.b64encode(binaryData).decode('utf-8') + + contentPart = ContentPart( + id=f"obj_{document.id}", + label=f"Object: {document.fileName}", + typeGroup="image" if document.mimeType.startswith("image/") else "binary", + mimeType=document.mimeType, + data=base64Data, + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {document.fileName}", + "originalFileName": document.fileName, + # Verknüpfung zu extracted Part (falls vorhanden) + "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None + } + ) + allContentParts.append(contentPart) + except Exception as e: + logger.error(f"Failed to load binary data for document {document.id}: {str(e)}") + + if "extract" in intent.intents: + # Extrahiere Content mit Extraction Service + extractionPrompt = intent.extractionPrompt or "Extract all content from the document" + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + extractionPrompt, + f"content_extraction_prompt_{document.id}" + ) + + # Führe Extraktion aus + from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy() + ) + + # extractContent ist nicht async - keine await nötig + extractedResults = self.services.extraction.extractContent( + [document], + extractionOptions, + operationId=extractionOperationId, + parentOperationId=extractionOperationId + ) + + # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten + for extracted in extractedResults: + for part in extracted.parts: + # Markiere als extracted Format + part.metadata.update({ + "contentFormat": "extracted", + "documentId": document.id, + "extractionPrompt": extractionPrompt, + "intent": "extract", + "usageHint": f"Use extracted content from {document.fileName}", + # Verknüpfung zu object Part (falls vorhanden) + "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None + }) + # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) + if "render" in intent.intents: + part.id = f"ext_{document.id}_{part.id}" + allContentParts.append(part) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([part.dict() for part in allContentParts], indent=2, default=str), + "content_extraction_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(extractionOperationId, True) + + return allContentParts + + except Exception as e: + self.services.chat.progressLogFinish(extractionOperationId, False) + logger.error(f"Error in _extractAndPrepareContent: {str(e)}") + raise + + def _isBinary(self, mimeType: str) -> bool: + """Prüfe ob MIME-Type binary ist.""" + binaryTypes = [ + "application/octet-stream", + "application/pdf", + "application/zip", + "application/x-zip-compressed" + ] + return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") + + async def _generateStructure( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5C: Generiert Dokument-Struktur mit Sections. + Jede Section spezifiziert: + - Welcher Content sollte in dieser Section sein + - Welche ContentParts zu verwenden sind + - Format für jeden ContentPart + + Args: + userPrompt: User-Anfrage + contentParts: Alle vorbereiteten ContentParts mit Metadaten + outputFormat: Ziel-Format (html, docx, pdf, etc.) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Struktur-Dict mit documents und sections + """ + # Erstelle Operation-ID für Struktur-Generierung + structureOperationId = f"{parentOperationId}_structure_generation" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + structureOperationId, + "Structure Generation", + "Structure", + f"Generating structure for {outputFormat}", + parentOperationId=parentOperationId + ) + + try: + # Baue Struktur-Prompt mit Content-Index + structurePrompt = self._buildStructurePrompt( + userPrompt=userPrompt, + contentParts=contentParts, + outputFormat=outputFormat + ) + + # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=structurePrompt, + debugType="document_generation_structure" + ) + + # Parse Struktur + structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) + + # ChatLog abschließen + self.services.chat.progressLogFinish(structureOperationId, True) + + return structure + + except Exception as e: + self.services.chat.progressLogFinish(structureOperationId, False) + logger.error(f"Error in _generateStructure: {str(e)}") + raise + + def _buildStructurePrompt( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str + ) -> str: + """Baue Prompt für Struktur-Generierung.""" + # Baue ContentParts-Index - filtere leere Parts heraus + contentPartsIndex = "" + validParts = [] + for part in contentParts: + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + # Überspringe Container-Parts ohne Daten + if part.typeGroup == "container" and not part.data: + continue + # Überspringe andere leere Parts + if not part.data: + continue + + validParts.append(part) + + # Baue Index nur für gültige Parts + for i, part in enumerate(validParts, 1): + contentFormat = part.metadata.get("contentFormat", "unknown") + dataPreview = "" + + if contentFormat == "extracted": + # Für Image-Parts: Zeige dass es ein Image ist + if part.typeGroup == "image": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "image" + dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" + elif part.typeGroup == "container": + # Container ohne Daten überspringen wir bereits oben + dataPreview = "Container structure (no text content)" + else: + # Zeige Preview von extrahiertem Text + if part.data: + preview = part.data[:200] + "..." if len(part.data) > 200 else part.data + dataPreview = preview + else: + dataPreview = "(empty)" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "binary" + if part.typeGroup == "image": + dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" + else: + dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" + elif contentFormat == "reference": + dataPreview = part.metadata.get("documentReference", "reference") + + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" + contentPartsIndex += f" Format: {contentFormat}\n" + contentPartsIndex += f" Type: {part.typeGroup}\n" + contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" + contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" + contentPartsIndex += f" Data preview: {dataPreview}\n" + + if not contentPartsIndex: + contentPartsIndex = "\n(No content parts available)" + + prompt = f"""USER REQUEST: +{userPrompt} + +AVAILABLE CONTENT PARTS: +{contentPartsIndex} + +TASK: Generiere Dokument-Struktur mit Sections. +Für jede Section, spezifiziere: +- section id +- content_type (heading, paragraph, image, table, etc.) +- contentPartIds: [Liste von ContentPart-IDs zu verwenden] +- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist +- generation_hint: Was AI für diese Section generieren soll +- elements: [] (leer, wird in nächster Phase gefüllt) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "metadata": {{ + "title": "Document Title", + "language": "de" + }}, + "documents": [{{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.{outputFormat}", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "generation_hint": "Main title", + "contentPartIds": [], + "contentFormats": {{}}, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "paragraph", + "generation_hint": "Introduction paragraph", + "contentPartIds": ["part_ext_1"], + "contentFormats": {{ + "part_ext_1": "extracted" + }}, + "elements": [] + }} + ] + }}] +}} + +Return ONLY valid JSON following the structure above. +""" + return prompt + + async def _fillStructure( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D: Füllt Struktur mit tatsächlichem Content. + Für jede Section: + - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format + - Wenn generation_hint spezifiziert: Generiere AI-Content + + **Implementierungsdetails:** + - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) + - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) + + Args: + structure: Struktur-Dict mit documents und sections + contentParts: Alle vorbereiteten ContentParts + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Gefüllte Struktur mit elements in jeder Section + """ + import copy + + # Erstelle Operation-ID für Struktur-Abfüllen + fillOperationId = f"{parentOperationId}_structure_filling" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + fillOperationId, + "Structure Filling", + "Filling", + f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + parentOperationId=parentOperationId + ) + + try: + filledStructure = copy.deepcopy(structure) + + # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) + sections_to_process = [] + for doc in filledStructure.get("documents", []): + for section in doc.get("sections", []): + sections_to_process.append((doc, section)) + + # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) + for doc, section in sections_to_process: + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + + elements = [] + + # Verarbeite ContentParts + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + # Füge extrahierten Text hinzu (kann in AI-Generierungs-Prompt verwendet werden) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) + + # Generiere AI-Content wenn nötig + if generationHint: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[self._findContentPartById(pid, contentParts) for pid in contentPartIds], + userPrompt=userPrompt, + generationHint=generationHint + ) + + # Erstelle Operation-ID für Section-Generierung + # Debug-Logs werden bereits von callAiPlanning geschrieben + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Generiere Content (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=generationPrompt, + debugType=f"section_generation_{sectionId}" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + section["elements"] = elements + + # ChatLog abschließen + self.services.chat.progressLogFinish(fillOperationId, True) + + return filledStructure + + except Exception as e: + self.services.chat.progressLogFinish(fillOperationId, False) + logger.error(f"Error in _fillStructure: {str(e)}") + raise + + def _buildSectionGenerationPrompt( + self, + section: Dict[str, Any], + contentParts: List[Optional[ContentPart]], + userPrompt: str, + generationHint: str + ) -> str: + """Baue Prompt für Section-Generierung.""" + # Filtere None-Werte + validParts = [p for p in contentParts if p is not None] + + contentPartsText = "" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + if contentFormat == "extracted": + contentPartsText += f" Content: {part.data[:500]}...\n" if len(part.data) > 500 else f" Content: {part.data}\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + elif contentFormat == "object": + contentPartsText += f" Object: {part.typeGroup} ({part.mimeType})\n" + + prompt = f"""USER REQUEST: +{userPrompt} + +SECTION TO GENERATE: +{generationHint} + +AVAILABLE CONTENT FOR THIS SECTION: +{contentPartsText} + +CRITICAL: Return ONLY a JSON object with an "elements" array. +Jedes Element sollte dem content_type der Section entsprechen. +""" + return prompt + + def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: + """Finde ContentPart nach ID.""" + for part in contentParts: + if part.id == partId: + return part + return None + + async def _renderResult( + self, + filledStructure: Dict[str, Any], + outputFormat: str, + title: str, + userPrompt: str, + parentOperationId: str + ) -> Tuple[bytes, str]: + """ + Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. + Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert. + + Args: + filledStructure: Gefüllte Struktur mit elements + outputFormat: Ziel-Format (pdf, docx, html, etc.) + title: Dokument-Titel + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Tuple von (renderedContent, mimeType) + """ + # Erstelle Operation-ID für Rendering + renderOperationId = f"{parentOperationId}_rendering" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + renderOperationId, + "Content Rendering", + "Rendering", + f"Rendering to {outputFormat} format", + parentOperationId=parentOperationId + ) + + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + + generationService = GenerationService(self.services) + + # Multi-Dokument-Rendering + documents = filledStructure.get("documents", []) + + if len(documents) == 1: + # Einzelnes Dokument - wie bisher + renderedContent, mimeType, images = await generationService.renderReport( + filledStructure, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) + else: + # Mehrere Dokumente - rendere alle + # Option: Alle Sections zusammenführen und als ein Dokument rendern + all_sections = [] + for doc in documents: + if "sections" in doc: + all_sections.extend(doc.get("sections", [])) + + # Erstelle temporäres Dokument mit allen Sections + merged_document = { + "metadata": filledStructure["metadata"], + "documents": [{ + "id": "merged", + "title": title, + "filename": f"{title}.{outputFormat}", + "sections": all_sections + }] + } + + renderedContent, mimeType, images = await generationService.renderReport( + merged_document, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(renderOperationId, True) + + return renderedContent, mimeType + + except Exception as e: + self.services.chat.progressLogFinish(renderOperationId, False) + logger.error(f"Error in _renderResult: {str(e)}") + raise + + def _shouldSkipContentPart( + self, + part: ContentPart + ) -> bool: + """Check if ContentPart should be skipped (already structured JSON).""" + if part.typeGroup == "structure" and part.mimeType == "application/json": + if part.metadata.get("skipExtraction", False): + logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (skipExtraction=True)") + return True + try: + if isinstance(part.data, str): + jsonData = json.loads(part.data) + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (contains documents/sections)") + return True + except Exception: + pass # Not JSON, continue processing + return False + async def callAiContent( self, prompt: str, options: AiCallOptions, contentParts: Optional[List[ContentPart]] = None, + documentList: Optional[Any] = None, # DocumentReferenceList + documentIntents: Optional[List[DocumentIntent]] = None, outputFormat: Optional[str] = None, title: Optional[str] = None, - parentOperationId: Optional[str] = None # Parent operation ID for hierarchical logging + parentOperationId: Optional[str] = None ) -> AiResponse: """ - Unified AI content processing method (replaces callAiDocuments and callAiText). + Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions. + + Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch. + Sie unterscheiden sich nur in Parametern, nicht in Logik. Args: prompt: The main prompt for the AI call - contentParts: Optional list of already-extracted content parts (preferred) options: AI call configuration options (REQUIRED - operationType must be set) + contentParts: Optional list of already-extracted content parts (preferred) + documentList: Optional DocumentReferenceList (wird zu ChatDocuments konvertiert) + documentIntents: Optional list of DocumentIntent objects (wird erstellt wenn nicht vorhanden) outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx') title: Optional title for generated documents parentOperationId: Optional parent operation ID for hierarchical logging @@ -996,14 +2144,11 @@ If no trackable items can be identified, return: {{"kpis": []}} """ await self.ensureAiObjectsInitialized() - # Create separate operationId for detailed progress tracking + # Erstelle Operation-ID workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" - # Use parent operation ID directly (parentId should be operationId, not log entry ID) - # parentOperationId is already the operationId of the parent - - # Start progress tracking with parent reference + # Starte Progress-Tracking mit Parent-Referenz self.services.chat.progressLogStart( aiOperationId, "AI content processing", @@ -1013,376 +2158,141 @@ If no trackable items can be identified, return: {{"kpis": []}} ) try: - # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way) + # Initialisiere Defaults if not outputFormat: outputFormat = "txt" - # Extraction is now separate - contentParts must be extracted before calling - # Require operationType to be set before calling opType = getattr(options, "operationType", None) if not opType: - # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE options.operationType = OperationTypeEnum.DATA_GENERATE opType = OperationTypeEnum.DATA_GENERATE - # Handle IMAGE_GENERATE operations + # Route zu Operation-spezifischen Handlern if opType == OperationTypeEnum.IMAGE_GENERATE: - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") - - request = AiCallRequest( - prompt=prompt, - context="", - options=options - ) - - response = await self.callAi(request) - - if response.content: - # Build document data for image - imageDoc = DocumentData( - documentName="generated_image.png", - documentData=response.content, - mimeType="image/png" - ) - - metadata = AiResponseMetadata( - title=title or "Generated Image", - operationType=opType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - "ai.generate.image" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata, - documents=[imageDoc] - ) - else: - errorMsg = f"No image data returned: {response.content}" - logger.error(f"Error in AI image generation: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) + return await self._handleImageGeneration(prompt, options, title, aiOperationId) - # Handle WEB_SEARCH and WEB_CRAWL operations if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL: - self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") - - request = AiCallRequest( - prompt=prompt, # Raw JSON prompt - connector will parse it - context="", - options=options - ) - - response = await self.callAi(request) - - if response.content: - metadata = AiResponseMetadata( - operationType=opType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - f"ai.{opType.name.lower()}" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata - ) - else: - errorMsg = f"No content returned from {opType.name}: {response.content}" - logger.error(f"Error in {opType.name}: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) + return await self._handleWebOperation(prompt, options, opType, aiOperationId) - # Handle document generation (outputFormat always set, defaults to "txt") - # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way - # outputFormat is always set now (defaults to "txt" if not specified) - - # CRITICAL: For document generation with JSON templates, NEVER compress the prompt + # Dokument-Generierungs-Pfad options.compressPrompt = False options.compressContext = False - # Process contentParts for generation prompt (if provided) - # Use generic callWithContentParts() which handles all content types (images, text, etc.) - # This automatically processes images with vision models and merges all results - if contentParts: - # Filter out binary/other parts that shouldn't be processed - processableParts = [] - skippedParts = [] - for p in contentParts: - if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))): - processableParts.append(p) - else: - skippedParts.append(p) - - if skippedParts: - logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation") - - if processableParts: - # Count images for progress update - imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))]) - if imageCount > 0: - self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models") - - # Build proper extraction prompt using buildExtractionPrompt - # This creates a focused extraction prompt, not the user's generation prompt - from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt - - # Determine renderer for format-specific guidelines - renderer = None - if outputFormat: - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - renderer = generationService.getRendererForFormat(outputFormat) - except Exception as e: - logger.debug(f"Could not get renderer for format {outputFormat}: {e}") - - extractionPrompt = await buildExtractionPrompt( - outputFormat=outputFormat or "txt", - userPrompt=prompt, # User's prompt as context for what to extract - title=title or "Document", - aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None, - services=self.services, - renderer=renderer - ) - - logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt") - - # Use DATA_EXTRACT operation type for extraction - extractionOptions = AiCallOptions( - operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction - compressPrompt=options.compressPrompt, - compressContext=options.compressContext - ) - - extractionRequest = AiCallRequest( - prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt - context="", - options=extractionOptions, - contentParts=processableParts - ) - - # Write debug file for extraction prompt (all parts) - self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt") - - # Call generic content parts processor - handles images, text, chunking, merging - extractionResponse = await self.callAi(extractionRequest) - - # Write debug file for extraction response - if extractionResponse.content: - self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response") - else: - self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response") - logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})") - - # Use extracted content directly for generation prompt - if extractionResponse.errorCount == 0 and extractionResponse.content: - # The extracted content is already merged and ready to use - content_for_generation = extractionResponse.content - logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation") - else: - # Extraction failed - use placeholders - logger.warning(f"Content extraction failed, using placeholders") - placeholderParts = [] - for p in processableParts: - placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]") - content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None - else: - content_for_generation = None - logger.debug("No processable parts found in contentParts") - else: - content_for_generation = None + # Schritt 5A: Kläre Dokument-Intents + documents = [] + if documentList: + documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) - # Detect if this is a section generation prompt (not full document generation) - # Section prompts contain "SECTION TO GENERATE" marker - isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt - - if isSectionGeneration: - # For section generation, use the prompt directly without wrapping - # Section prompts are already complete and should not be wrapped in document generation template - logger.debug("Detected section generation prompt - skipping document generation wrapper") - generation_prompt = prompt - - # Call AI directly without looping (sections are simple, single-call) - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation") - request = AiCallRequest( - prompt=generation_prompt, - context="", - options=options - ) - response = await self.callAi(request) - generated_json = response.content if response and response.content else "" - - # For section generation, return the raw JSON content directly - # No rendering needed - sections are just JSON elements - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - metadata = AiResponseMetadata( - title=title or "Section Content", - operationType=opType.value if opType else None - ) - - return AiResponse( - content=generated_json, - metadata=metadata, - documents=[] - ) - else: - # Full document generation - use the wrapper - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - - generation_prompt = await buildGenerationPrompt( - outputFormat, prompt, title, content_for_generation, None, self.services - ) - - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": content_for_generation, - "services": self.services - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - # Extract user prompt from promptArgs for task completion analysis - userPrompt = None - if promptArgs: - userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") - - # Track generation progress - the looping function will update with byte progress - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId, - userPrompt=userPrompt + if not documentIntents and documents: + documentIntents = await self._clarifyDocumentIntents( + documents, + prompt, + {"outputFormat": outputFormat}, + aiOperationId ) - # Calculate final size for completion message - finalSize = len(generated_json.encode('utf-8')) if generated_json else 0 - if finalSize < 1024: - finalSizeDisplay = f"{finalSize}B" - elif finalSize < 1024 * 1024: - finalSizeDisplay = f"{finalSize / 1024:.1f}kB" - else: - finalSizeDisplay = f"{finalSize / (1024 * 1024):.1f}MB" + # Schritt 5B: Extrahiere und bereite Content vor + if documents: + preparedContentParts = await self._extractAndPrepareContent( + documents, + documentIntents or [], + aiOperationId + ) + + # Merge mit bereitgestellten contentParts (falls vorhanden) + if contentParts: + # Prüfe auf pre-extracted Content + for part in contentParts: + if part.metadata.get("skipExtraction", False): + # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig + part.metadata.setdefault("contentFormat", "extracted") + part.metadata.setdefault("isPreExtracted", True) + preparedContentParts.extend(contentParts) + + contentParts = preparedContentParts - self.services.chat.progressLogUpdate(aiOperationId, 0.7, f"Parsing generated JSON ({finalSizeDisplay})") - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Generated content is not valid JSON: {str(e)}") - - # Extract title and filename from generated document structure - extractedTitle = title - extractedFilename = None - if isinstance(generated_data, dict) and "documents" in generated_data: - docs = generated_data["documents"] - if isinstance(docs, list) and len(docs) > 0: - firstDoc = docs[0] - if isinstance(firstDoc, dict): - if firstDoc.get("title"): - extractedTitle = firstDoc["title"] - if firstDoc.get("filename"): - extractedFilename = firstDoc["filename"] - - # Ensure metadata contains the extracted title - if "metadata" not in generated_data: - generated_data["metadata"] = {} - if extractedTitle: - generated_data["metadata"]["title"] = extractedTitle - - # Create separate operation for content rendering - renderOperationId = f"{aiOperationId}_render" - # Use aiOperationId directly as parentOperationId (operationId, not log entry ID) - self.services.chat.progressLogStart( - renderOperationId, - "Content Rendering", - "Rendering", - f"Format: {outputFormat}", - parentOperationId=aiOperationId + # Schritt 5C: Generiere Struktur + structure = await self._generateStructure( + prompt, + contentParts or [], + outputFormat, + aiOperationId ) - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") - rendered_content, mime_type, _images = await generationService.renderReport( - generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self - ) - self.services.chat.progressLogFinish(renderOperationId, True) - - # Determine document name - if extractedFilename: - documentName = extractedFilename - elif extractedTitle and extractedTitle != "Generated Document": - sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) - sanitized = re.sub(r"_+", "_", sanitized).strip("_") - if sanitized: - if not sanitized.lower().endswith(f".{outputFormat}"): - documentName = f"{sanitized}.{outputFormat}" - else: - documentName = sanitized - else: - documentName = f"generated.{outputFormat}" - else: - documentName = f"generated.{outputFormat}" - - # Build document data - docData = DocumentData( - documentName=documentName, - documentData=rendered_content, - mimeType=mime_type, - sourceJson=generated_data # Preserve source JSON for structure validation - ) - - metadata = AiResponseMetadata( - title=extractedTitle or title or "Generated Document", - filename=extractedFilename, - operationType=opType.value if opType else None - ) - - # Write JSON with proper formatting (not str() which can truncate) - jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile(jsonStr, "document_generation_response") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=json.dumps(generated_data), - metadata=metadata, - documents=[docData] - ) - - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - if renderOperationId: - self.services.chat.progressLogFinish(renderOperationId, False) - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Rendering failed: {str(e)}") + # Schritt 5D: Fülle Struktur + filledStructure = await self._fillStructure( + structure, + contentParts or [], + prompt, + aiOperationId + ) + + # Schritt 5E: Rendere Resultat + renderedContent, mimeType = await self._renderResult( + filledStructure, + outputFormat, + title or "Generated Document", + prompt, + aiOperationId + ) + + # Baue Response + documentName = self._determineDocumentName(filledStructure, outputFormat, title) + + docData = DocumentData( + documentName=documentName, + documentData=renderedContent, + mimeType=mimeType, + sourceJson=filledStructure + ) + + metadata = AiResponseMetadata( + title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), + operationType=opType.value + ) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str), + "document_generation_response" + ) + + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=json.dumps(filledStructure), + metadata=metadata, + documents=[docData] + ) except Exception as e: logger.error(f"Error in callAiContent: {str(e)}") self.services.chat.progressLogFinish(aiOperationId, False) raise + + def _determineDocumentName( + self, + filledStructure: Dict[str, Any], + outputFormat: str, + title: Optional[str] + ) -> str: + """Bestimme Dokument-Namen aus Struktur oder Titel.""" + # Versuche aus Struktur zu extrahieren + if isinstance(filledStructure, dict) and "documents" in filledStructure: + docs = filledStructure["documents"] + if isinstance(docs, list) and len(docs) > 0: + firstDoc = docs[0] + if isinstance(firstDoc, dict) and firstDoc.get("filename"): + return firstDoc["filename"] + + # Fallback zu Titel + if title: + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{outputFormat}"): + return f"{sanitized}.{outputFormat}" + return sanitized + + return f"generated.{outputFormat}" diff --git a/modules/services/serviceExtraction/chunking/chunkerStructure.py b/modules/services/serviceExtraction/chunking/chunkerStructure.py index bdf1bcdb..f4d23a72 100644 --- a/modules/services/serviceExtraction/chunking/chunkerStructure.py +++ b/modules/services/serviceExtraction/chunking/chunkerStructure.py @@ -34,12 +34,42 @@ class StructureChunker(Chunker): if bucket: emit(bucket) else: + # JSON object (dict) - check if it fits text = json.dumps(obj, ensure_ascii=False) - if len(text.encode('utf-8')) <= maxBytes: + textSize = len(text.encode('utf-8')) + if textSize <= maxBytes: emit(obj) else: - # fallback to line chunking - raise ValueError("too large") + # Object too large - try to split by keys if possible + # For large objects, we need to chunk by character boundaries + # since we can't split JSON objects arbitrarily + if isinstance(obj, dict) and len(obj) > 1: + # Try to split object into multiple chunks by keys + # This preserves JSON structure better than line-based chunking + currentChunk: Dict[str, Any] = {} + currentSize = 2 # Start with "{}" overhead + for key, value in obj.items(): + itemText = json.dumps({key: value}, ensure_ascii=False) + itemSize = len(itemText.encode('utf-8')) + # Account for comma and spacing between items + if currentChunk: + itemSize += 2 # ", " separator + + if currentSize + itemSize > maxBytes and currentChunk: + # Current chunk is full, emit it + emit(currentChunk) + currentChunk = {key: value} + currentSize = len(itemText.encode('utf-8')) + else: + currentChunk[key] = value + currentSize += itemSize + + # Emit remaining chunk + if currentChunk: + emit(currentChunk) + else: + # Single large value or can't split - fallback to line chunking + raise ValueError("too large") except Exception: current: List[str] = [] size = 0 diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index 663753cd..a2972453 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -6,10 +6,11 @@ import logging import time import asyncio import base64 +import json from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subPipeline import runExtraction -from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult +from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult, DocumentIntent from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall from modules.aicore.aicoreModelRegistry import modelRegistry @@ -73,12 +74,14 @@ class ExtractionService: if operationId: workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" docOperationId = f"{operationId}_doc_{i}" + # Use parentOperationId if provided, otherwise use operationId as parent + parentId = parentOperationId if parentOperationId else operationId self.services.chat.progressLogStart( docOperationId, "Extracting Document", f"Document {i + 1}/{totalDocs}", doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName, - parentOperationId=operationId # Use operationId as parent (not parentOperationId) + parentOperationId=parentId # Correct parent reference for ChatLog hierarchy ) # Start timing for this document @@ -125,12 +128,41 @@ class ExtractionService: if part.metadata: logger.debug(f" Metadata: {part.metadata}") - # Attach document id and MIME type to parts if missing + # Attach complete metadata to parts according to ContentPart Metadaten-Schema for p in ec.parts: + # Ensure metadata dict exists + if not p.metadata: + p.metadata = {} + + # Required metadata fields (from concept) if "documentId" not in p.metadata: p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4()) if "documentMimeType" not in p.metadata: p.metadata["documentMimeType"] = documentData["mimeType"] + if "originalFileName" not in p.metadata: + p.metadata["originalFileName"] = documentData["fileName"] + + # ContentFormat: Set based on typeGroup and mimeType + # Default to "extracted" for text content, but can be overridden by caller + if "contentFormat" not in p.metadata: + # Default: extracted text content + p.metadata["contentFormat"] = "extracted" + + # Intent: Default to "extract" for extracted content + if "intent" not in p.metadata: + p.metadata["intent"] = "extract" + + # ExtractionPrompt: Use from options if available + if "extractionPrompt" not in p.metadata and options and options.prompt: + p.metadata["extractionPrompt"] = options.prompt + + # UsageHint: Provide default hint + if "usageHint" not in p.metadata: + p.metadata["usageHint"] = f"Use extracted content from {documentData['fileName']}" + + # SourceAction: Mark as from extraction service + if "sourceAction" not in p.metadata: + p.metadata["sourceAction"] = "extraction.extractContent" # Log chunking information chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)] @@ -185,7 +217,7 @@ class ExtractionService: # Write extraction results to debug file try: from modules.shared.debugLogger import writeDebugFile - import json + # json is already imported at module level # Create summary of extraction results for debug extractionSummary = { "documentName": doc.fileName, @@ -487,7 +519,8 @@ class ExtractionService: prompt: str, aiObjects: Any, options: Optional[AiCallOptions] = None, - operationId: Optional[str] = None + operationId: Optional[str] = None, + parentOperationId: Optional[str] = None ) -> str: """ Process documents with model-aware chunking and merge results. @@ -499,6 +532,7 @@ class ExtractionService: aiObjects: AiObjects instance for making AI calls options: AI call options operationId: Optional operation ID for progress tracking + parentOperationId: Optional parent operation ID for hierarchical logging Returns: Merged AI results as string with preserved document structure @@ -514,7 +548,8 @@ class ExtractionService: operationId, "AI Text Extract", "Document Processing", - f"Processing {len(documents)} documents" + f"Processing {len(documents)} documents", + parentOperationId=parentOperationId # Use parentOperationId if provided ) try: @@ -539,7 +574,8 @@ class ExtractionService: if operationId: self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents") # Pass operationId as parentOperationId for hierarchical logging - extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId) + # Correct hierarchy: parentOperationId -> operationId -> docOperationId + extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=operationId) if not isinstance(extractionResult, list): if operationId: @@ -549,9 +585,10 @@ class ExtractionService: # Process parts (not chunks) with model-aware AI calls if operationId: self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts") - # Use parent operation ID directly (parentId should be operationId, not log entry ID) - parentOperationId = operationId # Use the parent's operationId directly - partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId) + # Use operationId as parentOperationId for child operations + # Correct hierarchy: parentOperationId -> operationId -> partOperationId + processParentOperationId = operationId + partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, processParentOperationId) # Merge results using existing merging system if operationId: @@ -733,7 +770,8 @@ class ExtractionService: # Detect input type and convert accordingly if isinstance(partResults[0], PartResult): # Existing logic for PartResult (from processDocumentsPerChunk) - for part_result in partResults: + # Phase 7: Add originalIndex for explicit ordering + for i, part_result in enumerate(partResults): content_part = ContentPart( id=part_result.originalPart.id, parentId=part_result.originalPart.parentId, @@ -744,7 +782,9 @@ class ExtractionService: metadata={ **part_result.originalPart.metadata, "aiResult": True, + "originalIndex": i, # Phase 7: Explicit order index "partIndex": part_result.partIndex, + "processingOrder": i, # Phase 7: Processing order "documentId": part_result.documentId, "processingTime": part_result.processingTime, "success": part_result.metadata.get("success", False) @@ -753,6 +793,7 @@ class ExtractionService: content_parts.append(content_part) elif isinstance(partResults[0], AiCallResponse): # Logic from interfaceAiObjects (from content parts processing) + # Phase 7: Add originalIndex for explicit ordering for i, result in enumerate(partResults): if result.content: content_part = ContentPart( @@ -764,6 +805,8 @@ class ExtractionService: data=result.content, metadata={ "aiResult": True, + "originalIndex": i, # Phase 7: Explicit order index + "processingOrder": i, # Phase 7: Processing order "modelName": result.modelName, "priceUsd": result.priceUsd, "processingTime": result.processingTime, @@ -792,11 +835,12 @@ class ExtractionService: # Determine merge strategy based on input type if isinstance(partResults[0], PartResult): - # Use strategy for extraction workflow (group by document, order by part index) + # Phase 7: Use originalIndex for explicit ordering + # Use strategy for extraction workflow (group by document, order by originalIndex) merge_strategy = MergeStrategy( useIntelligentMerging=True, groupBy="documentId", # Group by document - orderBy="partIndex", # Order by part index + orderBy="originalIndex", # Phase 7: Order by originalIndex instead of partIndex mergeType="concatenate" ) else: @@ -811,10 +855,52 @@ class ExtractionService: # Apply merging merged_parts = applyMerging(content_parts, merge_strategy) - # Convert back to string - final_content = "\n\n".join([part.data for part in merged_parts]) + # Phase 6: Enhanced format with metadata preservation + # CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing + # Check if this is a generation response by looking at operationType or content structure + isGenerationResponse = False + if options and hasattr(options, 'operationType'): + # Generation responses use DATA_GENERATE operation type + from modules.datamodels.datamodelAi import OperationTypeEnum + isGenerationResponse = options.operationType == OperationTypeEnum.DATA_GENERATE - logger.info(f"Merged {len(partResults)} parts using unified merging system") + # Also check if content looks like JSON (starts with { or [) + if not isGenerationResponse and merged_parts: + firstPartData = merged_parts[0].data if merged_parts[0].data else "" + if isinstance(firstPartData, str) and firstPartData.strip().startswith(('{', '[')): + # Check if it's a complete JSON structure (not extracted content) + # Generation responses are complete JSON, extraction responses are text content + try: + # json is already imported at module level + json.loads(firstPartData.strip()) + # If it parses as JSON and has "documents" key, it's likely a generation response + parsed = json.loads(firstPartData.strip()) + if isinstance(parsed, dict) and "documents" in parsed: + isGenerationResponse = True + except: + pass + + content_sections = [] + for part in merged_parts: + if isGenerationResponse: + # For generation responses, return JSON directly without SOURCE markers + content_sections.append(part.data) + else: + # For extraction responses, include metadata in section header for traceability + doc_id = part.metadata.get("documentId", "unknown") + doc_mime = part.metadata.get("documentMimeType", "unknown") + label = part.label or "content" + + section = f""" +[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}] +{part.data} +[END SOURCE] +""" + content_sections.append(section) + + final_content = "\n\n".join(content_sections) + + logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})") return final_content.strip() async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]: @@ -827,9 +913,14 @@ class ExtractionService: modelContextTokens = model.contextLength # Total context in tokens modelMaxOutputTokens = model.maxTokens # Maximum output tokens + # CRITICAL: Use same conservative token factor as in processContentPartWithFallback + # Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!) + TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead + # Reserve tokens for: - # 1. Prompt (user message) - promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0 + # 1. Prompt (user message) - use conservative factor + promptSize = len(prompt.encode('utf-8')) if prompt else 0 + promptTokens = promptSize / TOKEN_SAFETY_FACTOR # 2. System message wrapper ("Context from documents:\n") systemMessageTokens = 10 # ~40 bytes = 10 tokens @@ -844,31 +935,38 @@ class ExtractionService: totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens # Available tokens for content = context length - reserved tokens - # Use 80% of available for safety margin - availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8) + # Use 60% of available (same conservative margin as in processContentPartWithFallback) + availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.60) # Ensure we have at least some space if availableContentTokens < 100: logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens") availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context - # Convert tokens to bytes (1 token ≈ 4 bytes) - availableContentBytes = availableContentTokens * 4 + # Convert tokens to bytes using conservative factor (reverse: bytes = tokens * factor) + availableContentBytes = int(availableContentTokens * TOKEN_SAFETY_FACTOR) - logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)") + logger.info(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens est., reserved={totalReservedTokens:.0f} tokens est., available={availableContentTokens} tokens est. ({availableContentBytes} bytes), factor={TOKEN_SAFETY_FACTOR}") - # Use 70% of available content bytes for text chunks (conservative) - textChunkSize = int(availableContentBytes * 0.7) - imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks + # Use 50% of available content bytes for text chunks (very conservative to ensure chunks fit) + # This ensures that even with token counting inaccuracies, chunks will fit + textChunkSize = int(availableContentBytes * 0.5) + structureChunkSize = int(availableContentBytes * 0.5) # CRITICAL: Also set for StructureChunker (JSON content) + tableChunkSize = int(availableContentBytes * 0.5) # Also set for TableChunker + imageChunkSize = int(availableContentBytes * 0.6) # 60% for image chunks - # Build chunking options + # Build chunking options - include ALL chunk size options for different chunkers chunkingOptions = { "textChunkSize": textChunkSize, + "structureChunkSize": structureChunkSize, # CRITICAL: Required for StructureChunker (JSON) + "tableChunkSize": tableChunkSize, # Required for TableChunker "imageChunkSize": imageChunkSize, "maxSize": availableContentBytes, "chunkAllowed": True } + logger.info(f"Chunking options: textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes, tableChunkSize={tableChunkSize} bytes, imageChunkSize={imageChunkSize} bytes, contentPartSize={len(contentPart.data.encode('utf-8')) if contentPart.data else 0} bytes") + # Get appropriate chunker (uses existing ChunkerRegistry ✅) chunker = self._chunkerRegistry.resolve(contentPart.typeGroup) @@ -878,8 +976,14 @@ class ExtractionService: # Chunk the content part try: + contentSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0 + logger.info(f"Chunking {contentPart.typeGroup} part: contentSize={contentSize} bytes, textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes") chunks = chunker.chunk(contentPart, chunkingOptions) - logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part") + logger.info(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part (contentSize={contentSize} bytes)") + if chunks: + for i, chunk in enumerate(chunks): + chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0 + logger.info(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes") return chunks except Exception as e: logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}") @@ -999,15 +1103,86 @@ class ExtractionService: availableContentBytes = availableContentTokens * 4 - logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes") + # Also check prompt size - prompt + content together must fit + promptSize = len(prompt.encode('utf-8')) if prompt else 0 - if partSize <= availableContentBytes: + # CRITICAL: Token counting approximation is VERY inaccurate for JSON/content + # Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!) + # This happens because: + # 1. JSON/structured content tokenizes differently (more tokens per byte) + # 2. API has message structure overhead (system prompts, message wrappers) + # 3. Tokenizer differences between our approximation and actual API tokenizer + # Use conservative factor: 1 token ≈ 2.2 bytes (instead of 4) to account for these differences + TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead + promptTokens = promptSize / TOKEN_SAFETY_FACTOR + contentTokens = partSize / TOKEN_SAFETY_FACTOR + totalTokens = promptTokens + contentTokens + + # CRITICAL: Use very conservative margin (60%) because: + # 1. Token counting approximation is inaccurate - real tokens can be 2-3x more + # 2. API has additional overhead (message structure, system prompts, etc.) + # 3. Anthropic API is strict about the 200k limit + # 4. We've seen cases where our calculation says "fits" but API says "too long" + maxTotalTokens = int(modelContextTokens * 0.60) + + logger.info(f"Size check for {model.name}: partSize={partSize} bytes ({contentTokens:.0f} tokens est.), promptSize={promptSize} bytes ({promptTokens:.0f} tokens est.), total={totalTokens:.0f} tokens est., modelContext={modelContextTokens} tokens, maxTotal={maxTotalTokens} tokens (60% margin, conservative factor={TOKEN_SAFETY_FACTOR})") + + # CRITICAL: Always check totalTokens first - if prompt + content exceeds limit, MUST chunk + # Token counting approximation may differ significantly from API, so use very conservative margin + if totalTokens > maxTotalTokens: + logger.warning(f"⚠️ Total tokens ({totalTokens:.0f} est.) exceed model limit ({maxTotalTokens}), chunking required. Prompt: {promptTokens:.0f} tokens est., Content: {contentTokens:.0f} tokens est.") + elif partSize > availableContentBytes: + logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required") + + # If either condition fails, chunk the content + if totalTokens > maxTotalTokens or partSize > availableContentBytes: + # Part too large or total exceeds limit - chunk it + chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) + if not chunks: + raise ValueError(f"Failed to chunk content part for model {model.name}") + + logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}") + + if progressCallback: + progressCallback(0.0, f"Starting to process {len(chunks)} chunks") + + chunkResults = [] + for idx, chunk in enumerate(chunks): + chunkNum = idx + 1 + chunkData = chunk.get('data', '') + logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}") + + if progressCallback: + progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}") + + try: + chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options) + chunkResults.append(chunkResponse) + except Exception as chunkError: + logger.error(f"Error processing chunk {chunkNum}/{len(chunks)}: {str(chunkError)}") + # Continue with other chunks even if one fails + continue + + # Merge chunk results + if not chunkResults: + raise ValueError(f"All chunks failed for content part") + + mergedContent = self.mergePartResults(chunkResults, options) + return AiCallResponse( + content=mergedContent, + modelName=model.name, + priceUsd=sum(r.priceUsd for r in chunkResults), + processingTime=sum(r.processingTime for r in chunkResults), + bytesSent=sum(r.bytesSent for r in chunkResults), + bytesReceived=sum(r.bytesReceived for r in chunkResults), + errorCount=sum(r.errorCount for r in chunkResults) + ) + else: # Part fits - call AI directly via aiObjects interface + logger.info(f"✅ Content part fits within model limits, processing directly") response = await aiObjects._callWithModel(model, prompt, contentPart.data, options) logger.info(f"✅ Content part processed successfully with model: {model.name}") return response - else: - # Part too large - chunk it chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) if not chunks: raise ValueError(f"Failed to chunk content part for model {model.name}") @@ -1037,8 +1212,8 @@ class ExtractionService: logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}") raise - # Merge chunk results - mergedContent = self.mergeChunkResults(chunkResults) + # Merge chunk results using unified mergePartResults + mergedContent = self.mergePartResults(chunkResults, options) logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)") return AiCallResponse( diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 5b518afa..cababbeb 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -2,7 +2,9 @@ # All rights reserved. import logging import uuid -from typing import Any, Dict, List, Optional +import base64 +import traceback +from typing import Any, Dict, List, Optional, Callable from modules.datamodels.datamodelChat import ChatDocument from modules.services.serviceGeneration.subDocumentUtility import ( getFileExtension, @@ -100,12 +102,12 @@ class GenerationService: # For binary data, handle bytes vs base64 string vs regular string if isinstance(documentData, bytes): # Already bytes - encode to base64 string for storage - import base64 + # base64 is already imported at module level content = base64.b64encode(documentData).decode('utf-8') base64encoded = True elif isinstance(documentData, str): # Check if it's already valid base64 - import base64 + # base64 is already imported at module level try: # Try to decode to verify it's base64 base64.b64decode(documentData, validate=True) @@ -122,7 +124,7 @@ class GenerationService: continue else: # Other types - convert to string then base64 - import base64 + # base64 is already imported at module level try: content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8') base64encoded = True @@ -231,7 +233,7 @@ class GenerationService: return None # Convert content to bytes if base64encoded: - import base64 + # base64 is already imported at module level content_bytes = base64.b64decode(content) else: content_bytes = content.encode('utf-8') @@ -319,10 +321,12 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]: """ Render extracted JSON content to the specified output format. + Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering). Always uses unified "documents" array format. + Supports three content formats: reference, object (base64), extracted_text. Args: extractedContent: Structured JSON document from AI extraction @@ -330,6 +334,7 @@ class GenerationService: title: Report title userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation + parentOperationId: Optional parent operation ID for hierarchical logging Returns: tuple: (rendered_content, mime_type, images_list) @@ -348,15 +353,40 @@ class GenerationService: if len(documents) == 0: raise ValueError("No documents found in 'documents' array") - # Use first document for rendering - single_doc = documents[0] - if "sections" not in single_doc: - raise ValueError("Document must contain 'sections' field") - - # Pass standardized schema to renderer (maintains architecture) - # Renderer should extract sections from documents array according to standardized schema - # Standardized schema: {metadata: {...}, documents: [{sections: [...]}]} - contentToRender = extractedContent # Pass full standardized schema + # Phase 5: Multi-Dokument-Rendering + if len(documents) == 1: + # Single document - use existing logic + single_doc = documents[0] + if "sections" not in single_doc: + raise ValueError("Document must contain 'sections' field") + + # Pass standardized schema to renderer (maintains architecture) + contentToRender = extractedContent # Pass full standardized schema + else: + # Multiple documents - merge all sections into one document for rendering + # Option: Merge all sections from all documents into a single document + all_sections = [] + for doc in documents: + if isinstance(doc, dict) and "sections" in doc: + sections = doc.get("sections", []) + if isinstance(sections, list): + all_sections.extend(sections) + + if not all_sections: + raise ValueError("No sections found in any document") + + # Create merged document with all sections + merged_document = { + "metadata": extractedContent.get("metadata", {}), + "documents": [{ + "id": "merged", + "title": title, + "filename": f"{title}.{outputFormat}", + "sections": all_sections + }] + } + contentToRender = merged_document + logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections") # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) @@ -378,6 +408,92 @@ class GenerationService: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") raise + async def generateDocumentWithTwoPhases( + self, + userPrompt: str, + cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, + maxSectionLength: int = 500, + parallelGeneration: bool = True, + progressCallback: Optional[Callable] = None + ) -> Dict[str, Any]: + """ + Generate document using two-phase approach: + 1. Generate structure skeleton with empty sections + 2. Generate content for each section iteratively + + This is the core logic for document generation in AI calls. + + Args: + userPrompt: User's original prompt + cachedContent: Optional extracted content cache (from extraction phase) + contentParts: Optional list of ContentParts to use for structure generation + maxSectionLength: Maximum words for simple sections + parallelGeneration: Enable parallel section generation + progressCallback: Optional callback function(progress, total, message) for progress updates + + Returns: + Complete document structure with populated elements ready for rendering + """ + try: + from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator + from modules.services.serviceGeneration.subContentGenerator import ContentGenerator + + # Phase 1: Generate structure skeleton + if progressCallback: + progressCallback(0, 100, "Generating document structure...") + + structureGenerator = StructureGenerator(self.services) + + # Extract imageDocuments from cachedContent if available + existingImages = None + if cachedContent and cachedContent.get("imageDocuments"): + existingImages = cachedContent.get("imageDocuments") + + structure = await structureGenerator.generateStructure( + userPrompt=userPrompt, + documentList=None, # Not used in current implementation + cachedContent=cachedContent, + contentParts=contentParts, # Pass ContentParts for structure generation + maxSectionLength=maxSectionLength, + existingImages=existingImages + ) + + if progressCallback: + progressCallback(30, 100, "Structure generated, starting content generation...") + + # Phase 2: Generate content for each section + contentGenerator = ContentGenerator(self.services) + + # Create progress callback wrapper for content generation phase (30-90%) + def contentProgressCallback(sectionIndex: int, totalSections: int, message: str): + if progressCallback: + # Map section progress to overall progress (30% to 90%) + if totalSections > 0: + overallProgress = 30 + int(60 * (sectionIndex / totalSections)) + else: + overallProgress = 30 + progressCallback(overallProgress, 100, f"Section {sectionIndex}/{totalSections}: {message}") + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for content generation + progressCallback=contentProgressCallback, + parallelGeneration=parallelGeneration + ) + + if progressCallback: + progressCallback(100, 100, "Document generation complete") + + return completeStructure + + except Exception as e: + logger.error(f"Error in two-phase document generation: {str(e)}") + logger.debug(traceback.format_exc()) + raise + async def getAdaptiveExtractionPrompt( self, outputFormat: str, @@ -423,6 +539,6 @@ class GenerationService: except Exception as e: logger.error(f"Error getting renderer for {output_format}: {str(e)}") - import traceback + # traceback is already imported at module level logger.debug(traceback.format_exc()) return None \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 491c1d06..e9693680 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -68,6 +68,7 @@ class BaseRenderer(ABC): def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: """ Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + Phase 5: Supports multiple documents - extracts all sections from all documents. """ if "documents" not in reportData: raise ValueError("Report data must follow standardized schema with 'documents' array") @@ -76,11 +77,18 @@ class BaseRenderer(ABC): if not isinstance(documents, list) or len(documents) == 0: raise ValueError("Standardized schema must contain at least one document in 'documents' array") - firstDoc = documents[0] - if not isinstance(firstDoc, dict) or "sections" not in firstDoc: - raise ValueError("Document in standardized schema must contain 'sections' field") + # Phase 5: Extract sections from ALL documents + all_sections = [] + for doc in documents: + if isinstance(doc, dict) and "sections" in doc: + sections = doc.get("sections", []) + if isinstance(sections, list): + all_sections.extend(sections) - return firstDoc.get("sections", []) + if not all_sections: + raise ValueError("No sections found in any document") + + return all_sections def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: """ diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 48fb94f1..f62935d8 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -9,6 +9,7 @@ from typing import Dict, Any, Tuple, List import io import base64 import re +import csv try: from docx import Document @@ -225,13 +226,36 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Could not clear template content: {str(e)}") def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Render a single JSON section to DOCX using AI-generated styles.""" + """Render a single JSON section to DOCX using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Process each element in the section for element in elements: + element_type = element.get("type", "") + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + para = doc.add_paragraph(f"[Reference: {label}]") + para.runs[0].italic = True + continue + elif element_type == "extracted_text": + # Extracted text format - render as paragraph + content = element.get("content", "") + source = element.get("source", "") + if content: + para = doc.add_paragraph(content) + if source: + para.add_run(f" (Source: {source})").italic = True + continue + + # Standard section types if section_type == "table": self._renderJsonTable(doc, element, styles) elif section_type == "bullet_list": @@ -848,7 +872,7 @@ class RendererDocx(BaseRenderer): Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. Returns the content with tables replaced by placeholders. """ - import csv + # csv is already imported at module level lines = content.split('\n') processed_lines = [] diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 163690d3..54c7e64b 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -297,11 +297,39 @@ class RendererHtml(BaseRenderer): return '\n'.join(css_parts) def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a single JSON section to HTML using AI-generated styles.""" + """Render a single JSON section to HTML using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'

[Reference: {label}]

') + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'

{content}{source_text}

') + continue + + # If we processed reference/extracted_text elements, return them + if htmlParts: + return '\n'.join(htmlParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index 3c9569e9..dfe2bda2 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -77,11 +77,39 @@ class RendererMarkdown(BaseRenderer): raise Exception(f"Markdown generation failed: {str(e)}") def _renderJsonSection(self, section: Dict[str, Any]) -> str: - """Render a single JSON section to markdown.""" + """Render a single JSON section to markdown. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + markdownParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + markdownParts.append(f"*[Reference: {label}]*") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" *(Source: {source})*" if source else "" + markdownParts.append(f"{content}{source_text}") + continue + + # If we processed reference/extracted_text elements, return them + if markdownParts: + return '\n\n'.join(markdownParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 1cfcfad7..128e84d3 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -477,7 +477,9 @@ class RendererPdf(BaseRenderer): return colors.black def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: - """Render a single JSON section to PDF elements using AI-generated styles.""" + """Render a single JSON section to PDF elements using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: section_type = self._getSectionType(section) elements = self._getSectionData(section) @@ -485,6 +487,33 @@ class RendererPdf(BaseRenderer): # Process each element in the section all_elements = [] for element in elements: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + ref_style = ParagraphStyle( + 'Reference', + parent=self._createNormalStyle(styles), + fontStyle='italic', + textColor=colors.grey + ) + all_elements.append(Paragraph(f"[Reference: {label}]", ref_style)) + all_elements.append(Spacer(1, 6)) + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles))) + all_elements.append(Spacer(1, 6)) + continue + + # Standard section types if section_type == "table": all_elements.extend(self._renderJsonTable(element, styles)) elif section_type == "bullet_list": diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 6b1b9e18..e9ad334c 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -3,6 +3,9 @@ import logging import base64 import io +import json +import re +from datetime import datetime, UTC from typing import Dict, Any, Optional, Tuple, List from .rendererBaseTemplate import BaseRenderer @@ -261,7 +264,7 @@ class RendererPptx(BaseRenderer): Returns: List of slide content strings """ - import re + # re is already imported at module level # First, try to split by major headers (# or ##) # This is the most common case for AI-generated content @@ -399,7 +402,7 @@ class RendererPptx(BaseRenderer): def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str: """Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" - import json + # json is already imported at module level schema_json = json.dumps(style_schema, indent=4) return f"""Customize the JSON below for professional PowerPoint slides. @@ -443,8 +446,7 @@ JSON ONLY. NO OTHER TEXT.""" self.logger.warning("AI service returned no response, using defaults") return default_styles - import json - import re + # json and re are already imported at module level # Clean and parse JSON result = response.content.strip() if response and response.content else "" @@ -634,6 +636,27 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Check for three content formats from Phase 5D in elements + content_parts = [] + for element in elements: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + content_parts.append(f"[Reference: {label}]") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + content_parts.append(f"{content}{source_text}") + continue + # Handle image sections specially if content_type == "image": # Extract image data @@ -647,26 +670,25 @@ JSON ONLY. NO OTHER TEXT.""" }) return { - "title": section_title or element.get("altText", "Image"), - "content": "", # No text content for image slides + "title": section_title or (elements[0].get("altText", "Image") if elements else "Image"), + "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present "images": images } # Build slide content based on section type - content_parts = [] - - if content_type == "table": - content_parts.append(self._formatTableForSlide(elements)) - elif content_type == "list": - content_parts.append(self._formatListForSlide(elements)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(elements)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(elements)) - elif content_type == "code": - content_parts.append(self._formatCodeForSlide(elements)) - else: - content_parts.append(self._format_paragraph_for_slide(elements)) + if not content_parts: # Only if we didn't process reference/extracted_text above + if content_type == "table": + content_parts.append(self._formatTableForSlide(elements)) + elif content_type == "list": + content_parts.append(self._formatListForSlide(elements)) + elif content_type == "heading": + content_parts.append(self._formatHeadingForSlide(elements)) + elif content_type == "paragraph": + content_parts.append(self._formatParagraphForSlide(elements)) + elif content_type == "code": + content_parts.append(self._formatCodeForSlide(elements)) + else: + content_parts.append(self._format_paragraph_for_slide(elements)) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) @@ -1057,5 +1079,5 @@ JSON ONLY. NO OTHER TEXT.""" def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" - from datetime import datetime, UTC + # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 56d4af61..acbeaaf9 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -100,11 +100,39 @@ class RendererText(BaseRenderer): raise Exception(f"Text generation failed: {str(e)}") def _renderJsonSection(self, section: Dict[str, Any]) -> str: - """Render a single JSON section to text.""" + """Render a single JSON section to text. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + textParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + textParts.append(f"[Reference: {label}]") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + textParts.append(f"{content}{source_text}") + continue + + # If we processed reference/extracted_text elements, return them + if textParts: + return '\n\n'.join(textParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py index 0f75f595..681a5923 100644 --- a/modules/services/serviceGeneration/subContentGenerator.py +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -7,6 +7,10 @@ Generates content for each section in the document structure. import logging import asyncio +import json +import base64 +import re +import traceback from typing import Dict, Any, Optional, List, Callable from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator @@ -25,6 +29,7 @@ class ContentGenerator: structure: Dict[str, Any], cachedContent: Optional[Dict[str, Any]] = None, userPrompt: str = "", + contentParts: Optional[List[Any]] = None, progressCallback: Optional[Callable] = None, parallelGeneration: bool = True, batchSize: int = 10 @@ -33,9 +38,10 @@ class ContentGenerator: Generate content for all sections in structure. Args: - structure: Document structure from Phase 1 + structure: Document structure from Phase 1 (with contentPartIds per section) cachedContent: Extracted content cache userPrompt: Original user prompt + contentParts: List of all available ContentParts (for mapping by contentPartIds) progressCallback: Function to call for progress updates parallelGeneration: Enable parallel section generation batchSize: Number of sections to process in parallel @@ -89,6 +95,7 @@ class ContentGenerator: sections=sections, cachedContent=cachedContent, userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for section generation documentMetadata=structure.get("metadata", {}), progressCallback=lambda idx, total, msg: progressCallback( currentSectionIndex + idx, @@ -103,6 +110,7 @@ class ContentGenerator: sections=sections, cachedContent=cachedContent, userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for section generation documentMetadata=structure.get("metadata", {}), progressCallback=lambda idx, total, msg: progressCallback( currentSectionIndex + idx, @@ -138,7 +146,8 @@ class ContentGenerator: sections: List[Dict[str, Any]], cachedContent: Optional[Dict[str, Any]], userPrompt: str, - documentMetadata: Dict[str, Any], + contentParts: Optional[List[Any]] = None, + documentMetadata: Dict[str, Any] = {}, progressCallback: Optional[Callable] = None ) -> List[Dict[str, Any]]: """ @@ -149,6 +158,14 @@ class ContentGenerator: previousSections = [] totalSections = len(sections) + # Create ContentParts lookup map by ID + contentPartsMap = {} + if contentParts: + for part in contentParts: + partId = part.id if hasattr(part, 'id') else part.get('id', '') + if partId: + contentPartsMap[partId] = part + for idx, section in enumerate(sections): try: contentType = section.get("content_type", "content") @@ -171,11 +188,20 @@ class ContentGenerator: message ) + # Get ContentParts for this section + sectionContentPartIds = section.get("contentPartIds", []) + sectionContentParts = [] + if sectionContentPartIds and contentPartsMap: + for partId in sectionContentPartIds: + if partId in contentPartsMap: + sectionContentParts.append(contentPartsMap[partId]) + context = { "userPrompt": userPrompt, "cachedContent": cachedContent, "previousSections": previousSections.copy(), "targetSection": section, + "sectionContentParts": sectionContentParts, # ContentParts for this section "documentMetadata": documentMetadata, "operationId": None } @@ -272,11 +298,20 @@ class ContentGenerator: message ) + # Get ContentParts for this section + sectionContentPartIds = section.get("contentPartIds", []) + sectionContentParts = [] + if sectionContentPartIds and contentPartsMap: + for partId in sectionContentPartIds: + if partId in contentPartsMap: + sectionContentParts.append(contentPartsMap[partId]) + context = { "userPrompt": userPrompt, "cachedContent": cachedContent, "previousSections": batchPreviousSections.copy(), # Include sections from previous batches "targetSection": section, + "sectionContentParts": sectionContentParts, # ContentParts for this section "documentMetadata": documentMetadata, "operationId": None # Can be set if needed for nested progress } @@ -371,17 +406,13 @@ class ContentGenerator: # Create section-specific prompt sectionPrompt = self._createSectionPrompt(section, context) - # Debug: Log section generation prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - sectionId = section.get('id', 'unknown') - contentType = section.get('content_type', 'unknown') - try: - self.services.utils.writeDebugFile( - sectionPrompt, - f"document_generation_section_{sectionId}_{contentType}_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for section prompt: {e}") + # Debug: Log section generation prompt (harmonisiert - keine Checks nötig) + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + self.services.utils.writeDebugFile( + sectionPrompt, + f"document_generation_section_{sectionId}_{contentType}_prompt" + ) # Call AI to generate content from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum @@ -397,32 +428,27 @@ class ContentGenerator: outputFormat="json" ) - # Debug: Log section generation response (always log, even if empty) + # Debug: Log section generation response (harmonisiert - keine Checks nötig) sectionId = section.get('id', 'unknown') contentType = section.get('content_type', 'unknown') - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - responseContent = '' - if aiResponse: - if hasattr(aiResponse, 'content') and aiResponse.content: - responseContent = aiResponse.content - elif hasattr(aiResponse, 'documents') and aiResponse.documents: - responseContent = f"[Response has {len(aiResponse.documents)} documents]" - else: - responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" - else: - responseContent = '[No response object]' - - self.services.utils.writeDebugFile( - responseContent, - f"document_generation_section_{sectionId}_{contentType}_response" - ) - logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") - except Exception as e: - logger.warning(f"Could not write debug file for section response: {e}") - import traceback - logger.debug(traceback.format_exc()) + responseContent = '' + if aiResponse: + if hasattr(aiResponse, 'content') and aiResponse.content: + responseContent = aiResponse.content + elif hasattr(aiResponse, 'documents') and aiResponse.documents: + responseContent = f"[Response has {len(aiResponse.documents)} documents]" + else: + responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" + else: + responseContent = '[No response object]' + + # Debug: Log section generation response (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + responseContent, + f"document_generation_section_{sectionId}_{contentType}_response" + ) + logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") if not aiResponse or not aiResponse.content: logger.error(f"AI section generation returned empty response for section {sectionId}") @@ -443,7 +469,7 @@ class ContentGenerator: logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}") raise ValueError("No JSON found in AI section response") - import json + # json is already imported at module level try: elementsData = json.loads(extractedJson) logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") @@ -480,7 +506,7 @@ class ContentGenerator: # Last resort: try to extract partial content and create minimal valid JSON try: # Try to extract text content before the truncation point - import re + # re is already imported at module level # Look for text field that might be partially complete textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson) if textMatch: @@ -577,14 +603,14 @@ class ContentGenerator: ) -> Dict[str, Any]: """Generate image for image section or include existing image""" try: - # Check if this is an existing image to include + # Check if this is an existing image to include or render imageSource = section.get("image_source", "generate") - if imageSource == "existing": - # Include existing image from cachedContent + if imageSource == "existing" or imageSource == "render": + # Phase 4: Include existing image or render image from cachedContent imageRefId = section.get("image_reference_id") if not imageRefId: - raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id") + raise ValueError(f"Image section {section.get('id')} has image_source='{imageSource}' but no image_reference_id") cachedContent = context.get("cachedContent", {}) imageDocuments = cachedContent.get("imageDocuments", []) @@ -594,7 +620,7 @@ class ContentGenerator: if not imageDoc: raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments") - # Create image element from existing image + # Create image element from existing/render image altText = imageDoc.get("altText", section.get("generation_hint", "Image")) mimeType = imageDoc.get("mimeType", "image/png") @@ -605,7 +631,7 @@ class ContentGenerator: "caption": section.get("metadata", {}).get("caption") }] - logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}") + logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})") return section # Generate new image (existing logic) @@ -620,7 +646,7 @@ class ContentGenerator: # Call AI service for image generation from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage - import json + # json is already imported at module level # Create image generation prompt promptModel = AiCallPromptImage( @@ -664,7 +690,7 @@ class ContentGenerator: # Validate base64 data try: - import base64 + # base64 is already imported at module level base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars except Exception as e: logger.warning(f"Image data may not be valid base64: {str(e)}") @@ -710,9 +736,11 @@ class ContentGenerator: """Create sub-prompt for section content generation""" contentType = section.get("content_type", "") generationHint = section.get("generation_hint", "") + extractionPrompt = section.get("extractionPrompt") # Optional extraction prompt for ContentParts userPrompt = context.get("userPrompt", "") cachedContent = context.get("cachedContent") previousSections = context.get("previousSections", []) + sectionContentParts = context.get("sectionContentParts", []) # ContentParts for this section documentMetadata = context.get("documentMetadata", {}) # Get user language @@ -723,6 +751,51 @@ class ContentGenerator: if cachedContent and cachedContent.get("extractedContent"): cachedContentText = self._formatCachedContent(cachedContent) + # Format ContentParts for this section + contentPartsText = "" + imagePartReferences = [] # Track image parts for text reference + + if sectionContentParts: + try: + partsList = [] + imageIndex = 1 + for part in sectionContentParts: + partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '') + partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '') + partId = part.id if hasattr(part, 'id') else part.get('id', '') + partData = part.data if hasattr(part, 'data') else part.get('data', '') + + # Check if this is an image part + isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/")) + + if contentType == "image" and isImage: + # For image sections: include image data for integration + partsList.append(f"- ContentPart {partId} (image): [Image data available for integration]") + elif isImage: + # For non-image sections: track for text reference + imagePartReferences.append({ + "id": partId, + "index": imageIndex + }) + imageIndex += 1 + # Don't include image data in prompt for non-image sections + else: + # For text/table/etc parts: include data preview + dataPreview = str(partData)[:200] if partData else "[No data]" + partsList.append(f"- ContentPart {partId} ({partTypeGroup}): {dataPreview}{'...' if partData and len(str(partData)) > 200 else ''}") + + if partsList: + contentPartsText = "\n".join(partsList) + + # Add image reference instructions for non-image sections + if imagePartReferences and contentType != "image": + refText = ", ".join([f"Bild {ref['index']}" if userLanguage == "de" else f"Image {ref['index']}" for ref in imagePartReferences]) + contentPartsText += f"\n\nNOTE: Reference images as text in the document language: {refText}" + + except Exception as e: + logger.warning(f"Could not format ContentParts for section prompt: {str(e)}") + contentPartsText = "" + # Format previous sections for context previousSectionsText = "" if previousSections: @@ -787,14 +860,22 @@ EXTRACTED CONTENT (if available): {cachedContentText if cachedContentText else "None"} {'='*80} +{'='*80} +CONTENT PARTS FOR THIS SECTION: +{'='*80} +{contentPartsText if contentPartsText else "No ContentParts assigned to this section."} +{'='*80} + TASK: Generate content for this section ONLY. INSTRUCTIONS: 1. Generate content appropriate for section type: {contentType} 2. Use the generation hint: {generationHint} -3. Consider previous sections for continuity -4. Use extracted content if relevant -5. All content must be in the language '{userLanguage}' +{f"3. Use extractionPrompt for ContentParts: {extractionPrompt}" if extractionPrompt else "3. Use ContentParts data if provided"} +4. Consider previous sections for continuity +5. Use extracted content if relevant +6. All content must be in the language '{userLanguage}' +7. {'For image sections: Integrate image ContentParts as visual elements' if contentType == "image" else 'For non-image sections: Reference image ContentParts as text (e.g., "siehe Bild 1" in German, "see Image 1" in English)'} 6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure. diff --git a/modules/services/serviceGeneration/subContentIntegrator.py b/modules/services/serviceGeneration/subContentIntegrator.py index 7bee437e..1a83eb6e 100644 --- a/modules/services/serviceGeneration/subContentIntegrator.py +++ b/modules/services/serviceGeneration/subContentIntegrator.py @@ -65,18 +65,14 @@ class ContentIntegrator: ) sections[idx] = section - # Debug: Write final merged structure to debug file - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - import json - structureJson = json.dumps(structure, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile( - structureJson, - "document_generation_final_merged_json" - ) - logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") - except Exception as e: - logger.debug(f"Could not write debug file for final merged JSON: {e}") + # Debug: Write final merged structure to debug file (harmonisiert - keine Checks nötig) + import json + structureJson = json.dumps(structure, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile( + structureJson, + "document_generation_final_merged_json" + ) + logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") return structure diff --git a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py deleted file mode 100644 index d6620d3d..00000000 --- a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py +++ /dev/null @@ -1,316 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Document Purpose Analyzer for hierarchical document generation. -Uses AI to analyze user prompt and determine purpose for each document. -""" - -import logging -import json -from typing import Dict, Any, List, Optional -from modules.datamodels.datamodelChat import ChatDocument -from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum - -logger = logging.getLogger(__name__) - - -class DocumentPurposeAnalyzer: - """Analyzes user prompt and documents to determine document purposes""" - - def __init__(self, services: Any): - self.services = services - - async def analyzeDocumentPurposes( - self, - userPrompt: str, - chatDocuments: List[ChatDocument], - actionContext: str = "generateDocument" - ) -> Dict[str, Any]: - """ - Use AI to analyze user prompt and determine purpose for each document. - - Args: - userPrompt: User's original prompt - chatDocuments: List of ChatDocument objects to analyze - actionContext: Action name (e.g., "generateDocument", "extractData") - - Returns: - { - "document_purposes": [ - { - "document_id": "...", - "purpose": "extract_text_content" | "include_image" | ..., - "reasoning": "...", - "extractionPrompt": "..." (if purpose requires extraction), - "processingNotes": "..." - } - ], - "overall_intent": "..." - } - """ - try: - if not chatDocuments: - return { - "document_purposes": [], - "overall_intent": "No documents provided" - } - - # Create document metadata list for AI analysis - documentMetadata = [] - for doc in chatDocuments: - docInfo = { - "document_id": doc.id, - "fileName": doc.fileName, - "mimeType": doc.mimeType, - "fileSize": doc.fileSize - } - documentMetadata.append(docInfo) - - # Create analysis prompt - analysisPrompt = self._createAnalysisPrompt( - userPrompt=userPrompt, - actionContext=actionContext, - documentMetadata=documentMetadata - ) - - # Debug: Log purpose analysis prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - analysisPrompt, - "document_purpose_analysis_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for purpose analysis prompt: {e}") - - # Call AI for analysis - options = AiCallOptions( - operationType=OperationTypeEnum.DATA_GENERATE, - resultFormat="json" - ) - - aiResponse = await self.services.ai.callAiContent( - prompt=analysisPrompt, - options=options, - outputFormat="json" - ) - - # Debug: Log purpose analysis response - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - responseContent = aiResponse.content if aiResponse and aiResponse.content else '' - responseMetadata = { - "status": aiResponse.status if aiResponse else "N/A", - "error": aiResponse.error if aiResponse else "N/A", - "documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0 - } - self.services.utils.writeDebugFile( - f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}", - "document_purpose_analysis_response" - ) - except Exception as e: - logger.debug(f"Could not write debug file for purpose analysis response: {e}") - - if not aiResponse or not aiResponse.content: - logger.warning("AI purpose analysis returned empty response, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - # Extract and parse JSON - extractedJson = self.services.utils.jsonExtractString(aiResponse.content) - if not extractedJson: - logger.warning("No JSON found in purpose analysis response, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - try: - analysisResult = json.loads(extractedJson) - - # Validate structure - if "document_purposes" not in analysisResult: - logger.warning("Invalid analysis result structure, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - # Ensure all documents have purposes - analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])} - for doc in chatDocuments: - if doc.id not in analyzedIds: - logger.warning(f"Document {doc.id} not in analysis result, adding default purpose") - defaultPurpose = self._determineDefaultPurpose(doc, actionContext) - analysisResult["document_purposes"].append({ - "document_id": doc.id, - "purpose": defaultPurpose, - "reasoning": f"Default purpose based on document type and action context", - "extractionPrompt": None, - "processingNotes": None - }) - - return analysisResult - - except json.JSONDecodeError as e: - logger.error(f"Failed to parse purpose analysis JSON: {str(e)}") - logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}") - return self._createDefaultPurposes(chatDocuments, actionContext) - - except Exception as e: - logger.error(f"Error analyzing document purposes: {str(e)}") - return self._createDefaultPurposes(chatDocuments, actionContext) - - def _createAnalysisPrompt( - self, - userPrompt: str, - actionContext: str, - documentMetadata: List[Dict[str, Any]] - ) -> str: - """Create AI prompt for document purpose analysis""" - - # Format document list - docListText = "" - for i, docInfo in enumerate(documentMetadata, 1): - docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n" - docListText += f" File Name: {docInfo['fileName']}\n" - docListText += f" MIME Type: {docInfo['mimeType']}\n" - docListText += f" File Size: {docInfo['fileSize']} bytes\n" - - # Get user language - userLanguage = self._getUserLanguage() - - prompt = f"""{'='*80} -DOCUMENT PURPOSE ANALYSIS -{'='*80} - -USER PROMPT: -{userPrompt} - -ACTION CONTEXT: {actionContext} - -DOCUMENTS PROVIDED: -{docListText} -{'='*80} - -TASK: For each document, determine its purpose based on: -1. User prompt intent (what the user wants to do) -2. Action context (what action is being performed) -3. Document type (mimeType - is it text, image, etc.) -4. Document metadata (fileName, size) - -AVAILABLE PURPOSES: -- "extract_text_content": Extract text content for use in document generation -- "include_image": Include the image directly in the generated document (for images) -- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts) -- "use_as_template": Use document structure/layout as template for generation -- "use_as_reference": Use as background context/reference without detailed extraction -- "extract_data": Extract structured data (key-value pairs, entities, fields) -- "attach": Document is an attachment - don't process, just attach to output -- "convert_format": Convert document format (for convert actions) -- "translate": Translate document content (for translate actions) -- "summarize": Create summary of document (for summarize actions) -- "compare": Compare documents (for comparison actions) -- "merge": Merge documents (for merge actions) -- "extract_tables_charts": Extract tables and charts specifically -- "use_for_styling": Use document for styling/formatting reference only -- "extract_metadata": Extract only document metadata - -CRITICAL RULES: -1. For images (mimeType starts with "image/"): - - If user wants to "include" or "show" images → "include_image" - - If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision" - - Default for images in generateDocument → "include_image" - -2. For text documents in generateDocument: - - If user mentions "template" or "structure" → "use_as_template" - - If user mentions "reference" or "context" → "use_as_reference" - - Default → "extract_text_content" - -3. Consider action context: - - generateDocument: Usually "extract_text_content" or "include_image" - - extractData: Usually "extract_data" - - translateDocument: Usually "translate" - - summarizeDocument: Usually "summarize" - -4. Return ONLY valid JSON following this structure: -{{ - "document_purposes": [ - {{ - "document_id": "document_id_here", - "purpose": "extract_text_content", - "reasoning": "Brief explanation in language '{userLanguage}'", - "extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null", - "processingNotes": "Any special processing requirements or null" - }} - ], - "overall_intent": "Summary of how documents should be used together in language '{userLanguage}'" -}} - -5. All content must be in the language '{userLanguage}' -6. Return ONLY the JSON structure. No explanations before or after. - -Return ONLY the JSON structure. -""" - return prompt - - def _createDefaultPurposes( - self, - chatDocuments: List[ChatDocument], - actionContext: str - ) -> Dict[str, Any]: - """Create default purposes when AI analysis fails""" - purposes = [] - - for doc in chatDocuments: - purpose = self._determineDefaultPurpose(doc, actionContext) - purposes.append({ - "document_id": doc.id, - "purpose": purpose, - "reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})", - "extractionPrompt": None, - "processingNotes": None - }) - - return { - "document_purposes": purposes, - "overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action" - } - - def _determineDefaultPurpose( - self, - doc: ChatDocument, - actionContext: str - ) -> str: - """Determine default purpose based on document type and action context""" - mimeType = doc.mimeType or "" - - # Image documents - if mimeType.startswith("image/"): - if actionContext == "generateDocument": - return "include_image" - elif actionContext in ["extractData", "process"]: - return "analyze_image_vision" - else: - return "include_image" # Default for images - - # Action-specific defaults - if actionContext == "extractData": - return "extract_data" - elif actionContext == "translateDocument": - return "translate" - elif actionContext == "summarizeDocument": - return "summarize" - elif actionContext == "convertDocument" or actionContext == "convert": - return "convert_format" - elif actionContext == "generateDocument": - return "extract_text_content" - else: - # Default for other actions - return "extract_text_content" - - def _getUserLanguage(self) -> str: - """Get user language for document generation""" - try: - if self.services: - if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: - return self.services.currentUserLanguage - elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): - return self.services.user.language - except Exception: - pass - return 'en' # Default fallback - diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py index 9a78b9f4..0ee6fa5e 100644 --- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py +++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py @@ -19,7 +19,8 @@ async def buildGenerationPrompt( title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None, - services: Any = None + services: Any = None, + useContentParts: bool = False # ARCHITECTURE: If True, don't include full content in prompt (ContentParts will be used directly) ) -> str: """ Build the unified generation prompt using a single JSON template. @@ -120,7 +121,9 @@ Continue generating the remaining content now. # PROMPT FOR FIRST CALL # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions - if extracted_content: + # ARCHITECTURE: If useContentParts=True, don't include full content in prompt + # ContentParts will be passed directly to callAi for model-aware chunking + if extracted_content and not useContentParts: # If we have extracted content, put it FIRST and make it very clear it's the source data generationPrompt = f"""{'='*80} USER REQUEST / USER PROMPT: diff --git a/modules/services/serviceGeneration/subStructureGenerator.py b/modules/services/serviceGeneration/subStructureGenerator.py index d2ef1aeb..62e72c69 100644 --- a/modules/services/serviceGeneration/subStructureGenerator.py +++ b/modules/services/serviceGeneration/subStructureGenerator.py @@ -24,6 +24,7 @@ class StructureGenerator: userPrompt: str, documentList: Optional[Any] = None, cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, maxSectionLength: int = 500, existingImages: Optional[List[Dict[str, Any]]] = None ) -> Dict[str, Any]: @@ -34,30 +35,28 @@ class StructureGenerator: userPrompt: User's original prompt documentList: Optional document references cachedContent: Optional extracted content cache + contentParts: Optional list of ContentParts to analyze for structure generation maxSectionLength: Maximum words for simple sections existingImages: Optional list of existing images to include Returns: - Document structure with empty elements arrays + Document structure with empty elements arrays and contentPartIds per section """ try: # Create structure generation prompt structurePrompt = self._createStructurePrompt( userPrompt=userPrompt, cachedContent=cachedContent, + contentParts=contentParts, maxSectionLength=maxSectionLength, existingImages=existingImages or [] ) - # Debug: Log structure generation prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - structurePrompt, - "document_generation_structure_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for structure prompt: {e}") + # Debug: Log structure generation prompt (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + structurePrompt, + "document_generation_structure_prompt" + ) # Call AI to generate structure from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum @@ -73,15 +72,11 @@ class StructureGenerator: outputFormat="json" ) - # Debug: Log structure generation response - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - aiResponse.content if aiResponse and aiResponse.content else '', - "document_generation_structure_response" - ) - except Exception as e: - logger.debug(f"Could not write debug file for structure response: {e}") + # Debug: Log structure generation response (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + aiResponse.content if aiResponse and aiResponse.content else '', + "document_generation_structure_response" + ) if not aiResponse or not aiResponse.content: raise ValueError("AI structure generation returned empty response") @@ -106,6 +101,7 @@ class StructureGenerator: self, userPrompt: str, cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, maxSectionLength: int = 500, existingImages: Optional[List[Dict[str, Any]]] = None ) -> str: @@ -126,6 +122,41 @@ class StructureGenerator: if cachedContent and cachedContent.get("imageDocuments"): existingImages = cachedContent.get("imageDocuments", []) + # Format ContentParts as JSON for structure generation + contentPartsJson = "" + if contentParts: + try: + import json + # Convert ContentParts to dict format for JSON serialization + contentPartsList = [] + for part in contentParts: + if hasattr(part, 'dict'): + partDict = part.dict() + elif isinstance(part, dict): + partDict = part + else: + # Try to convert to dict + partDict = { + "id": getattr(part, 'id', ''), + "typeGroup": getattr(part, 'typeGroup', ''), + "mimeType": getattr(part, 'mimeType', ''), + "label": getattr(part, 'label', ''), + "metadata": getattr(part, 'metadata', {}) + } + # Only include essential fields for structure generation (not full data) + contentPartsList.append({ + "id": partDict.get("id", ""), + "typeGroup": partDict.get("typeGroup", ""), + "mimeType": partDict.get("mimeType", ""), + "label": partDict.get("label", ""), + "metadata": partDict.get("metadata", {}) + }) + + contentPartsJson = json.dumps(contentPartsList, indent=2, ensure_ascii=False) + except Exception as e: + logger.warning(f"Could not format ContentParts as JSON: {str(e)}") + contentPartsJson = "" + # Create structure template structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title") @@ -145,13 +176,15 @@ EXTRACTED CONTENT (if available): {'='*80} INSTRUCTIONS: -1. Analyze the user request and extracted content +1. Analyze the user request, extracted content, and available ContentParts 2. Create a document structure with CONTENT sections only 3. For each section, specify: - id: Unique identifier (e.g., "section_title_1", "section_image_1") - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) - generation_hint: Brief description of what content should be generated + - contentPartIds: Array of ContentPart IDs that should be used for this section (e.g., ["part_1", "part_2"]) - can be empty [] + - extractionPrompt: (optional) Specific prompt for extracting/processing ContentParts for this section - image_prompt: (only for image sections) Detailed prompt for image generation - order: Section order number (starting from 1) - elements: [] (empty array - will be populated later) @@ -160,10 +193,12 @@ INSTRUCTIONS: - If user requests illustrations/images, create image sections - If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them - Add image_prompt field with detailed description for image generation (only for new images) - - Set complexity to "complex" + - Set complexity to "complex" for new images, "simple" for existing/render images - For existing images: Set image_source to "existing" and image_reference_id to the image document ID + - For images to render (from input documents): Set image_source to "render" and image_reference_id to the image document ID - Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}} - Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} + - Example for render image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Render input image", "image_source": "render", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} {'='*80} EXISTING IMAGES (to include in document): @@ -178,12 +213,21 @@ EXISTING IMAGES (to include in document): 7. Return ONLY valid JSON following this structure: {structureTemplate} -5. CRITICAL RULES: +5. CRITICAL RULES FOR CONTENT PARTS: + - Analyze available ContentParts and determine which ones are needed for each section + - For image sections (content_type == "image"): Include image ContentParts in contentPartIds - images will be integrated as visual elements + - For other sections (heading, paragraph, etc.): If image ContentParts are referenced, they will be referenced as text in the document language (not integrated as images) + - Each section can reference multiple ContentParts via contentPartIds array + - If specific extraction/processing is needed for ContentParts, provide extractionPrompt + - Image references in non-image sections should be automatically derived in the document language (e.g., "siehe Bild 1" in German, "see Image 1" in English) + +6. CRITICAL RULES: - Return ONLY valid JSON (no comments, no trailing commas, double quotes only) - Follow the exact JSON schema structure provided - IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays) - ALL sections MUST include "generation_hint" field with a brief description of what content should be generated - ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images + - ALL sections MUST include "contentPartIds" field (can be empty array [] if no ContentParts needed) - Image sections MUST include "image_prompt" field with detailed description for image generation - Order numbers MUST start from 1 (not 0) - All content must be in the language '{userLanguage}' @@ -235,6 +279,14 @@ Return ONLY the JSON structure. No explanations. if "elements" not in section: section["elements"] = [] + # Ensure contentPartIds field exists (can be empty array) + if "contentPartIds" not in section: + section["contentPartIds"] = [] + + # Ensure extractionPrompt field exists (optional) + if "extractionPrompt" not in section: + section["extractionPrompt"] = None + # Identify complexity if not set if "complexity" not in section: section["complexity"] = self._identifySectionComplexity( @@ -255,11 +307,11 @@ Return ONLY the JSON structure. No explanations. if section.get("content_type") == "image": imageSource = section.get("image_source", "generate") - if imageSource == "existing": - # Existing image - ensure image_reference_id is set + if imageSource == "existing" or imageSource == "render": + # Existing or render image - ensure image_reference_id is set if "image_reference_id" not in section: - logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id") - # Existing images are simple (no generation needed) + logger.warning(f"Image section {sectionId} has image_source='{imageSource}' but no image_reference_id") + # Existing/render images are simple (no generation needed, code integration) section["complexity"] = "simple" else: # New image generation - ensure image_prompt diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index f2678b63..9a7cffab 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -2,6 +2,7 @@ # All rights reserved. import json import logging +import re from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar from pydantic import BaseModel, ValidationError @@ -11,10 +12,32 @@ T = TypeVar('T', bound=BaseModel) def stripCodeFences(text: str) -> str: - """Remove ```json / ``` fences and surrounding whitespace if present.""" + """Remove ```json / ``` fences and surrounding whitespace if present. + Also removes [SOURCE: ...] and [END SOURCE] tags that may wrap the JSON.""" if not text: return text s = text.strip() + + # Remove [SOURCE: ...] tags at the beginning + if s.startswith("[SOURCE:"): + # Find the end of the SOURCE tag (newline or end of string) + end_pos = s.find("\n") + if end_pos != -1: + s = s[end_pos+1:] + else: + # No newline, entire string is SOURCE tag + return "" + + # Remove [END SOURCE] tags at the end + if s.endswith("[END SOURCE]"): + # Find the start of END SOURCE tag (newline before it) + start_pos = s.rfind("\n[END SOURCE]") + if start_pos != -1: + s = s[:start_pos] + else: + # No newline, entire string is END SOURCE tag + return "" + # Handle opening fence (may or may not have closing fence) if s.startswith("```"): # Remove first triple backticks @@ -201,7 +224,7 @@ def closeJsonStructures(text: str) -> str: # Look for patterns like: "value" or "value\n (unterminated) # Check if we're in the middle of a string value when text ends if result.strip(): - import re + # re is already imported at module level # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') if quoteCount % 2 == 1: @@ -367,7 +390,7 @@ def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str] Remove the last item if it appears to be incomplete/corrupted. This prevents corrupted data from being included in the final result. """ - import re + # re is already imported at module level if not items: return items @@ -418,7 +441,7 @@ def _extractGenericContent(text: str) -> List[Dict[str, Any]]: CRITICAL: Must preserve original content_type and id from the JSON structure! """ - import re + # re is already imported at module level sections = [] @@ -1025,7 +1048,7 @@ def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> if not cut_off_element: # Extract the last incomplete part from raw JSON # Find the last incomplete string/number/array - import re + # re is already imported at module level # Look for incomplete string at the end incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL) if incomplete_match: @@ -1045,7 +1068,7 @@ def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optiona This helps identify where exactly to continue within nested structures. """ - import re + # re is already imported at module level # Check for code_block with nested JSON if "code" in element: diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py index f0ba9d4d..8ebe6679 100644 --- a/modules/workflows/methods/methodAi/actions/__init__.py +++ b/modules/workflows/methods/methodAi/actions/__init__.py @@ -8,9 +8,7 @@ from .process import process from .webResearch import webResearch from .summarizeDocument import summarizeDocument from .translateDocument import translateDocument -from .convert import convert from .convertDocument import convertDocument -from .extractData import extractData from .generateDocument import generateDocument __all__ = [ @@ -18,9 +16,7 @@ __all__ = [ 'webResearch', 'summarizeDocument', 'translateDocument', - 'convert', 'convertDocument', - 'extractData', 'generateDocument', ] diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py deleted file mode 100644 index 788fadea..00000000 --- a/modules/workflows/methods/methodAi/actions/convert.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. - -""" -Convert action for AI operations. -Converts documents/data between different formats with specific formatting options. -""" - -import logging -import json -from typing import Dict, Any -from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelDocref import DocumentReferenceList - -logger = logging.getLogger(__name__) - -@action -async def convert(self, parameters: Dict[str, Any]) -> ActionResult: - """ - GENERAL: - - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters). - - Input requirements: documentList (required); inputFormat and outputFormat (required). - - Output format: Document in target format with specified formatting options. - - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed). - - Parameters: - - documentList (list, required): Document reference(s) to convert. - - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.). - - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.). - - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect. - - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,). - - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True. - - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'. - """ - documentList = parameters.get("documentList", []) - if not documentList: - return ActionResult.isFailure(error="documentList is required") - - inputFormat = parameters.get("inputFormat") - outputFormat = parameters.get("outputFormat") - if not inputFormat or not outputFormat: - return ActionResult.isFailure(error="inputFormat and outputFormat are required") - - # Normalize formats (remove leading dot if present) - normalizedInputFormat = inputFormat.strip().lstrip('.').lower() - normalizedOutputFormat = outputFormat.strip().lstrip('.').lower() - - # Get documents - if isinstance(documentList, DocumentReferenceList): - docRefList = documentList - elif isinstance(documentList, list): - docRefList = DocumentReferenceList.from_string_list(documentList) - else: - docRefList = DocumentReferenceList.from_string_list([documentList]) - - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if not chatDocuments: - return ActionResult.isFailure(error="No documents found in documentList") - - # Check if input is standardized JSON format - if so, use direct rendering - if normalizedInputFormat == "json" and len(chatDocuments) == 1: - try: - doc = chatDocuments[0] - # ChatDocument doesn't have documentData - need to load file content using fileId - docBytes = self.services.chat.getFileData(doc.fileId) - if not docBytes: - raise ValueError(f"No file data found for fileId={doc.fileId}") - - # Decode bytes to string - docData = docBytes.decode('utf-8') - - # Try to parse as JSON - if isinstance(docData, str): - jsonData = json.loads(docData) - elif isinstance(docData, dict): - jsonData = docData - else: - jsonData = None - - # Check if it's standardized JSON format (has "documents" or "sections") - if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)): - # Use direct rendering - no AI call needed! - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - # Ensure format is "documents" array - if "documents" not in jsonData: - jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]} - - # Get title - title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document") - - # Render with options - renderOptions = {} - if normalizedOutputFormat == "csv": - renderOptions["delimiter"] = parameters.get("delimiter", ",") - renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") - renderOptions["includeHeader"] = parameters.get("includeHeader", True) - - rendered_content, mime_type, _images = await generationService.renderReport( - jsonData, normalizedOutputFormat, title, None, None - ) - - # Apply CSV options if needed (renderer will handle them) - if normalizedOutputFormat == "csv" and renderOptions: - rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions) - - validationMetadata = { - "actionType": "ai.convert", - "inputFormat": normalizedInputFormat, - "outputFormat": normalizedOutputFormat, - "hasSourceJson": True, - "conversionType": "direct_rendering" - } - actionDoc = ActionDocument( - documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}", - documentData=rendered_content, - mimeType=mime_type, - sourceJson=jsonData, # Preserve source JSON for structure validation - validationMetadata=validationMetadata - ) - - return ActionResult.isSuccess(documents=[actionDoc]) - - except Exception as e: - logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}") - # Fall through to AI-based conversion - - # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions) - columnsPerRow = parameters.get("columnsPerRow") - delimiter = parameters.get("delimiter", ",") - includeHeader = parameters.get("includeHeader", True) - language = parameters.get("language", "en") - - aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format." - - if normalizedOutputFormat == "csv": - aiPrompt += f" Use '{delimiter}' as the delimiter character." - if columnsPerRow: - aiPrompt += f" Format the output with {columnsPerRow} columns per row." - if not includeHeader: - aiPrompt += " Do not include a header row." - else: - aiPrompt += " Include a header row with column names." - - if language and language != "en": - aiPrompt += f" Use language: {language}." - - aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure." - - return await self.process({ - "aiPrompt": aiPrompt, - "documentList": documentList, - "resultType": normalizedOutputFormat - }) - diff --git a/modules/workflows/methods/methodAi/actions/extractData.py b/modules/workflows/methods/methodAi/actions/extractData.py deleted file mode 100644 index 723914bd..00000000 --- a/modules/workflows/methods/methodAi/actions/extractData.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. - -""" -Extract Data action for AI operations. -Extracts structured data from documents (key-value pairs, entities, facts, etc.). -""" - -import logging -from typing import Dict, Any -from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult - -logger = logging.getLogger(__name__) - -@action -async def extractData(self, parameters: Dict[str, Any]) -> ActionResult: - """ - GENERAL: - - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.). - - Input requirements: documentList (required); optional dataStructure, fields. - - Output format: JSON by default, or specified resultType. - - Parameters: - - documentList (list, required): Document reference(s) to extract data from. - - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested. - - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]). - - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json. - """ - documentList = parameters.get("documentList", []) - if not documentList: - return ActionResult.isFailure(error="documentList is required") - - dataStructure = parameters.get("dataStructure", "nested") - fields = parameters.get("fields", []) - resultType = parameters.get("resultType", "json") - - aiPrompt = "Extract structured data from the provided document(s)." - if fields: - fieldsStr = ", ".join(fields) - aiPrompt += f" Extract the following specific fields: {fieldsStr}." - else: - aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information." - - structureInstructions = { - "flat": "Use a flat key-value structure with simple properties.", - "nested": "Use a nested JSON structure with logical grouping of related data.", - "list": "Structure the data as a list/array of objects, one per entity or record." - } - aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}" - - aiPrompt += " Ensure all extracted data is accurate and complete." - - return await self.process({ - "aiPrompt": aiPrompt, - "documentList": documentList, - "resultType": resultType - }) - diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 5b5db12f..6569ddab 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -3,18 +3,17 @@ """ Generate Document action for AI operations. -Generates documents from scratch or based on templates/inputs using hierarchical approach. +Wrapper around AI service callAiContent method. """ import logging import time -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy -from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator -from modules.services.serviceGeneration.subContentGenerator import ContentGenerator -from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer +from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum +from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData logger = logging.getLogger(__name__) @@ -59,38 +58,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: resultType = "txt" logger.info(f"Auto-detected Text format from prompt") - maxSectionLength = parameters.get("maxSectionLength", 500) - parallelGeneration = parameters.get("parallelGeneration", True) - progressLogging = parameters.get("progressLogging", True) - # Create operation ID for progress tracking workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"doc_gen_{workflowId}_{int(time.time())}" parentOperationId = parameters.get('parentOperationId') try: - # Phase 1: Structure Generation - if progressLogging: - self.services.chat.progressLogStart( - operationId, - "Document", - "Structure Generation", - "Generating document structure...", - parentOperationId=parentOperationId - ) - - structureGenerator = StructureGenerator(self.services) - - # Analyze document purposes and process documents accordingly - cachedContent = None - imageDocuments = [] - documentPurposes = {} - + # Convert documentList to DocumentReferenceList if needed + docRefList = None if documentList: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...") - - # Convert documentList to DocumentReferenceList from modules.datamodels.datamodelDocref import DocumentReferenceList if isinstance(documentList, DocumentReferenceList): @@ -101,301 +77,78 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: docRefList = DocumentReferenceList.from_string_list(documentList) else: docRefList = DocumentReferenceList(references=[]) - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if chatDocuments: - logger.info(f"Analyzing purposes for {len(chatDocuments)} documents") - - # Analyze document purposes using AI - purposeAnalyzer = DocumentPurposeAnalyzer(self.services) - purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes( - userPrompt=prompt, - chatDocuments=chatDocuments, - actionContext="generateDocument" - ) - - documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])} - logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}") - - # Separate documents by purpose - textDocs = [] - imageDocsToInclude = [] - imageDocsToAnalyze = [] - - for doc in chatDocuments: - docPurpose = documentPurposes.get(doc.id, {}) - purpose = docPurpose.get("purpose", "extract_text_content") - - if purpose == "include_image": - imageDocsToInclude.append(doc) - elif purpose == "analyze_image_vision": - imageDocsToAnalyze.append(doc) - elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]: - textDocs.append(doc) - # Skip "attach" purpose - don't process - - # Process text documents (extract content) - extractedResults = [] - if textDocs: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...") - - # Prepare extraction options with purpose-specific prompts - extractionOptionsList = [] - for doc in textDocs: - docPurpose = documentPurposes.get(doc.id, {}) - extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document" - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - extractionOptionsList.append((doc, extractionOptions)) - - # Extract content from text documents - for doc, extractionOptions in extractionOptionsList: - try: - docResults = self.services.extraction.extractContent( - [doc], - extractionOptions, - parentOperationId=operationId - ) - extractedResults.extend(docResults) - except Exception as e: - logger.error(f"Error extracting content from {doc.fileName}: {str(e)}") - - logger.info(f"Extracted content from {len(extractedResults)} text document(s)") - - # Process images to analyze (vision call) - if imageDocsToAnalyze: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...") - - # Extract content from images using vision analysis - for doc in imageDocsToAnalyze: - try: - docPurpose = documentPurposes.get(doc.id, {}) - extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image" - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - docResults = self.services.extraction.extractContent( - [doc], - extractionOptions, - parentOperationId=operationId - ) - extractedResults.extend(docResults) - except Exception as e: - logger.error(f"Error analyzing image {doc.fileName}: {str(e)}") - - logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI") - - # Process images to include (store image data) - if imageDocsToInclude: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...") - - # Get image data for inclusion - from modules.interfaces.interfaceDbComponentObjects import getInterface - dbInterface = getInterface() - - for doc in imageDocsToInclude: - try: - # Get image bytes - imageBytes = dbInterface.getFileData(doc.fileId) - if imageBytes: - # Encode to base64 - import base64 - base64Data = base64.b64encode(imageBytes).decode('utf-8') - - # Create image document entry - imageDoc = { - "id": doc.id, - "fileName": doc.fileName, - "mimeType": doc.mimeType, - "base64Data": base64Data, - "altText": doc.fileName or "Image", - "fileSize": doc.fileSize - } - imageDocuments.append(imageDoc) - logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)") - else: - logger.warning(f"Could not retrieve image data for {doc.fileName}") - except Exception as e: - logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}") - - logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion") - - # Build cachedContent with all information - cachedContent = { - "extractedContent": extractedResults, - "imageDocuments": imageDocuments, - "documentPurposes": documentPurposes, - "extractionTimestamp": time.time(), - "sourceDocuments": [doc.id for doc in chatDocuments] - } - - logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include") - # Generate structure - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...") + # Prepare title + title = parameters.get("documentType") or "Generated Document" - structure = await structureGenerator.generateStructure( - userPrompt=prompt, - documentList=documentList if documentList else None, - cachedContent=cachedContent, - maxSectionLength=maxSectionLength, - existingImages=imageDocuments # Pass existing images for structure generation + # Call AI service for document generation + # callAiContent handles documentList internally via Phases 5A-5E + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED, + compressPrompt=False, + compressContext=False ) - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") - - # Phase 2: Content Generation - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.34, - "Starting content generation..." - ) - - contentGenerator = ContentGenerator(self.services) - - # Create enhanced progress callback - def progressCallback(sectionIndex: int, totalSections: int, message: str): - if progressLogging: - # Calculate progress: 34% to 90% for content generation phase - if totalSections > 0: - progress = 0.34 + (0.56 * (sectionIndex / totalSections)) - else: - progress = 0.34 - - # Format message - if sectionIndex > 0 and totalSections > 0: - progressMessage = f"Section {sectionIndex}/{totalSections}: {message}" - else: - progressMessage = message - - self.services.chat.progressLogUpdate( - operationId, - progress, - progressMessage - ) - - completeStructure = await contentGenerator.generateContent( - structure=structure, - cachedContent=cachedContent, - userPrompt=prompt, - progressCallback=progressCallback, - parallelGeneration=parallelGeneration - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") - - # Phase 3: Integration & Rendering - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.91, - "Rendering final document..." - ) - - # Use existing renderReport method - title = structure.get("metadata", {}).get("title", "Generated Document") - if documentType: - title = f"{title} ({documentType})" - - renderedContent, mimeType, images = await self.services.generation.renderReport( - extractedContent=completeStructure, + aiResponse: AiResponse = await self.services.ai.callAiContent( + prompt=prompt, + options=options, + documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E outputFormat=resultType, title=title, - userPrompt=prompt, - aiService=self.services.ai + parentOperationId=parentOperationId ) - # Build list of documents to return - documents = [ - ActionDocument( - documentName=f"document.{resultType}", - documentData=renderedContent, - mimeType=mimeType - ) - ] + # Convert AiResponse to ActionResult + documents = [] - # Add images as separate documents - if images: - logger.info(f"Processing {len(images)} image(s) from renderer") - import base64 - for idx, imageData in enumerate(images): - try: - base64Data = imageData.get("base64Data", "") - altText = imageData.get("altText", f"image_{idx + 1}") - caption = imageData.get("caption", "") - sectionId = imageData.get("sectionId", f"section_{idx + 1}") - - if base64Data: - # Decode base64 to bytes - imageBytes = base64.b64decode(base64Data) - - # Determine filename and mime type - filename = imageData.get("filename", f"image_{idx + 1}.png") - if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): - filename = f"image_{idx + 1}.png" - - # Determine mime type from filename - if filename.lower().endswith('.png'): - imageMimeType = "image/png" - elif filename.lower().endswith(('.jpg', '.jpeg')): - imageMimeType = "image/jpeg" - elif filename.lower().endswith('.gif'): - imageMimeType = "image/gif" - elif filename.lower().endswith('.webp'): - imageMimeType = "image/webp" - else: - imageMimeType = "image/png" # Default - - # Add image document - documents.append(ActionDocument( - documentName=filename, - documentData=imageBytes, - mimeType=imageMimeType - )) - logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})") + # Convert DocumentData to ActionDocument + if aiResponse.documents: + for docData in aiResponse.documents: + documents.append(ActionDocument( + documentName=docData.documentName, + documentData=docData.documentData, + mimeType=docData.mimeType, + sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None + )) + + # If no documents but content exists, create a document from content + if not documents and aiResponse.content: + # Determine document name from metadata + docName = f"document.{resultType}" + if aiResponse.metadata and aiResponse.metadata.filename: + docName = aiResponse.metadata.filename + elif aiResponse.metadata and aiResponse.metadata.title: + import re + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{resultType}"): + docName = f"{sanitized}.{resultType}" else: - logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping") - except Exception as e: - logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True) - continue - else: - logger.debug("No images returned from renderer") - - # Note: Document creation is handled by the workflow system - # We just return the rendered content and images in ActionResult - - if progressLogging: - self.services.chat.progressLogFinish(operationId, True) + docName = sanitized + + # Determine mime type + mimeType = "text/plain" + if resultType == "html": + mimeType = "text/html" + elif resultType == "json": + mimeType = "application/json" + elif resultType == "pdf": + mimeType = "application/pdf" + elif resultType == "md": + mimeType = "text/markdown" + + documents.append(ActionDocument( + documentName=docName, + documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content, + mimeType=mimeType + )) return ActionResult.isSuccess(documents=documents) except Exception as e: - logger.error(f"Error in hierarchical document generation: {str(e)}") - if progressLogging: - self.services.chat.progressLogFinish(operationId, False) + logger.error(f"Error in document generation: {str(e)}") return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 2468d949..5abc57cd 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -8,11 +8,12 @@ Universal AI document processing action. import logging import time +import json from typing import Dict, Any, List, Optional from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelAi import AiCallOptions -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart +from modules.datamodels.datamodelExtraction import ContentPart logger = logging.getLogger(__name__) @@ -82,8 +83,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available logger.info(f"Using result type: {resultType} -> {output_extension}") - # Phase 7.3: Extract content first if documents provided, then use contentParts - # Check if contentParts are already provided (preferred path) + # Check if contentParts are already provided (from context.extractContent or other sources) contentParts: Optional[List[ContentPart]] = None if "contentParts" in parameters: contentParts = parameters.get("contentParts") @@ -95,63 +95,42 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty") contentParts = None - # If contentParts not provided but documentList is, extract content first - if not contentParts and documentList.references: - self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents") - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if not chatDocuments: - logger.warning("No documents found in documentList") - else: - logger.info(f"Extracting content from {len(chatDocuments)} documents") - - # Prepare extraction options (use defaults if not provided) - extractionOptions = parameters.get("extractionOptions") - if not extractionOptions: - extractionOptions = ExtractionOptions( - prompt="Extract all content from the document", - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - # Extract content using extraction service with hierarchical progress logging - # Pass operationId for per-document progress tracking - extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) - - # Combine all ContentParts from all extracted results - contentParts = [] - for extracted in extractedResults: - if extracted.parts: - contentParts.extend(extracted.parts) - - logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents") - # Update progress - preparing AI call self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call") - # Build options with only resultFormat - let service layer handle all other parameters + # Build options output_format = output_extension.replace('.', '') or 'txt' options = AiCallOptions( resultFormat=output_format - # Removed all model parameters - service layer will analyze prompt and determine optimal parameters ) # Update progress - calling AI self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI") - # Use unified callAiContent method with contentParts (extraction is now separate) - aiResponse = await self.services.ai.callAiContent( - prompt=aiPrompt, - options=options, - contentParts=contentParts, # Already extracted (or None if no documents) - outputFormat=output_format, - parentOperationId=operationId - ) + # Use unified callAiContent method + # If contentParts provided (pre-extracted), use them directly + # Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally + # Note: ContentExtracted documents (from context.extractContent) are now handled + # automatically in _extractAndPrepareContent() (Phase 5B) + if contentParts: + # Pre-extracted ContentParts - use them directly + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + contentParts=contentParts, # Pre-extracted ContentParts + outputFormat=output_format, + parentOperationId=operationId + ) + else: + # Pass documentList - callAiContent handles Phases 5A-5E internally + # This includes automatic detection of ContentExtracted documents + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + documentList=documentList, # callAiContent macht Phasen 5A-5E + outputFormat=output_format, + parentOperationId=operationId + ) # Update progress - processing result self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result") diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 7595c2eb..881b007d 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -15,9 +15,7 @@ from .actions.process import process from .actions.webResearch import webResearch from .actions.summarizeDocument import summarizeDocument from .actions.translateDocument import translateDocument -from .actions.convert import convert from .actions.convertDocument import convertDocument -from .actions.extractData import extractData from .actions.generateDocument import generateDocument logger = logging.getLogger(__name__) @@ -192,69 +190,6 @@ class MethodAi(MethodBase): }, execute=translateDocument.__get__(self, self.__class__) ), - "convert": WorkflowActionDefinition( - actionId="ai.convert", - description="Convert documents/data between different formats with specific formatting options", - parameters={ - "documentList": WorkflowActionParameter( - name="documentList", - type="List[str]", - frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to convert" - ), - "inputFormat": WorkflowActionParameter( - name="inputFormat", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["json", "csv", "xlsx", "txt"], - required=True, - description="Source format" - ), - "outputFormat": WorkflowActionParameter( - name="outputFormat", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["csv", "json", "xlsx", "txt"], - required=True, - description="Target format" - ), - "columnsPerRow": WorkflowActionParameter( - name="columnsPerRow", - type="int", - frontendType=FrontendType.NUMBER, - required=False, - description="For CSV output, number of columns per row. Default: auto-detect", - validation={"min": 1, "max": 100} - ), - "delimiter": WorkflowActionParameter( - name="delimiter", - type="str", - frontendType=FrontendType.TEXT, - required=False, - default=",", - description="For CSV output, delimiter character" - ), - "includeHeader": WorkflowActionParameter( - name="includeHeader", - type="bool", - frontendType=FrontendType.CHECKBOX, - required=False, - default=True, - description="For CSV output, whether to include header row" - ), - "language": WorkflowActionParameter( - name="language", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["de", "en", "fr"], - required=False, - default="en", - description="Language for output" - ) - }, - execute=convert.__get__(self, self.__class__) - ), "convertDocument": WorkflowActionDefinition( actionId="ai.convertDocument", description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)", @@ -285,45 +220,6 @@ class MethodAi(MethodBase): }, execute=convertDocument.__get__(self, self.__class__) ), - "extractData": WorkflowActionDefinition( - actionId="ai.extractData", - description="Extract structured data from documents (key-value pairs, entities, facts, etc.)", - parameters={ - "documentList": WorkflowActionParameter( - name="documentList", - type="List[str]", - frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to extract data from" - ), - "dataStructure": WorkflowActionParameter( - name="dataStructure", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["flat", "nested", "list"], - required=False, - default="nested", - description="Desired data structure" - ), - "fields": WorkflowActionParameter( - name="fields", - type="List[str]", - frontendType=FrontendType.MULTISELECT, - required=False, - description="Specific fields/properties to extract (e.g., [name, date, amount])" - ), - "resultType": WorkflowActionParameter( - name="resultType", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["json", "csv", "xlsx"], - required=False, - default="json", - description="Output format" - ) - }, - execute=extractData.__get__(self, self.__class__) - ), "generateDocument": WorkflowActionDefinition( actionId="ai.generateDocument", description="Generate documents from scratch or based on templates/inputs", @@ -371,9 +267,7 @@ class MethodAi(MethodBase): self.webResearch = webResearch.__get__(self, self.__class__) self.summarizeDocument = summarizeDocument.__get__(self, self.__class__) self.translateDocument = translateDocument.__get__(self, self.__class__) - self.convert = convert.__get__(self, self.__class__) self.convertDocument = convertDocument.__get__(self, self.__class__) - self.extractData = extractData.__get__(self, self.__class__) self.generateDocument = generateDocument.__get__(self, self.__class__) def _format_timestamp_for_filename(self) -> str: diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py index 8c5fd5fb..949ac63d 100644 --- a/modules/workflows/methods/methodContext/actions/extractContent.py +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -19,10 +19,21 @@ logger = logging.getLogger(__name__) @action async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: """ - Extract content from documents (separate from AI calls). + Extract raw content parts from documents without AI processing. - This action performs pure content extraction without AI processing. - The extracted ContentParts can then be used by subsequent AI processing actions. + This action performs pure content extraction WITHOUT AI/OCR processing. + It returns ContentParts with different typeGroups: + - "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.) + - "image": Images as base64-encoded data (NOT converted to text, no OCR) + - "table": Tables as structured data + - "structure": Structured content (JSON, etc.) + - "container": Container elements (PDF pages, etc.) + + IMPORTANT: + - Images are returned as base64 data, NOT as extracted text + - No OCR is performed - images are preserved as visual elements + - Text extraction only works for text-based formats (not images) + - The extracted ContentParts can then be used by subsequent AI processing actions Parameters: - documentList (list, required): Document reference(s) to extract content from. @@ -30,7 +41,8 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: Returns: - ActionResult with ActionDocument containing ContentExtracted objects - - ContentExtracted.parts contains List[ContentPart] (already chunked if needed) + - ContentExtracted.parts contains List[ContentPart] with various typeGroups + - Each ContentPart has a typeGroup indicating its type (text, image, table, etc.) """ try: # Init progress logger @@ -79,12 +91,26 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: # Convert dict to ExtractionOptions object if needed, or create defaults if extractionOptionsParam: if isinstance(extractionOptionsParam, dict): + # Ensure required fields are present + if "prompt" not in extractionOptionsParam: + extractionOptionsParam["prompt"] = "Extract all content from the document" + if "mergeStrategy" not in extractionOptionsParam: + extractionOptionsParam["mergeStrategy"] = MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ) # Convert dict to ExtractionOptions object - extractionOptions = ExtractionOptions(**extractionOptionsParam) + try: + extractionOptions = ExtractionOptions(**extractionOptionsParam) + except Exception as e: + logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults") + extractionOptions = None elif isinstance(extractionOptionsParam, ExtractionOptions): extractionOptions = extractionOptionsParam else: # Invalid type, use defaults + logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults") extractionOptions = None else: extractionOptions = None diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py index a635764f..942f3f85 100644 --- a/modules/workflows/methods/methodContext/methodContext.py +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -50,7 +50,7 @@ class MethodContext(MethodBase): ), "extractContent": WorkflowActionDefinition( actionId="context.extractContent", - description="Extract content from documents (separate from AI calls)", + description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.", parameters={ "documentList": WorkflowActionParameter( name="documentList", @@ -64,7 +64,7 @@ class MethodContext(MethodBase): type="dict", frontendType=FrontendType.JSON, required=False, - description="Extraction options (if not provided, defaults are used)" + description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text." ) }, execute=extractContent.__get__(self, self.__class__) diff --git a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md deleted file mode 100644 index 39c649ce..00000000 --- a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md +++ /dev/null @@ -1,354 +0,0 @@ -# Architecture & Implementation Analysis -## Deep Review of Hierarchical Document Generation - -**Date**: 2025-12-22 -**Status**: Critical Issues Found - ---- - -## Executive Summary - -The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed. - ---- - -## ✅ What's Correctly Implemented - -### Phase 1: Core Infrastructure ✅ -- ✅ `StructureGenerator` class exists with `generateStructure()` method -- ✅ `ContentGenerator` class exists with `generateContent()` method -- ✅ `ContentIntegrator` class exists with `integrateContent()` method -- ✅ `generateDocument` action uses hierarchical approach -- ✅ Basic progress logging implemented -- ✅ Error handling with `createErrorSection()` implemented - -### Phase 2: Image Generation ✅ -- ✅ `_generateImageSection()` method implemented -- ✅ Image prompt extraction from structure -- ✅ Base64 image data storage -- ✅ Error handling for image failures - -### Phase 3: Parallel Processing ✅ -- ✅ `_generateSectionsParallel()` method implemented -- ✅ `_generateSectionsSequential()` method implemented -- ✅ Batch processing for large documents -- ✅ Progress callback system -- ✅ Exception handling in parallel execution - ---- - -## ❌ Critical Issues Found - -### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED** - -**Problem**: -- In parallel mode, sections within the same batch cannot see each other (correct) -- BUT: Sections in later batches should see sections from earlier batches -- **Current Status**: Code was fixed to accumulate previous sections, but needs verification - -**Location**: `subContentGenerator.py` lines 240-319 - -**Fix Applied**: -- Added `accumulatedPreviousSections` to track sections across batches -- Pass accumulated sections to each batch -- **VERIFICATION NEEDED**: Test that prompts actually show previous sections - -**Risk**: Medium - May cause continuity issues in generated content - ---- - -### Issue 2: Variable Shadowing Bug ✅ **FIXED** - -**Problem**: -- `contentType` variable was shadowed in loop, causing wrong section type in prompts - -**Location**: `subContentGenerator.py` line 676 - -**Fix Applied**: -- Renamed loop variable to `prevContentType` - -**Status**: ✅ Fixed - ---- - -### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED** - -**Problem**: -- Structure generator creates generic hints like "Section heading" instead of meaningful hints -- AI generates same content for all headings because hints are identical - -**Location**: `subStructureGenerator.py` lines 242-269 - -**Fix Applied**: -- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs -- Example: `section_heading_current_state` → "Current State" - -**Status**: ✅ Fixed - ---- - -### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED** - -**Problem**: -- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays -- Template missing `generation_hint` and `complexity` fields -- Template showed `order: 0` but should start from 1 - -**Location**: `datamodelJson.py` - -**Fix Applied**: -- Updated template to show empty `elements: []` -- Added `generation_hint` to all sections -- Added `complexity` to all sections -- Changed `order` to start from 1 -- Added `title` to metadata - -**Status**: ✅ Fixed - ---- - -### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED** - -**Problem**: -- Prompt said "All sections must have empty elements arrays" but template showed filled arrays -- Prompt didn't explicitly require `generation_hint` and `complexity` fields - -**Location**: `subStructureGenerator.py` lines 181-190 - -**Fix Applied**: -- Enhanced prompt to explicitly require `generation_hint` and `complexity` -- Clarified that template examples show structure, but elements must be empty - -**Status**: ✅ Fixed - ---- - -## ⚠️ Remaining Issues & Gaps - -### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No validation that structure has required fields before content generation -- No check that all sections have `generation_hint` before generating content - -**Expected** (from Phase 6): -```python -# Validate structure before content generation -if not validateStructure(structure): - raise ValueError("Invalid structure") -``` - -**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate - -**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better - -**Recommendation**: Add explicit validation method - ---- - -### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED** - -**Problem**: -- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing -- Should show `generation_hint` as fallback when elements not available - -**Location**: `subContentGenerator.py` lines 671-709 - -**Current Behavior**: -- Shows content preview if elements exist -- Shows nothing if elements don't exist - -**Expected Behavior**: -- Show content preview if elements exist -- Show `generation_hint` as fallback if elements don't exist - -**Impact**: Medium - Reduces context quality in parallel generation - -**Recommendation**: Add fallback to show `generation_hint` when elements not available - ---- - -### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED** - -**Problem**: -- Debug file writes `aiResponse.content` (raw AI response) before validation -- Can't verify if `generation_hint` was added by validation - -**Location**: `subStructureGenerator.py` lines 77-84 - -**Impact**: Low - Makes debugging harder but doesn't affect functionality - -**Recommendation**: Write validated structure to separate debug file - ---- - -### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No unit tests for any components (Phase 7 requirement) -- No tests for structure generation -- No tests for content generation -- No tests for integration - -**Impact**: High - No way to verify correctness or catch regressions - -**Recommendation**: Add comprehensive unit tests - ---- - -### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No end-to-end tests -- No tests with images -- No tests with long documents -- No error scenario tests - -**Impact**: High - No verification of complete flow - -**Recommendation**: Add integration tests - ---- - -### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED** - -**Problem**: -- Content is extracted and cached, but: - - No cache validation (check if documents changed) - - No cache reuse verification - - Content is passed to prompts but may not be formatted efficiently - -**Expected** (from Phase 5): -- Cache validation -- Efficient formatting -- Performance testing - -**Current**: Basic caching exists but not optimized - -**Impact**: Medium - Works but could be more efficient - -**Recommendation**: Add cache validation and optimization - ---- - -### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN** - -**Problem**: -- Implementation plan requires renderer updates for images -- HTML renderer should create separate image files -- PDF/XLSX/PPTX renderers should embed images -- **Status unknown** - need to verify renderers handle images correctly - -**Impact**: High - Images may not render correctly - -**Recommendation**: Verify all renderers handle images correctly - ---- - -## 📋 Architecture Compliance Check - -### Data Structure Compliance ✅ - -| Field | Required | Implemented | Status | -|-------|----------|-------------|--------| -| `metadata.title` | Yes | ✅ | ✅ | -| `metadata.split_strategy` | Yes | ✅ | ✅ | -| `sections[].id` | Yes | ✅ | ✅ | -| `sections[].content_type` | Yes | ✅ | ✅ | -| `sections[].complexity` | Yes | ✅ | ✅ | -| `sections[].generation_hint` | Yes | ✅ | ✅ | -| `sections[].order` | Yes | ✅ | ✅ | -| `sections[].elements` | Yes | ✅ | ✅ | -| `sections[].image_prompt` | Image only | ✅ | ✅ | - -### Component Method Compliance ✅ - -| Component | Method | Required | Implemented | Status | -|-----------|--------|----------|-------------|--------| -| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ | -| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ | -| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ | -| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ | -| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ | -| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ | -| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ | -| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ | -| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ | -| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ | -| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ | - ---- - -## 🎯 Priority Fixes Needed - -### Critical (Must Fix) -1. ✅ **Issue 2**: Variable shadowing bug - **FIXED** -2. ✅ **Issue 3**: Missing generation_hint - **FIXED** -3. ✅ **Issue 4**: JSON template mismatch - **FIXED** -4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED** -5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION** - -### High Priority (Should Fix) -6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION** -7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED** -8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED** - -### Medium Priority (Nice to Have) -9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED** -10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED** -11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED** -12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED** - ---- - -## ✅ Summary - -### What Works -- Core infrastructure is implemented -- Image generation is integrated -- Parallel processing is implemented -- Error handling is in place -- Progress logging works - -### What's Fixed (This Session) -- Variable shadowing bug -- Missing generation_hint extraction -- JSON template architecture mismatch -- Prompt instructions clarity -- Previous sections tracking (needs verification) - -### What Needs Work -- Unit and integration tests -- Renderer verification -- Previous sections formatting fallback -- Cache optimization -- Structure validation - -### Overall Status -**Architecture**: ✅ **85% Compliant** -**Implementation**: ✅ **80% Complete** -**Testing**: ❌ **0% Complete** -**Production Ready**: ⚠️ **Not Yet** (needs testing and verification) - ---- - -## Next Steps - -1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode -2. **Verify Issue 12**: Test that all renderers handle images correctly -3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator) -4. **Add Integration Tests**: Test end-to-end flow with various scenarios -5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available -6. **Add Structure Validation**: Explicit validation before content generation -7. **Optimize Content Caching**: Add cache validation and efficient formatting - ---- - -**Analysis Complete**: 2025-12-22 - diff --git a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index d0a59e80..00000000 --- a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,459 +0,0 @@ -# Concept: Hierarchical Document Generation with Image Integration - -## Executive Summary - -This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently. - -**Key Decisions**: -- ✅ **Performance**: Parallel processing with ChatLog progress messages -- ✅ **Error Handling**: Skip failed sections, show error messages -- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access) -- ✅ **Backward Compatibility**: Not needed - implement as new default - -**Renderer Status**: -- ✅ **Ready**: Text, Markdown, DOCX renderers -- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images) -- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support) - -## Problem Statement - -Currently, the document generation system has the following limitations: - -1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures -2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters) -3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily -4. **No Structured Approach**: No mechanism to first define document structure, then populate sections - -## Current Architecture Analysis - -### Current Flow: -``` -User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document -``` - -### Issues: -- AI generates complete JSON structure in one pass -- Images are generated separately via `ai.generate` action -- No mechanism to integrate generated images into document structure -- JSON schema supports `image` content_type, but AI rarely generates it -- Content extraction happens per action, not cached/reused - -### Current Image Handling: -- Images can be rendered IF they exist in JSON structure (`content_type: "image"`) -- Image data expected as `base64Data` in elements -- Renderers support image rendering (Docx, PDF, HTML, etc.) -- But images are never generated WITHIN document generation - -## Proposed Solution: Hierarchical Document Generation - -### Core Concept - -**Three-Phase Approach:** -1. **Structure Generation Phase**: Generate document skeleton with section placeholders -2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts -3. **Integration Phase**: Merge all generated content into final document structure - -### Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Phase 1: Structure Generation │ -│ - Generate document skeleton │ -│ - Identify sections (text, image, complex) │ -│ - Create section placeholders with metadata │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 2: Content Generation (Tree-like) │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 1: Heading (simple) │ │ -│ │ → Generate directly │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 2: Paragraph (simple) │ │ -│ │ → Generate directly │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 3: Image (complex) │ │ -│ │ → Sub-prompt: Generate image │ │ -│ │ → Store image data │ │ -│ │ → Create image section with base64Data │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 4: Long Chapter (complex) │ │ -│ │ → Sub-prompt: Generate chapter content │ │ -│ │ → Split into subsections if needed │ │ -│ └──────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 3: Integration │ -│ - Merge all generated content │ -│ - Replace placeholders with actual data │ -│ - Validate structure completeness │ -│ - Render to final format │ -└─────────────────────────────────────────────────────────────┘ -``` - -## Detailed Design - -### Phase 1: Structure Generation - -**Purpose**: Create document skeleton with section metadata - -**Process**: -1. AI generates document structure with sections -2. Each section includes: - - `id`: Unique identifier - - `content_type`: Type (heading, paragraph, image, table, etc.) - - `complexity`: "simple" or "complex" - - `generation_hint`: Instructions for content generation - - `order`: Section order - - `elements`: Empty or placeholder - -**Example Structure**: -```json -{ - "metadata": { - "title": "Children's Bedtime Story", - "split_strategy": "single_document" - }, - "documents": [{ - "id": "doc_1", - "sections": [ - { - "id": "section_title", - "content_type": "heading", - "complexity": "simple", - "generation_hint": "Story title", - "order": 1, - "elements": [] - }, - { - "id": "section_intro", - "content_type": "paragraph", - "complexity": "simple", - "generation_hint": "Introduction paragraph", - "order": 2, - "elements": [] - }, - { - "id": "section_image_1", - "content_type": "image", - "complexity": "complex", - "generation_hint": "Illustration: Rabbit meeting owl in moonlit forest", - "image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch", - "order": 3, - "elements": [] - }, - { - "id": "section_chapter_1", - "content_type": "paragraph", - "complexity": "complex", - "generation_hint": "First chapter: Rabbit's adventure begins", - "order": 4, - "elements": [] - } - ] - }] -} -``` - -### Phase 2: Content Generation - -**Purpose**: Generate actual content for each section - -**Process**: -1. Iterate through sections in order -2. For each section: - - **Simple sections** (heading, short paragraph): - - Generate content directly via AI - - Populate `elements` array - - **Complex sections** (image, long chapter): - - Create sub-prompt based on `generation_hint` and `image_prompt` - - Generate content via specialized action: - - Images: `ai.generate` with image generation - - Long text: `ai.process` with focused prompt - - Store generated content - - Populate `elements` array - -**Content Caching**: -- Extract content from source documents ONCE at the start -- Cache extracted content for reuse across all sections -- Pass cached content to sub-prompts to avoid re-extraction - -**Image Generation**: -- For `content_type: "image"` sections: - - Use `image_prompt` from structure - - Call `ai.generate` action with image generation - - Receive base64 image data - - Create image element: - ```json - { - "url": "data:image/png;base64,", - "base64Data": "", - "altText": "", - "caption": "" - } - ``` - -### Phase 3: Integration - -**Purpose**: Merge all content into final document structure - -**Process**: -1. Validate all sections have content -2. Merge generated content into structure -3. Replace placeholders with actual data -4. Finalize JSON structure -5. Render to target format (docx, pdf, html, etc.) - -## Implementation Strategy - -### New Components Needed - -1. **Structure Generator** (`structureGenerator.py`) - - Generates document skeleton - - Identifies section complexity - - Creates generation hints - -2. **Content Generator** (`contentGenerator.py`) - - Generates content for each section - - Handles simple vs complex sections - - Manages sub-prompts and image generation - - Caches extracted content - -3. **Content Integrator** (`contentIntegrator.py`) - - Merges generated content - - Validates completeness - - Finalizes document structure - -### Modified Components - -1. **`generateDocument` action** - - Implement hierarchical generation as default - - Orchestrate three phases - - Add progress logging for each phase - -2. **`process` action** - - Support content caching (extract once, reuse) - - Support sub-prompt generation for sections - -3. **Prompt Builder** (`subPromptBuilderGeneration.py`) - - Add structure generation prompt - - Add section-specific content prompts - - Add image generation prompt templates - -4. **Renderers** (Update required): - - **HTML Renderer**: Create separate image files and link them - - **PDF Renderer**: Embed images using reportlab - - **XLSX Renderer**: Add image embedding support - - **PPTX Renderer**: Add image embedding support - -### New Action Parameters - -**For `generateDocument`**: -- `enableImageIntegration`: boolean (default: true) -- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words) -- `parallelGeneration`: boolean (default: true) - enable parallel section generation -- `progressLogging`: boolean (default: true) - send ChatLog progress updates - -**For sub-prompts**: -- `sectionContext`: Previous sections for context -- `cachedContent`: Extracted content cache (to avoid re-extraction) -- `targetSection`: Section metadata -- `previousSections`: Array of already-generated sections for continuity - -## Benefits - -1. **Image Integration**: Images can be generated and embedded into documents -2. **Structured Approach**: Clear separation of structure and content -3. **Efficiency**: Content extracted once, reused across sections -4. **Scalability**: Can handle very long documents by splitting into sections -5. **Quality**: Better control over complex sections (images, long chapters) -6. **Flexibility**: Can generate different content types per section - -## Migration Strategy - -**Note**: No backwards compatibility needed - can implement directly as new default. - -1. **Phase 1**: Implement hierarchical generation as new default -2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support -3. **Phase 3**: Testing and refinement -4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only) - -## Example Workflow - -**User Request**: "Create a children's bedtime story with 5 illustrations" - -**Phase 1 Output**: -```json -{ - "metadata": {"title": "Flöckchen's Adventure"}, - "documents": [{ - "sections": [ - {"id": "title", "content_type": "heading", "complexity": "simple", ...}, - {"id": "intro", "content_type": "paragraph", "complexity": "simple", ...}, - {"id": "img1", "content_type": "image", "complexity": "complex", - "image_prompt": "Rabbit meeting owl", ...}, - {"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...}, - {"id": "img2", "content_type": "image", "complexity": "complex", ...}, - ... - ] - }] -} -``` - -**Phase 2 Process**: -- Generate title → populate elements -- Generate intro → populate elements -- Generate image 1 → call `ai.generate`, store base64 → populate elements -- Generate chapter 1 → sub-prompt → populate elements -- Generate image 2 → call `ai.generate`, store base64 → populate elements -- ... - -**Phase 3 Output**: Complete document with all sections populated, ready for rendering - -## Renderer Readiness Assessment - -### Current Renderer Status for Image Handling: - -1. **Text Renderer** (`rendererText.py`): ✅ **READY** - - Skips images, shows placeholder: `[Image: altText]` - - No changes needed - -2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY** - - Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)` - - No changes needed (markdown limitation) - -3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE** - - Currently: Embeds base64 directly in `` tag as data URI - - **Required Change**: Create separate image files and link to them - - Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML - - Update `` tags to use relative paths: `...` - - Return multiple files: HTML file + image files - -4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE** - - Currently: Shows placeholder `[Image: altText]` - - **Required Change**: Embed images directly in PDF using reportlab - - Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes - -5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY** - - Embeds images directly using `doc.add_picture()` - - Adds captions below images - - No changes needed - -6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION** - - Currently: No image handling found - - **Required Change**: Add image support using openpyxl - - Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells - - Store images in worksheet cells or as floating images - -7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION** - - Currently: No image handling found - - **Required Change**: Add image support using python-pptx - - Implementation: Use `slide.shapes.add_picture()` to add images to slides - -### Renderer Update Requirements: - -**Priority 1 (Critical for HTML output)**: -- HTML Renderer: Create separate image files and link them - -**Priority 2 (Important for document formats)**: -- PDF Renderer: Embed images using reportlab -- XLSX Renderer: Add image embedding support -- PPTX Renderer: Add image embedding support - -## Answers to Open Questions - -### 1. Performance: How to handle very large documents (100+ sections)? - -**Answer**: Use parallel processing where possible, with progress ChatLog messages. - -**Implementation Strategy**: -- **Parallel Section Generation**: Generate independent sections in parallel using asyncio -- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time) -- **Progress Tracking**: Send ChatLog progress updates: - - "Generating structure..." (Phase 1) - - "Generating content for section X/Y..." (Phase 2) - - "Generating image for section X..." (Phase 2 - images) - - "Merging content..." (Phase 3) - - "Rendering final document..." (Phase 3) -- **Streaming**: For very large documents, consider streaming partial results - -**Example Progress Messages**: -``` -Phase 1: Structure Generation (0% → 33%) -Phase 2: Content Generation (33% → 90%) - - Section 1/10: Heading (34%) - - Section 2/10: Paragraph (40%) - - Section 3/10: Image generation (50%) - - Section 4/10: Chapter (60%) - ... -Phase 3: Integration & Rendering (90% → 100%) -``` - -### 2. Error Handling: What if one section fails? - -**Answer**: Skip failed sections, keep section title and type, show error message in the section. - -**Implementation Strategy**: -- **Graceful Degradation**: Continue processing remaining sections -- **Error Section**: Create error placeholder section: - ```json - { - "id": "section_failed_3", - "content_type": "paragraph", - "elements": [{ - "text": "[ERROR: Failed to generate content for this section. Error: ]" - }], - "order": 3, - "error": true, - "errorMessage": "" - } - ``` -- **Logging**: Log errors for debugging but don't fail entire document -- **User Notification**: Include error count in final progress message - -### 3. Image Storage: Where to store generated images? - -**Answer**: Store images in JSON as base64, as renderers need them afterwards. - -**Implementation Strategy**: -- **In-Memory Storage**: Keep base64 strings in JSON structure during generation -- **JSON Structure**: Store in section elements: - ```json - { - "url": "data:image/png;base64,", - "base64Data": "", - "altText": "Image description", - "caption": "Optional caption" - } - ``` -- **Memory Management**: For very large images, consider compression or chunking -- **Renderer Access**: All renderers can access `base64Data` directly from JSON -- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering - -### 4. Backward Compatibility: How to ensure existing workflows still work? - -**Answer**: No backwards compatibility needed. - -**Implementation Strategy**: -- **New Default**: Hierarchical generation becomes the default mode -- **Clean Migration**: All document generation uses hierarchical approach -- **No Fallback**: Remove single-pass mode (or keep as internal fallback only) -- **Breaking Change**: Acceptable since this is a new feature/enhancement - -## Next Steps - -1. **Review and Approval**: Get feedback on concept -2. **Detailed Design**: Design API and data structures -3. **Prototype**: Implement Phase 1 (structure generation) -4. **Testing**: Test with real use cases -5. **Full Implementation**: Implement all phases -6. **Migration**: Migrate existing workflows - diff --git a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index 55a0c35c..00000000 --- a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,1067 +0,0 @@ -# Detailed Design: Hierarchical Document Generation with Image Integration - -## Table of Contents - -1. [Architecture Overview](#architecture-overview) -2. [Data Structures](#data-structures) -3. [Component Design](#component-design) -4. [API Design](#api-design) -5. [Image Handling](#image-handling) -6. [Progress Logging](#progress-logging) -7. [Error Handling](#error-handling) -8. [Performance Considerations](#performance-considerations) - -## Architecture Overview - -### System Flow - -``` -┌─────────────────────────────────────────────────────────────┐ -│ User Request: generateDocument │ -│ Parameters: prompt, documentList, resultType, etc. │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 1: Structure Generation │ -│ - Extract content from documentList (if provided) │ -│ - Cache extracted content │ -│ - Generate document skeleton with sections │ -│ - Identify section complexity │ -│ - Create generation hints │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 2: Content Generation (Parallel) │ -│ │ -│ Simple Sections (heading, short paragraph): │ -│ ┌────────────────────────────────────────┐ │ -│ │ Generate content directly via AI │ │ -│ │ Populate elements array │ │ -│ └────────────────────────────────────────┘ │ -│ │ -│ Complex Sections (image, long chapter): │ -│ ┌────────────────────────────────────────┐ │ -│ │ Create sub-prompt │ │ -│ │ Generate content (text or image) │ │ -│ │ Store in elements array │ │ -│ └────────────────────────────────────────┘ │ -│ │ -│ Progress Updates: │ -│ - "Generating section X/Y..." │ -│ - "Generating image for section X..." │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 3: Integration & Rendering │ -│ - Validate all sections have content │ -│ - Merge generated content into structure │ -│ - Replace placeholders with actual data │ -│ - Render to target format (docx, pdf, html, etc.) │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Final Document(s) │ -│ - Single document (docx, pdf, html, etc.) │ -│ - Or multiple files (html + image files) │ -└─────────────────────────────────────────────────────────────┘ -``` - -## Data Structures - -### Document Structure (Phase 1 Output) - -```python -{ - "metadata": { - "title": str, - "split_strategy": str, # "single_document" | "multi_document" - "source_documents": List[str], - "extraction_method": str - }, - "documents": [ - { - "id": str, - "title": str, - "filename": str, - "sections": [ - { - "id": str, - "content_type": str, # "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - "complexity": str, # "simple" | "complex" - "generation_hint": str, - "image_prompt": Optional[str], # Only for image sections - "order": int, - "elements": [], # Empty initially, populated in Phase 2 - "metadata": Optional[Dict[str, Any]] - } - ] - } - ] -} -``` - -### Section Content (Phase 2 Output) - -**Simple Section (heading)**: -```python -{ - "id": "section_title", - "content_type": "heading", - "elements": [ - { - "level": int, - "text": str - } - ], - "order": 1 -} -``` - -**Simple Section (paragraph)**: -```python -{ - "id": "section_intro", - "content_type": "paragraph", - "elements": [ - { - "text": str - } - ], - "order": 2 -} -``` - -**Complex Section (image)**: -```python -{ - "id": "section_image_1", - "content_type": "image", - "elements": [ - { - "url": "data:image/png;base64,", - "base64Data": str, # Full base64 encoded image - "altText": str, - "caption": Optional[str] - } - ], - "order": 3 -} -``` - -**Error Section**: -```python -{ - "id": "section_failed_4", - "content_type": "paragraph", - "elements": [ - { - "text": f"[ERROR: Failed to generate content for this section. Error: {error_message}]" - } - ], - "order": 4, - "error": True, - "errorMessage": str, - "originalContentType": str # Original content_type that failed -} -``` - -### Content Cache - -```python -{ - "extractedContent": List[ContentPart], # From extraction service - "extractionTimestamp": float, - "sourceDocuments": List[str] # Document IDs -} -``` - -### Generation Context - -```python -{ - "userPrompt": str, - "cachedContent": ContentCache, - "previousSections": List[Dict[str, Any]], # Already generated sections - "targetSection": Dict[str, Any], # Section to generate - "documentMetadata": Dict[str, Any] -} -``` - -## Component Design - -### 1. StructureGenerator - -**Purpose**: Generate document skeleton with section placeholders - -**Location**: `poweron/gateway/modules/services/serviceGeneration/subStructureGenerator.py` - -**Methods**: -```python -class StructureGenerator: - async def generateStructure( - self, - userPrompt: str, - documentList: Optional[DocumentReferenceList], - cachedContent: Optional[ContentCache], - services: Any - ) -> Dict[str, Any]: - """ - Generate document structure with sections. - - Returns: - Document structure with empty elements arrays - """ - - def _createStructurePrompt( - self, - userPrompt: str, - cachedContent: Optional[ContentCache], - services: Any - ) -> str: - """ - Create prompt for structure generation. - """ - - def _identifySectionComplexity( - self, - section: Dict[str, Any], - userPrompt: str - ) -> str: - """ - Identify if section is simple or complex. - - Rules: - - Images: always complex - - Long chapters (>maxSectionLength words): complex - - Others: simple - """ - - def _extractImagePrompts( - self, - structure: Dict[str, Any], - userPrompt: str - ) -> Dict[str, str]: - """ - Extract image generation prompts from structure and user prompt. - Maps section_id -> image_prompt - """ -``` - -### 2. ContentGenerator - -**Purpose**: Generate content for each section - -**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentGenerator.py` - -**Methods**: -```python -class ContentGenerator: - async def generateContent( - self, - structure: Dict[str, Any], - cachedContent: Optional[ContentCache], - userPrompt: str, - services: Any, - progressCallback: Optional[Callable] = None - ) -> Dict[str, Any]: - """ - Generate content for all sections in structure. - - Args: - structure: Document structure from Phase 1 - cachedContent: Extracted content cache - userPrompt: Original user prompt - services: Services instance - progressCallback: Function to call for progress updates - - Returns: - Complete document structure with populated elements - """ - - async def _generateSectionContent( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate content for a single section. - - Returns: - Section with populated elements array - """ - - async def _generateSimpleSection( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate content for simple section (heading, paragraph). - """ - - async def _generateImageSection( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate image for image section. - Calls ai.generate action with image generation. - """ - - async def _generateComplexTextSection( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate content for complex text section (long chapter). - Uses focused sub-prompt. - """ - - async def _generateSectionsParallel( - self, - sections: List[Dict[str, Any]], - context: GenerationContext, - services: Any, - progressCallback: Optional[Callable] = None - ) -> List[Dict[str, Any]]: - """ - Generate content for multiple sections in parallel. - Uses asyncio.gather for parallel execution. - """ - - def _createSectionPrompt( - self, - section: Dict[str, Any], - context: GenerationContext - ) -> str: - """ - Create sub-prompt for section content generation. - """ -``` - -### 3. ContentIntegrator - -**Purpose**: Merge generated content and render final document - -**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentIntegrator.py` - -**Methods**: -```python -class ContentIntegrator: - def integrateContent( - self, - structure: Dict[str, Any], - generatedSections: List[Dict[str, Any]] - ) -> Dict[str, Any]: - """ - Merge generated sections into document structure. - - Returns: - Complete document structure ready for rendering - """ - - def validateCompleteness( - self, - document: Dict[str, Any] - ) -> Tuple[bool, List[str]]: - """ - Validate that all sections have content. - - Returns: - (is_complete, list_of_missing_sections) - """ - - def createErrorSection( - self, - originalSection: Dict[str, Any], - errorMessage: str - ) -> Dict[str, Any]: - """ - Create error placeholder section. - """ -``` - -### 4. Modified generateDocument Action - -**Location**: `poweron/gateway/modules/workflows/methods/methodAi/actions/generateDocument.py` - -**Changes**: -```python -@action -async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Generate documents using hierarchical approach. - """ - # Extract parameters - prompt = parameters.get("prompt") - documentList = parameters.get("documentList", []) - resultType = parameters.get("resultType", "docx") - maxSectionLength = parameters.get("maxSectionLength", 500) - parallelGeneration = parameters.get("parallelGeneration", True) - progressLogging = parameters.get("progressLogging", True) - - # Create operation ID for progress tracking - operationId = f"doc_gen_{self.services.workflow.id}_{int(time.time())}" - parentOperationId = parameters.get('parentOperationId') - - try: - # Phase 1: Structure Generation - if progressLogging: - self.services.chat.progressLogStart( - operationId, - "Document", - "Structure Generation", - "Generating document structure...", - parentOperationId=parentOperationId - ) - - structureGenerator = StructureGenerator(self.services) - - # Extract and cache content if documentList provided - cachedContent = None - if documentList: - # Extract content once - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if chatDocuments: - extractionOptions = ExtractionOptions( - prompt="Extract all content from documents", - mergeStrategy=MergeStrategy(mergeType="concatenate") - ) - extractedResults = self.services.extraction.extractContent( - chatDocuments, - extractionOptions - ) - cachedContent = { - "extractedContent": extractedResults, - "extractionTimestamp": time.time(), - "sourceDocuments": [doc.id for doc in chatDocuments] - } - - # Generate structure - structure = await structureGenerator.generateStructure( - userPrompt=prompt, - documentList=documentList, - cachedContent=cachedContent, - services=self.services - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") - - # Phase 2: Content Generation - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.34, - "Starting content generation..." - ) - - contentGenerator = ContentGenerator(self.services) - - def progressCallback(sectionIndex: int, totalSections: int, message: str): - if progressLogging: - progress = 0.34 + (0.56 * (sectionIndex / totalSections)) - self.services.chat.progressLogUpdate( - operationId, - progress, - f"Section {sectionIndex}/{totalSections}: {message}" - ) - - completeStructure = await contentGenerator.generateContent( - structure=structure, - cachedContent=cachedContent, - userPrompt=prompt, - services=self.services, - progressCallback=progressCallback - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") - - # Phase 3: Integration & Rendering - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.91, - "Rendering final document..." - ) - - # Use existing renderReport method - title = structure.get("metadata", {}).get("title", "Generated Document") - renderedContent, mimeType = await self.services.generation.renderReport( - extractedContent=completeStructure, - outputFormat=resultType, - title=title, - userPrompt=prompt, - aiService=self.services.ai - ) - - # Create document - document = self.services.generation._createDocument( - fileName=f"document.{resultType}", - mimeType=mimeType, - content=renderedContent, - base64encoded=(mimeType not in ["text/plain", "text/html", "text/markdown"]), - messageId=None - ) - - if progressLogging: - self.services.chat.progressLogFinish(operationId, True) - - return ActionResult.isSuccess( - documents=[ActionDocument( - documentName=f"document.{resultType}", - documentData=renderedContent, - mimeType=mimeType - )] - ) - - except Exception as e: - logger.error(f"Error in hierarchical document generation: {str(e)}") - if progressLogging: - self.services.chat.progressLogFinish(operationId, False) - return ActionResult.isFailure(error=str(e)) -``` - -## API Design - -### Structure Generation Prompt - -```python -def _createStructurePrompt( - userPrompt: str, - cachedContent: Optional[ContentCache], - services: Any -) -> str: - """ - Create prompt for structure generation. - """ - prompt = f""" -{'='*80} -USER REQUEST: -{'='*80} -{userPrompt} -{'='*80} - -TASK: Generate a document STRUCTURE (skeleton) with sections. -Do NOT generate actual content yet - only the structure. - -{'='*80} -EXTRACTED CONTENT (if available): -{'='*80} -{_formatCachedContent(cachedContent) if cachedContent else "No source documents provided."} -{'='*80} - -INSTRUCTIONS: -1. Analyze the user request and extracted content -2. Create a document structure with sections -3. For each section, specify: - - id: Unique identifier - - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) - - generation_hint: Brief description of what content should be generated - - image_prompt: (only for image sections) Detailed prompt for image generation - - order: Section order number - - elements: [] (empty array - will be populated later) - -4. Identify image sections: - - If user requests illustrations/images, create image sections - - Add image_prompt field with detailed description - - Set complexity to "complex" - -5. Identify complex text sections: - - Long chapters (>500 words expected) should be marked as "complex" - - Short paragraphs/headings should be "simple" - -6. Return ONLY valid JSON following this structure: -{{ - "metadata": {{ - "title": "Document Title", - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "ai_generation" - }}, - "documents": [ - {{ - "id": "doc_1", - "title": "Document Title", - "filename": "document.json", - "sections": [ - {{ - "id": "section_1", - "content_type": "heading", - "complexity": "simple", - "generation_hint": "Main title", - "order": 1, - "elements": [] - }}, - {{ - "id": "section_2", - "content_type": "image", - "complexity": "complex", - "generation_hint": "Illustration for chapter 1", - "image_prompt": "Detailed description for image generation", - "order": 2, - "elements": [] - }} - ] - }} - ] -}} - -Return ONLY the JSON structure. No explanations. -""" - return prompt -``` - -### Section Content Generation Prompt - -```python -def _createSectionPrompt( - section: Dict[str, Any], - context: GenerationContext -) -> str: - """ - Create sub-prompt for section content generation. - """ - sectionType = section.get("content_type") - generationHint = section.get("generation_hint", "") - - prompt = f""" -{'='*80} -SECTION TO GENERATE: -{'='*80} -Type: {sectionType} -Hint: {generationHint} -{'='*80} - -CONTEXT: -- User Request: {context.userPrompt} -- Previous Sections: {len(context.previousSections)} sections already generated -- Document Title: {context.documentMetadata.get('title', 'Unknown')} - -{'='*80} -EXTRACTED CONTENT (if available): -{'='*80} -{_formatCachedContent(context.cachedContent) if context.cachedContent else "None"} -{'='*80} - -TASK: Generate content for this section ONLY. - -INSTRUCTIONS: -1. Generate content appropriate for section type: {sectionType} -2. Use the generation hint: {generationHint} -3. Consider previous sections for continuity -4. Use extracted content if relevant - -5. Return ONLY the elements array for this section: - -For heading: -{{ - "elements": [ - {{"level": 1, "text": "Heading Text"}} - ] -}} - -For paragraph: -{{ - "elements": [ - {{"text": "Paragraph text content"}} - ] -}} - -For image: -{{ - "elements": [ - {{ - "url": "data:image/png;base64,", - "base64Data": "", - "altText": "Image description", - "caption": "Optional caption" - }} - ] -}} - -Return ONLY the elements array as JSON. No other text. -""" - return prompt -``` - -## Image Handling - -### Image Generation Flow - -```python -async def _generateImageSection( - section: Dict[str, Any], - context: GenerationContext, - services: Any -) -> Dict[str, Any]: - """ - Generate image for image section. - """ - imagePrompt = section.get("image_prompt") - if not imagePrompt: - raise ValueError(f"Image section {section.get('id')} missing image_prompt") - - # Call ai.generate action with image generation - from modules.workflows.methods.methodAi.actions.generate import generate - - generateParams = { - "prompt": imagePrompt, - "resultType": "png", - "parentOperationId": context.operationId - } - - result = await generate(self=services.ai, parameters=generateParams) - - if not result.success or not result.documents: - raise ValueError(f"Image generation failed: {result.error}") - - # Extract base64 image data - imageDoc = result.documents[0] - base64Data = imageDoc.documentData - - # Create image element - section["elements"] = [{ - "url": f"data:image/png;base64,{base64Data}", - "base64Data": base64Data, - "altText": section.get("generation_hint", "Image"), - "caption": section.get("metadata", {}).get("caption") - }] - - return section -``` - -### HTML Renderer Image Handling - -**Location**: `poweron/gateway/modules/services/serviceGeneration/renderers/rendererHtml.py` - -**Changes**: -```python -async def render( - self, - extractedContent: Dict[str, Any], - title: str, - userPrompt: str = None, - aiService=None -) -> Tuple[str, str]: - """ - Render HTML with separate image files. - - Returns: - (html_content, mime_type) - """ - # Generate HTML - htmlContent = await self._generateHtmlFromJson(...) - - # Extract images and create separate files - images = self._extractImages(extractedContent) - - if images: - # Create image files - imageFiles = [] - for idx, imageData in enumerate(images): - base64Data = imageData.get("base64Data") - if base64Data: - # Decode base64 - imageBytes = base64.b64decode(base64Data) - - # Create filename - filename = f"image_{idx + 1}.png" - - # Update HTML to use relative path - htmlContent = htmlContent.replace( - f'data:image/png;base64,{base64Data}', - filename - ) - - imageFiles.append({ - "filename": filename, - "content": imageBytes, - "mimeType": "image/png" - }) - - # Return HTML + image files info - # Note: This requires modification to return multiple files - # For now, embed base64 (will be updated in implementation) - return htmlContent, "text/html" - - return htmlContent, "text/html" - -def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Extract all images from JSON structure. - """ - images = [] - - documents = jsonContent.get("documents", []) - if not documents: - sections = jsonContent.get("sections", []) - documents = [{"sections": sections}] - - for doc in documents: - sections = doc.get("sections", []) - for section in sections: - if section.get("content_type") == "image": - elements = section.get("elements", []) - for element in elements: - if element.get("base64Data"): - images.append(element) - - return images -``` - -## Progress Logging - -### Progress Stages - -```python -PROGRESS_STAGES = { - "structure_generation": { - "start": 0.0, - "end": 0.33, - "messages": [ - "Extracting content from documents...", - "Generating document structure...", - "Structure generated" - ] - }, - "content_generation": { - "start": 0.34, - "end": 0.90, - "messages": [ - "Starting content generation...", - "Generating section {current}/{total}...", - "Generating image for section {section_id}...", - "Content generated" - ] - }, - "integration_rendering": { - "start": 0.91, - "end": 1.0, - "messages": [ - "Rendering final document...", - "Document complete" - ] - } -} -``` - -### Progress Callback Implementation - -```python -def createProgressCallback( - operationId: str, - totalSections: int, - services: Any -) -> Callable: - """ - Create progress callback function. - """ - def progressCallback( - sectionIndex: int, - totalSections: int, - message: str - ): - # Calculate progress - baseProgress = 0.34 # Start of content generation phase - phaseProgress = 0.56 # Length of content generation phase - sectionProgress = (sectionIndex / totalSections) * phaseProgress - currentProgress = baseProgress + sectionProgress - - # Update progress log - services.chat.progressLogUpdate( - operationId, - currentProgress, - f"Section {sectionIndex}/{totalSections}: {message}" - ) - - return progressCallback -``` - -## Error Handling - -### Error Section Creation - -```python -def createErrorSection( - originalSection: Dict[str, Any], - errorMessage: str -) -> Dict[str, Any]: - """ - Create error placeholder section. - """ - return { - "id": originalSection.get("id", "unknown"), - "content_type": "paragraph", # Change to paragraph for error display - "elements": [{ - "text": f"[ERROR: Failed to generate {originalSection.get('content_type', 'content')} for section '{originalSection.get('id', 'unknown')}'. Error: {errorMessage}]" - }], - "order": originalSection.get("order", 0), - "error": True, - "errorMessage": errorMessage, - "originalContentType": originalSection.get("content_type") - } -``` - -### Error Handling in Content Generation - -```python -async def _generateSectionContent( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any -) -> Dict[str, Any]: - """ - Generate content for a single section with error handling. - """ - try: - complexity = section.get("complexity", "simple") - contentType = section.get("content_type") - - if contentType == "image": - return await self._generateImageSection(section, context, services) - elif complexity == "complex": - return await self._generateComplexTextSection(section, context, services) - else: - return await self._generateSimpleSection(section, context, services) - - except Exception as e: - logger.error(f"Error generating section {section.get('id')}: {str(e)}") - return createErrorSection(section, str(e)) -``` - -## Performance Considerations - -### Parallel Generation - -```python -async def _generateSectionsParallel( - self, - sections: List[Dict[str, Any]], - context: GenerationContext, - services: Any, - progressCallback: Optional[Callable] = None -) -> List[Dict[str, Any]]: - """ - Generate content for multiple sections in parallel. - """ - async def generateWithProgress(section: Dict[str, Any], index: int): - if progressCallback: - progressCallback(index + 1, len(sections), f"Generating {section.get('content_type')}...") - - return await self._generateSectionContent(section, context, services) - - # Generate all sections in parallel - results = await asyncio.gather( - *[generateWithProgress(section, idx) for idx, section in enumerate(sections)], - return_exceptions=True - ) - - # Handle exceptions - generatedSections = [] - for idx, result in enumerate(results): - if isinstance(result, Exception): - logger.error(f"Error generating section {idx}: {str(result)}") - generatedSections.append( - createErrorSection(sections[idx], str(result)) - ) - else: - generatedSections.append(result) - - return generatedSections -``` - -### Batch Processing for Large Documents - -```python -async def generateContent( - self, - structure: Dict[str, Any], - cachedContent: Optional[ContentCache], - userPrompt: str, - services: Any, - progressCallback: Optional[Callable] = None, - batchSize: int = 10 -) -> Dict[str, Any]: - """ - Generate content with batching for large documents. - """ - documents = structure.get("documents", []) - - for doc in documents: - sections = doc.get("sections", []) - - # Process in batches - for batchStart in range(0, len(sections), batchSize): - batch = sections[batchStart:batchStart + batchSize] - - # Generate batch in parallel - generatedBatch = await self._generateSectionsParallel( - batch, - context, - services, - progressCallback - ) - - # Update sections - for idx, generated in enumerate(generatedBatch): - sections[batchStart + idx] = generated - - return structure -``` - -## Testing Strategy - -### Unit Tests - -1. **StructureGenerator Tests**: - - Test structure generation with/without source documents - - Test complexity identification - - Test image prompt extraction - -2. **ContentGenerator Tests**: - - Test simple section generation - - Test image section generation - - Test complex text section generation - - Test parallel generation - - Test error handling - -3. **ContentIntegrator Tests**: - - Test content merging - - Test validation - - Test error section creation - -### Integration Tests - -1. **End-to-End Tests**: - - Test complete document generation flow - - Test with images - - Test with long documents - - Test error scenarios - -2. **Renderer Tests**: - - Test HTML renderer with separate image files - - Test PDF renderer with embedded images - - Test XLSX/PPTX renderers with images - -### Performance Tests - -1. **Large Document Tests**: - - Test with 100+ sections - - Test parallel generation performance - - Test memory usage - -2. **Image Generation Tests**: - - Test multiple images - - Test large images - - Test image generation failures - diff --git a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index 4476c2b9..00000000 --- a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,398 +0,0 @@ -# Implementation Plan: Hierarchical Document Generation - -## Overview - -This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration. - -## Implementation Phases - -### Phase 1: Core Infrastructure (Week 1) - -**Goal**: Set up core components and data structures - -#### Tasks: - -1. **Create StructureGenerator Component** - - [ ] Create `subStructureGenerator.py` - - [ ] Implement `generateStructure()` method - - [ ] Implement `_createStructurePrompt()` method - - [ ] Implement `_identifySectionComplexity()` method - - [ ] Implement `_extractImagePrompts()` method - - [ ] Add unit tests - -2. **Create ContentGenerator Component** - - [ ] Create `subContentGenerator.py` - - [ ] Implement `generateContent()` method - - [ ] Implement `_generateSectionContent()` method - - [ ] Implement `_generateSimpleSection()` method - - [ ] Implement `_generateComplexTextSection()` method - - [ ] Implement `_createSectionPrompt()` method - - [ ] Add unit tests - -3. **Create ContentIntegrator Component** - - [ ] Create `subContentIntegrator.py` - - [ ] Implement `integrateContent()` method - - [ ] Implement `validateCompleteness()` method - - [ ] Implement `createErrorSection()` method - - [ ] Add unit tests - -4. **Update generateDocument Action** - - [ ] Modify `generateDocument.py` to use hierarchical approach - - [ ] Add Phase 1: Structure generation - - [ ] Add Phase 2: Content generation (sequential first) - - [ ] Add Phase 3: Integration & rendering - - [ ] Add basic progress logging - - [ ] Add error handling - -**Deliverables**: -- Core components created -- Basic hierarchical generation working (sequential) -- Unit tests passing - -**Estimated Time**: 3-4 days - ---- - -### Phase 2: Image Generation Integration (Week 1-2) - -**Goal**: Integrate image generation into content generation - -#### Tasks: - -1. **Implement Image Section Generation** - - [ ] Add `_generateImageSection()` method to ContentGenerator - - [ ] Integrate with `ai.generate` action - - [ ] Handle base64 image data storage - - [ ] Add image prompt extraction from structure - - [ ] Add error handling for image generation failures - -2. **Update Structure Generation Prompt** - - [ ] Add image section detection in structure prompt - - [ ] Add image_prompt field extraction - - [ ] Test with user prompts requesting images - -3. **Test Image Integration** - - [ ] Test image generation in document structure - - [ ] Test multiple images in one document - - [ ] Test image generation failures - -**Deliverables**: -- Image generation integrated -- Images stored as base64 in JSON -- Error handling for image failures - -**Estimated Time**: 2-3 days - ---- - -### Phase 3: Parallel Processing & Progress Logging (Week 2) - -**Goal**: Implement parallel section generation and detailed progress logging - -#### Tasks: - -1. **Implement Parallel Generation** - - [ ] Add `_generateSectionsParallel()` method - - [ ] Use `asyncio.gather()` for parallel execution - - [ ] Add batch processing for large documents - - [ ] Handle exceptions in parallel execution - - [ ] Test parallel vs sequential performance - -2. **Enhance Progress Logging** - - [ ] Create progress callback system - - [ ] Add detailed progress messages: - - Structure generation progress - - Section-by-section progress - - Image generation progress - - Rendering progress - - [ ] Calculate accurate progress percentages - - [ ] Test progress updates - -3. **Update generateDocument Action** - - [ ] Integrate parallel generation - - [ ] Add progress callback to content generation - - [ ] Update progress logging throughout phases - -**Deliverables**: -- Parallel section generation working -- Detailed progress logging -- Performance improvements - -**Estimated Time**: 2-3 days - ---- - -### Phase 4: Renderer Updates (Week 2-3) - -**Goal**: Update renderers to properly handle images - -#### Tasks: - -1. **Update HTML Renderer** - - [ ] Modify `rendererHtml.py` - - [ ] Add `_extractImages()` method - - [ ] Implement separate image file creation - - [ ] Update HTML to use relative image paths - - [ ] Handle multiple image files - - [ ] Test HTML + image files output - -2. **Update PDF Renderer** - - [ ] Modify `rendererPdf.py` - - [ ] Update `_renderJsonImage()` to embed images - - [ ] Use `reportlab.platypus.Image()` with base64 - - [ ] Handle image sizing and positioning - - [ ] Test PDF with embedded images - -3. **Update XLSX Renderer** - - [ ] Modify `rendererXlsx.py` - - [ ] Add `_renderJsonImage()` method - - [ ] Use `openpyxl.drawing.image.Image()` to embed images - - [ ] Handle image placement in cells - - [ ] Test XLSX with images - -4. **Update PPTX Renderer** - - [ ] Modify `rendererPptx.py` - - [ ] Add `_renderJsonImage()` method - - [ ] Use `slide.shapes.add_picture()` to add images - - [ ] Handle image sizing on slides - - [ ] Test PPTX with images - -**Deliverables**: -- All renderers support images -- HTML creates separate image files -- PDF/XLSX/PPTX embed images directly - -**Estimated Time**: 4-5 days - ---- - -### Phase 5: Content Caching & Optimization (Week 3) - -**Goal**: Implement content caching to avoid re-extraction - -#### Tasks: - -1. **Implement Content Cache** - - [ ] Create ContentCache data structure - - [ ] Extract content once at start of generation - - [ ] Pass cached content to all sub-prompts - - [ ] Add cache validation (check if documents changed) - - [ ] Test cache reuse - -2. **Optimize Prompt Building** - - [ ] Update structure prompt to use cached content - - [ ] Update section prompts to use cached content - - [ ] Format cached content efficiently - - [ ] Test prompt sizes - -3. **Performance Testing** - - [ ] Test with large documents - - [ ] Test with multiple source documents - - [ ] Measure performance improvements - - [ ] Optimize bottlenecks - -**Deliverables**: -- Content caching implemented -- No redundant content extraction -- Performance optimized - -**Estimated Time**: 2-3 days - ---- - -### Phase 6: Error Handling & Edge Cases (Week 3-4) - -**Goal**: Robust error handling and edge case coverage - -#### Tasks: - -1. **Enhance Error Handling** - - [ ] Improve error section creation - - [ ] Add error recovery strategies - - [ ] Handle partial failures gracefully - - [ ] Add error logging and reporting - -2. **Handle Edge Cases** - - [ ] Empty document list - - [ ] No sections generated - - [ ] All sections fail - - [ ] Very large images - - [ ] Very long documents (100+ sections) - - [ ] Missing image prompts - - [ ] Invalid section types - -3. **Add Validation** - - [ ] Validate structure before content generation - - [ ] Validate content before integration - - [ ] Validate final document before rendering - - [ ] Add comprehensive error messages - -**Deliverables**: -- Robust error handling -- Edge cases covered -- Clear error messages - -**Estimated Time**: 2-3 days - ---- - -### Phase 7: Testing & Refinement (Week 4) - -**Goal**: Comprehensive testing and refinement - -#### Tasks: - -1. **Unit Testing** - - [ ] Complete unit tests for all components - - [ ] Test all methods - - [ ] Test error scenarios - - [ ] Achieve >80% code coverage - -2. **Integration Testing** - - [ ] Test end-to-end document generation - - [ ] Test with various document types - - [ ] Test with images - - [ ] Test with long documents - - [ ] Test error scenarios - -3. **Performance Testing** - - [ ] Test with 10, 50, 100+ sections - - [ ] Measure generation time - - [ ] Measure memory usage - - [ ] Compare parallel vs sequential - - [ ] Optimize if needed - -4. **User Acceptance Testing** - - [ ] Test with real user scenarios - - [ ] Test bedtime story with images (original use case) - - [ ] Test business documents - - [ ] Test technical documents - - [ ] Gather feedback - -5. **Documentation** - - [ ] Update API documentation - - [ ] Add code comments - - [ ] Update user guides - - [ ] Create examples - -**Deliverables**: -- Comprehensive test suite -- Performance benchmarks -- Documentation complete -- Ready for production - -**Estimated Time**: 3-4 days - ---- - -## Dependencies - -### External Dependencies -- `asyncio` - For parallel processing -- `base64` - For image encoding/decoding -- `reportlab` - For PDF image embedding -- `openpyxl` - For XLSX image embedding -- `python-pptx` - For PPTX image embedding - -### Internal Dependencies -- `serviceGeneration` - Main generation service -- `serviceAi` - AI service for generation -- `serviceExtraction` - Content extraction service -- `methodAi.actions.generate` - Image generation action -- `methodAi.actions.process` - Text generation action - -## Risk Mitigation - -### Risks and Mitigation Strategies - -1. **Risk**: Image generation failures break entire document - - **Mitigation**: Error handling creates error sections, continues processing - -2. **Risk**: Parallel generation causes memory issues - - **Mitigation**: Batch processing, limit concurrent operations - -3. **Risk**: Large base64 images cause JSON size issues - - **Mitigation**: Consider compression or chunking for very large images - -4. **Risk**: HTML renderer needs to return multiple files - - **Mitigation**: Modify return type or create file bundle system - -5. **Risk**: Performance not meeting expectations - - **Mitigation**: Profile and optimize bottlenecks, consider caching - -## Success Criteria - -### Functional Requirements -- ✅ Documents can be generated with embedded images -- ✅ HTML renderer creates separate image files -- ✅ PDF/XLSX/PPTX renderers embed images -- ✅ Progress logging shows detailed progress -- ✅ Error handling prevents complete failures -- ✅ Content extraction happens only once - -### Performance Requirements -- ✅ Parallel generation improves performance by 2x+ for multi-section documents -- ✅ Progress updates appear within 1 second of action -- ✅ Documents with 50+ sections complete in <5 minutes - -### Quality Requirements -- ✅ >80% code coverage -- ✅ All edge cases handled -- ✅ Clear error messages -- ✅ Comprehensive documentation - -## Rollout Plan - -### Step 1: Internal Testing (Week 4) -- Deploy to development environment -- Internal team testing -- Fix critical issues - -### Step 2: Beta Testing (Week 5) -- Deploy to staging environment -- Select beta users -- Gather feedback -- Fix issues - -### Step 3: Production Deployment (Week 6) -- Deploy to production -- Monitor performance -- Monitor errors -- Gather user feedback - -### Step 4: Optimization (Ongoing) -- Monitor usage patterns -- Optimize based on real-world usage -- Add enhancements based on feedback - -## Timeline Summary - -| Phase | Duration | Start | End | -|-------|----------|-------|-----| -| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 | -| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 | -| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 | -| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 | -| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 | -| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 | -| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 | - -**Total Estimated Time**: 4-5 weeks - -## Next Steps - -1. **Review and Approve Plan** - - Review implementation plan - - Approve timeline - - Assign resources - -2. **Set Up Development Environment** - - Create feature branch - - Set up test infrastructure - - Prepare development tools - -3. **Begin Phase 1** - - Start with StructureGenerator - - Set up project structure - - Begin implementation - diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index a5971904..593ba555 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -167,50 +167,86 @@ class WorkflowManager: self.workflowProcessor = WorkflowProcessor(self.services) - # Get workflow mode to determine if complexity detection is needed + # Get workflow mode to determine if combined analysis is needed workflowMode = getattr(self.services.workflow, 'workflowMode', None) - skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) + skipCombinedAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) - if skipComplexityDetection: - logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan") + if skipCombinedAnalysis: + logger.info("Skipping combined analysis for AUTOMATION mode - using predefined plan") complexity = "moderate" # Default for automation workflows needsWorkflowHistory = False # Automation workflows don't need history detectedLanguage = None # No language detection in automation mode + normalizedRequest = userInput.prompt + intentText = userInput.prompt + contextItems = [] + workflowIntent = None else: - # Process user-uploaded documents from userInput for complexity detection - # This is the correct way: use the input data directly, not workflow state + # Process user-uploaded documents from userInput for combined analysis documents = [] if userInput.listFileId: try: documents = await self._processFileIds(userInput.listFileId, None) except Exception as e: - logger.warning(f"Failed to process user fileIds for complexity detection: {e}") + logger.warning(f"Failed to process user fileIds for combined analysis: {e}") - # Detect complexity (AI-based semantic understanding) using user input documents - # Also detects language for fast path responses - complexity, needsWorkflowHistory, detectedLanguage = await self.workflowProcessor.detectComplexity(userInput.prompt, documents) - logger.info(f"Request complexity detected: {complexity}, needsWorkflowHistory: {needsWorkflowHistory}, language: {detectedLanguage}") + # Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call + analysisResult = await self._analyzeUserInputAndComplexity(userInput.prompt, documents) - # Set detected language for fast path (if detected) + # Extract results + detectedLanguage = analysisResult.get('detectedLanguage') + normalizedRequest = analysisResult.get('normalizedRequest') + intentText = analysisResult.get('intent') or userInput.prompt + contextItems = analysisResult.get('contextItems', []) + complexity = analysisResult.get('complexity', 'moderate') + needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False) + fastTrack = analysisResult.get('fastTrack', False) + + # Extract intent analysis fields and store as workflowIntent + workflowIntent = { + 'primaryGoal': analysisResult.get('primaryGoal'), + 'dataType': analysisResult.get('dataType', 'unknown'), + 'expectedFormats': analysisResult.get('expectedFormats', []), + 'qualityRequirements': analysisResult.get('qualityRequirements', {}), + 'successCriteria': analysisResult.get('successCriteria', []), + 'languageUserDetected': detectedLanguage, + 'needsWorkflowHistory': needsWorkflowHistory + } + + # Store needsWorkflowHistory in services + setattr(self.services, '_needsWorkflowHistory', bool(needsWorkflowHistory)) + + # Store workflowIntent in workflow object for reuse + if hasattr(self.services, 'workflow') and self.services.workflow: + self.services.workflow._workflowIntent = workflowIntent + + # Store normalized request and intent + self.services.currentUserPrompt = intentText or userInput.prompt + self.services.currentUserPromptNormalized = normalizedRequest or intentText or userInput.prompt + if contextItems is not None: + self.services.currentUserContextItems = contextItems + + # Set detected language if detectedLanguage and isinstance(detectedLanguage, str): self._setUserLanguage(detectedLanguage) try: setattr(self.services, 'currentUserLanguage', detectedLanguage) except Exception: pass + + logger.info(f"Combined analysis: complexity={complexity}, needsWorkflowHistory={needsWorkflowHistory}, language={detectedLanguage}, fastTrack={fastTrack}") # Route to fast path for simple requests if history is not needed # Skip fast path for automation mode or if history is needed - if complexity == "simple" and not needsWorkflowHistory: + if not skipCombinedAnalysis and complexity == "simple" and not needsWorkflowHistory: logger.info("Routing to fast path for simple request") await self._executeFastPath(userInput, documents) return # Fast path completes the workflow - # Now send the first message (which will also process the documents again, but that's fine) - await self._sendFirstMessage(userInput) + # Now send the first message (use already analyzed data if available) + await self._sendFirstMessage(userInput, skipIntentionAnalysis=not skipCombinedAnalysis) # Route to full workflow for moderate/complex requests or automation mode - logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else "")) + logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipCombinedAnalysis else "")) taskPlan = await self._planTasks(userInput) await self._executeTasks(taskPlan) await self._processWorkflowResults() @@ -223,6 +259,143 @@ class WorkflowManager: # Helper functions + async def _analyzeUserInputAndComplexity( + self, + userPrompt: str, + documents: List[ChatDocument] + ) -> Dict[str, Any]: + """ + Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call. + + Args: + userPrompt: User-Anfrage + documents: Liste der Dokumente + + Returns: + Dict mit: + - detectedLanguage: ISO 639-1 Sprachcode + - normalizedRequest: Vollständige, explizite Umformulierung + - intent: Kurze Kern-Anfrage + - contextItems: Große Datenblöcke als separate Dokumente + - complexity: "simple" | "moderate" | "complex" + - needsWorkflowHistory: bool + - fastTrack: bool + - primaryGoal: Hauptziel + - dataType: Datentyp + - expectedFormats: Erwartete Formate + - qualityRequirements: Qualitätsanforderungen + - successCriteria: Erfolgskriterien + """ + # Baue Dokument-Liste für Prompt + docListText = "" + if documents: + for i, doc in enumerate(documents, 1): + docListText += f"\n{i}. {doc.fileName} ({doc.mimeType}, {doc.fileSize} bytes)" + + analysisPrompt = f"""You are an input analyzer. From the user's message, perform ALL of the following in one pass: + +1. detectedLanguage: Detect ISO 639-1 language code (e.g., de, en, fr, it) +2. normalizedRequest: Full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details +3. intent: Concise single-paragraph core request in the detected language for high-level routing +4. contextItems: Supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content +5. complexity: "simple" | "moderate" | "complex" + - "simple": Only if NO documents AND NO web search required. Single question, straightforward answer (5-15s) + - "moderate": Multiple steps, some documents, structured response requiring some processing, or web search needed (30-60s) + - "complex": Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s) +6. needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work) +7. fastTrack: Boolean indicating if Fast Track is possible (simple requests without documents and without workflow history) +8. primaryGoal: The main objective the user wants to achieve +9. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown) +10. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list [] +11. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}} +12. successCriteria: Specific success criteria that define completion (array of strings) + +Rules: +- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained +- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear +- Preserve critical references (URLs, filenames) in intent +- Normalize to the primary detected language if mixed-language +- Consider number of documents provided when determining complexity +- Consider need for external research or web search when determining complexity + +Documents provided: {len(documents)} document(s) +{docListText} + +Return ONLY JSON (no markdown) with this exact structure: +{{ + "detectedLanguage": "de|en|fr|it|...", + "normalizedRequest": "Full explicit instruction in detected language", + "intent": "Concise normalized request...", + "contextItems": [ + {{ + "title": "User context 1", + "mimeType": "text/plain", + "content": "Full extracted content block here" + }} + ], + "complexity": "simple" | "moderate" | "complex", + "needsWorkflowHistory": true|false, + "fastTrack": true|false, + "primaryGoal": "The main objective the user wants to achieve", + "dataType": "numbers|text|documents|analysis|code|unknown", + "expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"], + "qualityRequirements": {{ + "accuracyThreshold": 0.0-1.0, + "completenessThreshold": 0.0-1.0 + }}, + "successCriteria": ["specific criterion 1", "specific criterion 2"] +}} + +## User Message +The following is the user's original input message. Analyze intent, normalize the request, determine complexity, and identify any large context blocks that should be moved to separate documents: + +################ USER INPUT START ################# +{userPrompt.replace('{', '{{').replace('}', '}}') if userPrompt else ''} +################ USER INPUT FINISH ################# +""" + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.services.ai.callAiPlanning( + prompt=analysisPrompt, + placeholders=None, + debugType="user_input_analysis" + ) + + # Parse Result + try: + jsonStart = aiResponse.find('{') if aiResponse else -1 + jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0 + if jsonStart != -1 and jsonEnd > jsonStart: + result = json.loads(aiResponse[jsonStart:jsonEnd]) + return result + else: + logger.warning("Could not parse combined analysis response, using defaults") + return self._getDefaultAnalysisResult() + except Exception as e: + logger.warning(f"Error parsing combined analysis response: {str(e)}, using defaults") + return self._getDefaultAnalysisResult() + + def _getDefaultAnalysisResult(self) -> Dict[str, Any]: + """Fallback Default-Werte wenn Parsing fehlschlägt.""" + return { + "detectedLanguage": "en", + "normalizedRequest": "", + "intent": "", + "contextItems": [], + "complexity": "moderate", + "needsWorkflowHistory": False, + "fastTrack": False, + "primaryGoal": None, + "dataType": "unknown", + "expectedFormats": [], + "qualityRequirements": { + "accuracyThreshold": 0.8, + "completenessThreshold": 0.8 + }, + "successCriteria": [] + } + async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None: """Execute fast path for simple requests and deliver result to user""" try: @@ -330,7 +503,7 @@ class WorkflowManager: await self._executeTasks(taskPlan) await self._processWorkflowResults() - async def _sendFirstMessage(self, userInput: UserInputRequest) -> None: + async def _sendFirstMessage(self, userInput: UserInputRequest, skipIntentionAnalysis: bool = False) -> None: """Send first message to start workflow""" try: workflow = self.services.workflow @@ -360,21 +533,58 @@ class WorkflowManager: } # Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents - # SKIP user intention analysis for AUTOMATION mode - it uses predefined JSON plans + # SKIP user intention analysis if already done in combined analysis (skipIntentionAnalysis=True) + # or for AUTOMATION mode - it uses predefined JSON plans createdDocs = [] workflowMode = getattr(workflow, 'workflowMode', None) - skipIntentionAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) + skipIntentionAnalysis = skipIntentionAnalysis or (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) if skipIntentionAnalysis: - logger.info("Skipping user intention analysis for AUTOMATION mode - using direct user input") - # For automation mode, use user input directly without AI analysis - self.services.currentUserPrompt = userInput.prompt - # Always set currentUserPromptNormalized - use user input directly for automation mode - self.services.currentUserPromptNormalized = userInput.prompt - detectedLanguage = None - normalizedRequest = None - intentText = userInput.prompt - contextItems = [] + logger.info("Skipping user intention analysis (already done in combined analysis or AUTOMATION mode)") + # Use already analyzed data if available, otherwise use user input directly + detectedLanguage = getattr(self.services, 'currentUserLanguage', None) + normalizedRequest = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt + intentText = getattr(self.services, 'currentUserPrompt', None) or userInput.prompt + contextItems = getattr(self.services, 'currentUserContextItems', None) or [] + workflowIntent = getattr(workflow, '_workflowIntent', None) + + # Create documents for context items (if available from combined analysis) + if contextItems and isinstance(contextItems, list): + for idx, item in enumerate(contextItems): + try: + title = item.get('title') if isinstance(item, dict) else None + mime = item.get('mimeType') if isinstance(item, dict) else None + content = item.get('content') if isinstance(item, dict) else None + if not content: + continue + fileName = (title or f"user_context_{idx+1}.txt").strip() + mimeType = (mime or "text/plain").strip() + + # Neutralize content before storing if neutralization is enabled + contentBytes = content.encode('utf-8') + contentBytes = await self._neutralizeContentIfEnabled(contentBytes, mimeType) + + # Create file in component storage + fileItem = self.services.interfaceDbComponent.createFile( + name=fileName, + mimeType=mimeType, + content=contentBytes + ) + # Persist file data + self.services.interfaceDbComponent.createFileData(fileItem.id, contentBytes) + + # Collect file info + fileInfo = self.services.chat.getFileInfo(fileItem.id) + from modules.datamodels.datamodelChat import ChatDocument + doc = ChatDocument( + fileId=fileItem.id, + fileName=fileInfo.get("fileName", fileName) if fileInfo else fileName, + fileSize=fileInfo.get("size", len(contentBytes)) if fileInfo else len(contentBytes), + mimeType=fileInfo.get("mimeType", mimeType) if fileInfo else mimeType + ) + createdDocs.append(doc) + except Exception: + continue else: try: analyzerPrompt = ( diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py index 0834f440..49860665 100644 --- a/tests/functional/test09_document_generation_formats.py +++ b/tests/functional/test09_document_generation_formats.py @@ -39,6 +39,7 @@ class DocumentGenerationFormatsTester: self.workflow = None self.testResults = {} self.generatedDocuments = {} + self.pdfFileId = None # Store PDF file ID for reuse async def initialize(self): """Initialize the test environment.""" @@ -53,17 +54,123 @@ class DocumentGenerationFormatsTester: print(f"Initialized test with user: {self.testUser.id}") print(f"Mandate ID: {self.testUser.mandateId}") print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}") + + # Upload PDF file for testing + await self.uploadPdfFile() + + async def uploadPdfFile(self): + """Upload the PDF file and store its file ID.""" + pdfPath = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "temp", "B2025-02c.pdf") + pdfPath = os.path.abspath(pdfPath) + + if not os.path.exists(pdfPath): + print(f"⚠️ Warning: PDF file not found at {pdfPath}") + print(" Test will continue without PDF attachment") + return + + try: + # Read PDF file + with open(pdfPath, "rb") as f: + pdfContent = f.read() + + # Create file using services.interfaceDbComponent + if not hasattr(self.services, 'interfaceDbComponent') or not self.services.interfaceDbComponent: + print("⚠️ Warning: interfaceDbComponent not available in services") + print(" Test will continue without PDF attachment") + return + + interfaceDbComponent = self.services.interfaceDbComponent + + fileItem = interfaceDbComponent.createFile( + name="B2025-02c.pdf", + mimeType="application/pdf", + content=pdfContent + ) + + # Store file data + interfaceDbComponent.createFileData(fileItem.id, pdfContent) + + self.pdfFileId = fileItem.id + print(f"✅ Uploaded PDF file: {fileItem.fileName} (ID: {self.pdfFileId}, Size: {len(pdfContent)} bytes)") + + except Exception as e: + import traceback + print(f"⚠️ Warning: Failed to upload PDF file: {str(e)}") + print(f" Traceback: {traceback.format_exc()}") + print(" Test will continue without PDF attachment") def createTestPrompt(self, format: str) -> str: - """Create a test prompt for document generation in the specified format.""" + """Create a unified test prompt for document generation in the specified format. + + The prompt requests: + - Extraction of images from the attached PDF + - Generation of a new image + - Document creation with both images + """ + basePrompt = ( + "Create a professional document about 'Fuel Station Receipt Analysis' with the following content:\n" + "1) A main title\n" + "2) An introduction paragraph explaining the receipt analysis\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) A section analyzing the receipt data with bullet points\n" + "5) Generate a new image showing a visual representation of fuel consumption trends\n" + "6) A conclusion paragraph with recommendations\n\n" + "Make sure to include both: the image extracted from the PDF and the newly generated image.\n" + f"Format the output as {format.upper()}." + ) + return basePrompt + + def createRefactoringTestPrompt(self, testType: str, format: str = "html") -> str: + """Create test prompts for specific refactoring features. + + Args: + testType: Type of refactoring test: + - "intent_analysis": Test DocumentIntent analysis + - "conditional_extraction": Test conditional extraction (extract vs render) + - "image_render": Test image rendering as asset + - "multi_document": Test multi-document rendering + - "metadata_preservation": Test metadata preservation + format: Output format (default: html) + """ prompts = { - "html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.", - "pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.", - "docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.", - "xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.", - "pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX." + "intent_analysis": ( + "Create a document with the following requirements:\n" + "1) Extract text content from the attached PDF\n" + "2) Include images from the PDF as visual elements (render them, don't extract text from them)\n" + "3) Generate a summary document\n\n" + "This tests that the system correctly identifies which documents need extraction vs rendering." + ), + "conditional_extraction": ( + "Create a document that:\n" + "1) Extracts and uses text from the attached PDF\n" + "2) Renders images from the PDF as visual assets (not as extracted text)\n" + "3) Generates new content based on the extracted text\n\n" + "This tests conditional extraction - only extract what needs extraction, render what needs rendering." + ), + "image_render": ( + "Create a document that includes images from the attached PDF.\n" + "The images should be rendered as visual elements in the document, not extracted as text.\n" + "Include a title and description for each image.\n\n" + "This tests the image asset pipeline with render intent." + ), + "multi_document": ( + "Create multiple separate documents:\n" + "1) Document 1: Summary of the PDF content\n" + "2) Document 2: Analysis of the PDF content\n" + "3) Document 3: Recommendations based on the PDF content\n\n" + "Each document should be separate and complete.\n" + "This tests multi-document generation and rendering." + ), + "metadata_preservation": ( + "Create a document that extracts content from the attached PDF.\n" + "The document should clearly show which content came from which source document.\n" + "Include source references in the generated content.\n\n" + "This tests that metadata (documentId, mimeType) is preserved in the generation prompt." + ) } - return prompts.get(format.lower(), prompts["docx"]) + + prompt = prompts.get(testType, self.createTestPrompt(format)) + return f"{prompt}\n\nFormat the output as {format.upper()}." async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]: """Generate a document in the specified format using workflow.""" @@ -74,9 +181,18 @@ class DocumentGenerationFormatsTester: prompt = self.createTestPrompt(format) print(f"Prompt: {prompt[:200]}...") + # Create user input request with PDF file attachment + listFileId = [] + if self.pdfFileId: + listFileId = [self.pdfFileId] + print(f"Attaching PDF file (ID: {self.pdfFileId})") + else: + print("⚠️ No PDF file attached (file upload may have failed)") + # Create user input request userInput = UserInputRequest( prompt=prompt, + listFileId=listFileId, userLanguage="en" ) @@ -281,6 +397,166 @@ class DocumentGenerationFormatsTester: return verification + async def testRefactoringFeatures(self) -> Dict[str, Any]: + """Test specific refactoring features.""" + print("\n" + "="*80) + print("TESTING REFACTORING FEATURES") + print("="*80) + + refactoringTests = [ + ("intent_analysis", "html"), + ("conditional_extraction", "html"), + ("image_render", "html"), + ("multi_document", "html"), + ("metadata_preservation", "html") + ] + + results = {} + + for testType, format in refactoringTests: + try: + print(f"\n{'='*80}") + print(f"Testing Refactoring Feature: {testType}") + print(f"{'='*80}") + + prompt = self.createRefactoringTestPrompt(testType, format) + print(f"Prompt: {prompt[:200]}...") + + # Create user input request with PDF file attachment + listFileId = [] + if self.pdfFileId: + listFileId = [self.pdfFileId] + print(f"Attaching PDF file (ID: {self.pdfFileId})") + else: + print("⚠️ No PDF file attached (file upload may have failed)") + + userInput = UserInputRequest( + prompt=prompt, + listFileId=listFileId, + userLanguage="en" + ) + + # Start workflow + print(f"\nStarting workflow for {testType} test...") + workflow = await chatStart( + currentUser=self.testUser, + userInput=userInput, + workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC, + workflowId=None + ) + + if not workflow: + results[testType] = { + "success": False, + "error": "Failed to start workflow" + } + continue + + self.workflow = workflow + print(f"Workflow started: {workflow.id}") + + # Wait for workflow completion + completed = await self.waitForWorkflowCompletion(timeout=300) + + if not completed: + results[testType] = { + "success": False, + "error": "Workflow did not complete within timeout", + "workflowId": workflow.id + } + continue + + # Analyze results + workflowResults = self.analyzeWorkflowResults() + + # Check for specific refactoring features + verification = self.verifyRefactoringFeature(testType, workflowResults) + + results[testType] = { + "success": True, + "workflowId": workflow.id, + "verification": verification, + "workflowResults": workflowResults + } + + print(f"\n✅ {testType} test completed!") + print(f" Verification: {'✅ PASS' if verification.get('passed', False) else '❌ FAIL'}") + if verification.get("details"): + for detail in verification["details"]: + print(f" - {detail}") + + await asyncio.sleep(2) + + except Exception as e: + import traceback + print(f"\n❌ Error testing {testType}: {str(e)}") + print(traceback.format_exc()) + results[testType] = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + + return results + + def verifyRefactoringFeature(self, testType: str, workflowResults: Dict[str, Any]) -> Dict[str, Any]: + """Verify that a refactoring feature works correctly.""" + documents = workflowResults.get("documents", []) + logs = workflowResults.get("logs", []) + + verification = { + "testType": testType, + "passed": False, + "details": [] + } + + if testType == "intent_analysis": + # Check that intent analysis was performed + intentLogs = [log for log in logs if "intent" in str(log).lower() or "analyzing document intent" in str(log).lower()] + if intentLogs: + verification["details"].append("Intent analysis logs found") + verification["passed"] = True + else: + verification["details"].append("No intent analysis logs found") + + elif testType == "conditional_extraction": + # Check that extraction and rendering both occurred + extractionLogs = [log for log in logs if "extract" in str(log).lower()] + renderLogs = [log for log in logs if "render" in str(log).lower() or "image" in str(log).lower()] + if extractionLogs and renderLogs: + verification["details"].append("Both extraction and rendering occurred") + verification["passed"] = True + else: + verification["details"].append(f"Missing logs: extraction={len(extractionLogs)}, render={len(renderLogs)}") + + elif testType == "image_render": + # Check that images were rendered (not extracted as text) + imageLogs = [log for log in logs if "image" in str(log).lower()] + if imageLogs: + verification["details"].append("Image rendering logs found") + verification["passed"] = True + else: + verification["details"].append("No image rendering logs found") + + elif testType == "multi_document": + # Check that multiple documents were generated + if len(documents) >= 2: + verification["details"].append(f"Multiple documents generated: {len(documents)}") + verification["passed"] = True + else: + verification["details"].append(f"Expected multiple documents, got {len(documents)}") + + elif testType == "metadata_preservation": + # Check that metadata was preserved (check logs for documentId references) + metadataLogs = [log for log in logs if "documentId" in str(log) or "SOURCE:" in str(log)] + if metadataLogs: + verification["details"].append("Metadata preservation logs found") + verification["passed"] = True + else: + verification["details"].append("No metadata preservation logs found") + + return verification + async def testAllFormats(self) -> Dict[str, Any]: """Test document generation in all formats.""" print("\n" + "="*80) @@ -334,8 +610,12 @@ class DocumentGenerationFormatsTester: return results - async def runTest(self): - """Run the complete test.""" + async def runTest(self, includeRefactoringTests: bool = True): + """Run the complete test. + + Args: + includeRefactoringTests: If True, also run refactoring feature tests + """ print("\n" + "="*80) print("DOCUMENT GENERATION FORMATS TEST") print("="*80) @@ -344,18 +624,43 @@ class DocumentGenerationFormatsTester: # Initialize await self.initialize() + # Test refactoring features first (if enabled) + refactoringResults = {} + if includeRefactoringTests: + refactoringResults = await self.testRefactoringFeatures() + # Test all formats - results = await self.testAllFormats() + formatResults = await self.testAllFormats() # Summary print("\n" + "="*80) print("TEST SUMMARY") print("="*80) + # Refactoring tests summary + refactoringSuccessCount = 0 + refactoringFailCount = 0 + if includeRefactoringTests and refactoringResults: + print("\nRefactoring Features:") + for testType, result in refactoringResults.items(): + if result.get("success"): + refactoringSuccessCount += 1 + verification = result.get("verification", {}) + passed = verification.get("passed", False) + statusIcon = "✅" if passed else "⚠️" + print(f"{statusIcon} {testType:25s}: {'PASS' if passed else 'FAIL'}") + else: + refactoringFailCount += 1 + error = result.get("error", "Unknown error") + print(f"❌ {testType:25s}: FAIL - {error}") + print(f"Refactoring Tests: {refactoringSuccessCount} passed, {refactoringFailCount} failed out of {len(refactoringResults)} tests") + + # Format tests summary + print("\nFormat Tests:") successCount = 0 failCount = 0 - for format, result in results.items(): + for format, result in formatResults.items(): if result.get("success"): successCount += 1 status = "✅ PASS" @@ -369,14 +674,28 @@ class DocumentGenerationFormatsTester: error = result.get("error", "Unknown error") print(f"❌ {format.upper():6s}: FAIL - {error}") - print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats") + print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats") + + # Calculate totals + totalSuccess = successCount + refactoringSuccessCount if includeRefactoringTests else successCount + totalFail = failCount + refactoringFailCount if includeRefactoringTests else failCount self.testResults = { - "success": failCount == 0, - "successCount": successCount, - "failCount": failCount, - "totalFormats": len(results), - "results": results + "success": failCount == 0 and (not includeRefactoringTests or refactoringFailCount == 0), + "formatTests": { + "successCount": successCount, + "failCount": failCount, + "totalFormats": len(formatResults), + "results": formatResults + }, + "refactoringTests": { + "successCount": refactoringSuccessCount if includeRefactoringTests else 0, + "failCount": refactoringFailCount if includeRefactoringTests else 0, + "totalTests": len(refactoringResults) if includeRefactoringTests else 0, + "results": refactoringResults if includeRefactoringTests else {} + }, + "totalSuccess": totalSuccess, + "totalFail": totalFail } return self.testResults From 23bb1ff5d30e668d0e416ef90acb75b70a39c955 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 25 Dec 2025 00:24:53 +0100 Subject: [PATCH 08/21] fixed json identification for pre-extracted content --- modules/services/serviceAi/mainServiceAi.py | 28 +++++++++++-------- .../mainServiceGeneration.py | 28 +++++++++++++++++-- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 30e7cc88..331a3289 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1202,21 +1202,24 @@ If no trackable items can be identified, return: {{"kpis": []}} return None # Check for ContentExtracted format + # Nur Format 1 (ActionDocument-Format mit validationMetadata) wird unterstützt documentData = None - if "parts" in jsonData and isinstance(jsonData.get("parts"), list): - # Direct ContentExtracted format: {"id": "...", "parts": [...], ...} - documentData = jsonData - else: - validationMetadata = jsonData.get("validationMetadata", {}) - actionType = validationMetadata.get("actionType") - if actionType == "context.extractContent": - # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} - documentData = jsonData.get("documentData") + + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}") if documentData: from modules.datamodels.datamodelExtraction import ContentExtracted try: + # Stelle sicher, dass "id" vorhanden ist + if "id" not in documentData: + documentData["id"] = document.id + contentExtracted = ContentExtracted(**documentData) if contentExtracted.parts: @@ -1235,8 +1238,8 @@ If no trackable items can be identified, return: {{"kpis": []}} if not originalMimeType and part.metadata.get("documentMimeType"): originalMimeType = part.metadata.get("documentMimeType") - # Falls nicht gefunden, verwende documentName aus ContentExtracted - if not originalFileName and hasattr(contentExtracted, 'id'): + # Falls nicht gefunden, versuche aus documentName zu extrahieren + if not originalFileName: # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") if document.fileName and "_extracted_" in document.fileName: originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" @@ -1252,7 +1255,8 @@ If no trackable items can be identified, return: {{"kpis": []}} "parts": contentExtracted.parts } except Exception as parseError: - logger.debug(f"Could not parse ContentExtracted format: {str(parseError)}") + logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") + logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") return None return None diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index cababbeb..e08eaa81 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -58,11 +58,35 @@ class GenerationService: # Detect MIME without relying on a service center mime_type = detectMimeTypeFromContent(content, doc.documentName) + # WICHTIG: Für ActionDocuments mit validationMetadata (z.B. context.extractContent) + # müssen wir das gesamte ActionDocument serialisieren, nicht nur documentData + document_data = doc.documentData + if hasattr(doc, 'validationMetadata') and doc.validationMetadata: + # Wenn validationMetadata vorhanden ist, serialisiere das gesamte ActionDocument-Format + if mime_type == "application/json": + # Erstelle ActionDocument-Format mit validationMetadata und documentData + if hasattr(document_data, 'model_dump'): + # Pydantic v2 + document_data_dict = document_data.model_dump() + elif hasattr(document_data, 'dict'): + # Pydantic v1 + document_data_dict = document_data.dict() + elif isinstance(document_data, dict): + document_data_dict = document_data + else: + document_data_dict = {"data": str(document_data)} + + # Erstelle ActionDocument-Format + document_data = { + "validationMetadata": doc.validationMetadata, + "documentData": document_data_dict + } + return { 'fileName': doc.documentName, - 'fileSize': len(str(doc.documentData)), + 'fileSize': len(str(document_data)), 'mimeType': mime_type, - 'content': doc.documentData, + 'content': document_data, 'document': doc } except Exception as e: From a2315d6ace26832ad225db782d03904d6d5f91f5 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 25 Dec 2025 00:34:45 +0100 Subject: [PATCH 09/21] fixed vision for pre-extracted content --- modules/services/serviceAi/mainServiceAi.py | 102 ++++++++++++++++++-- 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 331a3289..74b90346 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1135,7 +1135,8 @@ If no trackable items can be identified, return: {{"kpis": []}} fileName=preExtracted["originalDocument"]["fileName"], mimeType=preExtracted["originalDocument"]["mimeType"], fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), - fileId=doc.fileId # Behalte fileId vom JSON + fileId=doc.fileId, # Behalte fileId vom JSON + messageId=doc.messageId if hasattr(doc, 'messageId') else None # Behalte messageId falls vorhanden ) resolvedDocuments.append(originalDoc) else: @@ -1264,6 +1265,39 @@ If no trackable items can be identified, return: {{"kpis": []}} logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") return None + async def _extractTextFromImage(self, imagePart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Extrahiere Text aus einem Image-Part mit Vision AI. + + Args: + imagePart: ContentPart mit typeGroup="image" + extractionPrompt: Prompt für die Text-Extraktion + + Returns: + Extrahierter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions + + # Erstelle AI-Call-Request mit Image-Part + request = AiCallRequest( + prompt=extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting.", + context="", + options=AiCallOptions(operationType="extraction"), + contentParts=[imagePart] + ) + + # Verwende AI-Service für Vision AI-Verarbeitung + response = await self.services.ai.call(request) + + if response and response.content: + return response.content.strip() + + return None + except Exception as e: + logger.warning(f"Error extracting text from image {imagePart.id}: {str(e)}") + return None + def _buildIntentAnalysisPrompt( self, userPrompt: str, @@ -1420,12 +1454,22 @@ Return ONLY valid JSON following the structure above. # Wenn Intent "render" für Images hat, erstelle auch object Part if "render" in partIntent and part.typeGroup == "image" and part.data: # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part - # 1. Extracted Part (bereits vorhanden) - part.metadata["intent"] = "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) + # 1. Extracted Part - prüfe ob "extract" Intent vorhanden ist + if "extract" in partIntent: + # Image hat sowohl extract als auch render Intent + # Extracted Part: Wird mit Vision AI verarbeitet (skipExtraction=False) + part.metadata["intent"] = "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = False # WICHTIG: Vision AI-Verarbeitung nötig! + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + else: + # Nur render Intent - kein Text-Extraktion nötig + part.metadata["intent"] = "render" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) # 2. Object Part für Rendering (base64 data ist bereits im extracted Part) objectPart = ContentPart( @@ -1444,11 +1488,51 @@ Return ONLY valid JSON following the structure above. } ) allContentParts.append(objectPart) + elif part.typeGroup == "image" and "extract" in partIntent: + # Image mit extract Intent: Vision AI-Verarbeitung nötig + # Verarbeite Image mit Vision AI, um Text zu extrahieren + try: + extractedText = await self._extractTextFromImage(part, intent.extractionPrompt if intent else "Extract all text content from this image") + if extractedText: + # Erstelle neuen Text-Part mit extrahiertem Text + textPart = ContentPart( + id=f"extracted_{part.id}", + label=f"Extracted text from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=extractedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedImagePartId": part.id, + "extractionPrompt": intent.extractionPrompt if intent else "Extract all text content from this image" + } + ) + allContentParts.append(textPart) + logger.info(f"✅ Extracted text from image {part.id} using Vision AI") + + # Wenn auch render Intent vorhanden, füge Image-Part hinzu + if "render" in partIntent: + part.metadata["intent"] = "render" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + except Exception as e: + logger.warning(f"Failed to extract text from image {part.id}: {str(e)}, adding image as-is") + # Fallback: Füge Image-Part hinzu ohne Text-Extraktion + part.metadata["intent"] = "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = False + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) else: - # Normales extracted Part + # Normales extracted Part (kein Image oder kein extract Intent) part.metadata["intent"] = partIntent[0] if partIntent else "extract" part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True + part.metadata["skipExtraction"] = True # Bereits extrahiert part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] allContentParts.append(part) From 60a0543e865b212f0ecfe4ff07ae3a481c2a9042 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 25 Dec 2025 23:51:47 +0100 Subject: [PATCH 10/21] refactored ai service with chapter generation --- .../serviceAi/README_MODULE_STRUCTURE.md | 78 ++ .../services/serviceAi/REFACTORING_PLAN.md | 126 ++ modules/services/serviceAi/mainServiceAi.py | 1129 ++++++++++++++--- .../serviceAi/subContentExtraction.py | 670 ++++++++++ .../services/serviceAi/subDocumentIntents.py | 302 +++++ .../services/serviceAi/subResponseParsing.py | 275 ++++ .../services/serviceAi/subStructureFilling.py | 546 ++++++++ .../serviceAi/subStructureGeneration.py | 229 ++++ .../mainServiceExtraction.py | 26 +- .../renderers/rendererHtml.py | 97 +- .../test09_document_generation_formats.py | 21 +- 11 files changed, 3256 insertions(+), 243 deletions(-) create mode 100644 modules/services/serviceAi/README_MODULE_STRUCTURE.md create mode 100644 modules/services/serviceAi/REFACTORING_PLAN.md create mode 100644 modules/services/serviceAi/subContentExtraction.py create mode 100644 modules/services/serviceAi/subDocumentIntents.py create mode 100644 modules/services/serviceAi/subResponseParsing.py create mode 100644 modules/services/serviceAi/subStructureFilling.py create mode 100644 modules/services/serviceAi/subStructureGeneration.py diff --git a/modules/services/serviceAi/README_MODULE_STRUCTURE.md b/modules/services/serviceAi/README_MODULE_STRUCTURE.md new file mode 100644 index 00000000..d2fca8f5 --- /dev/null +++ b/modules/services/serviceAi/README_MODULE_STRUCTURE.md @@ -0,0 +1,78 @@ +# Module Structure - serviceAi + +## Übersicht + +Das `mainServiceAi.py` Modul wurde in mehrere Submodule aufgeteilt, um die Übersichtlichkeit zu verbessern. + +## Modulstruktur + +### Hauptmodul +- **mainServiceAi.py** (~800 Zeilen) + - Initialisierung (`__init__`, `create`, `ensureAiObjectsInitialized`) + - Public API (`callAiPlanning`, `callAiContent`) + - Routing zu Submodulen + - Helper-Methoden + +### Submodule + +1. **subJsonResponseHandling.py** (bereits vorhanden) + - JSON Response Merging + - Section Merging + - Fragment Detection + +2. **subResponseParsing.py** (~200 Zeilen) + - `ResponseParser.extractSectionsFromResponse()` - Extrahiert Sections aus AI-Responses + - `ResponseParser.shouldContinueGeneration()` - Entscheidet ob Generation fortgesetzt werden soll + - `ResponseParser._isStuckInLoop()` - Loop-Detection + - `ResponseParser.extractDocumentMetadata()` - Extrahiert Metadaten + - `ResponseParser.buildFinalResultFromSections()` - Baut finales JSON + +3. **subDocumentIntents.py** (~300 Zeilen) + - `DocumentIntentAnalyzer.clarifyDocumentIntents()` - Analysiert Dokument-Intents + - `DocumentIntentAnalyzer.resolvePreExtractedDocument()` - Löst pre-extracted Dokumente auf + - `DocumentIntentAnalyzer._buildIntentAnalysisPrompt()` - Baut Intent-Analyse-Prompt + +4. **subContentExtraction.py** (~600 Zeilen) + - `ContentExtractor.extractAndPrepareContent()` - Extrahiert und bereitet Content vor + - `ContentExtractor.extractTextFromImage()` - Vision AI für Bilder + - `ContentExtractor.processTextContentWithAi()` - AI-Verarbeitung von Text + - `ContentExtractor._isBinary()` - Helper für Binary-Check + +5. **subStructureGeneration.py** (~200 Zeilen) + - `StructureGenerator.generateStructure()` - Generiert Dokument-Struktur + - `StructureGenerator._buildStructurePrompt()` - Baut Struktur-Prompt + +6. **subStructureFilling.py** (~400 Zeilen) + - `StructureFiller.fillStructure()` - Füllt Struktur mit Content + - `StructureFiller._buildSectionGenerationPrompt()` - Baut Section-Generation-Prompt + - `StructureFiller._findContentPartById()` - Helper für ContentPart-Suche + - `StructureFiller._needsAggregation()` - Entscheidet ob Aggregation nötig + +7. **subAiCallLooping.py** (~400 Zeilen) + - `AiCallLooper.callAiWithLooping()` - Haupt-Looping-Logik + - `AiCallLooper._defineKpisFromPrompt()` - KPI-Definition + +## Verwendung + +Alle Submodule werden über das Hauptmodul `AiService` verwendet: + +```python +# Initialisierung +aiService = await AiService.create(serviceCenter) + +# Submodule werden automatisch initialisiert +# aiService.responseParser +# aiService.intentAnalyzer +# aiService.contentExtractor +# etc. +``` + +## Migration + +Die öffentliche API bleibt unverändert. Interne Methoden wurden in Submodule verschoben: + +- `_extractSectionsFromResponse` → `responseParser.extractSectionsFromResponse` +- `_clarifyDocumentIntents` → `intentAnalyzer.clarifyDocumentIntents` +- `_extractAndPrepareContent` → `contentExtractor.extractAndPrepareContent` +- etc. + diff --git a/modules/services/serviceAi/REFACTORING_PLAN.md b/modules/services/serviceAi/REFACTORING_PLAN.md new file mode 100644 index 00000000..2ce7a717 --- /dev/null +++ b/modules/services/serviceAi/REFACTORING_PLAN.md @@ -0,0 +1,126 @@ +# Refactoring Plan für mainServiceAi.py + +## Ziel +Aufteilen des 3000-Zeilen-Moduls in überschaubare Submodule (~300-600 Zeilen pro Modul). + +## Vorgeschlagene Struktur + +### Bereits erstellt: +1. ✅ `subResponseParsing.py` - ResponseParser Klasse +2. ✅ `subDocumentIntents.py` - DocumentIntentAnalyzer Klasse + +### Noch zu erstellen: +3. `subContentExtraction.py` - ContentExtractor Klasse + - `extractAndPrepareContent()` (~490 Zeilen) + - `extractTextFromImage()` (~55 Zeilen) + - `processTextContentWithAi()` (~72 Zeilen) + - `_isBinary()` (~10 Zeilen) + +4. `subStructureGeneration.py` - StructureGenerator Klasse + - `generateStructure()` (~60 Zeilen) + - `_buildStructurePrompt()` (~130 Zeilen) + +5. `subStructureFilling.py` - StructureFiller Klasse + - `fillStructure()` (~290 Zeilen) + - `_buildSectionGenerationPrompt()` (~185 Zeilen) + - `_findContentPartById()` (~5 Zeilen) + - `_needsAggregation()` (~20 Zeilen) + +6. `subAiCallLooping.py` - AiCallLooper Klasse + - `callAiWithLooping()` (~405 Zeilen) + - `_defineKpisFromPrompt()` (~92 Zeilen) + +## Refactoring-Schritte für mainServiceAi.py + +### Schritt 1: Submodule-Initialisierung erweitern + +```python +def _initializeSubmodules(self): + """Initialize all submodules after aiObjects is ready.""" + if self.aiObjects is None: + raise RuntimeError("aiObjects must be initialized before initializing submodules") + + if self.extractionService is None: + logger.info("Initializing ExtractionService...") + self.extractionService = ExtractionService(self.services) + + # Neue Submodule initialisieren + from modules.services.serviceAi.subResponseParsing import ResponseParser + from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer + from modules.services.serviceAi.subContentExtraction import ContentExtractor + from modules.services.serviceAi.subStructureGeneration import StructureGenerator + from modules.services.serviceAi.subStructureFilling import StructureFiller + + if not hasattr(self, 'responseParser'): + self.responseParser = ResponseParser(self.services) + + if not hasattr(self, 'intentAnalyzer'): + self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self) + + if not hasattr(self, 'contentExtractor'): + self.contentExtractor = ContentExtractor(self.services, self) + + if not hasattr(self, 'structureGenerator'): + self.structureGenerator = StructureGenerator(self.services, self) + + if not hasattr(self, 'structureFiller'): + self.structureFiller = StructureFiller(self.services, self) +``` + +### Schritt 2: Methoden durch Delegation ersetzen + +**Beispiel für Response Parsing:** +```python +# ALT: +def _extractSectionsFromResponse(self, ...): + # 100 Zeilen Code + ... + +# NEU: +def _extractSectionsFromResponse(self, ...): + return self.responseParser.extractSectionsFromResponse(...) +``` + +**Beispiel für Document Intents:** +```python +# ALT: +async def _clarifyDocumentIntents(self, ...): + # 100 Zeilen Code + ... + +# NEU: +async def _clarifyDocumentIntents(self, ...): + return await self.intentAnalyzer.clarifyDocumentIntents(...) +``` + +### Schritt 3: Helper-Methoden beibehalten + +Kleine Helper-Methoden bleiben im Hauptmodul: +- `_buildPromptWithPlaceholders()` +- `_getIntentForDocument()` +- `_shouldSkipContentPart()` +- `_determineDocumentName()` + +### Schritt 4: Public API unverändert lassen + +Die öffentliche API (`callAiPlanning`, `callAiContent`) bleibt unverändert. + +## Erwartete Ergebnis-Größen + +- `mainServiceAi.py`: ~800-1000 Zeilen (von 3016) +- `subResponseParsing.py`: ~200 Zeilen ✅ +- `subDocumentIntents.py`: ~300 Zeilen ✅ +- `subContentExtraction.py`: ~600 Zeilen +- `subStructureGeneration.py`: ~200 Zeilen +- `subStructureFilling.py`: ~400 Zeilen +- `subAiCallLooping.py`: ~500 Zeilen + +**Gesamt: ~3000 Zeilen** (gleich, aber besser organisiert) + +## Vorteile + +1. **Übersichtlichkeit**: Jedes Modul hat eine klare Verantwortlichkeit +2. **Wartbarkeit**: Änderungen sind lokalisiert +3. **Testbarkeit**: Module können einzeln getestet werden +4. **Wiederverwendbarkeit**: Module können in anderen Kontexten verwendet werden + diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 74b90346..f8ab4dad 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -50,6 +50,33 @@ class AiService: if self.extractionService is None: logger.info("Initializing ExtractionService...") self.extractionService = ExtractionService(self.services) + + # Initialize new submodules + from modules.services.serviceAi.subResponseParsing import ResponseParser + from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer + from modules.services.serviceAi.subContentExtraction import ContentExtractor + from modules.services.serviceAi.subStructureGeneration import StructureGenerator + from modules.services.serviceAi.subStructureFilling import StructureFiller + + if not hasattr(self, 'responseParser'): + logger.info("Initializing ResponseParser...") + self.responseParser = ResponseParser(self.services) + + if not hasattr(self, 'intentAnalyzer'): + logger.info("Initializing DocumentIntentAnalyzer...") + self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self) + + if not hasattr(self, 'contentExtractor'): + logger.info("Initializing ContentExtractor...") + self.contentExtractor = ContentExtractor(self.services, self, self.intentAnalyzer) + + if not hasattr(self, 'structureGenerator'): + logger.info("Initializing StructureGenerator...") + self.structureGenerator = StructureGenerator(self.services, self) + + if not hasattr(self, 'structureFiller'): + logger.info("Initializing StructureFiller...") + self.structureFiller = StructureFiller(self.services, self) async def callAi(self, request: AiCallRequest, progressCallback=None): """Router: handles content parts via extractionService, text context via interface. @@ -684,6 +711,19 @@ If no trackable items can be identified, return: {{"kpis": []}} debugPrefix: str, allSections: List[Dict[str, Any]] = None, accumulationState: Optional[JsonAccumulationState] = None + ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: + """Delegate to ResponseParser.""" + return self.responseParser.extractSectionsFromResponse( + result, iteration, debugPrefix, allSections, accumulationState + ) + + def _extractSectionsFromResponse_OLD( + self, + result: str, + iteration: int, + debugPrefix: str, + allSections: List[Dict[str, Any]] = None, + accumulationState: Optional[JsonAccumulationState] = None ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: """ Extract sections from AI response, handling both valid and broken JSON. @@ -783,6 +823,18 @@ If no trackable items can be identified, return: {{"kpis": []}} iteration: int, wasJsonComplete: bool, rawResponse: str = None + ) -> bool: + """Delegate to ResponseParser.""" + return self.responseParser.shouldContinueGeneration( + allSections, iteration, wasJsonComplete, rawResponse + ) + + def _shouldContinueGeneration_OLD( + self, + allSections: List[Dict[str, Any]], + iteration: int, + wasJsonComplete: bool, + rawResponse: str = None ) -> bool: """ Determine if AI generation loop should continue. @@ -859,6 +911,13 @@ If no trackable items can be identified, return: {{"kpis": []}} def _extractDocumentMetadata( self, parsedResult: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """Delegate to ResponseParser.""" + return self.responseParser.extractDocumentMetadata(parsedResult) + + def _extractDocumentMetadata_OLD( + self, + parsedResult: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """ Extract document metadata (title, filename) from parsed AI response. @@ -885,6 +944,14 @@ If no trackable items can be identified, return: {{"kpis": []}} self, allSections: List[Dict[str, Any]], documentMetadata: Optional[Dict[str, Any]] = None + ) -> str: + """Delegate to ResponseParser.""" + return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) + + def _buildFinalResultFromSections_OLD( + self, + allSections: List[Dict[str, Any]], + documentMetadata: Optional[Dict[str, Any]] = None ) -> str: """ Build final JSON result from accumulated sections. @@ -1090,6 +1157,18 @@ If no trackable items can be identified, return: {{"kpis": []}} userPrompt: str, actionParameters: Dict[str, Any], parentOperationId: str + ) -> List[DocumentIntent]: + """Delegate to DocumentIntentAnalyzer.""" + return await self.intentAnalyzer.clarifyDocumentIntents( + documents, userPrompt, actionParameters, parentOperationId + ) + + async def _clarifyDocumentIntents_OLD( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str ) -> List[DocumentIntent]: """ Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. @@ -1189,6 +1268,7 @@ If no trackable items can be identified, return: {{"kpis": []}} Returns None wenn kein pre-extracted Format erkannt wird. """ if document.mimeType != "application/json": + logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check") return None try: @@ -1208,10 +1288,14 @@ If no trackable items can be identified, return: {{"kpis": []}} validationMetadata = jsonData.get("validationMetadata", {}) actionType = validationMetadata.get("actionType") + logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}") + if actionType == "context.extractContent": # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} documentData = jsonData.get("documentData") - logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}") + logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}") + else: + logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})") if documentData: from modules.datamodels.datamodelExtraction import ContentExtracted @@ -1258,7 +1342,11 @@ If no trackable items can be identified, return: {{"kpis": []}} except Exception as parseError: logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") + import traceback + logger.debug(f"Parse error traceback: {traceback.format_exc()}") return None + else: + logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})") return None except Exception as e: @@ -1277,26 +1365,122 @@ If no trackable items can be identified, return: {{"kpis": []}} Extrahierter Text oder None bei Fehler """ try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting." + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + finalPrompt, + f"content_extraction_prompt_image_{imagePart.id}" + ) # Erstelle AI-Call-Request mit Image-Part request = AiCallRequest( - prompt=extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting.", + prompt=finalPrompt, context="", - options=AiCallOptions(operationType="extraction"), + options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), contentParts=[imagePart] ) # Verwende AI-Service für Vision AI-Verarbeitung - response = await self.services.ai.call(request) + response = await self.services.ai.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_image_{imagePart.id}" + ) if response and response.content: return response.content.strip() - return None + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" except Exception as e: - logger.warning(f"Error extracting text from image {imagePart.id}: {str(e)}") - return None + errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" + + async def _processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Verarbeite Text-Content mit AI basierend auf extractionPrompt. + + WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text + (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI + verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt. + + Args: + textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ) + extractionPrompt: Prompt für die AI-Verarbeitung des Textes + + Returns: + AI-verarbeiteter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Process and extract the key information from the following text content." + + # Debug-Log (harmonisiert) - log prompt with text preview + textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "") + promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}" + self.services.utils.writeDebugFile( + promptWithContext, + f"content_extraction_prompt_text_{textPart.id}" + ) + + # Erstelle Text-ContentPart für AI-Verarbeitung + # Verwende den vorhandenen Text als Input + textContentPart = ContentPart( + id=textPart.id, + label=textPart.label, + typeGroup="text", + mimeType="text/plain", + data=textPart.data if textPart.data else "", + metadata=textPart.metadata.copy() if textPart.metadata else {} + ) + + # Erstelle AI-Call-Request mit Text-Part + request = AiCallRequest( + prompt=finalPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT), + contentParts=[textContentPart] + ) + + # Verwende AI-Service für Text-Verarbeitung + response = await self.services.ai.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_text_{textPart.id}" + ) + + if response and response.content: + return response.content.strip() + + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" + except Exception as e: + errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" def _buildIntentAnalysisPrompt( self, @@ -1390,6 +1574,17 @@ Return ONLY valid JSON following the structure above. documents: List[ChatDocument], documentIntents: List[DocumentIntent], parentOperationId: str + ) -> List[ContentPart]: + """Delegate to ContentExtractor.""" + return await self.contentExtractor.extractAndPrepareContent( + documents, documentIntents, parentOperationId, self._getIntentForDocument + ) + + async def _extractAndPrepareContent_OLD( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str ) -> List[ContentPart]: """ Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. @@ -1425,12 +1620,25 @@ Return ONLY valid JSON following the structure above. for document in documents: # Check if document is already a ContentExtracted document (pre-extracted JSON) + logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content") preExtracted = self._resolvePreExtractedDocument(document) if preExtracted: + logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}") + logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}") + logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}") + # Verwende bereits extrahierte ContentParts direkt contentExtracted = preExtracted["contentExtracted"] + + # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original + # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID) intent = self._getIntentForDocument(document.id, documentIntents) + logger.info(f" Intent lookup for document {document.id}: found={intent is not None}") + if intent: + logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...") + else: + logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}") if contentExtracted.parts: for part in contentExtracted.parts: @@ -1445,96 +1653,267 @@ Return ONLY valid JSON following the structure above. # Ensure metadata is complete if "documentId" not in part.metadata: part.metadata["documentId"] = document.id - if "contentFormat" not in part.metadata: - part.metadata["contentFormat"] = "extracted" # WICHTIG: Prüfe Intent für dieses Part partIntent = intent.intents if intent else ["extract"] - # Wenn Intent "render" für Images hat, erstelle auch object Part - if "render" in partIntent and part.typeGroup == "image" and part.data: - # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part - # 1. Extracted Part - prüfe ob "extract" Intent vorhanden ist - if "extract" in partIntent: - # Image hat sowohl extract als auch render Intent - # Extracted Part: Wird mit Vision AI verarbeitet (skipExtraction=False) - part.metadata["intent"] = "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = False # WICHTIG: Vision AI-Verarbeitung nötig! - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - else: - # Nur render Intent - kein Text-Extraktion nötig - part.metadata["intent"] = "render" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - - # 2. Object Part für Rendering (base64 data ist bereits im extracted Part) - objectPart = ContentPart( - id=f"obj_{document.id}_{part.id}", - label=f"Object: {part.label or 'Image'}", - typeGroup="image", - mimeType=part.mimeType or "image/jpeg", - data=part.data, # Base64 data ist bereits vorhanden + # Debug-Logging für Intent-Verarbeitung + logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}") + + # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart + # Generische Intent-Verarbeitung für ALLE Content-Typen + hasReferenceIntent = "reference" in partIntent + hasRenderIntent = "render" in partIntent + hasExtractIntent = "extract" in partIntent + hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0) + + logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}") + + # Track ob der originale Part bereits hinzugefügt wurde + originalPartAdded = False + + # 1. Reference Intent: Erstelle Reference ContentPart + if hasReferenceIntent: + referencePart = ContentPart( + id=f"ref_{document.id}_{part.id}", + label=f"Reference: {part.label or 'Content'}", + typeGroup="reference", + mimeType=part.mimeType or "application/octet-stream", + data="", # Leer - nur Referenz metadata={ - "contentFormat": "object", + "contentFormat": "reference", "documentId": document.id, - "intent": "render", - "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedExtractedPartId": part.id + "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}", + "intent": "reference", + "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"] } ) - allContentParts.append(objectPart) - elif part.typeGroup == "image" and "extract" in partIntent: - # Image mit extract Intent: Vision AI-Verarbeitung nötig - # Verarbeite Image mit Vision AI, um Text zu extrahieren - try: - extractedText = await self._extractTextFromImage(part, intent.extractionPrompt if intent else "Extract all text content from this image") - if extractedText: - # Erstelle neuen Text-Part mit extrahiertem Text - textPart = ContentPart( - id=f"extracted_{part.id}", - label=f"Extracted text from {part.label or 'Image'}", - typeGroup="text", - mimeType="text/plain", - data=extractedText, - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedImagePartId": part.id, - "extractionPrompt": intent.extractionPrompt if intent else "Extract all text content from this image" - } - ) - allContentParts.append(textPart) - logger.info(f"✅ Extracted text from image {part.id} using Vision AI") + allContentParts.append(referencePart) + logger.debug(f"✅ Created reference ContentPart for {part.id}") + + # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering) + if hasRenderIntent and hasPartData: + # Prüfe ob es ein Binary/Image ist (kann gerendert werden) + isRenderable = ( + part.typeGroup == "image" or + part.typeGroup == "binary" or + (part.mimeType and ( + part.mimeType.startswith("image/") or + part.mimeType.startswith("video/") or + part.mimeType.startswith("audio/") or + self._isBinary(part.mimeType) + )) + ) + + if isRenderable: + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Content'}", + typeGroup=part.typeGroup, + mimeType=part.mimeType or "application/octet-stream", + data=part.data, # Base64/Binary data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None + } + ) + allContentParts.append(objectPart) + logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)") + else: + logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})") + elif hasRenderIntent and not hasPartData: + logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part") + + # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung) + if hasExtractIntent: + # Spezielle Behandlung für Images: Vision AI für Text-Extraktion + if part.typeGroup == "image" and hasPartData: + logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)") + try: + extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting." + extractedText = await self._extractTextFromImage(part, extractionPrompt) + if extractedText: + # Prüfe ob es ein Error-Message ist + isError = extractedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message + textPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=extractedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": isError + } + ) + allContentParts.append(textPart) + if isError: + logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}") + else: + logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": True + } + ) + allContentParts.append(errorPart) + except Exception as e: + logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part + # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen + if not hasRenderIntent: + logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available") + else: + # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird + # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content + # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist, + # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt. - # Wenn auch render Intent vorhanden, füge Image-Part hinzu - if "render" in partIntent: - part.metadata["intent"] = "render" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - except Exception as e: - logger.warning(f"Failed to extract text from image {part.id}: {str(e)}, adding image as-is") - # Fallback: Füge Image-Part hinzu ohne Text-Extraktion - part.metadata["intent"] = "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = False - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - else: - # Normales extracted Part (kein Image oder kein extract Intent) - part.metadata["intent"] = partIntent[0] if partIntent else "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True # Bereits extrahiert - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden) + isTextContent = ( + part.typeGroup == "text" or + part.typeGroup == "table" or + (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0) + ) + + if isTextContent and intent and intent.extractionPrompt: + # Text-Content mit extractionPrompt: Verarbeite mit AI + logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)") + try: + extractionPrompt = intent.extractionPrompt + processedText = await self._processTextContentWithAi(part, extractionPrompt) + if processedText: + # Prüfe ob es ein Error-Message ist + isError = processedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message + processedPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=processedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "fromExtractContent": True, + "isError": isError + } + ) + allContentParts.append(processedPart) + originalPartAdded = True + if isError: + logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}") + else: + logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "isError": True + } + ) + allContentParts.append(errorPart) + originalPartAdded = True + except Exception as e: + logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Fallback: Verwende Original-Part + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + allContentParts.append(part) + originalPartAdded = True + else: + # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted + # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig) + # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent) + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, # Bereits extrahiert + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + # Stelle sicher dass contentFormat gesetzt ist + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + allContentParts.append(part) + originalPartAdded = True + logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") + + # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt + # (sollte normalerweise nicht vorkommen, da default "extract" ist) + if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded: + logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default") + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"] + }) allContentParts.append(part) + originalPartAdded = True logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") @@ -1715,6 +2094,18 @@ Return ONLY valid JSON following the structure above. contentParts: List[ContentPart], outputFormat: str, parentOperationId: str + ) -> Dict[str, Any]: + """Delegate to StructureGenerator.""" + return await self.structureGenerator.generateStructure( + userPrompt, contentParts, outputFormat, parentOperationId + ) + + async def _generateStructure_OLD( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str ) -> Dict[str, Any]: """ Phase 5C: Generiert Dokument-Struktur mit Sections. @@ -1782,17 +2173,36 @@ Return ONLY valid JSON following the structure above. # Baue ContentParts-Index - filtere leere Parts heraus contentPartsIndex = "" validParts = [] + filteredParts = [] + for part in contentParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + + # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen + if contentFormat == "reference": + validParts.append(part) + logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") + continue + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + # ABER: Reference Parts wurden bereits oben behandelt if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): # Überspringe Container-Parts ohne Daten if part.typeGroup == "container" and not part.data: + filteredParts.append((part.id, "container without data")) continue - # Überspringe andere leere Parts + # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) if not part.data: + filteredParts.append((part.id, f"no data (format: {contentFormat})")) continue validParts.append(part) + logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") + + if filteredParts: + logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") + + logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") # Baue Index nur für gültige Parts for i, part in enumerate(validParts, 1): @@ -1825,11 +2235,14 @@ Return ONLY valid JSON following the structure above. elif contentFormat == "reference": dataPreview = part.metadata.get("documentReference", "reference") + originalFileName = part.metadata.get('originalFileName', 'N/A') + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Original file name: {originalFileName}\n" contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" contentPartsIndex += f" Data preview: {dataPreview}\n" @@ -1896,6 +2309,18 @@ Return ONLY valid JSON following the structure above. contentParts: List[ContentPart], userPrompt: str, parentOperationId: str + ) -> Dict[str, Any]: + """Delegate to StructureFiller.""" + return await self.structureFiller.fillStructure( + structure, contentParts, userPrompt, parentOperationId + ) + + async def _fillStructure_OLD( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D: Füllt Struktur mit tatsächlichem Content. @@ -1935,105 +2360,244 @@ Return ONLY valid JSON following the structure above. # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) sections_to_process = [] + all_sections_list = [] # Für Kontext-Informationen for doc in filledStructure.get("documents", []): - for section in doc.get("sections", []): + doc_sections = doc.get("sections", []) + all_sections_list.extend(doc_sections) + for section in doc_sections: sections_to_process.append((doc, section)) # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) - for doc, section in sections_to_process: + for sectionIndex, (doc, section) in enumerate(sections_to_process): sectionId = section.get("id") contentPartIds = section.get("contentPartIds", []) contentFormats = section.get("contentFormats", {}) generationHint = section.get("generation_hint") + contentType = section.get("content_type", "paragraph") elements = [] - # Verarbeite ContentParts - for partId in contentPartIds: - part = self._findContentPartById(partId, contentParts) - if not part: - continue - - contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) - - if contentFormat == "reference": - # Füge Dokument-Referenz hinzu - elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "object": - # Füge base64 Object hinzu - elements.append({ - "type": part.typeGroup, # "image", "binary", etc. - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "extracted": - # Füge extrahierten Text hinzu (kann in AI-Generierungs-Prompt verwendet werden) - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) - # Generiere AI-Content wenn nötig - if generationHint: - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=[self._findContentPartById(pid, contentParts) for pid in contentPartIds], - userPrompt=userPrompt, - generationHint=generationHint - ) + if needsAggregation and generationHint: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds + ] + sectionParts = [p for p in sectionParts if p is not None] - # Erstelle Operation-ID für Section-Generierung - # Debug-Logs werden bereits von callAiPlanning geschrieben - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation", - "Section", - f"Generating section {sectionId}", - parentOperationId=fillOperationId - ) - - try: - # Generiere Content (verwende callAiPlanning für einfache JSON-Responses) - # Debug-Logs werden bereits von callAiPlanning geschrieben - aiResponse = await self.callAiPlanning( - prompt=generationPrompt, - debugType=f"section_generation_{sectionId}" - ) + if sectionParts: + # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] - # Parse und füge zu elements hinzu - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + # Verarbeite non-extracted Parts separat (reference, object) + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + elements.append({ + "type": part.typeGroup, + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) - # ChatLog abschließen - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") - # NICHT raise - Section wird mit Fehlermeldung gerendert + # Aggregiere extracted Parts mit AI + if extractedParts: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, # ALLE PARTS für Aggregation! + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + "Section", + f"Generating section {sectionId} with {len(extractedParts)} parts", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + request = AiCallRequest( + prompt=generationPrompt, + contentParts=extractedParts, # ALLE PARTS! + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + else: + # Einzelverarbeitung: Jeder Part einzeln + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + if generationHint: + # AI-Call mit einzelnen ContentPart + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[part], # EIN PART + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=False + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung + request = AiCallRequest( + prompt=generationPrompt, + contentParts=[part], + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + else: + # Füge extrahierten Text direkt hinzu (kein AI-Call) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) section["elements"] = elements @@ -2052,35 +2616,185 @@ Return ONLY valid JSON following the structure above. section: Dict[str, Any], contentParts: List[Optional[ContentPart]], userPrompt: str, - generationHint: str + generationHint: str, + allSections: Optional[List[Dict[str, Any]]] = None, + sectionIndex: Optional[int] = None, + isAggregation: bool = False ) -> str: - """Baue Prompt für Section-Generierung.""" + """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" # Filtere None-Werte validParts = [p for p in contentParts if p is not None] - contentPartsText = "" - for part in validParts: - contentFormat = part.metadata.get("contentFormat", "unknown") - contentPartsText += f"\n- ContentPart {part.id}:\n" - contentPartsText += f" Format: {contentFormat}\n" - if contentFormat == "extracted": - contentPartsText += f" Content: {part.data[:500]}...\n" if len(part.data) > 500 else f" Content: {part.data}\n" - elif contentFormat == "reference": - contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" - elif contentFormat == "object": - contentPartsText += f" Object: {part.typeGroup} ({part.mimeType})\n" + # Section-Metadaten + sectionId = section.get("id", "unknown") + contentType = section.get("content_type", "paragraph") - prompt = f"""USER REQUEST: + # Baue ContentParts-Beschreibung + contentPartsText = "" + if isAggregation: + # Aggregation: Zeige nur Metadaten, nicht Previews + contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" + contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" + contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" + contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" + contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" + contentPartsText += f"ContentPart IDs:\n" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" + if part.metadata.get("originalFileName"): + contentPartsText += f", Source: {part.metadata.get('originalFileName')}" + contentPartsText += ")\n" + else: + # Einzelverarbeitung: Zeige Previews + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + contentPartsText += f" Type: {part.typeGroup}\n" + if part.metadata.get("originalFileName"): + contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" + + if contentFormat == "extracted": + # Zeige Preview von extrahiertem Text (länger für besseren Kontext) + previewLength = 1000 + if part.data: + preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data + contentPartsText += f" Content preview:\n```\n{preview}\n```\n" + else: + contentPartsText += f" Content: (empty)\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + contentPartsText += f" Object type: {part.typeGroup}\n" + contentPartsText += f" MIME type: {part.mimeType}\n" + contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + + # Baue Section-Kontext (vorherige und nachfolgende Sections) + contextText = "" + if allSections and sectionIndex is not None: + prevSections = [] + nextSections = [] + + if sectionIndex > 0: + for i in range(max(0, sectionIndex - 2), sectionIndex): + prevSection = allSections[i] + prevSections.append({ + "id": prevSection.get("id"), + "content_type": prevSection.get("content_type"), + "generation_hint": prevSection.get("generation_hint", "")[:100] + }) + + if sectionIndex < len(allSections) - 1: + for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): + nextSection = allSections[i] + nextSections.append({ + "id": nextSection.get("id"), + "content_type": nextSection.get("content_type"), + "generation_hint": nextSection.get("generation_hint", "")[:100] + }) + + if prevSections or nextSections: + contextText = "\n## DOCUMENT CONTEXT\n" + if prevSections: + contextText += "\nPrevious sections:\n" + for prev in prevSections: + contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" + if nextSections: + contextText += "\nFollowing sections:\n" + for next in nextSections: + contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" + + if isAggregation: + prompt = f"""# TASK: Generate Section Content (Aggregation) + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` {userPrompt} +``` -SECTION TO GENERATE: -{generationHint} +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} -AVAILABLE CONTENT FOR THIS SECTION: -{contentPartsText} +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) +3. For table content_type: Create a single table with headers and rows from all ContentParts +4. For bullet_list content_type: Create a single list with items from all ContentParts +5. Format appropriately based on content_type ({contentType}) +6. Ensure the generated content fits logically between previous and following sections +7. Return ONLY a JSON object with an "elements" array +8. Each element should match the content_type: {contentType} -CRITICAL: Return ONLY a JSON object with an "elements" array. -Jedes Element sollte dem content_type der Section entsprechen. +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "headers": [...], // if table + "rows": [...], // if table + "items": [...], // if bullet_list + "content": "..." // if paragraph + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + else: + prompt = f"""# TASK: Generate Section Content + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` +{userPrompt} +``` + +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} + +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. Use the available content parts to populate this section +3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data +4. For extracted text: Format appropriately based on content_type ({contentType}) +5. Ensure the generated content fits logically between previous and following sections +6. Return ONLY a JSON object with an "elements" array +7. Each element should match the content_type: {contentType} + +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "content": "..." + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt @@ -2091,6 +2805,35 @@ Jedes Element sollte dem content_type der Section entsprechen. return part return None + def _needsAggregation( + self, + contentType: str, + contentPartCount: int + ) -> bool: + """ + Bestimmt ob mehrere ContentParts aggregiert werden müssen. + + Aggregation nötig wenn: + - content_type erfordert Aggregation (table, bullet_list) + - UND mehrere ContentParts vorhanden sind (> 1) + + Args: + contentType: Section content_type + contentPartCount: Anzahl der ContentParts in dieser Section + + Returns: + True wenn Aggregation nötig, False sonst + """ + aggregationTypes = ["table", "bullet_list"] + + if contentType in aggregationTypes and contentPartCount > 1: + return True + + # Optional: Auch für paragraph wenn mehrere Parts vorhanden + # (z.B. Vergleich mehrerer Dokumente) + # Standard: Keine Aggregation für paragraph + return False + async def _renderResult( self, filledStructure: Dict[str, Any], diff --git a/modules/services/serviceAi/subContentExtraction.py b/modules/services/serviceAi/subContentExtraction.py new file mode 100644 index 00000000..229587f8 --- /dev/null +++ b/modules/services/serviceAi/subContentExtraction.py @@ -0,0 +1,670 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Content Extraction Module + +Handles content extraction and preparation, including: +- Extracting content from documents based on intents +- Processing pre-extracted documents +- Vision AI for image text extraction +- AI processing of text content +""" +import json +import logging +import base64 +from typing import Dict, Any, List, Optional + +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent + +logger = logging.getLogger(__name__) + + +class ContentExtractor: + """Handles content extraction and preparation.""" + + def __init__(self, services, aiService, intentAnalyzer): + """Initialize ContentExtractor with service center, AI service, and intent analyzer access.""" + self.services = services + self.aiService = aiService + self.intentAnalyzer = intentAnalyzer + + async def extractAndPrepareContent( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str, + getIntentForDocument: callable + ) -> List[ContentPart]: + """ + Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. + Gibt Liste von ContentParts im passenden Format zurück. + + WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind. + Beispiel: Bild mit intents=["extract", "render"] erzeugt: + - ContentPart(contentFormat="object", ...) für Rendering + - ContentPart(contentFormat="extracted", ...) für Text-Analyse + + Args: + documents: Liste der zu verarbeitenden Dokumente + documentIntents: Liste von DocumentIntent-Objekten + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + getIntentForDocument: Callable to get intent for document ID + + Returns: + Liste von ContentParts mit vollständigen Metadaten + """ + # Erstelle Operation-ID für Extraktion + extractionOperationId = f"{parentOperationId}_content_extraction" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + extractionOperationId, + "Content Extraction", + "Extraction", + f"Extracting from {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + allContentParts = [] + + for document in documents: + # Check if document is already a ContentExtracted document (pre-extracted JSON) + logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content") + preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document) + + if preExtracted: + logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}") + logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}") + logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}") + + # Verwende bereits extrahierte ContentParts direkt + contentExtracted = preExtracted["contentExtracted"] + + # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original + # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID) + intent = getIntentForDocument(document.id, documentIntents) + logger.info(f" Intent lookup for document {document.id}: found={intent is not None}") + if intent: + logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...") + else: + logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}") + + if contentExtracted.parts: + for part in contentExtracted.parts: + # Überspringe leere Parts (Container ohne Daten) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + if part.typeGroup == "container": + continue # Überspringe leere Container + + if not part.metadata: + part.metadata = {} + + # Ensure metadata is complete + if "documentId" not in part.metadata: + part.metadata["documentId"] = document.id + + # WICHTIG: Prüfe Intent für dieses Part + partIntent = intent.intents if intent else ["extract"] + + # Debug-Logging für Intent-Verarbeitung + logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}") + + # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart + # Generische Intent-Verarbeitung für ALLE Content-Typen + hasReferenceIntent = "reference" in partIntent + hasRenderIntent = "render" in partIntent + hasExtractIntent = "extract" in partIntent + hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0) + + logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}") + + # Track ob der originale Part bereits hinzugefügt wurde + originalPartAdded = False + + # 1. Reference Intent: Erstelle Reference ContentPart + if hasReferenceIntent: + referencePart = ContentPart( + id=f"ref_{document.id}_{part.id}", + label=f"Reference: {part.label or 'Content'}", + typeGroup="reference", + mimeType=part.mimeType or "application/octet-stream", + data="", # Leer - nur Referenz + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}", + "intent": "reference", + "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"] + } + ) + allContentParts.append(referencePart) + logger.debug(f"✅ Created reference ContentPart for {part.id}") + + # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering) + if hasRenderIntent and hasPartData: + # Prüfe ob es ein Binary/Image ist (kann gerendert werden) + isRenderable = ( + part.typeGroup == "image" or + part.typeGroup == "binary" or + (part.mimeType and ( + part.mimeType.startswith("image/") or + part.mimeType.startswith("video/") or + part.mimeType.startswith("audio/") or + self._isBinary(part.mimeType) + )) + ) + + if isRenderable: + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Content'}", + typeGroup=part.typeGroup, + mimeType=part.mimeType or "application/octet-stream", + data=part.data, # Base64/Binary data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None + } + ) + allContentParts.append(objectPart) + logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)") + else: + logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})") + elif hasRenderIntent and not hasPartData: + logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part") + + # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung) + if hasExtractIntent: + # Spezielle Behandlung für Images: Vision AI für Text-Extraktion + if part.typeGroup == "image" and hasPartData: + logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)") + try: + extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting." + extractedText = await self.extractTextFromImage(part, extractionPrompt) + if extractedText: + # Prüfe ob es ein Error-Message ist + isError = extractedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message + textPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=extractedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": isError + } + ) + allContentParts.append(textPart) + if isError: + logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}") + else: + logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": True + } + ) + allContentParts.append(errorPart) + except Exception as e: + logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part + # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen + if not hasRenderIntent: + logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available") + else: + # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird + # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content + # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist, + # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt. + + # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden) + isTextContent = ( + part.typeGroup == "text" or + part.typeGroup == "table" or + (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0) + ) + + if isTextContent and intent and intent.extractionPrompt: + # Text-Content mit extractionPrompt: Verarbeite mit AI + logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)") + try: + extractionPrompt = intent.extractionPrompt + processedText = await self.processTextContentWithAi(part, extractionPrompt) + if processedText: + # Prüfe ob es ein Error-Message ist + isError = processedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message + processedPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=processedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "fromExtractContent": True, + "isError": isError + } + ) + allContentParts.append(processedPart) + originalPartAdded = True + if isError: + logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}") + else: + logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "isError": True + } + ) + allContentParts.append(errorPart) + originalPartAdded = True + except Exception as e: + logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Fallback: Verwende Original-Part + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + allContentParts.append(part) + originalPartAdded = True + else: + # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted + # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig) + # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent) + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, # Bereits extrahiert + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + # Stelle sicher dass contentFormat gesetzt ist + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + allContentParts.append(part) + originalPartAdded = True + logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") + + # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt + # (sollte normalerweise nicht vorkommen, da default "extract" ist) + if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded: + logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default") + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"] + }) + allContentParts.append(part) + originalPartAdded = True + + logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") + logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") + continue # Skip normal extraction for this document + + # Check if it's standardized JSON format (has "documents" or "sections") + if document.mimeType == "application/json": + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if docBytes: + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.info(f"Document is already in standardized JSON format, using as reference") + # Create reference ContentPart for structured JSON + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="structure", + mimeType="application/json", + data=docData, + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "skipExtraction": True, + "intent": "reference" + } + ) + allContentParts.append(contentPart) + logger.info(f"✅ Using JSON document directly without extraction") + continue # Skip normal extraction for this document + except Exception as e: + logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}") + # Continue with normal extraction + + # Normal extraction path + intent = getIntentForDocument(document.id, documentIntents) + + if not intent: + # Default: extract für alle Dokumente ohne Intent + logger.warning(f"No intent found for document {document.id}, using default 'extract'") + intent = DocumentIntent( + documentId=document.id, + intents=["extract"], + extractionPrompt="Extract all content from the document", + reasoning="Default intent: no specific intent found" + ) + + # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen + + if "reference" in intent.intents: + # Erstelle Reference ContentPart + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="reference", + mimeType=document.mimeType, + data="", + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "intent": "reference", + "usageHint": f"Reference document: {document.fileName}" + } + ) + allContentParts.append(contentPart) + + # WICHTIG: "render" und "extract" können beide vorhanden sein! + # In diesem Fall erzeugen wir BEIDE ContentParts + + if "render" in intent.intents: + # Für Images/Binary: extrahiere als Object + if document.mimeType.startswith("image/") or self._isBinary(document.mimeType): + try: + # Lade Binary-Daten (getFileData ist nicht async - keine await nötig) + binaryData = self.services.interfaceDbComponent.getFileData(document.fileId) + if not binaryData: + logger.warning(f"No binary data found for document {document.id}") + continue + base64Data = base64.b64encode(binaryData).decode('utf-8') + + contentPart = ContentPart( + id=f"obj_{document.id}", + label=f"Object: {document.fileName}", + typeGroup="image" if document.mimeType.startswith("image/") else "binary", + mimeType=document.mimeType, + data=base64Data, + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {document.fileName}", + "originalFileName": document.fileName, + # Verknüpfung zu extracted Part (falls vorhanden) + "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None + } + ) + allContentParts.append(contentPart) + except Exception as e: + logger.error(f"Failed to load binary data for document {document.id}: {str(e)}") + + if "extract" in intent.intents: + # Extrahiere Content mit Extraction Service + extractionPrompt = intent.extractionPrompt or "Extract all content from the document" + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + extractionPrompt, + f"content_extraction_prompt_{document.id}" + ) + + # Führe Extraktion aus + from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy() + ) + + # extractContent ist nicht async - keine await nötig + extractedResults = self.services.extraction.extractContent( + [document], + extractionOptions, + operationId=extractionOperationId, + parentOperationId=extractionOperationId + ) + + # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten + for extracted in extractedResults: + for part in extracted.parts: + # Markiere als extracted Format + part.metadata.update({ + "contentFormat": "extracted", + "documentId": document.id, + "extractionPrompt": extractionPrompt, + "intent": "extract", + "usageHint": f"Use extracted content from {document.fileName}", + # Verknüpfung zu object Part (falls vorhanden) + "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None + }) + # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) + if "render" in intent.intents: + part.id = f"ext_{document.id}_{part.id}" + allContentParts.append(part) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([part.dict() for part in allContentParts], indent=2, default=str), + "content_extraction_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(extractionOperationId, True) + + return allContentParts + + except Exception as e: + self.services.chat.progressLogFinish(extractionOperationId, False) + logger.error(f"Error in extractAndPrepareContent: {str(e)}") + raise + + async def extractTextFromImage(self, imagePart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Extrahiere Text aus einem Image-Part mit Vision AI. + + Args: + imagePart: ContentPart mit typeGroup="image" + extractionPrompt: Prompt für die Text-Extraktion + + Returns: + Extrahierter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting." + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + finalPrompt, + f"content_extraction_prompt_image_{imagePart.id}" + ) + + # Erstelle AI-Call-Request mit Image-Part + request = AiCallRequest( + prompt=finalPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), + contentParts=[imagePart] + ) + + # Verwende AI-Service für Vision AI-Verarbeitung + response = await self.aiService.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_image_{imagePart.id}" + ) + + if response and response.content: + return response.content.strip() + + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" + except Exception as e: + errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" + + async def processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Verarbeite Text-Content mit AI basierend auf extractionPrompt. + + WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text + (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI + verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt. + + Args: + textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ) + extractionPrompt: Prompt für die AI-Verarbeitung des Textes + + Returns: + AI-verarbeiteter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Process and extract the key information from the following text content." + + # Debug-Log (harmonisiert) - log prompt with text preview + textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "") + promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}" + self.services.utils.writeDebugFile( + promptWithContext, + f"content_extraction_prompt_text_{textPart.id}" + ) + + # Erstelle Text-ContentPart für AI-Verarbeitung + # Verwende den vorhandenen Text als Input + textContentPart = ContentPart( + id=textPart.id, + label=textPart.label, + typeGroup="text", + mimeType="text/plain", + data=textPart.data if textPart.data else "", + metadata=textPart.metadata.copy() if textPart.metadata else {} + ) + + # Erstelle AI-Call-Request mit Text-Part + request = AiCallRequest( + prompt=finalPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT), + contentParts=[textContentPart] + ) + + # Verwende AI-Service für Text-Verarbeitung + response = await self.aiService.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_text_{textPart.id}" + ) + + if response and response.content: + return response.content.strip() + + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" + except Exception as e: + errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" + + def _isBinary(self, mimeType: str) -> bool: + """Prüfe ob MIME-Type binary ist.""" + binaryTypes = [ + "application/octet-stream", + "application/pdf", + "application/zip", + "application/x-zip-compressed" + ] + return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") + diff --git a/modules/services/serviceAi/subDocumentIntents.py b/modules/services/serviceAi/subDocumentIntents.py new file mode 100644 index 00000000..c1faba39 --- /dev/null +++ b/modules/services/serviceAi/subDocumentIntents.py @@ -0,0 +1,302 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Intent Analysis Module + +Handles analysis of document intents, including: +- Clarifying which documents need extraction vs reference +- Resolving pre-extracted documents +- Building intent analysis prompts +""" +import json +import logging +from typing import Dict, Any, List, Optional + +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelExtraction import DocumentIntent + +logger = logging.getLogger(__name__) + + +class DocumentIntentAnalyzer: + """Handles document intent analysis and resolution.""" + + def __init__(self, services, aiService): + """Initialize DocumentIntentAnalyzer with service center and AI service access.""" + self.services = services + self.aiService = aiService + + async def clarifyDocumentIntents( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str + ) -> List[DocumentIntent]: + """ + Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. + Gibt DocumentIntent für jedes Dokument zurück. + + Args: + documents: Liste der zu verarbeitenden Dokumente + userPrompt: User-Anfrage + actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von DocumentIntent-Objekten + """ + # Erstelle Operation-ID für Intent-Analyse + intentOperationId = f"{parentOperationId}_intent_analysis" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + intentOperationId, + "Document Intent Analysis", + "Intent Analysis", + f"Analyzing {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse + documentMapping = {} # Maps original doc ID -> JSON doc ID + resolvedDocuments = [] + + for doc in documents: + preExtracted = self.resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + documentMapping[originalDocId] = doc.id + # Erstelle temporäres ChatDocument für ursprüngliches Dokument + originalDoc = ChatDocument( + id=originalDocId, + fileName=preExtracted["originalDocument"]["fileName"], + mimeType=preExtracted["originalDocument"]["mimeType"], + fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), + fileId=doc.fileId, # Behalte fileId vom JSON + messageId=doc.messageId if hasattr(doc, 'messageId') else None # Behalte messageId falls vorhanden + ) + resolvedDocuments.append(originalDoc) + else: + resolvedDocuments.append(doc) + + # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten + intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters) + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.aiService.callAiPlanning( + prompt=intentPrompt, + debugType="document_intent_analysis" + ) + + # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig + intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse)) + documentIntents = [] + for intent in intentsData.get("intents", []): + docId = intent.get("documentId") + # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID + if docId in documentMapping: + intent["documentId"] = documentMapping[docId] + documentIntents.append(DocumentIntent(**intent)) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([intent.dict() for intent in documentIntents], indent=2), + "document_intent_analysis_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(intentOperationId, True) + + return documentIntents + + except Exception as e: + self.services.chat.progressLogFinish(intentOperationId, False) + logger.error(f"Error in clarifyDocumentIntents: {str(e)}") + raise + + def resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]: + """ + Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält. + Gibt Dict zurück mit: + - originalDocument: ChatDocument-Info des ursprünglichen Dokuments + - contentExtracted: ContentExtracted-Objekt mit Parts + - parts: Liste der ContentParts + + Returns None wenn kein pre-extracted Format erkannt wird. + """ + if document.mimeType != "application/json": + logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check") + return None + + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if not docBytes: + return None + + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if not isinstance(jsonData, dict): + return None + + # Check for ContentExtracted format + # Nur Format 1 (ActionDocument-Format mit validationMetadata) wird unterstützt + documentData = None + + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}") + + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}") + else: + logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})") + + if documentData: + from modules.datamodels.datamodelExtraction import ContentExtracted + + try: + # Stelle sicher, dass "id" vorhanden ist + if "id" not in documentData: + documentData["id"] = document.id + + contentExtracted = ContentExtracted(**documentData) + + if contentExtracted.parts: + # Extrahiere ursprüngliche Dokument-Info aus den Parts + originalDocId = None + originalFileName = None + originalMimeType = None + + for part in contentExtracted.parts: + if part.metadata: + # Versuche ursprüngliche Dokument-Info zu finden + if not originalDocId and part.metadata.get("documentId"): + originalDocId = part.metadata.get("documentId") + if not originalFileName and part.metadata.get("originalFileName"): + originalFileName = part.metadata.get("originalFileName") + if not originalMimeType and part.metadata.get("documentMimeType"): + originalMimeType = part.metadata.get("documentMimeType") + + # Falls nicht gefunden, versuche aus documentName zu extrahieren + if not originalFileName: + # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") + if document.fileName and "_extracted_" in document.fileName: + originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" + + return { + "originalDocument": { + "id": originalDocId or document.id, + "fileName": originalFileName or document.fileName, + "mimeType": originalMimeType or "application/pdf", + "fileSize": document.fileSize + }, + "contentExtracted": contentExtracted, + "parts": contentExtracted.parts + } + except Exception as parseError: + logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") + logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") + import traceback + logger.debug(f"Parse error traceback: {traceback.format_exc()}") + return None + else: + logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})") + + return None + except Exception as e: + logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") + return None + + def _buildIntentAnalysisPrompt( + self, + userPrompt: str, + documents: List[ChatDocument], + actionParameters: Dict[str, Any] + ) -> str: + """Baue Prompt für Intent-Analyse.""" + # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs + docListText = "" + for i, doc in enumerate(documents, 1): + # Prüfe ob es ein pre-extracted JSON ist + preExtracted = self.resolvePreExtractedDocument(doc) + + if preExtracted: + # Zeige ursprüngliches Dokument statt JSON + originalDoc = preExtracted["originalDocument"] + partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})" + docListText += f"\n{i}. Document ID: {originalDoc['id']}\n" + docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n" + docListText += f" MIME Type: {originalDoc['mimeType']}\n" + docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n" + else: + # Normales Dokument + docListText += f"\n{i}. Document ID: {doc.id}\n" + docListText += f" File Name: {doc.fileName}\n" + docListText += f" MIME Type: {doc.mimeType}\n" + docListText += f" File Size: {doc.fileSize} bytes\n" + + outputFormat = actionParameters.get("outputFormat", "txt") + + prompt = f"""USER REQUEST: +{userPrompt} + +DOCUMENTS TO ANALYZE: +{docListText} + +TASK: For each document, determine its intents (can be multiple): +- "extract": Content extraction needed (text, structure, OCR, etc.) +- "render": Image/binary should be rendered as-is (visual element) +- "reference": Document reference/attachment (no extraction, just reference) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], # Array - can contain multiple! + "extractionPrompt": "Extract all text content, preserving structure", + "reasoning": "User needs text content for document generation" + }}, + {{ + "documentId": "doc_2", + "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "extractionPrompt": "Extract text content from image using vision AI", + "reasoning": "Image contains text that needs extraction, but also should be rendered visually" + }}, + {{ + "documentId": "doc_3", + "intents": ["reference"], + "extractionPrompt": null, + "reasoning": "Document is only used as reference, no extraction needed" + }} + ] +}} + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → add "render" + - If user wants to "analyze", "read text", or "extract text" from images → add "extract" + - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering + +2. For text documents: + - If user mentions "template" or "structure" → "reference" or "extract" based on context + - If user mentions "reference" or "context" → "reference" + - Default → "extract" + +3. Consider output format: + - For formats like PDF, DOCX, PPTX: images usually need "render" + - For formats like CSV, JSON: usually "extract" only + - For HTML: can have both "extract" and "render" + +Return ONLY valid JSON following the structure above. +""" + return prompt + diff --git a/modules/services/serviceAi/subResponseParsing.py b/modules/services/serviceAi/subResponseParsing.py new file mode 100644 index 00000000..a2d568d9 --- /dev/null +++ b/modules/services/serviceAi/subResponseParsing.py @@ -0,0 +1,275 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Response Parsing Module + +Handles parsing of AI responses, including: +- Section extraction from responses +- JSON completeness detection +- Loop detection +- Document metadata extraction +- Final result building +""" +import json +import logging +from typing import Dict, Any, List, Optional, Tuple + +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.datamodels.datamodelAi import JsonAccumulationState + +logger = logging.getLogger(__name__) + + +class ResponseParser: + """Handles parsing of AI responses and completion detection.""" + + def __init__(self, services): + """Initialize ResponseParser with service center access.""" + self.services = services + + def extractSectionsFromResponse( + self, + result: str, + iteration: int, + debugPrefix: str, + allSections: List[Dict[str, Any]] = None, + accumulationState: Optional[JsonAccumulationState] = None + ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: + """ + Extract sections from AI response, handling both valid and broken JSON. + + NEW BEHAVIOR: + - First iteration: Check if complete, if not start accumulation + - Subsequent iterations: Accumulate strings, parse when complete + + Returns: + Tuple of: + - sections: Extracted sections + - wasJsonComplete: True if JSON is complete + - parsedResult: Parsed JSON object + - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode) + """ + if allSections is None: + allSections = [] + + if iteration == 1: + # First iteration - check if complete + parsed = None + try: + extracted = extractJsonString(result) + parsed = json.loads(extracted) + + # Check completeness + if JsonResponseHandler.isJsonComplete(parsed): + # Complete JSON - no accumulation needed + sections = extractSectionsFromDocument(parsed) + logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed") + return sections, True, parsed, None # No accumulation + except Exception: + pass + + # Incomplete - try to extract partial sections from broken JSON + logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections") + + partialSections = [] + if parsed: + # Try to extract sections from parsed (even if incomplete) + partialSections = extractSectionsFromDocument(parsed) + else: + # Try to repair broken JSON and extract sections + try: + repaired = repairBrokenJson(result) + if repaired: + partialSections = extractSectionsFromDocument(repaired) + parsed = repaired # Use repaired version for accumulation state + except Exception: + pass # If repair fails, continue with empty sections + + + # Define KPIs (async call - need to handle this) + # For now, create accumulation state without KPIs, will be updated after async call + accumulationState = JsonAccumulationState( + accumulatedJsonString=result, + isAccumulationMode=True, + lastParsedResult=parsed, + allSections=partialSections, + kpis=[] + ) + + # Note: KPI definition will be done in the caller (async context) + return partialSections, False, parsed, accumulationState + + else: + # Subsequent iterations - accumulate + if accumulationState and accumulationState.isAccumulationMode: + accumulated, sections, isComplete, parsedResult = \ + JsonResponseHandler.accumulateAndParseJsonFragments( + accumulationState.accumulatedJsonString, + result, + allSections, + iteration + ) + + # Update accumulation state + accumulationState.accumulatedJsonString = accumulated + accumulationState.lastParsedResult = parsedResult + accumulationState.allSections = allSections + sections if sections else allSections + accumulationState.isAccumulationMode = not isComplete + + # Log accumulated JSON for debugging + if parsedResult: + accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json") + + return sections, isComplete, parsedResult, accumulationState + else: + # No accumulation mode - process normally (shouldn't happen) + logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1") + return [], False, None, None + + def shouldContinueGeneration( + self, + allSections: List[Dict[str, Any]], + iteration: int, + wasJsonComplete: bool, + rawResponse: str = None + ) -> bool: + """ + Determine if AI generation loop should continue. + + CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD! + Action DoD is checked AFTER the AI Loop completes in _refineDecide. + + Simple logic: + - If JSON parsing failed or incomplete → continue (needs more content) + - If JSON parses successfully and is complete → stop (all content delivered) + - Loop detection prevents infinite loops + + CRITICAL: JSON completeness is determined by parsing, NOT by last character check! + Returns True if we should continue, False if AI Loop is done. + """ + if len(allSections) == 0: + return True # No sections yet, continue + + # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete + if not wasJsonComplete: + logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete") + return True + + # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection + if self._isStuckInLoop(allSections, iteration): + logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop") + return False + + # JSON is complete and not stuck in loop - done + logger.info(f"Iteration {iteration}: JSON complete - AI loop done") + return False + + def _isStuckInLoop( + self, + allSections: List[Dict[str, Any]], + iteration: int + ) -> bool: + """ + Detect if we're stuck in a loop (same content being repeated). + + Generic approach: Check if recent iterations are adding minimal or duplicate content. + """ + if iteration < 3: + return False # Need at least 3 iterations to detect a loop + + if len(allSections) == 0: + return False + + # Check if last section is very small (might be stuck) + lastSection = allSections[-1] + elements = lastSection.get("elements", []) + + if isinstance(elements, list) and elements: + lastElem = elements[-1] if elements else {} + else: + lastElem = elements if isinstance(elements, dict) else {} + + # Check content size of last section + lastSectionSize = 0 + if isinstance(lastElem, dict): + for key, value in lastElem.items(): + if isinstance(value, str): + lastSectionSize += len(value) + elif isinstance(value, list): + lastSectionSize += len(str(value)) + + # If last section is very small and we've done many iterations, might be stuck + if lastSectionSize < 100 and iteration > 10: + logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}") + return True + + return False + + def extractDocumentMetadata( + self, + parsedResult: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """ + Extract document metadata (title, filename) from parsed AI response. + Returns dict with 'title' and 'filename' keys if found, None otherwise. + """ + if not isinstance(parsedResult, dict): + return None + + # Try to get from documents array (preferred structure) + if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0: + firstDoc = parsedResult["documents"][0] + if isinstance(firstDoc, dict): + title = firstDoc.get("title") + filename = firstDoc.get("filename") + if title or filename: + return { + "title": title, + "filename": filename + } + + return None + + def buildFinalResultFromSections( + self, + allSections: List[Dict[str, Any]], + documentMetadata: Optional[Dict[str, Any]] = None + ) -> str: + """ + Build final JSON result from accumulated sections. + Uses AI-provided metadata (title, filename) if available. + """ + if not allSections: + return "" + + # Extract metadata from AI response if available + title = "Generated Document" + filename = "document.json" + if documentMetadata: + if documentMetadata.get("title"): + title = documentMetadata["title"] + if documentMetadata.get("filename"): + filename = documentMetadata["filename"] + + # Build documents structure + # Assuming single document for now + documents = [{ + "id": "doc_1", + "title": title, + "filename": filename, + "sections": allSections + }] + + result = { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": documents + } + + return json.dumps(result, indent=2) + diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py new file mode 100644 index 00000000..cc45b099 --- /dev/null +++ b/modules/services/serviceAi/subStructureFilling.py @@ -0,0 +1,546 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Structure Filling Module + +Handles filling document structure with content, including: +- Filling sections with content parts +- Building section generation prompts +- Aggregation logic +""" +import json +import logging +import copy +from typing import Dict, Any, List, Optional + +from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum + +logger = logging.getLogger(__name__) + + +class StructureFiller: + """Handles filling document structure with content.""" + + def __init__(self, services, aiService): + """Initialize StructureFiller with service center and AI service access.""" + self.services = services + self.aiService = aiService + + async def fillStructure( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D: Füllt Struktur mit tatsächlichem Content. + Für jede Section: + - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format + - Wenn generation_hint spezifiziert: Generiere AI-Content + + **Implementierungsdetails:** + - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) + - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) + + Args: + structure: Struktur-Dict mit documents und sections + contentParts: Alle vorbereiteten ContentParts + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Gefüllte Struktur mit elements in jeder Section + """ + # Erstelle Operation-ID für Struktur-Abfüllen + fillOperationId = f"{parentOperationId}_structure_filling" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + fillOperationId, + "Structure Filling", + "Filling", + f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + parentOperationId=parentOperationId + ) + + try: + filledStructure = copy.deepcopy(structure) + + # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) + sections_to_process = [] + all_sections_list = [] # Für Kontext-Informationen + for doc in filledStructure.get("documents", []): + doc_sections = doc.get("sections", []) + all_sections_list.extend(doc_sections) + for section in doc_sections: + sections_to_process.append((doc, section)) + + # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) + for sectionIndex, (doc, section) in enumerate(sections_to_process): + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + contentType = section.get("content_type", "paragraph") + + elements = [] + + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) + + if needsAggregation and generationHint: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds + ] + sectionParts = [p for p in sectionParts if p is not None] + + if sectionParts: + # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] + + # Verarbeite non-extracted Parts separat (reference, object) + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + elements.append({ + "type": part.typeGroup, + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + # Aggregiere extracted Parts mit AI + if extractedParts: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, # ALLE PARTS für Aggregation! + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + "Section", + f"Generating section {sectionId} with {len(extractedParts)} parts", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + request = AiCallRequest( + prompt=generationPrompt, + contentParts=extractedParts, # ALLE PARTS! + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + else: + # Einzelverarbeitung: Jeder Part einzeln + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + if generationHint: + # AI-Call mit einzelnen ContentPart + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[part], # EIN PART + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=False + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung + request = AiCallRequest( + prompt=generationPrompt, + contentParts=[part], + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + else: + # Füge extrahierten Text direkt hinzu (kein AI-Call) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) + + section["elements"] = elements + + # ChatLog abschließen + self.services.chat.progressLogFinish(fillOperationId, True) + + return filledStructure + + except Exception as e: + self.services.chat.progressLogFinish(fillOperationId, False) + logger.error(f"Error in fillStructure: {str(e)}") + raise + + def _buildSectionGenerationPrompt( + self, + section: Dict[str, Any], + contentParts: List[Optional[ContentPart]], + userPrompt: str, + generationHint: str, + allSections: Optional[List[Dict[str, Any]]] = None, + sectionIndex: Optional[int] = None, + isAggregation: bool = False + ) -> str: + """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" + # Filtere None-Werte + validParts = [p for p in contentParts if p is not None] + + # Section-Metadaten + sectionId = section.get("id", "unknown") + contentType = section.get("content_type", "paragraph") + + # Baue ContentParts-Beschreibung + contentPartsText = "" + if isAggregation: + # Aggregation: Zeige nur Metadaten, nicht Previews + contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" + contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" + contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" + contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" + contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" + contentPartsText += f"ContentPart IDs:\n" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" + if part.metadata.get("originalFileName"): + contentPartsText += f", Source: {part.metadata.get('originalFileName')}" + contentPartsText += ")\n" + else: + # Einzelverarbeitung: Zeige Previews + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + contentPartsText += f" Type: {part.typeGroup}\n" + if part.metadata.get("originalFileName"): + contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" + + if contentFormat == "extracted": + # Zeige Preview von extrahiertem Text (länger für besseren Kontext) + previewLength = 1000 + if part.data: + preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data + contentPartsText += f" Content preview:\n```\n{preview}\n```\n" + else: + contentPartsText += f" Content: (empty)\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + contentPartsText += f" Object type: {part.typeGroup}\n" + contentPartsText += f" MIME type: {part.mimeType}\n" + contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + + # Baue Section-Kontext (vorherige und nachfolgende Sections) + contextText = "" + if allSections and sectionIndex is not None: + prevSections = [] + nextSections = [] + + if sectionIndex > 0: + for i in range(max(0, sectionIndex - 2), sectionIndex): + prevSection = allSections[i] + prevSections.append({ + "id": prevSection.get("id"), + "content_type": prevSection.get("content_type"), + "generation_hint": prevSection.get("generation_hint", "")[:100] + }) + + if sectionIndex < len(allSections) - 1: + for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): + nextSection = allSections[i] + nextSections.append({ + "id": nextSection.get("id"), + "content_type": nextSection.get("content_type"), + "generation_hint": nextSection.get("generation_hint", "")[:100] + }) + + if prevSections or nextSections: + contextText = "\n## DOCUMENT CONTEXT\n" + if prevSections: + contextText += "\nPrevious sections:\n" + for prev in prevSections: + contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" + if nextSections: + contextText += "\nFollowing sections:\n" + for next in nextSections: + contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" + + if isAggregation: + prompt = f"""# TASK: Generate Section Content (Aggregation) + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` +{userPrompt} +``` + +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} + +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) +3. For table content_type: Create a single table with headers and rows from all ContentParts +4. For bullet_list content_type: Create a single list with items from all ContentParts +5. Format appropriately based on content_type ({contentType}) +6. Ensure the generated content fits logically between previous and following sections +7. Return ONLY a JSON object with an "elements" array +8. Each element should match the content_type: {contentType} + +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "headers": [...], // if table + "rows": [...], // if table + "items": [...], // if bullet_list + "content": "..." // if paragraph + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + else: + prompt = f"""# TASK: Generate Section Content + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` +{userPrompt} +``` + +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} + +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. Use the available content parts to populate this section +3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data +4. For extracted text: Format appropriately based on content_type ({contentType}) +5. Ensure the generated content fits logically between previous and following sections +6. Return ONLY a JSON object with an "elements" array +7. Each element should match the content_type: {contentType} + +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "content": "..." + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + return prompt + + def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: + """Finde ContentPart nach ID.""" + for part in contentParts: + if part.id == partId: + return part + return None + + def _needsAggregation( + self, + contentType: str, + contentPartCount: int + ) -> bool: + """ + Bestimmt ob mehrere ContentParts aggregiert werden müssen. + + Aggregation nötig wenn: + - content_type erfordert Aggregation (table, bullet_list) + - UND mehrere ContentParts vorhanden sind (> 1) + + Args: + contentType: Section content_type + contentPartCount: Anzahl der ContentParts in dieser Section + + Returns: + True wenn Aggregation nötig, False sonst + """ + aggregationTypes = ["table", "bullet_list"] + + if contentType in aggregationTypes and contentPartCount > 1: + return True + + # Optional: Auch für paragraph wenn mehrere Parts vorhanden + # (z.B. Vergleich mehrerer Dokumente) + # Standard: Keine Aggregation für paragraph + return False + diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py new file mode 100644 index 00000000..eb39fdd6 --- /dev/null +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -0,0 +1,229 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Structure Generation Module + +Handles document structure generation, including: +- Generating document structure with sections +- Building structure prompts +""" +import json +import logging +from typing import Dict, Any, List + +from modules.datamodels.datamodelExtraction import ContentPart + +logger = logging.getLogger(__name__) + + +class StructureGenerator: + """Handles document structure generation.""" + + def __init__(self, services, aiService): + """Initialize StructureGenerator with service center and AI service access.""" + self.services = services + self.aiService = aiService + + async def generateStructure( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5C: Generiert Dokument-Struktur mit Sections. + Jede Section spezifiziert: + - Welcher Content sollte in dieser Section sein + - Welche ContentParts zu verwenden sind + - Format für jeden ContentPart + + Args: + userPrompt: User-Anfrage + contentParts: Alle vorbereiteten ContentParts mit Metadaten + outputFormat: Ziel-Format (html, docx, pdf, etc.) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Struktur-Dict mit documents und sections + """ + # Erstelle Operation-ID für Struktur-Generierung + structureOperationId = f"{parentOperationId}_structure_generation" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + structureOperationId, + "Structure Generation", + "Structure", + f"Generating structure for {outputFormat}", + parentOperationId=parentOperationId + ) + + try: + # Baue Struktur-Prompt mit Content-Index + structurePrompt = self._buildStructurePrompt( + userPrompt=userPrompt, + contentParts=contentParts, + outputFormat=outputFormat + ) + + # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.aiService.callAiPlanning( + prompt=structurePrompt, + debugType="document_generation_structure" + ) + + # Parse Struktur + structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) + + # ChatLog abschließen + self.services.chat.progressLogFinish(structureOperationId, True) + + return structure + + except Exception as e: + self.services.chat.progressLogFinish(structureOperationId, False) + logger.error(f"Error in generateStructure: {str(e)}") + raise + + def _buildStructurePrompt( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str + ) -> str: + """Baue Prompt für Struktur-Generierung.""" + # Baue ContentParts-Index - filtere leere Parts heraus + contentPartsIndex = "" + validParts = [] + filteredParts = [] + + for part in contentParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + + # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen + if contentFormat == "reference": + validParts.append(part) + logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") + continue + + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + # ABER: Reference Parts wurden bereits oben behandelt + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + # Überspringe Container-Parts ohne Daten + if part.typeGroup == "container" and not part.data: + filteredParts.append((part.id, "container without data")) + continue + # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) + if not part.data: + filteredParts.append((part.id, f"no data (format: {contentFormat})")) + continue + + validParts.append(part) + logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") + + if filteredParts: + logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") + + logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") + + # Baue Index nur für gültige Parts + for i, part in enumerate(validParts, 1): + contentFormat = part.metadata.get("contentFormat", "unknown") + dataPreview = "" + + if contentFormat == "extracted": + # Für Image-Parts: Zeige dass es ein Image ist + if part.typeGroup == "image": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "image" + dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" + elif part.typeGroup == "container": + # Container ohne Daten überspringen wir bereits oben + dataPreview = "Container structure (no text content)" + else: + # Zeige Preview von extrahiertem Text + if part.data: + preview = part.data[:200] + "..." if len(part.data) > 200 else part.data + dataPreview = preview + else: + dataPreview = "(empty)" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "binary" + if part.typeGroup == "image": + dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" + else: + dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" + elif contentFormat == "reference": + dataPreview = part.metadata.get("documentReference", "reference") + + originalFileName = part.metadata.get('originalFileName', 'N/A') + + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" + contentPartsIndex += f" Format: {contentFormat}\n" + contentPartsIndex += f" Type: {part.typeGroup}\n" + contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" + contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Original file name: {originalFileName}\n" + contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" + contentPartsIndex += f" Data preview: {dataPreview}\n" + + if not contentPartsIndex: + contentPartsIndex = "\n(No content parts available)" + + prompt = f"""USER REQUEST: +{userPrompt} + +AVAILABLE CONTENT PARTS: +{contentPartsIndex} + +TASK: Generiere Dokument-Struktur mit Sections. +Für jede Section, spezifiziere: +- section id +- content_type (heading, paragraph, image, table, etc.) +- contentPartIds: [Liste von ContentPart-IDs zu verwenden] +- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist +- generation_hint: Was AI für diese Section generieren soll +- elements: [] (leer, wird in nächster Phase gefüllt) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "metadata": {{ + "title": "Document Title", + "language": "de" + }}, + "documents": [{{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.{outputFormat}", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "generation_hint": "Main title", + "contentPartIds": [], + "contentFormats": {{}}, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "paragraph", + "generation_hint": "Introduction paragraph", + "contentPartIds": ["part_ext_1"], + "contentFormats": {{ + "part_ext_1": "extracted" + }}, + "elements": [] + }} + ] + }}] +}} + +Return ONLY valid JSON following the structure above. +""" + return prompt + diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index a2972453..ba4bfb69 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -856,7 +856,10 @@ class ExtractionService: merged_parts = applyMerging(content_parts, merge_strategy) # Phase 6: Enhanced format with metadata preservation - # CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing + # CRITICAL: Don't add SOURCE markers for internal use - metadata is already preserved in ContentPart objects + # SOURCE markers should ONLY be added when content is returned directly to user for display/debugging + # For extraction content used in generation pipelines, metadata is in ContentPart.metadata, not in text markers + # Check if this is a generation response by looking at operationType or content structure isGenerationResponse = False if options and hasattr(options, 'operationType'): @@ -880,23 +883,14 @@ class ExtractionService: except: pass + # ROOT CAUSE FIX: Never add SOURCE markers - metadata is preserved in ContentPart.metadata + # SOURCE markers pollute content and cause issues when content is used in generation pipelines + # If traceability is needed, use ContentPart.metadata fields (documentId, documentMimeType, label, etc.) content_sections = [] for part in merged_parts: - if isGenerationResponse: - # For generation responses, return JSON directly without SOURCE markers - content_sections.append(part.data) - else: - # For extraction responses, include metadata in section header for traceability - doc_id = part.metadata.get("documentId", "unknown") - doc_mime = part.metadata.get("documentMimeType", "unknown") - label = part.label or "content" - - section = f""" -[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}] -{part.data} -[END SOURCE] -""" - content_sections.append(section) + # Always return clean content without SOURCE markers + # Metadata is available in ContentPart.metadata for traceability + content_sections.append(part.data if part.data else "") final_content = "\n\n".join(content_sections) diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 54c7e64b..213bf641 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -299,36 +299,14 @@ class RendererHtml(BaseRenderer): def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a single JSON section to HTML using AI-generated styles. Supports three content formats: reference, object (base64), extracted_text. + WICHTIG: Respektiert sectionType (content_type) für korrekte Rendering-Logik. """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) - # Check for three content formats from Phase 5D in elements - if isinstance(sectionData, list): - htmlParts = [] - for element in sectionData: - element_type = element.get("type", "") if isinstance(element, dict) else "" - - # Support three content formats from Phase 5D - if element_type == "reference": - # Document reference format - doc_ref = element.get("documentReference", "") - label = element.get("label", "Reference") - htmlParts.append(f'

[Reference: {label}]

') - continue - elif element_type == "extracted_text": - # Extracted text format - content = element.get("content", "") - source = element.get("source", "") - if content: - source_text = f' (Source: {source})' if source else '' - htmlParts.append(f'

{content}{source_text}

') - continue - - # If we processed reference/extracted_text elements, return them - if htmlParts: - return '\n'.join(htmlParts) + # WICHTIG: Respektiere sectionType (content_type) ZUERST, dann process elements entsprechend + # Process elements according to section's content_type, not just element types if sectionType == "table": # Process the section data to extract table structure @@ -339,8 +317,58 @@ class RendererHtml(BaseRenderer): processedData = self._processSectionByType(section) return self._renderJsonBulletList(processedData, styles) elif sectionType == "heading": + # Extract text from elements for heading rendering + if isinstance(sectionData, list): + # Extract text from heading elements + headingText = "" + for element in sectionData: + if isinstance(element, dict): + element_type = element.get("type", "") + if element_type == "heading": + headingText = element.get("content", element.get("text", "")) + break + elif element_type == "extracted_text": + # Use extracted text as heading if no heading element found + content = element.get("content", "") + if content and not headingText: + # Extract first line or title from extracted text + headingText = content.split('\n')[0].strip() + # Remove markdown formatting + headingText = headingText.replace('#', '').replace('**', '').strip() + break + elif "text" in element: + headingText = element.get("text", "") + break + if headingText: + return self._renderJsonHeading({"text": headingText, "level": 2}, styles) return self._renderJsonHeading(sectionData, styles) elif sectionType == "paragraph": + # Process paragraph elements, including extracted_text + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + if element_type == "reference": + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'

[Reference: {label}]

') + elif element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'

{content}{source_text}

') + elif isinstance(element, dict): + # Regular paragraph element + text = element.get("text", element.get("content", "")) + if text: + htmlParts.append(f'

{text}

') + elif isinstance(element, str): + htmlParts.append(f'

{element}

') + + if htmlParts: + return '\n'.join(htmlParts) return self._renderJsonParagraph(sectionData, styles) elif sectionType == "code_block": # Process the section data to extract code block structure @@ -351,6 +379,25 @@ class RendererHtml(BaseRenderer): processedData = self._processSectionByType(section) return self._renderJsonImage(processedData, styles) else: + # Fallback: Check for special element types first + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + if element_type == "reference": + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'

[Reference: {label}]

') + elif element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'

{content}{source_text}

') + + if htmlParts: + return '\n'.join(htmlParts) # Fallback to paragraph for unknown types return self._renderJsonParagraph(sectionData, styles) diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py index 49860665..3e33c996 100644 --- a/tests/functional/test09_document_generation_formats.py +++ b/tests/functional/test09_document_generation_formats.py @@ -214,14 +214,14 @@ class DocumentGenerationFormatsTester: self.workflow = workflow print(f"Workflow started: {workflow.id}") - # Wait for workflow completion + # Wait for workflow completion (no timeout - wait indefinitely) print(f"Waiting for workflow completion...") - completed = await self.waitForWorkflowCompletion(timeout=300) # 5 minute timeout + completed = await self.waitForWorkflowCompletion(timeout=None) if not completed: return { "success": False, - "error": "Workflow did not complete within timeout", + "error": "Workflow did not complete", "workflowId": workflow.id, "status": workflow.status if workflow else "unknown" } @@ -243,7 +243,7 @@ class DocumentGenerationFormatsTester: "results": results } - async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool: + async def waitForWorkflowCompletion(self, timeout: Optional[int] = None, checkInterval: int = 2) -> bool: """Wait for workflow to complete.""" if not self.workflow: return False @@ -253,9 +253,12 @@ class DocumentGenerationFormatsTester: interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + if timeout is None: + print("Waiting indefinitely (no timeout)") + while True: - # Check timeout - if time.time() - startTime > timeout: + # Check timeout only if specified + if timeout is not None and time.time() - startTime > timeout: print(f"\n⏱️ Timeout after {timeout} seconds") return False @@ -455,13 +458,13 @@ class DocumentGenerationFormatsTester: self.workflow = workflow print(f"Workflow started: {workflow.id}") - # Wait for workflow completion - completed = await self.waitForWorkflowCompletion(timeout=300) + # Wait for workflow completion (no timeout - wait indefinitely) + completed = await self.waitForWorkflowCompletion(timeout=None) if not completed: results[testType] = { "success": False, - "error": "Workflow did not complete within timeout", + "error": "Workflow did not complete", "workflowId": workflow.id } continue From 9d4bd8ceef948b3891eb643b46caca706a116b6a Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Fri, 26 Dec 2025 00:16:08 +0100 Subject: [PATCH 11/21] refactored ai service container (3000 lines) with submodules, and enhanced generation part with dynamic chapters --- modules/services/serviceAi/mainServiceAi.py | 2377 +---------------- .../services/serviceAi/subAiCallLooping.py | 533 ++++ 2 files changed, 548 insertions(+), 2362 deletions(-) create mode 100644 modules/services/serviceAi/subAiCallLooping.py diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index f8ab4dad..777e6230 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -57,6 +57,7 @@ class AiService: from modules.services.serviceAi.subContentExtraction import ContentExtractor from modules.services.serviceAi.subStructureGeneration import StructureGenerator from modules.services.serviceAi.subStructureFilling import StructureFiller + from modules.services.serviceAi.subAiCallLooping import AiCallLooper if not hasattr(self, 'responseParser'): logger.info("Initializing ResponseParser...") @@ -77,6 +78,10 @@ class AiService: if not hasattr(self, 'structureFiller'): logger.info("Initializing StructureFiller...") self.structureFiller = StructureFiller(self.services, self) + + if not hasattr(self, 'aiCallLooper'): + logger.info("Initializing AiCallLooper...") + self.aiCallLooper = AiCallLooper(self.services, self, self.responseParser) async def callAi(self, request: AiCallRequest, progressCallback=None): """Router: handles content parts via extractionService, text context via interface. @@ -214,402 +219,10 @@ Respond with ONLY a JSON object in this exact format: userPrompt: Optional[str] = None, contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content ) -> str: - """ - Shared core function for AI calls with repair-based looping system. - Automatically repairs broken JSON and continues generation seamlessly. - - Args: - prompt: The prompt to send to AI - options: AI call configuration options - debugPrefix: Prefix for debug file names - promptBuilder: Optional function to rebuild prompts for continuation - promptArgs: Optional arguments for prompt builder - operationId: Optional operation ID for progress tracking - - Returns: - Complete AI response after all iterations - """ - maxIterations = 50 # Prevent infinite loops - iteration = 0 - allSections = [] # Accumulate all sections across iterations - lastRawResponse = None # Store last raw JSON response for continuation - documentMetadata = None # Store document metadata (title, filename) from first iteration - accumulationState = None # Track accumulation state for string accumulation - - # Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID) - parentOperationId = operationId # Use the parent's operationId directly - - while iteration < maxIterations: - iteration += 1 - - # Create separate operation for each iteration with parent reference - iterationOperationId = None - if operationId: - iterationOperationId = f"{operationId}_iter_{iteration}" - self.services.chat.progressLogStart( - iterationOperationId, - "AI Call", - f"Iteration {iteration}", - "", - parentOperationId=parentOperationId - ) - - # Build iteration prompt - # CRITICAL: Build continuation prompt if we have sections OR if we have a previous response (even if broken) - # This ensures continuation prompts are built even when JSON is so broken that no sections can be extracted - if (len(allSections) > 0 or lastRawResponse) and promptBuilder and promptArgs: - # This is a continuation - build continuation context with raw JSON and rebuild prompt - continuationContext = buildContinuationContext(allSections, lastRawResponse) - if not lastRawResponse: - logger.warning(f"Iteration {iteration}: No previous response available for continuation!") - - # Filter promptArgs to only include parameters that buildGenerationPrompt accepts - # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services - filteredPromptArgs = { - k: v for k, v in promptArgs.items() - if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services'] - } - # Always include services if available - if not filteredPromptArgs.get('services') and hasattr(self, 'services'): - filteredPromptArgs['services'] = self.services - - # Rebuild prompt with continuation context using the provided prompt builder - iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext) - else: - # First iteration - use original prompt - iterationPrompt = prompt - - # Make AI call - try: - if iterationOperationId: - self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") - # ARCHITECTURE: Pass ContentParts directly to AiCallRequest - # This allows model-aware chunking to handle large content properly - # ContentParts are only passed in first iteration (continuations don't need them) - request = AiCallRequest( - prompt=iterationPrompt, - context="", - options=options, - contentParts=contentParts if iteration == 1 else None # Only pass ContentParts in first iteration - ) - - # Write the ACTUAL prompt sent to AI - if iteration == 1: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") - else: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") - - response = await self.callAi(request) - result = response.content - - # Track bytes for progress reporting - bytesReceived = len(result.encode('utf-8')) if result else 0 - totalBytesSoFar = sum(len(section.get('content', '').encode('utf-8')) if isinstance(section.get('content'), str) else 0 for section in allSections) + bytesReceived - - # Update progress after AI call with byte information - if iterationOperationId: - # Format bytes for display (kB or MB) - if totalBytesSoFar < 1024: - bytesDisplay = f"{totalBytesSoFar}B" - elif totalBytesSoFar < 1024 * 1024: - bytesDisplay = f"{totalBytesSoFar / 1024:.1f}kB" - else: - bytesDisplay = f"{totalBytesSoFar / (1024 * 1024):.1f}MB" - self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})") - - # Write raw AI response to debug file - if iteration == 1: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") - else: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") - - # Emit stats for this iteration (only if workflow exists and has id) - if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id: - try: - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - f"ai.call.{debugPrefix}.iteration_{iteration}" - ) - except Exception as statError: - # Don't break the main loop if stat storage fails - logger.warning(f"Failed to store workflow stat: {str(statError)}") - - # Check for error response using generic error detection (errorCount > 0 or modelName == "error") - if hasattr(response, 'errorCount') and response.errorCount > 0: - errorMsg = f"Iteration {iteration}: Error response detected (errorCount={response.errorCount}), stopping loop: {result[:200] if result else 'empty'}" - logger.error(errorMsg) - break - - if hasattr(response, 'modelName') and response.modelName == "error": - errorMsg = f"Iteration {iteration}: Error response detected (modelName=error), stopping loop: {result[:200] if result else 'empty'}" - logger.error(errorMsg) - break - - if not result or not result.strip(): - logger.warning(f"Iteration {iteration}: Empty response, stopping") - break - - # Check if this is a text response (not document generation) - # Text responses don't need JSON parsing - return immediately after first successful response - isTextResponse = (promptBuilder is None and promptArgs is None) or debugPrefix == "text" - - if isTextResponse: - # For text responses, return the text immediately - no JSON parsing needed - logger.info(f"Iteration {iteration}: Text response received, returning immediately") - if iterationOperationId: - self.services.chat.progressLogFinish(iterationOperationId, True) - return result - - # Store raw response for continuation (even if broken) - lastRawResponse = result - - # Extract sections from response (handles both valid and broken JSON) - # Only for document generation (JSON responses) - # CRITICAL: Pass allSections and accumulationState to enable string accumulation - extractedSections, wasJsonComplete, parsedResult, accumulationState = self._extractSectionsFromResponse( - result, iteration, debugPrefix, allSections, accumulationState - ) - - # CRITICAL: Merge sections BEFORE KPI validation - # This ensures sections are preserved even if KPI validation fails - if extractedSections: - allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration) - - # Define KPIs if we just entered accumulation mode (iteration 1, incomplete JSON) - if accumulationState and accumulationState.isAccumulationMode and iteration == 1 and not accumulationState.kpis: - logger.info(f"Iteration {iteration}: Defining KPIs for accumulation tracking") - continuationContext = buildContinuationContext(allSections, result) - # Pass raw response string from first iteration for KPI definition - kpiDefinitions = await self._defineKpisFromPrompt( - userPrompt or prompt, - result, # Pass raw JSON string from first iteration - continuationContext, - debugPrefix - ) - # Initialize KPIs with currentValue = 0 - accumulationState.kpis = [{**kpi, "currentValue": 0} for kpi in kpiDefinitions] - logger.info(f"Defined {len(accumulationState.kpis)} KPIs: {[kpi.get('id') for kpi in accumulationState.kpis]}") - - # Extract and validate KPIs (if in accumulation mode with KPIs defined) - if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis: - # For KPI extraction, prefer accumulated JSON string over repaired JSON - # because repairBrokenJson may lose data (e.g., empty rows array when JSON is incomplete) - updatedKpis = [] - - # First try to extract from parsedResult (repaired JSON) - if parsedResult: - try: - updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( - parsedResult, - accumulationState.kpis - ) - # Check if we got meaningful values (non-zero) - hasValidValues = any(kpi.get("currentValue", 0) > 0 for kpi in updatedKpis) - if not hasValidValues and accumulationState.accumulatedJsonString: - # Repaired JSON has empty values, try accumulated string - logger.debug("Repaired JSON has empty KPI values, trying accumulated JSON string") - updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( - accumulationState.accumulatedJsonString, - accumulationState.kpis - ) - except Exception as e: - logger.debug(f"Error extracting KPIs from parsedResult: {e}") - updatedKpis = [] - - # If no parsedResult or extraction failed, try accumulated string - if not updatedKpis and accumulationState.accumulatedJsonString: - try: - updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( - accumulationState.accumulatedJsonString, - accumulationState.kpis - ) - except Exception as e: - logger.debug(f"Error extracting KPIs from accumulated JSON string: {e}") - updatedKpis = [] - - if updatedKpis: - shouldProceed, reason = JsonResponseHandler.validateKpiProgression( - accumulationState, - updatedKpis - ) - - if not shouldProceed: - logger.warning(f"Iteration {iteration}: KPI validation failed: {reason}") - if iterationOperationId: - self.services.chat.progressLogFinish(iterationOperationId, False) - if operationId: - self.services.chat.progressLogUpdate(operationId, 0.9, f"KPI validation failed: {reason} ({iteration} iterations)") - break - - # Update KPIs in accumulation state - accumulationState.kpis = updatedKpis - logger.info(f"Iteration {iteration}: KPIs updated: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") - - # Check if all KPIs completed - allCompleted = True - for kpi in updatedKpis: - targetValue = kpi.get("targetValue", 0) - currentValue = kpi.get("currentValue", 0) - if currentValue < targetValue: - allCompleted = False - break - - if allCompleted: - logger.info(f"Iteration {iteration}: All KPIs completed, finishing accumulation") - wasJsonComplete = True # Mark as complete to exit loop - - # CRITICAL: Handle JSON fragments (continuation content) - # Fragment merging happens inside _extractSectionsFromResponse - # If merge fails (returns wasJsonComplete=True), stop iterations and complete JSON - if not extractedSections and allSections: - if wasJsonComplete: - # Merge failed - stop iterations, complete JSON with available data - logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - Stopping iterations, completing JSON with available data") - if iterationOperationId: - self.services.chat.progressLogFinish(iterationOperationId, False) - if operationId: - self.services.chat.progressLogUpdate(operationId, 0.9, f"Merge failed, completing JSON ({iteration} iterations)") - break - - # Fragment was detected and merged successfully - logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing") - # Don't break - fragment was merged, continue to get more content if needed - # Check if we should continue based on JSON completeness - shouldContinue = self._shouldContinueGeneration( - allSections, - iteration, - wasJsonComplete, - result - ) - if shouldContinue: - if iterationOperationId: - self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing") - self.services.chat.progressLogFinish(iterationOperationId, True) - continue - else: - # Done - fragment was merged and JSON is complete - if iterationOperationId: - self.services.chat.progressLogFinish(iterationOperationId, True) - if operationId: - self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)") - logger.info(f"Generation complete after {iteration} iterations: fragment merged") - break - - # Extract document metadata from first iteration if available - if iteration == 1 and parsedResult and not documentMetadata: - documentMetadata = self._extractDocumentMetadata(parsedResult) - - # Update progress after parsing - if iterationOperationId: - if extractedSections: - self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections") - - if not extractedSections: - # CRITICAL: If JSON was incomplete/broken, continue even if no sections extracted - # This allows the AI to retry and complete the broken JSON - if not wasJsonComplete: - logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt") - continue - # If JSON was complete but no sections extracted - check if it was a fragment - # Fragments are handled above, so if we get here and it's complete, it's an error - logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping") - break - - # NOTE: Section merging now happens BEFORE KPI validation (see above) - # This ensures sections are preserved even if KPI validation fails - - # Calculate total bytes in merged content for progress display - merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False) - totalBytesGenerated = len(merged_json_str.encode('utf-8')) - - # Update main operation with byte progress - if operationId: - # Format bytes for display - if totalBytesGenerated < 1024: - bytesDisplay = f"{totalBytesGenerated}B" - elif totalBytesGenerated < 1024 * 1024: - bytesDisplay = f"{totalBytesGenerated / 1024:.1f}kB" - else: - bytesDisplay = f"{totalBytesGenerated / (1024 * 1024):.1f}MB" - # Estimate progress based on iterations (rough estimate) - estimatedProgress = min(0.9, 0.4 + (iteration * 0.1)) - self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})") - - # Log merged sections for debugging - self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}") - - # Check if we should continue (completion detection) - # Simple logic: JSON completeness determines continuation - shouldContinue = self._shouldContinueGeneration( - allSections, - iteration, - wasJsonComplete, - result - ) - - if shouldContinue: - # Finish iteration operation (will continue with next iteration) - if iterationOperationId: - # Show byte progress in iteration completion - iterBytes = len(result.encode('utf-8')) if result else 0 - if iterBytes < 1024: - iterBytesDisplay = f"{iterBytes}B" - elif iterBytes < 1024 * 1024: - iterBytesDisplay = f"{iterBytes / 1024:.1f}kB" - else: - iterBytesDisplay = f"{iterBytes / (1024 * 1024):.1f}MB" - self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Completed ({iterBytesDisplay})") - self.services.chat.progressLogFinish(iterationOperationId, True) - continue - else: - # Done - finish iteration and update main operation - if iterationOperationId: - # Show final byte count - finalBytes = len(merged_json_str.encode('utf-8')) - if finalBytes < 1024: - finalBytesDisplay = f"{finalBytes}B" - elif finalBytes < 1024 * 1024: - finalBytesDisplay = f"{finalBytes / 1024:.1f}kB" - else: - finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB" - self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Complete ({finalBytesDisplay})") - self.services.chat.progressLogFinish(iterationOperationId, True) - if operationId: - # Show final size in main operation - finalBytes = len(merged_json_str.encode('utf-8')) - if finalBytes < 1024: - finalBytesDisplay = f"{finalBytes}B" - elif finalBytes < 1024 * 1024: - finalBytesDisplay = f"{finalBytes / 1024:.1f}kB" - else: - finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB" - self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete: {finalBytesDisplay} ({iteration} iterations, {len(allSections)} sections)") - logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections") - break - - except Exception as e: - logger.error(f"Error in AI call iteration {iteration}: {str(e)}") - if iterationOperationId: - self.services.chat.progressLogFinish(iterationOperationId, False) - break - - if iteration >= maxIterations: - logger.warning(f"AI call stopped after maximum iterations ({maxIterations})") - - # CRITICAL: Complete any incomplete structures in sections before building final result - # This ensures JSON is properly closed even if merge failed or iterations stopped early - allSections = JsonResponseHandler.completeIncompleteStructures(allSections) - - # Build final result from accumulated sections - final_result = self._buildFinalResultFromSections(allSections, documentMetadata) - - # Write final result to debug file - self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result") - - return final_result - - # JSON merging logic moved to subJsonResponseHandling.py + """Delegate to AiCallLooper.""" + return await self.aiCallLooper.callAiWithLooping( + prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts + ) async def _defineKpisFromPrompt( self, @@ -618,91 +231,12 @@ Respond with ONLY a JSON object in this exact format: continuationContext: Dict[str, Any], debugPrefix: str = "kpi" ) -> List[Dict[str, Any]]: - """ - Make separate AI call to define KPIs based on user prompt and incomplete JSON. - - Args: - userPrompt: Original user prompt - rawJsonString: Raw JSON string from first iteration response - continuationContext: Continuation context (not used for JSON, kept for compatibility) - - Returns: - List of KPI definitions: [{"id": str, "description": str, "jsonPath": str, "targetValue": int}, ...] - """ - # Use raw JSON string from first iteration response - if rawJsonString: - # Remove markdown code fences if present - from modules.shared.jsonUtils import stripCodeFences - incompleteJson = stripCodeFences(rawJsonString.strip()) - else: - incompleteJson = "Not available" - - kpiDefinitionPrompt = f"""Analyze the user request and incomplete JSON to define KPIs (Key Performance Indicators) for tracking progress. - -User Request: -{userPrompt} - -Delivered JSON part: -{incompleteJson} - -Task: Define which JSON items should be tracked to measure completion progress. - -IMPORTANT: Analyze the Delivered JSON part structure to understand what is being tracked: -1. Identify the structure type (table with rows, list with items, etc.) -2. Determine what the jsonPath actually counts (number of rows, number of items, etc.) -3. Calculate targetValue based on what is being tracked, NOT the total quantity requested - -For each trackable item, provide: -- id: Unique identifier (use descriptive name) -- description: What this KPI measures (be specific about what is counted) -- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "documents[0].sections[1].elements[0].rows") -- targetValue: Target value to reach (integer) - MUST match what jsonPath actually tracks (rows count, items count, etc.) - -Return ONLY valid JSON in this format: -{{ - "kpis": [ - {{ - "id": "unique_id", - "description": "Description of what is measured", - "jsonPath": "path.to.value", - "targetValue": 0 - }} - ] -}} - -If no trackable items can be identified, return: {{"kpis": []}} -""" - - try: - request = AiCallRequest( - prompt=kpiDefinitionPrompt, - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.SPEED, - processingMode=ProcessingModeEnum.BASIC - ) - ) - - # Write KPI definition prompt to debug file - self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt") - - response = await self.callAi(request) - - # Write KPI definition response to debug file - self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response") - - # Parse response - extracted = extractJsonString(response.content) - kpiResponse = json.loads(extracted) - - kpiDefinitions = kpiResponse.get("kpis", []) - logger.info(f"Defined {len(kpiDefinitions)} KPIs for tracking") - - return kpiDefinitions - - except Exception as e: - logger.warning(f"Failed to define KPIs: {e}, continuing without KPI tracking") - return [] + """Delegate to AiCallLooper.""" + return await self.aiCallLooper._defineKpisFromPrompt( + userPrompt, rawJsonString, continuationContext, debugPrefix + ) + + # JSON merging logic moved to subJsonResponseHandling.py def _extractSectionsFromResponse( self, @@ -717,106 +251,6 @@ If no trackable items can be identified, return: {{"kpis": []}} result, iteration, debugPrefix, allSections, accumulationState ) - def _extractSectionsFromResponse_OLD( - self, - result: str, - iteration: int, - debugPrefix: str, - allSections: List[Dict[str, Any]] = None, - accumulationState: Optional[JsonAccumulationState] = None - ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: - """ - Extract sections from AI response, handling both valid and broken JSON. - - NEW BEHAVIOR: - - First iteration: Check if complete, if not start accumulation - - Subsequent iterations: Accumulate strings, parse when complete - - Returns: - Tuple of: - - sections: Extracted sections - - wasJsonComplete: True if JSON is complete - - parsedResult: Parsed JSON object - - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode) - """ - if allSections is None: - allSections = [] - - if iteration == 1: - # First iteration - check if complete - parsed = None - try: - extracted = extractJsonString(result) - parsed = json.loads(extracted) - - # Check completeness - if JsonResponseHandler.isJsonComplete(parsed): - # Complete JSON - no accumulation needed - sections = extractSectionsFromDocument(parsed) - logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed") - return sections, True, parsed, None # No accumulation - except Exception: - pass - - # Incomplete - try to extract partial sections from broken JSON - logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections") - - partialSections = [] - if parsed: - # Try to extract sections from parsed (even if incomplete) - partialSections = extractSectionsFromDocument(parsed) - else: - # Try to repair broken JSON and extract sections - try: - repaired = repairBrokenJson(result) - if repaired: - partialSections = extractSectionsFromDocument(repaired) - parsed = repaired # Use repaired version for accumulation state - except Exception: - pass # If repair fails, continue with empty sections - - - # Define KPIs (async call - need to handle this) - # For now, create accumulation state without KPIs, will be updated after async call - accumulationState = JsonAccumulationState( - accumulatedJsonString=result, - isAccumulationMode=True, - lastParsedResult=parsed, - allSections=partialSections, - kpis=[] - ) - - # Note: KPI definition will be done in the caller (async context) - return partialSections, False, parsed, accumulationState - - else: - # Subsequent iterations - accumulate - if accumulationState and accumulationState.isAccumulationMode: - accumulated, sections, isComplete, parsedResult = \ - JsonResponseHandler.accumulateAndParseJsonFragments( - accumulationState.accumulatedJsonString, - result, - allSections, - iteration - ) - - # Update accumulation state - accumulationState.accumulatedJsonString = accumulated - accumulationState.lastParsedResult = parsedResult - accumulationState.allSections = allSections + sections if sections else allSections - accumulationState.isAccumulationMode = not isComplete - - # Log accumulated JSON for debugging - if parsedResult: - accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json") - - return sections, isComplete, parsedResult, accumulationState - else: - # No accumulation mode - process normally (shouldn't happen) - logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1") - return [], False, None, None - def _shouldContinueGeneration( self, allSections: List[Dict[str, Any]], @@ -829,85 +263,6 @@ If no trackable items can be identified, return: {{"kpis": []}} allSections, iteration, wasJsonComplete, rawResponse ) - def _shouldContinueGeneration_OLD( - self, - allSections: List[Dict[str, Any]], - iteration: int, - wasJsonComplete: bool, - rawResponse: str = None - ) -> bool: - """ - Determine if AI generation loop should continue. - - CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD! - Action DoD is checked AFTER the AI Loop completes in _refineDecide. - - Simple logic: - - If JSON parsing failed or incomplete → continue (needs more content) - - If JSON parses successfully and is complete → stop (all content delivered) - - Loop detection prevents infinite loops - - CRITICAL: JSON completeness is determined by parsing, NOT by last character check! - Returns True if we should continue, False if AI Loop is done. - """ - if len(allSections) == 0: - return True # No sections yet, continue - - # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete - if not wasJsonComplete: - logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete") - return True - - # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection - if self._isStuckInLoop(allSections, iteration): - logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop") - return False - - # JSON is complete and not stuck in loop - done - logger.info(f"Iteration {iteration}: JSON complete - AI loop done") - return False - - def _isStuckInLoop( - self, - allSections: List[Dict[str, Any]], - iteration: int - ) -> bool: - """ - Detect if we're stuck in a loop (same content being repeated). - - Generic approach: Check if recent iterations are adding minimal or duplicate content. - """ - if iteration < 3: - return False # Need at least 3 iterations to detect a loop - - if len(allSections) == 0: - return False - - # Check if last section is very small (might be stuck) - lastSection = allSections[-1] - elements = lastSection.get("elements", []) - - if isinstance(elements, list) and elements: - lastElem = elements[-1] if elements else {} - else: - lastElem = elements if isinstance(elements, dict) else {} - - # Check content size of last section - lastSectionSize = 0 - if isinstance(lastElem, dict): - for key, value in lastElem.items(): - if isinstance(value, str): - lastSectionSize += len(value) - elif isinstance(value, list): - lastSectionSize += len(str(value)) - - # If last section is very small and we've done many iterations, might be stuck - if lastSectionSize < 100 and iteration > 10: - logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}") - return True - - return False - def _extractDocumentMetadata( self, parsedResult: Dict[str, Any] @@ -915,31 +270,6 @@ If no trackable items can be identified, return: {{"kpis": []}} """Delegate to ResponseParser.""" return self.responseParser.extractDocumentMetadata(parsedResult) - def _extractDocumentMetadata_OLD( - self, - parsedResult: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: - """ - Extract document metadata (title, filename) from parsed AI response. - Returns dict with 'title' and 'filename' keys if found, None otherwise. - """ - if not isinstance(parsedResult, dict): - return None - - # Try to get from documents array (preferred structure) - if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0: - firstDoc = parsedResult["documents"][0] - if isinstance(firstDoc, dict): - title = firstDoc.get("title") - filename = firstDoc.get("filename") - if title or filename: - return { - "title": title, - "filename": filename - } - - return None - def _buildFinalResultFromSections( self, allSections: List[Dict[str, Any]], @@ -948,47 +278,6 @@ If no trackable items can be identified, return: {{"kpis": []}} """Delegate to ResponseParser.""" return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) - def _buildFinalResultFromSections_OLD( - self, - allSections: List[Dict[str, Any]], - documentMetadata: Optional[Dict[str, Any]] = None - ) -> str: - """ - Build final JSON result from accumulated sections. - Uses AI-provided metadata (title, filename) if available. - """ - if not allSections: - return "" - - # Extract metadata from AI response if available - title = "Generated Document" - filename = "document.json" - if documentMetadata: - if documentMetadata.get("title"): - title = documentMetadata["title"] - if documentMetadata.get("filename"): - filename = documentMetadata["filename"] - - # Build documents structure - # Assuming single document for now - documents = [{ - "id": "doc_1", - "title": title, - "filename": filename, - "sections": allSections - }] - - result = { - "metadata": { - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "ai_generation" - }, - "documents": documents - } - - return json.dumps(result, indent=2) - # Public API Methods # Planning AI Call @@ -1163,412 +452,6 @@ If no trackable items can be identified, return: {{"kpis": []}} documents, userPrompt, actionParameters, parentOperationId ) - async def _clarifyDocumentIntents_OLD( - self, - documents: List[ChatDocument], - userPrompt: str, - actionParameters: Dict[str, Any], - parentOperationId: str - ) -> List[DocumentIntent]: - """ - Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. - Gibt DocumentIntent für jedes Dokument zurück. - - Args: - documents: Liste der zu verarbeitenden Dokumente - userPrompt: User-Anfrage - actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat) - parentOperationId: Parent Operation-ID für ChatLog-Hierarchie - - Returns: - Liste von DocumentIntent-Objekten - """ - from modules.datamodels.datamodelChat import ChatDocument - - # Erstelle Operation-ID für Intent-Analyse - intentOperationId = f"{parentOperationId}_intent_analysis" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - intentOperationId, - "Document Intent Analysis", - "Intent Analysis", - f"Analyzing {len(documents)} documents", - parentOperationId=parentOperationId - ) - - try: - # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse - documentMapping = {} # Maps original doc ID -> JSON doc ID - resolvedDocuments = [] - - for doc in documents: - preExtracted = self._resolvePreExtractedDocument(doc) - if preExtracted: - originalDocId = preExtracted["originalDocument"]["id"] - documentMapping[originalDocId] = doc.id - # Erstelle temporäres ChatDocument für ursprüngliches Dokument - from modules.datamodels.datamodelChat import ChatDocument - originalDoc = ChatDocument( - id=originalDocId, - fileName=preExtracted["originalDocument"]["fileName"], - mimeType=preExtracted["originalDocument"]["mimeType"], - fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), - fileId=doc.fileId, # Behalte fileId vom JSON - messageId=doc.messageId if hasattr(doc, 'messageId') else None # Behalte messageId falls vorhanden - ) - resolvedDocuments.append(originalDoc) - else: - resolvedDocuments.append(doc) - - # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten - intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters) - - # AI-Call (verwende callAiPlanning für einfache JSON-Responses) - # Debug-Logs werden bereits von callAiPlanning geschrieben - aiResponse = await self.callAiPlanning( - prompt=intentPrompt, - debugType="document_intent_analysis" - ) - - # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig - intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse)) - documentIntents = [] - for intent in intentsData.get("intents", []): - docId = intent.get("documentId") - # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID - if docId in documentMapping: - intent["documentId"] = documentMapping[docId] - documentIntents.append(DocumentIntent(**intent)) - - # Debug-Log (harmonisiert) - self.services.utils.writeDebugFile( - json.dumps([intent.dict() for intent in documentIntents], indent=2), - "document_intent_analysis_result" - ) - - # ChatLog abschließen - self.services.chat.progressLogFinish(intentOperationId, True) - - return documentIntents - - except Exception as e: - self.services.chat.progressLogFinish(intentOperationId, False) - logger.error(f"Error in _clarifyDocumentIntents: {str(e)}") - raise - - def _resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]: - """ - Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält. - Gibt Dict zurück mit: - - originalDocument: ChatDocument-Info des ursprünglichen Dokuments - - contentExtracted: ContentExtracted-Objekt mit Parts - - parts: Liste der ContentParts - - Returns None wenn kein pre-extracted Format erkannt wird. - """ - if document.mimeType != "application/json": - logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check") - return None - - try: - docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) - if not docBytes: - return None - - docData = docBytes.decode('utf-8') - jsonData = json.loads(docData) - - if not isinstance(jsonData, dict): - return None - - # Check for ContentExtracted format - # Nur Format 1 (ActionDocument-Format mit validationMetadata) wird unterstützt - documentData = None - - validationMetadata = jsonData.get("validationMetadata", {}) - actionType = validationMetadata.get("actionType") - logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}") - - if actionType == "context.extractContent": - # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} - documentData = jsonData.get("documentData") - logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}") - else: - logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})") - - if documentData: - from modules.datamodels.datamodelExtraction import ContentExtracted - - try: - # Stelle sicher, dass "id" vorhanden ist - if "id" not in documentData: - documentData["id"] = document.id - - contentExtracted = ContentExtracted(**documentData) - - if contentExtracted.parts: - # Extrahiere ursprüngliche Dokument-Info aus den Parts - originalDocId = None - originalFileName = None - originalMimeType = None - - for part in contentExtracted.parts: - if part.metadata: - # Versuche ursprüngliche Dokument-Info zu finden - if not originalDocId and part.metadata.get("documentId"): - originalDocId = part.metadata.get("documentId") - if not originalFileName and part.metadata.get("originalFileName"): - originalFileName = part.metadata.get("originalFileName") - if not originalMimeType and part.metadata.get("documentMimeType"): - originalMimeType = part.metadata.get("documentMimeType") - - # Falls nicht gefunden, versuche aus documentName zu extrahieren - if not originalFileName: - # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") - if document.fileName and "_extracted_" in document.fileName: - originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" - - return { - "originalDocument": { - "id": originalDocId or document.id, - "fileName": originalFileName or document.fileName, - "mimeType": originalMimeType or "application/pdf", - "fileSize": document.fileSize - }, - "contentExtracted": contentExtracted, - "parts": contentExtracted.parts - } - except Exception as parseError: - logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") - logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") - import traceback - logger.debug(f"Parse error traceback: {traceback.format_exc()}") - return None - else: - logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})") - - return None - except Exception as e: - logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") - return None - - async def _extractTextFromImage(self, imagePart: ContentPart, extractionPrompt: str) -> Optional[str]: - """ - Extrahiere Text aus einem Image-Part mit Vision AI. - - Args: - imagePart: ContentPart mit typeGroup="image" - extractionPrompt: Prompt für die Text-Extraktion - - Returns: - Extrahierter Text oder None bei Fehler - """ - try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - - # Final extraction prompt - finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting." - - # Debug-Log (harmonisiert) - self.services.utils.writeDebugFile( - finalPrompt, - f"content_extraction_prompt_image_{imagePart.id}" - ) - - # Erstelle AI-Call-Request mit Image-Part - request = AiCallRequest( - prompt=finalPrompt, - context="", - options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), - contentParts=[imagePart] - ) - - # Verwende AI-Service für Vision AI-Verarbeitung - response = await self.services.ai.callAi(request) - - # Debug-Log für Response (harmonisiert) - if response and response.content: - self.services.utils.writeDebugFile( - response.content, - f"content_extraction_response_image_{imagePart.id}" - ) - - if response and response.content: - return response.content.strip() - - # Kein Content zurückgegeben - return error message für Debugging - errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}" - logger.warning(errorMsg) - return f"[ERROR: {errorMsg}]" - except Exception as e: - errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}" - logger.error(errorMsg) - import traceback - logger.debug(f"Traceback: {traceback.format_exc()}") - # Return error message statt None für Debugging - return f"[ERROR: {errorMsg}]" - - async def _processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]: - """ - Verarbeite Text-Content mit AI basierend auf extractionPrompt. - - WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text - (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI - verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt. - - Args: - textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ) - extractionPrompt: Prompt für die AI-Verarbeitung des Textes - - Returns: - AI-verarbeiteter Text oder None bei Fehler - """ - try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - - # Final extraction prompt - finalPrompt = extractionPrompt or "Process and extract the key information from the following text content." - - # Debug-Log (harmonisiert) - log prompt with text preview - textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "") - promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}" - self.services.utils.writeDebugFile( - promptWithContext, - f"content_extraction_prompt_text_{textPart.id}" - ) - - # Erstelle Text-ContentPart für AI-Verarbeitung - # Verwende den vorhandenen Text als Input - textContentPart = ContentPart( - id=textPart.id, - label=textPart.label, - typeGroup="text", - mimeType="text/plain", - data=textPart.data if textPart.data else "", - metadata=textPart.metadata.copy() if textPart.metadata else {} - ) - - # Erstelle AI-Call-Request mit Text-Part - request = AiCallRequest( - prompt=finalPrompt, - context="", - options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT), - contentParts=[textContentPart] - ) - - # Verwende AI-Service für Text-Verarbeitung - response = await self.services.ai.callAi(request) - - # Debug-Log für Response (harmonisiert) - if response and response.content: - self.services.utils.writeDebugFile( - response.content, - f"content_extraction_response_text_{textPart.id}" - ) - - if response and response.content: - return response.content.strip() - - # Kein Content zurückgegeben - return error message für Debugging - errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}" - logger.warning(errorMsg) - return f"[ERROR: {errorMsg}]" - except Exception as e: - errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}" - logger.error(errorMsg) - import traceback - logger.debug(f"Traceback: {traceback.format_exc()}") - # Return error message statt None für Debugging - return f"[ERROR: {errorMsg}]" - - def _buildIntentAnalysisPrompt( - self, - userPrompt: str, - documents: List[ChatDocument], - actionParameters: Dict[str, Any] - ) -> str: - """Baue Prompt für Intent-Analyse.""" - # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs - docListText = "" - for i, doc in enumerate(documents, 1): - # Prüfe ob es ein pre-extracted JSON ist - preExtracted = self._resolvePreExtractedDocument(doc) - - if preExtracted: - # Zeige ursprüngliches Dokument statt JSON - originalDoc = preExtracted["originalDocument"] - partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})" - docListText += f"\n{i}. Document ID: {originalDoc['id']}\n" - docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n" - docListText += f" MIME Type: {originalDoc['mimeType']}\n" - docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n" - else: - # Normales Dokument - docListText += f"\n{i}. Document ID: {doc.id}\n" - docListText += f" File Name: {doc.fileName}\n" - docListText += f" MIME Type: {doc.mimeType}\n" - docListText += f" File Size: {doc.fileSize} bytes\n" - - outputFormat = actionParameters.get("outputFormat", "txt") - - prompt = f"""USER REQUEST: -{userPrompt} - -DOCUMENTS TO ANALYZE: -{docListText} - -TASK: For each document, determine its intents (can be multiple): -- "extract": Content extraction needed (text, structure, OCR, etc.) -- "render": Image/binary should be rendered as-is (visual element) -- "reference": Document reference/attachment (no extraction, just reference) - -OUTPUT FORMAT: {outputFormat} - -RETURN JSON: -{{ - "intents": [ - {{ - "documentId": "doc_1", - "intents": ["extract"], # Array - can contain multiple! - "extractionPrompt": "Extract all text content, preserving structure", - "reasoning": "User needs text content for document generation" - }}, - {{ - "documentId": "doc_2", - "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering - "extractionPrompt": "Extract text content from image using vision AI", - "reasoning": "Image contains text that needs extraction, but also should be rendered visually" - }}, - {{ - "documentId": "doc_3", - "intents": ["reference"], - "extractionPrompt": null, - "reasoning": "Document is only used as reference, no extraction needed" - }} - ] -}} - -CRITICAL RULES: -1. For images (mimeType starts with "image/"): - - If user wants to "include" or "show" images → add "render" - - If user wants to "analyze", "read text", or "extract text" from images → add "extract" - - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering - -2. For text documents: - - If user mentions "template" or "structure" → "reference" or "extract" based on context - - If user mentions "reference" or "context" → "reference" - - Default → "extract" - -3. Consider output format: - - For formats like PDF, DOCX, PPTX: images usually need "render" - - For formats like CSV, JSON: usually "extract" only - - For HTML: can have both "extract" and "render" - -Return ONLY valid JSON following the structure above. -""" - return prompt - async def _extractAndPrepareContent( self, documents: List[ChatDocument], @@ -1580,514 +463,6 @@ Return ONLY valid JSON following the structure above. documents, documentIntents, parentOperationId, self._getIntentForDocument ) - async def _extractAndPrepareContent_OLD( - self, - documents: List[ChatDocument], - documentIntents: List[DocumentIntent], - parentOperationId: str - ) -> List[ContentPart]: - """ - Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. - Gibt Liste von ContentParts im passenden Format zurück. - - WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind. - Beispiel: Bild mit intents=["extract", "render"] erzeugt: - - ContentPart(contentFormat="object", ...) für Rendering - - ContentPart(contentFormat="extracted", ...) für Text-Analyse - - Args: - documents: Liste der zu verarbeitenden Dokumente - documentIntents: Liste von DocumentIntent-Objekten - parentOperationId: Parent Operation-ID für ChatLog-Hierarchie - - Returns: - Liste von ContentParts mit vollständigen Metadaten - """ - # Erstelle Operation-ID für Extraktion - extractionOperationId = f"{parentOperationId}_content_extraction" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - extractionOperationId, - "Content Extraction", - "Extraction", - f"Extracting from {len(documents)} documents", - parentOperationId=parentOperationId - ) - - try: - allContentParts = [] - - for document in documents: - # Check if document is already a ContentExtracted document (pre-extracted JSON) - logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content") - preExtracted = self._resolvePreExtractedDocument(document) - - if preExtracted: - logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}") - logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}") - logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}") - - # Verwende bereits extrahierte ContentParts direkt - contentExtracted = preExtracted["contentExtracted"] - - # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original - # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID) - intent = self._getIntentForDocument(document.id, documentIntents) - logger.info(f" Intent lookup for document {document.id}: found={intent is not None}") - if intent: - logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...") - else: - logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}") - - if contentExtracted.parts: - for part in contentExtracted.parts: - # Überspringe leere Parts (Container ohne Daten) - if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): - if part.typeGroup == "container": - continue # Überspringe leere Container - - if not part.metadata: - part.metadata = {} - - # Ensure metadata is complete - if "documentId" not in part.metadata: - part.metadata["documentId"] = document.id - - # WICHTIG: Prüfe Intent für dieses Part - partIntent = intent.intents if intent else ["extract"] - - # Debug-Logging für Intent-Verarbeitung - logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}") - - # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart - # Generische Intent-Verarbeitung für ALLE Content-Typen - hasReferenceIntent = "reference" in partIntent - hasRenderIntent = "render" in partIntent - hasExtractIntent = "extract" in partIntent - hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0) - - logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}") - - # Track ob der originale Part bereits hinzugefügt wurde - originalPartAdded = False - - # 1. Reference Intent: Erstelle Reference ContentPart - if hasReferenceIntent: - referencePart = ContentPart( - id=f"ref_{document.id}_{part.id}", - label=f"Reference: {part.label or 'Content'}", - typeGroup="reference", - mimeType=part.mimeType or "application/octet-stream", - data="", # Leer - nur Referenz - metadata={ - "contentFormat": "reference", - "documentId": document.id, - "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}", - "intent": "reference", - "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}", - "originalFileName": preExtracted["originalDocument"]["fileName"] - } - ) - allContentParts.append(referencePart) - logger.debug(f"✅ Created reference ContentPart for {part.id}") - - # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering) - if hasRenderIntent and hasPartData: - # Prüfe ob es ein Binary/Image ist (kann gerendert werden) - isRenderable = ( - part.typeGroup == "image" or - part.typeGroup == "binary" or - (part.mimeType and ( - part.mimeType.startswith("image/") or - part.mimeType.startswith("video/") or - part.mimeType.startswith("audio/") or - self._isBinary(part.mimeType) - )) - ) - - if isRenderable: - objectPart = ContentPart( - id=f"obj_{document.id}_{part.id}", - label=f"Object: {part.label or 'Content'}", - typeGroup=part.typeGroup, - mimeType=part.mimeType or "application/octet-stream", - data=part.data, # Base64/Binary data ist bereits vorhanden - metadata={ - "contentFormat": "object", - "documentId": document.id, - "intent": "render", - "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None - } - ) - allContentParts.append(objectPart) - logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)") - else: - logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})") - elif hasRenderIntent and not hasPartData: - logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part") - - # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung) - if hasExtractIntent: - # Spezielle Behandlung für Images: Vision AI für Text-Extraktion - if part.typeGroup == "image" and hasPartData: - logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)") - try: - extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting." - extractedText = await self._extractTextFromImage(part, extractionPrompt) - if extractedText: - # Prüfe ob es ein Error-Message ist - isError = extractedText.startswith("[ERROR:") - - # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message - textPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}", - typeGroup="text", - mimeType="text/plain", - data=extractedText, - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, - "extractionPrompt": extractionPrompt, - "extractionMethod": "vision", - "isError": isError - } - ) - allContentParts.append(textPart) - if isError: - logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}") - else: - logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars") - else: - # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) - errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}" - logger.error(errorMsg) - errorPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"Error extracting from {part.label or 'Image'}", - typeGroup="text", - mimeType="text/plain", - data=f"[ERROR: {errorMsg}]", - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "extractionPrompt": extractionPrompt, - "extractionMethod": "vision", - "isError": True - } - ) - allContentParts.append(errorPart) - except Exception as e: - logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}") - import traceback - logger.debug(f"Traceback: {traceback.format_exc()}") - # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part - # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen - if not hasRenderIntent: - logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available") - else: - # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird - # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content - # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist, - # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt. - - # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden) - isTextContent = ( - part.typeGroup == "text" or - part.typeGroup == "table" or - (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0) - ) - - if isTextContent and intent and intent.extractionPrompt: - # Text-Content mit extractionPrompt: Verarbeite mit AI - logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)") - try: - extractionPrompt = intent.extractionPrompt - processedText = await self._processTextContentWithAi(part, extractionPrompt) - if processedText: - # Prüfe ob es ein Error-Message ist - isError = processedText.startswith("[ERROR:") - - # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message - processedPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}", - typeGroup="text", - mimeType="text/plain", - data=processedText, - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, - "extractionPrompt": extractionPrompt, - "extractionMethod": "ai", - "sourcePartId": part.id, - "fromExtractContent": True, - "isError": isError - } - ) - allContentParts.append(processedPart) - originalPartAdded = True - if isError: - logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}") - else: - logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars") - else: - # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) - errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}" - logger.error(errorMsg) - errorPart = ContentPart( - id=f"extracted_{document.id}_{part.id}", - label=f"Error processing {part.label or 'Content'}", - typeGroup="text", - mimeType="text/plain", - data=f"[ERROR: {errorMsg}]", - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "extractionPrompt": extractionPrompt, - "extractionMethod": "ai", - "sourcePartId": part.id, - "isError": True - } - ) - allContentParts.append(errorPart) - originalPartAdded = True - except Exception as e: - logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}") - import traceback - logger.debug(f"Traceback: {traceback.format_exc()}") - # Fallback: Verwende Original-Part - if not originalPartAdded: - part.metadata.update({ - "contentFormat": "extracted", - "intent": "extract", - "fromExtractContent": True, - "skipExtraction": True, - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None - }) - allContentParts.append(part) - originalPartAdded = True - else: - # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted - # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig) - # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent) - if not originalPartAdded: - part.metadata.update({ - "contentFormat": "extracted", - "intent": "extract", - "fromExtractContent": True, - "skipExtraction": True, # Bereits extrahiert - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None - }) - # Stelle sicher dass contentFormat gesetzt ist - if "contentFormat" not in part.metadata: - part.metadata["contentFormat"] = "extracted" - allContentParts.append(part) - originalPartAdded = True - logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") - - # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt - # (sollte normalerweise nicht vorkommen, da default "extract" ist) - if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded: - logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default") - part.metadata.update({ - "contentFormat": "extracted", - "intent": "extract", - "fromExtractContent": True, - "skipExtraction": True, - "originalFileName": preExtracted["originalDocument"]["fileName"] - }) - allContentParts.append(part) - originalPartAdded = True - - logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") - logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") - continue # Skip normal extraction for this document - - # Check if it's standardized JSON format (has "documents" or "sections") - if document.mimeType == "application/json": - try: - docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) - if docBytes: - docData = docBytes.decode('utf-8') - jsonData = json.loads(docData) - - if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): - logger.info(f"Document is already in standardized JSON format, using as reference") - # Create reference ContentPart for structured JSON - contentPart = ContentPart( - id=f"ref_{document.id}", - label=f"Reference: {document.fileName}", - typeGroup="structure", - mimeType="application/json", - data=docData, - metadata={ - "contentFormat": "reference", - "documentId": document.id, - "documentReference": f"docItem:{document.id}:{document.fileName}", - "skipExtraction": True, - "intent": "reference" - } - ) - allContentParts.append(contentPart) - logger.info(f"✅ Using JSON document directly without extraction") - continue # Skip normal extraction for this document - except Exception as e: - logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}") - # Continue with normal extraction - - # Normal extraction path - intent = self._getIntentForDocument(document.id, documentIntents) - - if not intent: - # Default: extract für alle Dokumente ohne Intent - logger.warning(f"No intent found for document {document.id}, using default 'extract'") - intent = DocumentIntent( - documentId=document.id, - intents=["extract"], - extractionPrompt="Extract all content from the document", - reasoning="Default intent: no specific intent found" - ) - - # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen - - if "reference" in intent.intents: - # Erstelle Reference ContentPart - contentPart = ContentPart( - id=f"ref_{document.id}", - label=f"Reference: {document.fileName}", - typeGroup="reference", - mimeType=document.mimeType, - data="", - metadata={ - "contentFormat": "reference", - "documentId": document.id, - "documentReference": f"docItem:{document.id}:{document.fileName}", - "intent": "reference", - "usageHint": f"Reference document: {document.fileName}" - } - ) - allContentParts.append(contentPart) - - # WICHTIG: "render" und "extract" können beide vorhanden sein! - # In diesem Fall erzeugen wir BEIDE ContentParts - - if "render" in intent.intents: - # Für Images/Binary: extrahiere als Object - if document.mimeType.startswith("image/") or self._isBinary(document.mimeType): - try: - # Lade Binary-Daten (getFileData ist nicht async - keine await nötig) - binaryData = self.services.interfaceDbComponent.getFileData(document.fileId) - if not binaryData: - logger.warning(f"No binary data found for document {document.id}") - continue - base64Data = base64.b64encode(binaryData).decode('utf-8') - - contentPart = ContentPart( - id=f"obj_{document.id}", - label=f"Object: {document.fileName}", - typeGroup="image" if document.mimeType.startswith("image/") else "binary", - mimeType=document.mimeType, - data=base64Data, - metadata={ - "contentFormat": "object", - "documentId": document.id, - "intent": "render", - "usageHint": f"Render as visual element: {document.fileName}", - "originalFileName": document.fileName, - # Verknüpfung zu extracted Part (falls vorhanden) - "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None - } - ) - allContentParts.append(contentPart) - except Exception as e: - logger.error(f"Failed to load binary data for document {document.id}: {str(e)}") - - if "extract" in intent.intents: - # Extrahiere Content mit Extraction Service - extractionPrompt = intent.extractionPrompt or "Extract all content from the document" - - # Debug-Log (harmonisiert) - self.services.utils.writeDebugFile( - extractionPrompt, - f"content_extraction_prompt_{document.id}" - ) - - # Führe Extraktion aus - from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy() - ) - - # extractContent ist nicht async - keine await nötig - extractedResults = self.services.extraction.extractContent( - [document], - extractionOptions, - operationId=extractionOperationId, - parentOperationId=extractionOperationId - ) - - # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten - for extracted in extractedResults: - for part in extracted.parts: - # Markiere als extracted Format - part.metadata.update({ - "contentFormat": "extracted", - "documentId": document.id, - "extractionPrompt": extractionPrompt, - "intent": "extract", - "usageHint": f"Use extracted content from {document.fileName}", - # Verknüpfung zu object Part (falls vorhanden) - "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None - }) - # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) - if "render" in intent.intents: - part.id = f"ext_{document.id}_{part.id}" - allContentParts.append(part) - - # Debug-Log (harmonisiert) - self.services.utils.writeDebugFile( - json.dumps([part.dict() for part in allContentParts], indent=2, default=str), - "content_extraction_result" - ) - - # ChatLog abschließen - self.services.chat.progressLogFinish(extractionOperationId, True) - - return allContentParts - - except Exception as e: - self.services.chat.progressLogFinish(extractionOperationId, False) - logger.error(f"Error in _extractAndPrepareContent: {str(e)}") - raise - - def _isBinary(self, mimeType: str) -> bool: - """Prüfe ob MIME-Type binary ist.""" - binaryTypes = [ - "application/octet-stream", - "application/pdf", - "application/zip", - "application/x-zip-compressed" - ] - return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") - async def _generateStructure( self, userPrompt: str, @@ -2100,209 +475,6 @@ Return ONLY valid JSON following the structure above. userPrompt, contentParts, outputFormat, parentOperationId ) - async def _generateStructure_OLD( - self, - userPrompt: str, - contentParts: List[ContentPart], - outputFormat: str, - parentOperationId: str - ) -> Dict[str, Any]: - """ - Phase 5C: Generiert Dokument-Struktur mit Sections. - Jede Section spezifiziert: - - Welcher Content sollte in dieser Section sein - - Welche ContentParts zu verwenden sind - - Format für jeden ContentPart - - Args: - userPrompt: User-Anfrage - contentParts: Alle vorbereiteten ContentParts mit Metadaten - outputFormat: Ziel-Format (html, docx, pdf, etc.) - parentOperationId: Parent Operation-ID für ChatLog-Hierarchie - - Returns: - Struktur-Dict mit documents und sections - """ - # Erstelle Operation-ID für Struktur-Generierung - structureOperationId = f"{parentOperationId}_structure_generation" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - structureOperationId, - "Structure Generation", - "Structure", - f"Generating structure for {outputFormat}", - parentOperationId=parentOperationId - ) - - try: - # Baue Struktur-Prompt mit Content-Index - structurePrompt = self._buildStructurePrompt( - userPrompt=userPrompt, - contentParts=contentParts, - outputFormat=outputFormat - ) - - # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) - # Debug-Logs werden bereits von callAiPlanning geschrieben - aiResponse = await self.callAiPlanning( - prompt=structurePrompt, - debugType="document_generation_structure" - ) - - # Parse Struktur - structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) - - # ChatLog abschließen - self.services.chat.progressLogFinish(structureOperationId, True) - - return structure - - except Exception as e: - self.services.chat.progressLogFinish(structureOperationId, False) - logger.error(f"Error in _generateStructure: {str(e)}") - raise - - def _buildStructurePrompt( - self, - userPrompt: str, - contentParts: List[ContentPart], - outputFormat: str - ) -> str: - """Baue Prompt für Struktur-Generierung.""" - # Baue ContentParts-Index - filtere leere Parts heraus - contentPartsIndex = "" - validParts = [] - filteredParts = [] - - for part in contentParts: - contentFormat = part.metadata.get("contentFormat", "unknown") - - # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen - if contentFormat == "reference": - validParts.append(part) - logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") - continue - - # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) - # ABER: Reference Parts wurden bereits oben behandelt - if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): - # Überspringe Container-Parts ohne Daten - if part.typeGroup == "container" and not part.data: - filteredParts.append((part.id, "container without data")) - continue - # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) - if not part.data: - filteredParts.append((part.id, f"no data (format: {contentFormat})")) - continue - - validParts.append(part) - logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") - - if filteredParts: - logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") - - logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") - - # Baue Index nur für gültige Parts - for i, part in enumerate(validParts, 1): - contentFormat = part.metadata.get("contentFormat", "unknown") - dataPreview = "" - - if contentFormat == "extracted": - # Für Image-Parts: Zeige dass es ein Image ist - if part.typeGroup == "image": - dataLength = len(part.data) if part.data else 0 - mimeType = part.mimeType or "image" - dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" - elif part.typeGroup == "container": - # Container ohne Daten überspringen wir bereits oben - dataPreview = "Container structure (no text content)" - else: - # Zeige Preview von extrahiertem Text - if part.data: - preview = part.data[:200] + "..." if len(part.data) > 200 else part.data - dataPreview = preview - else: - dataPreview = "(empty)" - elif contentFormat == "object": - dataLength = len(part.data) if part.data else 0 - mimeType = part.mimeType or "binary" - if part.typeGroup == "image": - dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" - else: - dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" - elif contentFormat == "reference": - dataPreview = part.metadata.get("documentReference", "reference") - - originalFileName = part.metadata.get('originalFileName', 'N/A') - - contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" - contentPartsIndex += f" Format: {contentFormat}\n" - contentPartsIndex += f" Type: {part.typeGroup}\n" - contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" - contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" - contentPartsIndex += f" Original file name: {originalFileName}\n" - contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" - contentPartsIndex += f" Data preview: {dataPreview}\n" - - if not contentPartsIndex: - contentPartsIndex = "\n(No content parts available)" - - prompt = f"""USER REQUEST: -{userPrompt} - -AVAILABLE CONTENT PARTS: -{contentPartsIndex} - -TASK: Generiere Dokument-Struktur mit Sections. -Für jede Section, spezifiziere: -- section id -- content_type (heading, paragraph, image, table, etc.) -- contentPartIds: [Liste von ContentPart-IDs zu verwenden] -- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist -- generation_hint: Was AI für diese Section generieren soll -- elements: [] (leer, wird in nächster Phase gefüllt) - -OUTPUT FORMAT: {outputFormat} - -RETURN JSON: -{{ - "metadata": {{ - "title": "Document Title", - "language": "de" - }}, - "documents": [{{ - "id": "doc_1", - "title": "Document Title", - "filename": "document.{outputFormat}", - "sections": [ - {{ - "id": "section_1", - "content_type": "heading", - "generation_hint": "Main title", - "contentPartIds": [], - "contentFormats": {{}}, - "elements": [] - }}, - {{ - "id": "section_2", - "content_type": "paragraph", - "generation_hint": "Introduction paragraph", - "contentPartIds": ["part_ext_1"], - "contentFormats": {{ - "part_ext_1": "extracted" - }}, - "elements": [] - }} - ] - }}] -}} - -Return ONLY valid JSON following the structure above. -""" - return prompt - async def _fillStructure( self, structure: Dict[str, Any], @@ -2315,525 +487,6 @@ Return ONLY valid JSON following the structure above. structure, contentParts, userPrompt, parentOperationId ) - async def _fillStructure_OLD( - self, - structure: Dict[str, Any], - contentParts: List[ContentPart], - userPrompt: str, - parentOperationId: str - ) -> Dict[str, Any]: - """ - Phase 5D: Füllt Struktur mit tatsächlichem Content. - Für jede Section: - - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format - - Wenn generation_hint spezifiziert: Generiere AI-Content - - **Implementierungsdetails:** - - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) - - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) - - Args: - structure: Struktur-Dict mit documents und sections - contentParts: Alle vorbereiteten ContentParts - userPrompt: User-Anfrage - parentOperationId: Parent Operation-ID für ChatLog-Hierarchie - - Returns: - Gefüllte Struktur mit elements in jeder Section - """ - import copy - - # Erstelle Operation-ID für Struktur-Abfüllen - fillOperationId = f"{parentOperationId}_structure_filling" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - fillOperationId, - "Structure Filling", - "Filling", - f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", - parentOperationId=parentOperationId - ) - - try: - filledStructure = copy.deepcopy(structure) - - # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) - sections_to_process = [] - all_sections_list = [] # Für Kontext-Informationen - for doc in filledStructure.get("documents", []): - doc_sections = doc.get("sections", []) - all_sections_list.extend(doc_sections) - for section in doc_sections: - sections_to_process.append((doc, section)) - - # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) - for sectionIndex, (doc, section) in enumerate(sections_to_process): - sectionId = section.get("id") - contentPartIds = section.get("contentPartIds", []) - contentFormats = section.get("contentFormats", {}) - generationHint = section.get("generation_hint") - contentType = section.get("content_type", "paragraph") - - elements = [] - - # Prüfe ob Aggregation nötig ist - needsAggregation = self._needsAggregation( - contentType=contentType, - contentPartCount=len(contentPartIds) - ) - - if needsAggregation and generationHint: - # Aggregation: Alle Parts zusammen verarbeiten - sectionParts = [ - self._findContentPartById(pid, contentParts) - for pid in contentPartIds - ] - sectionParts = [p for p in sectionParts if p is not None] - - if sectionParts: - # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) - extractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" - ] - nonExtractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" - ] - - # Verarbeite non-extracted Parts separat (reference, object) - for part in nonExtractedParts: - contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) - - if contentFormat == "reference": - elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) - }) - elif contentFormat == "object": - elements.append({ - "type": part.typeGroup, - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) - - # Aggregiere extracted Parts mit AI - if extractedParts: - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=extractedParts, # ALLE PARTS für Aggregation! - userPrompt=userPrompt, - generationHint=generationHint, - allSections=all_sections_list, - sectionIndex=sectionIndex, - isAggregation=True - ) - - # Erstelle Operation-ID für Section-Generierung - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation (Aggregation)", - "Section", - f"Generating section {sectionId} with {len(extractedParts)} parts", - parentOperationId=fillOperationId - ) - - try: - # Debug: Log Prompt - self.services.utils.writeDebugFile( - generationPrompt, - f"section_content_{sectionId}_prompt" - ) - - # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) - request = AiCallRequest( - prompt=generationPrompt, - contentParts=extractedParts, # ALLE PARTS! - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.DETAILED - ) - ) - aiResponse = await self.callAi(request) - - # Debug: Log Response - self.services.utils.writeDebugFile( - aiResponse.content, - f"section_content_{sectionId}_response" - ) - - # Parse und füge zu elements hinzu - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) - - # ChatLog abschließen - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") - # NICHT raise - Section wird mit Fehlermeldung gerendert - - else: - # Einzelverarbeitung: Jeder Part einzeln - for partId in contentPartIds: - part = self._findContentPartById(partId, contentParts) - if not part: - continue - - contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) - - if contentFormat == "reference": - # Füge Dokument-Referenz hinzu - elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "object": - # Füge base64 Object hinzu - elements.append({ - "type": part.typeGroup, # "image", "binary", etc. - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "extracted": - if generationHint: - # AI-Call mit einzelnen ContentPart - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=[part], # EIN PART - userPrompt=userPrompt, - generationHint=generationHint, - allSections=all_sections_list, - sectionIndex=sectionIndex, - isAggregation=False - ) - - # Erstelle Operation-ID für Section-Generierung - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation", - "Section", - f"Generating section {sectionId}", - parentOperationId=fillOperationId - ) - - try: - # Debug: Log Prompt - self.services.utils.writeDebugFile( - generationPrompt, - f"section_content_{sectionId}_prompt" - ) - - # Verwende callAi für ContentParts-Unterstützung - request = AiCallRequest( - prompt=generationPrompt, - contentParts=[part], - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.DETAILED - ) - ) - aiResponse = await self.callAi(request) - - # Debug: Log Response - self.services.utils.writeDebugFile( - aiResponse.content, - f"section_content_{sectionId}_response" - ) - - # Parse und füge zu elements hinzu - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) - - # ChatLog abschließen - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") - # NICHT raise - Section wird mit Fehlermeldung gerendert - else: - # Füge extrahierten Text direkt hinzu (kein AI-Call) - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) - - section["elements"] = elements - - # ChatLog abschließen - self.services.chat.progressLogFinish(fillOperationId, True) - - return filledStructure - - except Exception as e: - self.services.chat.progressLogFinish(fillOperationId, False) - logger.error(f"Error in _fillStructure: {str(e)}") - raise - - def _buildSectionGenerationPrompt( - self, - section: Dict[str, Any], - contentParts: List[Optional[ContentPart]], - userPrompt: str, - generationHint: str, - allSections: Optional[List[Dict[str, Any]]] = None, - sectionIndex: Optional[int] = None, - isAggregation: bool = False - ) -> str: - """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" - # Filtere None-Werte - validParts = [p for p in contentParts if p is not None] - - # Section-Metadaten - sectionId = section.get("id", "unknown") - contentType = section.get("content_type", "paragraph") - - # Baue ContentParts-Beschreibung - contentPartsText = "" - if isAggregation: - # Aggregation: Zeige nur Metadaten, nicht Previews - contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" - contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" - contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" - contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" - contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" - contentPartsText += f"ContentPart IDs:\n" - for part in validParts: - contentFormat = part.metadata.get("contentFormat", "unknown") - contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" - if part.metadata.get("originalFileName"): - contentPartsText += f", Source: {part.metadata.get('originalFileName')}" - contentPartsText += ")\n" - else: - # Einzelverarbeitung: Zeige Previews - for part in validParts: - contentFormat = part.metadata.get("contentFormat", "unknown") - contentPartsText += f"\n- ContentPart {part.id}:\n" - contentPartsText += f" Format: {contentFormat}\n" - contentPartsText += f" Type: {part.typeGroup}\n" - if part.metadata.get("originalFileName"): - contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" - - if contentFormat == "extracted": - # Zeige Preview von extrahiertem Text (länger für besseren Kontext) - previewLength = 1000 - if part.data: - preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data - contentPartsText += f" Content preview:\n```\n{preview}\n```\n" - else: - contentPartsText += f" Content: (empty)\n" - elif contentFormat == "reference": - contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" - if part.metadata.get("usageHint"): - contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" - elif contentFormat == "object": - dataLength = len(part.data) if part.data else 0 - contentPartsText += f" Object type: {part.typeGroup}\n" - contentPartsText += f" MIME type: {part.mimeType}\n" - contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" - if part.metadata.get("usageHint"): - contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" - - # Baue Section-Kontext (vorherige und nachfolgende Sections) - contextText = "" - if allSections and sectionIndex is not None: - prevSections = [] - nextSections = [] - - if sectionIndex > 0: - for i in range(max(0, sectionIndex - 2), sectionIndex): - prevSection = allSections[i] - prevSections.append({ - "id": prevSection.get("id"), - "content_type": prevSection.get("content_type"), - "generation_hint": prevSection.get("generation_hint", "")[:100] - }) - - if sectionIndex < len(allSections) - 1: - for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): - nextSection = allSections[i] - nextSections.append({ - "id": nextSection.get("id"), - "content_type": nextSection.get("content_type"), - "generation_hint": nextSection.get("generation_hint", "")[:100] - }) - - if prevSections or nextSections: - contextText = "\n## DOCUMENT CONTEXT\n" - if prevSections: - contextText += "\nPrevious sections:\n" - for prev in prevSections: - contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" - if nextSections: - contextText += "\nFollowing sections:\n" - for next in nextSections: - contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" - - if isAggregation: - prompt = f"""# TASK: Generate Section Content (Aggregation) - -## SECTION METADATA -- Section ID: {sectionId} -- Content Type: {contentType} -- Generation Hint: {generationHint} -{contextText} - -## USER REQUEST (for context) -``` -{userPrompt} -``` - -## AVAILABLE CONTENT FOR THIS SECTION -{contentPartsText if contentPartsText else "(No content parts specified for this section)"} - -## INSTRUCTIONS -1. Generate content for section "{sectionId}" based on the generation hint above -2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) -3. For table content_type: Create a single table with headers and rows from all ContentParts -4. For bullet_list content_type: Create a single list with items from all ContentParts -5. Format appropriately based on content_type ({contentType}) -6. Ensure the generated content fits logically between previous and following sections -7. Return ONLY a JSON object with an "elements" array -8. Each element should match the content_type: {contentType} - -## OUTPUT FORMAT -Return a JSON object with this structure: -```json -{{ - "elements": [ - {{ - "type": "{contentType}", - "headers": [...], // if table - "rows": [...], // if table - "items": [...], // if bullet_list - "content": "..." // if paragraph - }} - ] -}} -``` - -CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. -""" - else: - prompt = f"""# TASK: Generate Section Content - -## SECTION METADATA -- Section ID: {sectionId} -- Content Type: {contentType} -- Generation Hint: {generationHint} -{contextText} - -## USER REQUEST (for context) -``` -{userPrompt} -``` - -## AVAILABLE CONTENT FOR THIS SECTION -{contentPartsText if contentPartsText else "(No content parts specified for this section)"} - -## INSTRUCTIONS -1. Generate content for section "{sectionId}" based on the generation hint above -2. Use the available content parts to populate this section -3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data -4. For extracted text: Format appropriately based on content_type ({contentType}) -5. Ensure the generated content fits logically between previous and following sections -6. Return ONLY a JSON object with an "elements" array -7. Each element should match the content_type: {contentType} - -## OUTPUT FORMAT -Return a JSON object with this structure: -```json -{{ - "elements": [ - {{ - "type": "{contentType}", - "content": "..." - }} - ] -}} -``` - -CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. -""" - return prompt - - def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: - """Finde ContentPart nach ID.""" - for part in contentParts: - if part.id == partId: - return part - return None - - def _needsAggregation( - self, - contentType: str, - contentPartCount: int - ) -> bool: - """ - Bestimmt ob mehrere ContentParts aggregiert werden müssen. - - Aggregation nötig wenn: - - content_type erfordert Aggregation (table, bullet_list) - - UND mehrere ContentParts vorhanden sind (> 1) - - Args: - contentType: Section content_type - contentPartCount: Anzahl der ContentParts in dieser Section - - Returns: - True wenn Aggregation nötig, False sonst - """ - aggregationTypes = ["table", "bullet_list"] - - if contentType in aggregationTypes and contentPartCount > 1: - return True - - # Optional: Auch für paragraph wenn mehrere Parts vorhanden - # (z.B. Vergleich mehrerer Dokumente) - # Standard: Keine Aggregation für paragraph - return False - async def _renderResult( self, filledStructure: Dict[str, Any], diff --git a/modules/services/serviceAi/subAiCallLooping.py b/modules/services/serviceAi/subAiCallLooping.py new file mode 100644 index 00000000..8ebafd23 --- /dev/null +++ b/modules/services/serviceAi/subAiCallLooping.py @@ -0,0 +1,533 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +AI Call Looping Module + +Handles AI calls with looping and repair logic, including: +- Looping with JSON repair and continuation +- KPI definition and tracking +- Progress tracking and iteration management +""" +import json +import logging +from typing import Dict, Any, List, Optional, Callable + +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, JsonAccumulationState +from modules.datamodels.datamodelExtraction import ContentPart +from modules.shared.jsonUtils import buildContinuationContext, extractJsonString +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler + +logger = logging.getLogger(__name__) + + +class AiCallLooper: + """Handles AI calls with looping and repair logic.""" + + def __init__(self, services, aiService, responseParser): + """Initialize AiCallLooper with service center, AI service, and response parser access.""" + self.services = services + self.aiService = aiService + self.responseParser = responseParser + + async def callAiWithLooping( + self, + prompt: str, + options: AiCallOptions, + debugPrefix: str = "ai_call", + promptBuilder: Optional[Callable] = None, + promptArgs: Optional[Dict[str, Any]] = None, + operationId: Optional[str] = None, + userPrompt: Optional[str] = None, + contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content + ) -> str: + """ + Shared core function for AI calls with repair-based looping system. + Automatically repairs broken JSON and continues generation seamlessly. + + Args: + prompt: The prompt to send to AI + options: AI call configuration options + debugPrefix: Prefix for debug file names + promptBuilder: Optional function to rebuild prompts for continuation + promptArgs: Optional arguments for prompt builder + operationId: Optional operation ID for progress tracking + userPrompt: Optional user prompt for KPI definition + contentParts: Optional content parts for first iteration + + Returns: + Complete AI response after all iterations + """ + maxIterations = 50 # Prevent infinite loops + iteration = 0 + allSections = [] # Accumulate all sections across iterations + lastRawResponse = None # Store last raw JSON response for continuation + documentMetadata = None # Store document metadata (title, filename) from first iteration + accumulationState = None # Track accumulation state for string accumulation + + # Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID) + parentOperationId = operationId # Use the parent's operationId directly + + while iteration < maxIterations: + iteration += 1 + + # Create separate operation for each iteration with parent reference + iterationOperationId = None + if operationId: + iterationOperationId = f"{operationId}_iter_{iteration}" + self.services.chat.progressLogStart( + iterationOperationId, + "AI Call", + f"Iteration {iteration}", + "", + parentOperationId=parentOperationId + ) + + # Build iteration prompt + # CRITICAL: Build continuation prompt if we have sections OR if we have a previous response (even if broken) + # This ensures continuation prompts are built even when JSON is so broken that no sections can be extracted + if (len(allSections) > 0 or lastRawResponse) and promptBuilder and promptArgs: + # This is a continuation - build continuation context with raw JSON and rebuild prompt + continuationContext = buildContinuationContext(allSections, lastRawResponse) + if not lastRawResponse: + logger.warning(f"Iteration {iteration}: No previous response available for continuation!") + + # Filter promptArgs to only include parameters that buildGenerationPrompt accepts + # buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services + filteredPromptArgs = { + k: v for k, v in promptArgs.items() + if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services'] + } + # Always include services if available + if not filteredPromptArgs.get('services') and hasattr(self, 'services'): + filteredPromptArgs['services'] = self.services + + # Rebuild prompt with continuation context using the provided prompt builder + iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext) + else: + # First iteration - use original prompt + iterationPrompt = prompt + + # Make AI call + try: + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") + # ARCHITECTURE: Pass ContentParts directly to AiCallRequest + # This allows model-aware chunking to handle large content properly + # ContentParts are only passed in first iteration (continuations don't need them) + request = AiCallRequest( + prompt=iterationPrompt, + context="", + options=options, + contentParts=contentParts if iteration == 1 else None # Only pass ContentParts in first iteration + ) + + # Write the ACTUAL prompt sent to AI + if iteration == 1: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") + else: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") + + response = await self.aiService.callAi(request) + result = response.content + + # Track bytes for progress reporting + bytesReceived = len(result.encode('utf-8')) if result else 0 + totalBytesSoFar = sum(len(section.get('content', '').encode('utf-8')) if isinstance(section.get('content'), str) else 0 for section in allSections) + bytesReceived + + # Update progress after AI call with byte information + if iterationOperationId: + # Format bytes for display (kB or MB) + if totalBytesSoFar < 1024: + bytesDisplay = f"{totalBytesSoFar}B" + elif totalBytesSoFar < 1024 * 1024: + bytesDisplay = f"{totalBytesSoFar / 1024:.1f}kB" + else: + bytesDisplay = f"{totalBytesSoFar / (1024 * 1024):.1f}MB" + self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})") + + # Write raw AI response to debug file + if iteration == 1: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") + else: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") + + # Emit stats for this iteration (only if workflow exists and has id) + if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id: + try: + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + f"ai.call.{debugPrefix}.iteration_{iteration}" + ) + except Exception as statError: + # Don't break the main loop if stat storage fails + logger.warning(f"Failed to store workflow stat: {str(statError)}") + + # Check for error response using generic error detection (errorCount > 0 or modelName == "error") + if hasattr(response, 'errorCount') and response.errorCount > 0: + errorMsg = f"Iteration {iteration}: Error response detected (errorCount={response.errorCount}), stopping loop: {result[:200] if result else 'empty'}" + logger.error(errorMsg) + break + + if hasattr(response, 'modelName') and response.modelName == "error": + errorMsg = f"Iteration {iteration}: Error response detected (modelName=error), stopping loop: {result[:200] if result else 'empty'}" + logger.error(errorMsg) + break + + if not result or not result.strip(): + logger.warning(f"Iteration {iteration}: Empty response, stopping") + break + + # Check if this is a text response (not document generation) + # Text responses don't need JSON parsing - return immediately after first successful response + isTextResponse = (promptBuilder is None and promptArgs is None) or debugPrefix == "text" + + if isTextResponse: + # For text responses, return the text immediately - no JSON parsing needed + logger.info(f"Iteration {iteration}: Text response received, returning immediately") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, True) + return result + + # Store raw response for continuation (even if broken) + lastRawResponse = result + + # Extract sections from response (handles both valid and broken JSON) + # Only for document generation (JSON responses) + # CRITICAL: Pass allSections and accumulationState to enable string accumulation + extractedSections, wasJsonComplete, parsedResult, accumulationState = self.responseParser.extractSectionsFromResponse( + result, iteration, debugPrefix, allSections, accumulationState + ) + + # CRITICAL: Merge sections BEFORE KPI validation + # This ensures sections are preserved even if KPI validation fails + if extractedSections: + allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration) + + # Define KPIs if we just entered accumulation mode (iteration 1, incomplete JSON) + if accumulationState and accumulationState.isAccumulationMode and iteration == 1 and not accumulationState.kpis: + logger.info(f"Iteration {iteration}: Defining KPIs for accumulation tracking") + continuationContext = buildContinuationContext(allSections, result) + # Pass raw response string from first iteration for KPI definition + kpiDefinitions = await self._defineKpisFromPrompt( + userPrompt or prompt, + result, # Pass raw JSON string from first iteration + continuationContext, + debugPrefix + ) + # Initialize KPIs with currentValue = 0 + accumulationState.kpis = [{**kpi, "currentValue": 0} for kpi in kpiDefinitions] + logger.info(f"Defined {len(accumulationState.kpis)} KPIs: {[kpi.get('id') for kpi in accumulationState.kpis]}") + + # Extract and validate KPIs (if in accumulation mode with KPIs defined) + if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis: + # For KPI extraction, prefer accumulated JSON string over repaired JSON + # because repairBrokenJson may lose data (e.g., empty rows array when JSON is incomplete) + updatedKpis = [] + + # First try to extract from parsedResult (repaired JSON) + if parsedResult: + try: + updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( + parsedResult, + accumulationState.kpis + ) + # Check if we got meaningful values (non-zero) + hasValidValues = any(kpi.get("currentValue", 0) > 0 for kpi in updatedKpis) + if not hasValidValues and accumulationState.accumulatedJsonString: + # Repaired JSON has empty values, try accumulated string + logger.debug("Repaired JSON has empty KPI values, trying accumulated JSON string") + updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + accumulationState.accumulatedJsonString, + accumulationState.kpis + ) + except Exception as e: + logger.debug(f"Error extracting KPIs from parsedResult: {e}") + updatedKpis = [] + + # If no parsedResult or extraction failed, try accumulated string + if not updatedKpis and accumulationState.accumulatedJsonString: + try: + updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + accumulationState.accumulatedJsonString, + accumulationState.kpis + ) + except Exception as e: + logger.debug(f"Error extracting KPIs from accumulated JSON string: {e}") + updatedKpis = [] + + if updatedKpis: + shouldProceed, reason = JsonResponseHandler.validateKpiProgression( + accumulationState, + updatedKpis + ) + + if not shouldProceed: + logger.warning(f"Iteration {iteration}: KPI validation failed: {reason}") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) + if operationId: + self.services.chat.progressLogUpdate(operationId, 0.9, f"KPI validation failed: {reason} ({iteration} iterations)") + break + + # Update KPIs in accumulation state + accumulationState.kpis = updatedKpis + logger.info(f"Iteration {iteration}: KPIs updated: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") + + # Check if all KPIs completed + allCompleted = True + for kpi in updatedKpis: + targetValue = kpi.get("targetValue", 0) + currentValue = kpi.get("currentValue", 0) + if currentValue < targetValue: + allCompleted = False + break + + if allCompleted: + logger.info(f"Iteration {iteration}: All KPIs completed, finishing accumulation") + wasJsonComplete = True # Mark as complete to exit loop + + # CRITICAL: Handle JSON fragments (continuation content) + # Fragment merging happens inside extractSectionsFromResponse + # If merge fails (returns wasJsonComplete=True), stop iterations and complete JSON + if not extractedSections and allSections: + if wasJsonComplete: + # Merge failed - stop iterations, complete JSON with available data + logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - Stopping iterations, completing JSON with available data") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) + if operationId: + self.services.chat.progressLogUpdate(operationId, 0.9, f"Merge failed, completing JSON ({iteration} iterations)") + break + + # Fragment was detected and merged successfully + logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing") + # Don't break - fragment was merged, continue to get more content if needed + # Check if we should continue based on JSON completeness + shouldContinue = self.responseParser.shouldContinueGeneration( + allSections, + iteration, + wasJsonComplete, + result + ) + if shouldContinue: + if iterationOperationId: + self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing") + self.services.chat.progressLogFinish(iterationOperationId, True) + continue + else: + # Done - fragment was merged and JSON is complete + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, True) + if operationId: + self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)") + logger.info(f"Generation complete after {iteration} iterations: fragment merged") + break + + # Extract document metadata from first iteration if available + if iteration == 1 and parsedResult and not documentMetadata: + documentMetadata = self.responseParser.extractDocumentMetadata(parsedResult) + + # Update progress after parsing + if iterationOperationId: + if extractedSections: + self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections") + + if not extractedSections: + # CRITICAL: If JSON was incomplete/broken, continue even if no sections extracted + # This allows the AI to retry and complete the broken JSON + if not wasJsonComplete: + logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt") + continue + # If JSON was complete but no sections extracted - check if it was a fragment + # Fragments are handled above, so if we get here and it's complete, it's an error + logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping") + break + + # NOTE: Section merging now happens BEFORE KPI validation (see above) + # This ensures sections are preserved even if KPI validation fails + + # Calculate total bytes in merged content for progress display + merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False) + totalBytesGenerated = len(merged_json_str.encode('utf-8')) + + # Update main operation with byte progress + if operationId: + # Format bytes for display + if totalBytesGenerated < 1024: + bytesDisplay = f"{totalBytesGenerated}B" + elif totalBytesGenerated < 1024 * 1024: + bytesDisplay = f"{totalBytesGenerated / 1024:.1f}kB" + else: + bytesDisplay = f"{totalBytesGenerated / (1024 * 1024):.1f}MB" + # Estimate progress based on iterations (rough estimate) + estimatedProgress = min(0.9, 0.4 + (iteration * 0.1)) + self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})") + + # Log merged sections for debugging + self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}") + + # Check if we should continue (completion detection) + # Simple logic: JSON completeness determines continuation + shouldContinue = self.responseParser.shouldContinueGeneration( + allSections, + iteration, + wasJsonComplete, + result + ) + + if shouldContinue: + # Finish iteration operation (will continue with next iteration) + if iterationOperationId: + # Show byte progress in iteration completion + iterBytes = len(result.encode('utf-8')) if result else 0 + if iterBytes < 1024: + iterBytesDisplay = f"{iterBytes}B" + elif iterBytes < 1024 * 1024: + iterBytesDisplay = f"{iterBytes / 1024:.1f}kB" + else: + iterBytesDisplay = f"{iterBytes / (1024 * 1024):.1f}MB" + self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Completed ({iterBytesDisplay})") + self.services.chat.progressLogFinish(iterationOperationId, True) + continue + else: + # Done - finish iteration and update main operation + if iterationOperationId: + # Show final byte count + finalBytes = len(merged_json_str.encode('utf-8')) + if finalBytes < 1024: + finalBytesDisplay = f"{finalBytes}B" + elif finalBytes < 1024 * 1024: + finalBytesDisplay = f"{finalBytes / 1024:.1f}kB" + else: + finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB" + self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Complete ({finalBytesDisplay})") + self.services.chat.progressLogFinish(iterationOperationId, True) + if operationId: + # Show final size in main operation + finalBytes = len(merged_json_str.encode('utf-8')) + if finalBytes < 1024: + finalBytesDisplay = f"{finalBytes}B" + elif finalBytes < 1024 * 1024: + finalBytesDisplay = f"{finalBytes / 1024:.1f}kB" + else: + finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB" + self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete: {finalBytesDisplay} ({iteration} iterations, {len(allSections)} sections)") + logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections") + break + + except Exception as e: + logger.error(f"Error in AI call iteration {iteration}: {str(e)}") + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) + break + + if iteration >= maxIterations: + logger.warning(f"AI call stopped after maximum iterations ({maxIterations})") + + # CRITICAL: Complete any incomplete structures in sections before building final result + # This ensures JSON is properly closed even if merge failed or iterations stopped early + allSections = JsonResponseHandler.completeIncompleteStructures(allSections) + + # Build final result from accumulated sections + final_result = self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) + + # Write final result to debug file + self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result") + + return final_result + + async def _defineKpisFromPrompt( + self, + userPrompt: str, + rawJsonString: Optional[str], + continuationContext: Dict[str, Any], + debugPrefix: str = "kpi" + ) -> List[Dict[str, Any]]: + """ + Make separate AI call to define KPIs based on user prompt and incomplete JSON. + + Args: + userPrompt: Original user prompt + rawJsonString: Raw JSON string from first iteration response + continuationContext: Continuation context (not used for JSON, kept for compatibility) + debugPrefix: Prefix for debug file names + + Returns: + List of KPI definitions: [{"id": str, "description": str, "jsonPath": str, "targetValue": int}, ...] + """ + # Use raw JSON string from first iteration response + if rawJsonString: + # Remove markdown code fences if present + from modules.shared.jsonUtils import stripCodeFences + incompleteJson = stripCodeFences(rawJsonString.strip()) + else: + incompleteJson = "Not available" + + kpiDefinitionPrompt = f"""Analyze the user request and incomplete JSON to define KPIs (Key Performance Indicators) for tracking progress. + +User Request: +{userPrompt} + +Delivered JSON part: +{incompleteJson} + +Task: Define which JSON items should be tracked to measure completion progress. + +IMPORTANT: Analyze the Delivered JSON part structure to understand what is being tracked: +1. Identify the structure type (table with rows, list with items, etc.) +2. Determine what the jsonPath actually counts (number of rows, number of items, etc.) +3. Calculate targetValue based on what is being tracked, NOT the total quantity requested + +For each trackable item, provide: +- id: Unique identifier (use descriptive name) +- description: What this KPI measures (be specific about what is counted) +- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "documents[0].sections[1].elements[0].rows") +- targetValue: Target value to reach (integer) - MUST match what jsonPath actually tracks (rows count, items count, etc.) + +Return ONLY valid JSON in this format: +{{ + "kpis": [ + {{ + "id": "unique_id", + "description": "Description of what is measured", + "jsonPath": "path.to.value", + "targetValue": 0 + }} + ] +}} + +If no trackable items can be identified, return: {{"kpis": []}} +""" + + try: + request = AiCallRequest( + prompt=kpiDefinitionPrompt, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.SPEED, + processingMode=ProcessingModeEnum.BASIC + ) + ) + + # Write KPI definition prompt to debug file + self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt") + + response = await self.aiService.callAi(request) + + # Write KPI definition response to debug file + self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response") + + # Parse response + extracted = extractJsonString(response.content) + kpiResponse = json.loads(extracted) + + kpiDefinitions = kpiResponse.get("kpis", []) + logger.info(f"Defined {len(kpiDefinitions)} KPIs for tracking") + + return kpiDefinitions + + except Exception as e: + logger.warning(f"Failed to define KPIs: {e}, continuing without KPI tracking") + return [] + From 723f98ea7a1d3413a48f9df8e9bbb1f888ae511f Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 28 Dec 2025 11:43:42 +0100 Subject: [PATCH 12/21] enhanced generation engine with chapters as structure, renderers to render a pipeline and deliver 1..n documents --- modules/datamodels/datamodelDocument.py | 12 + modules/services/serviceAi/mainServiceAi.py | 98 ++- .../services/serviceAi/subStructureFilling.py | 605 ++++++++++++++++-- .../serviceAi/subStructureGeneration.py | 88 +-- .../mainServiceGeneration.py | 96 ++- .../renderers/rendererBaseTemplate.py | 39 +- .../renderers/rendererCsv.py | 22 +- .../renderers/rendererDocx.py | 43 +- .../renderers/rendererHtml.py | 134 +++- .../renderers/rendererImage.py | 32 +- .../renderers/rendererJson.py | 31 +- .../renderers/rendererMarkdown.py | 31 +- .../renderers/rendererPdf.py | 44 +- .../renderers/rendererPptx.py | 41 +- .../renderers/rendererText.py | 31 +- .../renderers/rendererXlsx.py | 34 +- .../processing/adaptive/contentValidator.py | 24 + 17 files changed, 1141 insertions(+), 264 deletions(-) diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py index 3f2f8f8e..2f5af99a 100644 --- a/modules/datamodels/datamodelDocument.py +++ b/modules/datamodels/datamodelDocument.py @@ -107,5 +107,17 @@ class StructuredDocument(BaseModel): +class RenderedDocument(BaseModel): + """A single rendered document from a renderer.""" + documentData: bytes = Field(description="Document content as bytes") + mimeType: str = Field(description="MIME type of the document (e.g., 'text/html', 'application/pdf')") + filename: str = Field(description="Filename for the document (e.g., 'report.html', 'image.png')") + + class Config: + json_encoders = { + bytes: lambda v: v.decode('utf-8', errors='replace') if isinstance(v, bytes) else v + } + + # Update forward references ListItem.model_rebuild() diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 777e6230..9839093d 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -11,6 +11,7 @@ from modules.services.serviceExtraction.mainServiceExtraction import ExtractionS from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData +from modules.datamodels.datamodelDocument import RenderedDocument from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( extractJsonString, @@ -50,7 +51,7 @@ class AiService: if self.extractionService is None: logger.info("Initializing ExtractionService...") self.extractionService = ExtractionService(self.services) - + # Initialize new submodules from modules.services.serviceAi.subResponseParsing import ResponseParser from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer @@ -277,7 +278,7 @@ Respond with ONLY a JSON object in this exact format: ) -> str: """Delegate to ResponseParser.""" return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) - + # Public API Methods # Planning AI Call @@ -494,20 +495,21 @@ Respond with ONLY a JSON object in this exact format: title: str, userPrompt: str, parentOperationId: str - ) -> Tuple[bytes, str]: + ) -> List[RenderedDocument]: """ Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. - Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert. + Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben. Args: filledStructure: Gefüllte Struktur mit elements - outputFormat: Ziel-Format (pdf, docx, html, etc.) + outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet title: Dokument-Titel userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie - + Returns: - Tuple von (renderedContent, mimeType) + List of RenderedDocument objects. + Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei) """ # Erstelle Operation-ID für Rendering renderOperationId = f"{parentOperationId}_rendering" @@ -526,51 +528,21 @@ Respond with ONLY a JSON object in this exact format: generationService = GenerationService(self.services) - # Multi-Dokument-Rendering - documents = filledStructure.get("documents", []) - - if len(documents) == 1: - # Einzelnes Dokument - wie bisher - renderedContent, mimeType, images = await generationService.renderReport( - filledStructure, - outputFormat, - title, - userPrompt, - self, - parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie - ) - else: - # Mehrere Dokumente - rendere alle - # Option: Alle Sections zusammenführen und als ein Dokument rendern - all_sections = [] - for doc in documents: - if "sections" in doc: - all_sections.extend(doc.get("sections", [])) - - # Erstelle temporäres Dokument mit allen Sections - merged_document = { - "metadata": filledStructure["metadata"], - "documents": [{ - "id": "merged", - "title": title, - "filename": f"{title}.{outputFormat}", - "sections": all_sections - }] - } - - renderedContent, mimeType, images = await generationService.renderReport( - merged_document, - outputFormat, - title, - userPrompt, - self, - parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie - ) + # renderReport verarbeitet jetzt jedes Dokument einzeln + # und gibt Liste von (documentData, mimeType, filename) zurück + renderedDocuments = await generationService.renderReport( + filledStructure, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) # ChatLog abschließen self.services.chat.progressLogFinish(renderOperationId, True) - return renderedContent, mimeType + return renderedDocuments except Exception as e: self.services.chat.progressLogFinish(renderOperationId, False) @@ -712,7 +684,8 @@ Respond with ONLY a JSON object in this exact format: ) # Schritt 5E: Rendere Resultat - renderedContent, mimeType = await self._renderResult( + # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder) + renderedDocuments = await self._renderResult( filledStructure, outputFormat, title or "Generated Document", @@ -720,15 +693,24 @@ Respond with ONLY a JSON object in this exact format: aiOperationId ) - # Baue Response - documentName = self._determineDocumentName(filledStructure, outputFormat, title) + # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData + documentDataList = [] + for renderedDoc in renderedDocuments: + try: + # Erstelle DocumentData für jedes gerenderte Dokument + docDataObj = DocumentData( + documentName=renderedDoc.filename, + documentData=renderedDoc.documentData, + mimeType=renderedDoc.mimeType, + sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument + ) + documentDataList.append(docDataObj) + logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})") + except Exception as e: + logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}") - docData = DocumentData( - documentName=documentName, - documentData=renderedContent, - mimeType=mimeType, - sourceJson=filledStructure - ) + if not documentDataList: + raise ValueError("No documents were rendered") metadata = AiResponseMetadata( title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), @@ -746,7 +728,7 @@ Respond with ONLY a JSON object in this exact format: return AiResponse( content=json.dumps(filledStructure), metadata=metadata, - documents=[docData] + documents=documentDataList ) except Exception as e: diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index cc45b099..d93264af 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -35,65 +35,184 @@ class StructureFiller: parentOperationId: str ) -> Dict[str, Any]: """ - Phase 5D: Füllt Struktur mit tatsächlichem Content. - Für jede Section: - - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format - - Wenn generation_hint spezifiziert: Generiere AI-Content + Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz). - **Implementierungsdetails:** - - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) - - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) + Phase 5D.1: Generiert Sections-Struktur für jedes Chapter + Phase 5D.2: Füllt Sections mit ContentParts Args: - structure: Struktur-Dict mit documents und sections + structure: Struktur-Dict mit documents und chapters (nicht sections!) contentParts: Alle vorbereiteten ContentParts userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: - Gefüllte Struktur mit elements in jeder Section + Gefüllte Struktur mit elements in jeder Section (nach Flattening) """ # Erstelle Operation-ID für Struktur-Abfüllen fillOperationId = f"{parentOperationId}_structure_filling" + # Prüfe ob Struktur Chapters oder Sections hat + hasChapters = False + for doc in structure.get("documents", []): + if "chapters" in doc: + hasChapters = True + break + + if not hasChapters: + # Fallback: Alte Struktur mit Sections direkt - verwende alte Logik + logger.warning("Structure has no chapters, using legacy section-based filling") + return await self._fillStructureLegacy(structure, contentParts, userPrompt, fillOperationId) + # Starte ChatLog mit Parent-Referenz + chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", [])) self.services.chat.progressLogStart( fillOperationId, - "Structure Filling", + "Chapter Content Generation", "Filling", - f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + f"Processing {chapterCount} chapters", parentOperationId=parentOperationId ) try: filledStructure = copy.deepcopy(structure) - # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) - sections_to_process = [] - all_sections_list = [] # Für Kontext-Informationen - for doc in filledStructure.get("documents", []): - doc_sections = doc.get("sections", []) - all_sections_list.extend(doc_sections) - for section in doc_sections: - sections_to_process.append((doc, section)) + # Phase 5D.1: Sections-Struktur für jedes Chapter generieren + filledStructure = await self._generateChapterSectionsStructure( + filledStructure, contentParts, userPrompt, fillOperationId + ) - # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) - for sectionIndex, (doc, section) in enumerate(sections_to_process): - sectionId = section.get("id") - contentPartIds = section.get("contentPartIds", []) - contentFormats = section.get("contentFormats", {}) - generationHint = section.get("generation_hint") - contentType = section.get("content_type", "paragraph") + # Phase 5D.2: Sections mit ContentParts füllen + filledStructure = await self._fillChapterSections( + filledStructure, contentParts, userPrompt, fillOperationId + ) + + # Flattening: Chapters zu Sections konvertieren + flattenedStructure = self._flattenChaptersToSections(filledStructure) + + # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) + flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts) + + # ChatLog abschließen + self.services.chat.progressLogFinish(fillOperationId, True) + + return flattenedStructure + + except Exception as e: + self.services.chat.progressLogFinish(fillOperationId, False) + logger.error(f"Error in fillStructure: {str(e)}") + raise + + async def _generateChapterSectionsStructure( + self, + chapterStructure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content). + Sections enthalten: content_type, contentPartIds, generationHint, useAiCall + """ + for doc in chapterStructure.get("documents", []): + for chapter in doc.get("chapters", []): + chapterId = chapter.get("id", "unknown") + chapterLevel = chapter.get("level", 1) + chapterTitle = chapter.get("title", "") + generationHint = chapter.get("generationHint", "") + contentPartIds = chapter.get("contentPartIds", []) + contentPartInstructions = chapter.get("contentPartInstructions", {}) - elements = [] - - # Prüfe ob Aggregation nötig ist - needsAggregation = self._needsAggregation( - contentType=contentType, - contentPartCount=len(contentPartIds) + chapterPrompt = self._buildChapterSectionsStructurePrompt( + chapterId=chapterId, + chapterLevel=chapterLevel, + chapterTitle=chapterTitle, + generationHint=generationHint, + contentPartIds=contentPartIds, + contentPartInstructions=contentPartInstructions, + contentParts=contentParts, + userPrompt=userPrompt ) - if needsAggregation and generationHint: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + chapterPrompt, + f"chapter_structure_{chapterId}_prompt" + ) + + aiResponse = await self.aiService.callAiPlanning( + prompt=chapterPrompt, + debugType=f"chapter_structure_{chapterId}" + ) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse, + f"chapter_structure_{chapterId}_response" + ) + + sectionsStructure = json.loads( + self.services.utils.jsonExtractString(aiResponse) + ) + + chapter["sections"] = sectionsStructure.get("sections", []) + + # Setze useAiCall Flag (falls nicht von AI gesetzt) + for section in chapter["sections"]: + if "useAiCall" not in section: + contentType = section.get("content_type", "paragraph") + useAiCall = contentType != "paragraph" + + # Prüfe contentPartInstructions + if not useAiCall: + for partId in section.get("contentPartIds", []): + instruction = contentPartInstructions.get(partId, {}).get("instruction", "") + if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]: + useAiCall = True + break + + section["useAiCall"] = useAiCall + + return chapterStructure + + async def _fillChapterSections( + self, + chapterStructure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D.2: Füllt Sections mit ContentParts. + """ + # Sammle alle Sections für sequenzielle Verarbeitung + sections_to_process = [] + all_sections_list = [] # Für Kontext-Informationen + for doc in chapterStructure.get("documents", []): + for chapter in doc.get("chapters", []): + for section in chapter.get("sections", []): + all_sections_list.append(section) + sections_to_process.append((doc, chapter, section)) + + # Sequenzielle Section-Generierung + fillOperationId = parentOperationId + for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process): + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + contentType = section.get("content_type", "paragraph") + useAiCall = section.get("useAiCall", False) + + elements = [] + + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) + + if needsAggregation and useAiCall: # Aggregation: Alle Parts zusammen verarbeiten sectionParts = [ self._findContentPartById(pid, contentParts) @@ -201,8 +320,8 @@ class StructureFiller: }) logger.error(f"Error generating section {sectionId}: {str(e)}") # NICHT raise - Section wird mit Fehlermeldung gerendert - - else: + + else: # Einzelverarbeitung: Jeder Part einzeln for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) @@ -308,19 +427,429 @@ class StructureFiller: "source": part.metadata.get("documentId"), "extractionPrompt": part.metadata.get("extractionPrompt") }) + + section["elements"] = elements + + return chapterStructure + + def _addContentPartsMetadata( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart] + ) -> Dict[str, Any]: + """ + Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind. + Dies hilft der Validierung, den Kontext der ContentParts zu verstehen. + """ + # Erstelle Mapping von ContentPart-ID zu Metadaten + contentPartsMap = {} + for part in contentParts: + contentPartsMap[part.id] = { + "id": part.id, + "format": part.metadata.get("contentFormat", "unknown"), + "type": part.typeGroup, + "mimeType": part.mimeType, + "originalFileName": part.metadata.get("originalFileName"), + "usageHint": part.metadata.get("usageHint"), + "documentId": part.metadata.get("documentId"), + "dataSize": len(str(part.data)) if part.data else 0 + } + + # Füge Metadaten zu Sections hinzu, die contentPartIds haben + for doc in structure.get("documents", []): + # Prüfe ob Chapters vorhanden sind (neue Struktur) + if "chapters" in doc: + for chapter in doc.get("chapters", []): + # Füge Metadaten zu Chapter-Level contentPartIds hinzu + chapterContentPartIds = chapter.get("contentPartIds", []) + if chapterContentPartIds: + chapter["contentPartsMetadata"] = [] + for partId in chapterContentPartIds: + if partId in contentPartsMap: + chapter["contentPartsMetadata"].append(contentPartsMap[partId]) + + # Füge Metadaten zu Sections hinzu + for section in chapter.get("sections", []): + contentPartIds = section.get("contentPartIds", []) + if contentPartIds: + section["contentPartsMetadata"] = [] + for partId in contentPartIds: + if partId in contentPartsMap: + section["contentPartsMetadata"].append(contentPartsMap[partId]) + + # Prüfe ob Sections direkt vorhanden sind (Legacy-Struktur) + elif "sections" in doc: + for section in doc.get("sections", []): + contentPartIds = section.get("contentPartIds", []) + if contentPartIds: + section["contentPartsMetadata"] = [] + for partId in contentPartIds: + if partId in contentPartsMap: + section["contentPartsMetadata"].append(contentPartsMap[partId]) + + return structure + + def _flattenChaptersToSections( + self, + chapterStructure: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Flattening: Konvertiert Chapters zu finaler Section-Struktur. + Jedes Chapter wird zu einer Heading-Section + dessen Sections. + """ + result = { + "metadata": chapterStructure.get("metadata", {}), + "documents": [] + } + + for doc in chapterStructure.get("documents", []): + flattened_doc = { + "id": doc.get("id"), + "title": doc.get("title"), + "filename": doc.get("filename"), + "sections": [] + } + + for chapter in doc.get("chapters", []): + # 1. Vordefinierte Heading-Section für Chapter-Title + heading_section = { + "id": f"{chapter['id']}_heading", + "content_type": "heading", + "elements": [{ + "type": "heading", + "content": chapter.get("title"), + "level": chapter.get("level", 1) + }] + } + flattened_doc["sections"].append(heading_section) + + # 2. Generierte Sections + flattened_doc["sections"].extend(chapter.get("sections", [])) + + result["documents"].append(flattened_doc) + + return result + + async def _fillStructureLegacy( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + fillOperationId: str + ) -> Dict[str, Any]: + """ + Legacy: Füllt Struktur mit Sections direkt (für Rückwärtskompatibilität). + """ + # Starte ChatLog + self.services.chat.progressLogStart( + fillOperationId, + "Structure Filling (Legacy)", + "Filling", + f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + parentOperationId=fillOperationId + ) + + try: + filledStructure = copy.deepcopy(structure) + + # Sammle alle Sections + sections_to_process = [] + all_sections_list = [] + for doc in filledStructure.get("documents", []): + doc_sections = doc.get("sections", []) + all_sections_list.extend(doc_sections) + for section in doc_sections: + sections_to_process.append((doc, section)) + + # Verarbeite Sections (bestehende Logik) + for sectionIndex, (doc, section) in enumerate(sections_to_process): + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + contentType = section.get("content_type", "paragraph") + + elements = [] + + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) + + if needsAggregation and generationHint: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds + ] + sectionParts = [p for p in sectionParts if p is not None] + + if sectionParts: + # Filtere nur extracted Parts für Aggregation + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] + + # Verarbeite non-extracted Parts separat + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + elements.append({ + "type": part.typeGroup, + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + # Aggregiere extracted Parts mit AI + if extractedParts: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + "Section", + f"Generating section {sectionId} with {len(extractedParts)} parts", + parentOperationId=fillOperationId + ) + + try: + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + request = AiCallRequest( + prompt=generationPrompt, + contentParts=extractedParts, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + + else: + # Einzelverarbeitung: Jeder Part einzeln + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + elements.append({ + "type": part.typeGroup, + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + if generationHint: + # AI-Call mit einzelnen ContentPart + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[part], + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=False + ) + + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + request = AiCallRequest( + prompt=generationPrompt, + contentParts=[part], + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + else: + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) section["elements"] = elements - # ChatLog abschließen - self.services.chat.progressLogFinish(fillOperationId, True) + # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) + filledStructure = self._addContentPartsMetadata(filledStructure, contentParts) + self.services.chat.progressLogFinish(fillOperationId, True) return filledStructure except Exception as e: self.services.chat.progressLogFinish(fillOperationId, False) - logger.error(f"Error in fillStructure: {str(e)}") + logger.error(f"Error in _fillStructureLegacy: {str(e)}") raise + def _buildChapterSectionsStructurePrompt( + self, + chapterId: str, + chapterLevel: int, + chapterTitle: str, + generationHint: str, + contentPartIds: List[str], + contentPartInstructions: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str + ) -> str: + """Baue Prompt für Chapter-Sections-Struktur-Generierung.""" + # Baue ContentParts-Index (nur IDs, keine Previews!) + contentPartsIndex = "" + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = part.metadata.get("contentFormat", "unknown") + instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed") + + contentPartsIndex += f"\n- ContentPart ID: {partId}\n" + contentPartsIndex += f" Format: {contentFormat}\n" + contentPartsIndex += f" Type: {part.typeGroup}\n" + contentPartsIndex += f" Instruction: {instruction}\n" + + if not contentPartsIndex: + contentPartsIndex = "\n(No content parts specified for this chapter)" + + prompt = f"""TASK: Generate Chapter Sections Structure + +CHAPTER METADATA: +- Chapter ID: {chapterId} +- Chapter Level: {chapterLevel} +- Chapter Title: {chapterTitle} +- Generation Hint: {generationHint} + +WICHTIG: Chapter hat bereits vordefinierte Heading-Section. +Generiere NICHT eine Heading-Section für Chapter-Title! + +AVAILABLE CONTENT PARTS: +{contentPartsIndex} + +STANDARD JSON SCHEMA FOR SECTIONS: +Supported content_types: table, bullet_list, heading, paragraph, code_block, image + +Return JSON: +{{ + "sections": [ + {{ + "id": "section_1", + "content_type": "paragraph", + "contentPartIds": ["part_ext_1"], + "generationHint": "...", + "useAiCall": false, + "elements": [] + }} + ] +}} + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + return prompt + def _buildSectionGenerationPrompt( self, section: Dict[str, Any], diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index eb39fdd6..a4d7a19e 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -32,11 +32,12 @@ class StructureGenerator: parentOperationId: str ) -> Dict[str, Any]: """ - Phase 5C: Generiert Dokument-Struktur mit Sections. - Jede Section spezifiziert: - - Welcher Content sollte in dieser Section sein - - Welche ContentParts zu verwenden sind - - Format für jeden ContentPart + Phase 5C: Generiert Chapter-Struktur (Table of Contents). + Definiert für jedes Chapter: + - Level, Title + - contentPartIds + - contentPartInstructions + - generationHint Args: userPrompt: User-Anfrage @@ -45,7 +46,7 @@ class StructureGenerator: parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: - Struktur-Dict mit documents und sections + Struktur-Dict mit documents und chapters (nicht sections!) """ # Erstelle Operation-ID für Struktur-Generierung structureOperationId = f"{parentOperationId}_structure_generation" @@ -53,25 +54,36 @@ class StructureGenerator: # Starte ChatLog mit Parent-Referenz self.services.chat.progressLogStart( structureOperationId, - "Structure Generation", + "Chapter Structure Generation", "Structure", - f"Generating structure for {outputFormat}", + f"Generating chapter structure for {outputFormat}", parentOperationId=parentOperationId ) try: - # Baue Struktur-Prompt mit Content-Index - structurePrompt = self._buildStructurePrompt( + # Baue Chapter-Struktur-Prompt mit Content-Index + structurePrompt = self._buildChapterStructurePrompt( userPrompt=userPrompt, contentParts=contentParts, outputFormat=outputFormat ) - # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) - # Debug-Logs werden bereits von callAiPlanning geschrieben + # Debug: Log Prompt + self.services.utils.writeDebugFile( + structurePrompt, + "chapter_structure_generation_prompt" + ) + + # AI-Call für Chapter-Struktur-Generierung aiResponse = await self.aiService.callAiPlanning( prompt=structurePrompt, - debugType="document_generation_structure" + debugType="chapter_structure_generation" + ) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse, + "chapter_structure_generation_response" ) # Parse Struktur @@ -87,13 +99,13 @@ class StructureGenerator: logger.error(f"Error in generateStructure: {str(e)}") raise - def _buildStructurePrompt( + def _buildChapterStructurePrompt( self, userPrompt: str, contentParts: List[ContentPart], outputFormat: str ) -> str: - """Baue Prompt für Struktur-Generierung.""" + """Baue Prompt für Chapter-Struktur-Generierung.""" # Baue ContentParts-Index - filtere leere Parts heraus contentPartsIndex = "" validParts = [] @@ -179,14 +191,19 @@ class StructureGenerator: AVAILABLE CONTENT PARTS: {contentPartsIndex} -TASK: Generiere Dokument-Struktur mit Sections. -Für jede Section, spezifiziere: -- section id -- content_type (heading, paragraph, image, table, etc.) -- contentPartIds: [Liste von ContentPart-IDs zu verwenden] -- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist -- generation_hint: Was AI für diese Section generieren soll -- elements: [] (leer, wird in nächster Phase gefüllt) +TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente. + +Für jedes Chapter: +- chapter id +- level (1, 2, 3, etc.) +- title +- contentPartIds: [Liste von ContentPart-IDs] +- contentPartInstructions: {{ + "partId": {{ + "instruction": "Wie Content strukturiert werden soll" + }} +}} +- generationHint: Beschreibung des Inhalts OUTPUT FORMAT: {outputFormat} @@ -200,24 +217,19 @@ RETURN JSON: "id": "doc_1", "title": "Document Title", "filename": "document.{outputFormat}", - "sections": [ + "chapters": [ {{ - "id": "section_1", - "content_type": "heading", - "generation_hint": "Main title", - "contentPartIds": [], - "contentFormats": {{}}, - "elements": [] - }}, - {{ - "id": "section_2", - "content_type": "paragraph", - "generation_hint": "Introduction paragraph", + "id": "chapter_1", + "level": 1, + "title": "Introduction", "contentPartIds": ["part_ext_1"], - "contentFormats": {{ - "part_ext_1": "extracted" + "contentPartInstructions": {{ + "part_ext_1": {{ + "instruction": "Use full extracted text" + }} }}, - "elements": [] + "generationHint": "Create introduction section", + "sections": [] }} ] }}] diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index e08eaa81..828f1033 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -5,6 +5,7 @@ import uuid import base64 import traceback from typing import Any, Dict, List, Optional, Callable +from modules.datamodels.datamodelDocument import RenderedDocument from modules.datamodels.datamodelChat import ChatDocument from modules.services.serviceGeneration.subDocumentUtility import ( getFileExtension, @@ -345,31 +346,31 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]: """ Render extracted JSON content to the specified output format. - Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering). - Always uses unified "documents" array format. - Supports three content formats: reference, object (base64), extracted_text. + Processes EACH document separately and calls renderer for each. + Each renderer can return 1..n documents (e.g., HTML + images). Args: - extractedContent: Structured JSON document from AI extraction + extractedContent: Structured JSON document with documents array outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) + In future, each document can have its own format title: Report title userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation parentOperationId: Optional parent operation ID for hierarchical logging Returns: - tuple: (rendered_content, mime_type, images_list) - images_list: List of image dicts with base64Data, altText, caption, etc. + List of RenderedDocument objects. + Each RenderedDocument represents one rendered file (main document or supporting file) """ try: # Validate JSON input if not isinstance(extractedContent, dict): raise ValueError("extractedContent must be a JSON dictionary") - # Unified approach: Always expect "documents" array (single doc = n=1) + # Unified approach: Always expect "documents" array if "documents" not in extractedContent: raise ValueError("extractedContent must contain 'documents' array") @@ -377,56 +378,45 @@ class GenerationService: if len(documents) == 0: raise ValueError("No documents found in 'documents' array") - # Phase 5: Multi-Dokument-Rendering - if len(documents) == 1: - # Single document - use existing logic - single_doc = documents[0] - if "sections" not in single_doc: - raise ValueError("Document must contain 'sections' field") + metadata = extractedContent.get("metadata", {}) + allRenderedDocuments = [] + + # Process EACH document separately + for docIndex, doc in enumerate(documents): + if not isinstance(doc, dict): + logger.warning(f"Skipping invalid document at index {docIndex}") + continue - # Pass standardized schema to renderer (maintains architecture) - contentToRender = extractedContent # Pass full standardized schema - else: - # Multiple documents - merge all sections into one document for rendering - # Option: Merge all sections from all documents into a single document - all_sections = [] - for doc in documents: - if isinstance(doc, dict) and "sections" in doc: - sections = doc.get("sections", []) - if isinstance(sections, list): - all_sections.extend(sections) + if "sections" not in doc: + logger.warning(f"Document {doc.get('id', docIndex)} has no sections, skipping") + continue - if not all_sections: - raise ValueError("No sections found in any document") + # Determine format for this document + # TODO: In future, each document can have its own format field + # For now, use the global outputFormat + docFormat = doc.get("format", outputFormat) - # Create merged document with all sections - merged_document = { - "metadata": extractedContent.get("metadata", {}), - "documents": [{ - "id": "merged", - "title": title, - "filename": f"{title}.{outputFormat}", - "sections": all_sections - }] + # Get renderer for this document's format + renderer = self._getFormatRenderer(docFormat) + if not renderer: + logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping") + continue + + # Create JSON structure with single document (preserving metadata) + singleDocContent = { + "metadata": metadata, + "documents": [doc] # Only this document } - contentToRender = merged_document - logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections") - - # Get the appropriate renderer for the format - renderer = self._getFormatRenderer(outputFormat) - if not renderer: - raise ValueError(f"Unsupported output format: {outputFormat}") + + # Use document title or fallback to provided title + docTitle = doc.get("title", title) + + # Render this document (can return multiple files, e.g., HTML + images) + renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService) + allRenderedDocuments.extend(renderedDocs) - # Render the JSON content directly (AI generation handled by main service) - # Renderer receives standardized schema and extracts what it needs - renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService) - - # Get images from renderer if available - images = [] - if hasattr(renderer, 'getRenderedImages'): - images = renderer.getRenderedImages() - - return renderedContent, mimeType, images + logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)") + return allRenderedDocuments except Exception as e: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index e9693680..e15e0711 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -5,8 +5,9 @@ Base renderer class for all format renderers. """ from abc import ABC, abstractmethod -from typing import Dict, Any, Tuple, List +from typing import Dict, Any, List from modules.datamodels.datamodelJson import supportedSectionTypes +from modules.datamodels.datamodelDocument import RenderedDocument import json import logging import re @@ -50,21 +51,49 @@ class BaseRenderer(ABC): return 0 @abstractmethod - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """ - Render extracted JSON content to the target format. + Render extracted JSON content to multiple documents. + Each renderer must implement this method. + Can return 1..n documents (e.g., HTML + images). Args: - extractedContent: Structured JSON content with sections and metadata + extractedContent: Structured JSON content with sections and metadata (contains single document) title: Report title userPrompt: Original user prompt for context aiService: AI service instance for additional processing Returns: - tuple: (renderedContent, mimeType) + List of RenderedDocument objects. + First document is the main document, additional documents are supporting files (e.g., images). + Even if only one document is returned, it must be wrapped in a list. """ pass + def _determineFilename(self, title: str, mimeType: str) -> str: + """Determine filename from title and mimeType.""" + import re + # Get extension from mimeType + extensionMap = { + "text/html": "html", + "application/pdf": "pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx", + "text/plain": "txt", + "text/markdown": "md", + "application/json": "json", + "text/csv": "csv" + } + extension = extensionMap.get(mimeType, "txt") + + # Sanitize title for filename + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if not sanitized: + sanitized = "document" + + return f"{sanitized}.{extension}" + def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: """ Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py index c18d7481..52e2933d 100644 --- a/modules/services/serviceGeneration/renderers/rendererCsv.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -5,7 +5,8 @@ CSV renderer for report generation. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List class RendererCsv(BaseRenderer): """Renders content to CSV format with format-specific extraction.""" @@ -25,13 +26,28 @@ class RendererCsv(BaseRenderer): """Return priority for CSV renderer.""" return 70 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to CSV format.""" try: # Generate CSV directly from JSON (no styling needed for CSV) csvContent = await self._generateCsvFromJson(extractedContent, title) - return csvContent, "text/csv" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "text/csv") + else: + filename = self._determineFilename(title, "text/csv") + + return [ + RenderedDocument( + documentData=csvContent.encode('utf-8'), + mimeType="text/csv", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering CSV: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index f62935d8..ee88369f 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -5,7 +5,8 @@ DOCX renderer for report generation using python-docx. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List import io import base64 import re @@ -38,7 +39,7 @@ class RendererDocx(BaseRenderer): """Return priority for DOCX renderer.""" return 115 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to DOCX format using AI-analyzed styling.""" self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") try: @@ -46,18 +47,48 @@ class RendererDocx(BaseRenderer): # Fallback to HTML if python-docx not available from .rendererHtml import RendererHtml htmlRenderer = RendererHtml() - htmlContent, _ = await htmlRenderer.render(extractedContent, title) - return htmlContent, "text/html" + return await htmlRenderer.render(extractedContent, title, userPrompt, aiService) # Generate DOCX using AI-analyzed styling docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) - return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document") + else: + filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document") + + # Convert DOCX content to bytes if it's a string (base64) + if isinstance(docx_content, str): + try: + docx_bytes = base64.b64decode(docx_content) + except Exception: + docx_bytes = docx_content.encode('utf-8') + else: + docx_bytes = docx_content + + return [ + RenderedDocument( + documentData=docx_bytes, + mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering DOCX: {str(e)}") # Return minimal fallback - return f"DOCX Generation Error: {str(e)}", "text/plain" + fallbackContent = f"DOCX Generation Error: {str(e)}" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/plain", + filename=self._determineFilename(title, "text/plain") + ) + ] async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate DOCX content from structured JSON document.""" diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 213bf641..dba6a03f 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -5,7 +5,8 @@ HTML renderer for report generation. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List class RendererHtml(BaseRenderer): """Renders content to HTML format with format-specific extraction.""" @@ -25,29 +26,66 @@ class RendererHtml(BaseRenderer): """Return priority for HTML renderer.""" return 100 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: - """Render extracted JSON content to HTML format using AI-analyzed styling.""" - try: - # Extract images first - images = self._extractImages(extractedContent) + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + """ + Render HTML document with images as separate files. + Returns list of documents: [HTML document, image1, image2, ...] + """ + import base64 + + # Extract images first + images = self._extractImages(extractedContent) + + # Store images in instance for later retrieval + self._renderedImages = images + + # Generate HTML using AI-analyzed styling + htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) + + # Replace base64 data URIs with relative file paths if images exist + if images: + htmlContent = self._replaceImageDataUris(htmlContent, images) + + # Determine HTML filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + htmlFilename = documents[0].get("filename") + if not htmlFilename: + htmlFilename = self._determineFilename(title, "text/html") + else: + htmlFilename = self._determineFilename(title, "text/html") + + # Start with HTML document + resultDocuments = [ + RenderedDocument( + documentData=htmlContent.encode('utf-8'), + mimeType="text/html", + filename=htmlFilename + ) + ] + + # Add images as separate documents + for img in images: + base64Data = img.get("base64Data", "") + filename = img.get("filename", f"image_{len(resultDocuments)}.png") + mimeType = img.get("mimeType", "image/png") - # Store images in instance for later retrieval - self._renderedImages = images - - # Generate HTML using AI-analyzed styling - htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) - - # Replace base64 data URIs with relative file paths if images exist - if images: - htmlContent = self._replaceImageDataUris(htmlContent, images) - - return htmlContent, "text/html" - - except Exception as e: - self.logger.error(f"Error rendering HTML: {str(e)}") - # Return minimal HTML fallback - self._renderedImages = [] # Initialize empty list on error - return f"{title}

{title}

Error rendering report: {str(e)}

", "text/html" + if base64Data: + try: + # Decode base64 to bytes + imageBytes = base64.b64decode(base64Data) + resultDocuments.append( + RenderedDocument( + documentData=imageBytes, + mimeType=mimeType, + filename=filename + ) + ) + self.logger.debug(f"Added image file: {filename} ({len(imageBytes)} bytes)") + except Exception as e: + self.logger.warning(f"Error creating image file {filename}: {str(e)}") + + return resultDocuments async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate HTML content from structured JSON document using AI-generated styling.""" @@ -597,8 +635,31 @@ class RendererHtml(BaseRenderer): if base64Data: sectionId = section.get("id", "unknown") + + # Bestimme MIME-Type und Extension + mimeType = element.get("mimeType", "image/png") + if not mimeType or mimeType == "unknown": + # Versuche MIME-Type aus base64 zu erkennen + if base64Data.startswith("/9j/"): + mimeType = "image/jpeg" + elif base64Data.startswith("iVBORw0KGgo"): + mimeType = "image/png" + else: + mimeType = "image/png" # Default + + # Bestimme Extension basierend auf MIME-Type + extension = "png" + if mimeType == "image/jpeg" or mimeType == "image/jpg": + extension = "jpg" + elif mimeType == "image/png": + extension = "png" + elif mimeType == "image/gif": + extension = "gif" + elif mimeType == "image/webp": + extension = "webp" + # Generate filename from section ID - filename = f"{sectionId}.png" + filename = f"{sectionId}.{extension}" # Clean filename (remove invalid characters) filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename) @@ -607,7 +668,8 @@ class RendererHtml(BaseRenderer): "altText": element.get("altText", "Image"), "caption": element.get("caption"), "sectionId": sectionId, - "filename": filename + "filename": filename, + "mimeType": mimeType }) self.logger.debug(f"Extracted image from section {sectionId}: {filename}") @@ -633,8 +695,9 @@ class RendererHtml(BaseRenderer): import base64 import re - # Find all image data URIs in HTML - dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)' + # Find all image data URIs in HTML (verschiedene MIME-Types unterstützen) + # Pattern: data:image/[type];base64, + dataUriPattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)' def replaceDataUri(match): base64Data = match.group(1) @@ -642,7 +705,9 @@ class RendererHtml(BaseRenderer): # Find matching image in images list matchingImage = None for img in images: - if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]): + imgBase64 = img.get("base64Data", "") + # Vergleiche base64-Daten (kann unterschiedliche Längen haben durch Padding) + if imgBase64 == base64Data or imgBase64.startswith(base64Data[:100]) or base64Data.startswith(imgBase64[:100]): matchingImage = img break @@ -650,20 +715,25 @@ class RendererHtml(BaseRenderer): # Use filename from image data (generated from section ID) filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png") - # Replace with relative path + # Replace with relative path (ohne Pfad, nur Dateiname) altText = matchingImage.get("altText", "Image") caption = matchingImage.get("caption", "") + # Entferne IMAGE_MARKER Kommentar falls vorhanden + imgTag = f'{altText}' + if caption: - return f'
{altText}
{caption}
' + return f'
{imgTag}
{caption}
' else: - return f'{altText}' + return imgTag else: # Keep original if no match found return match.group(0) - # Replace all data URIs + # Replace all data URIs (auch IMAGE_MARKER Kommentare entfernen) updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent) + # Entferne IMAGE_MARKER Kommentare die übrig geblieben sind + updatedHtml = re.sub(r'', '', updatedHtml) return updatedHtml diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index ad83673b..7d317131 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -5,8 +5,10 @@ Image renderer for report generation using AI image generation. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List import logging +import base64 logger = logging.getLogger(__name__) @@ -28,13 +30,37 @@ class RendererImage(BaseRenderer): """Return priority for image renderer.""" return 90 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to image format using AI image generation.""" try: # Generate AI image from content imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService) - return imageContent, "image/png" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "image/png") + else: + filename = self._determineFilename(title, "image/png") + + # Convert image content to bytes (base64 string or bytes) + if isinstance(imageContent, str): + try: + imageBytes = base64.b64decode(imageContent) + except Exception: + imageBytes = imageContent.encode('utf-8') + else: + imageBytes = imageContent + + return [ + RenderedDocument( + documentData=imageBytes, + mimeType="image/png", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering image: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py index a7f3d644..04196cf4 100644 --- a/modules/services/serviceGeneration/renderers/rendererJson.py +++ b/modules/services/serviceGeneration/renderers/rendererJson.py @@ -5,7 +5,8 @@ JSON renderer for report generation. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List import json class RendererJson(BaseRenderer): @@ -26,14 +27,29 @@ class RendererJson(BaseRenderer): """Return priority for JSON renderer.""" return 80 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to JSON format.""" try: # The extracted content should already be JSON from the AI # Just validate and format it jsonContent = self._cleanJsonContent(extractedContent, title) - return jsonContent, "application/json" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "application/json") + else: + filename = self._determineFilename(title, "application/json") + + return [ + RenderedDocument( + documentData=jsonContent.encode('utf-8'), + mimeType="application/json", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering JSON: {str(e)}") @@ -43,7 +59,14 @@ class RendererJson(BaseRenderer): "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}], "metadata": {"error": str(e)} } - return json.dumps(fallbackData, indent=2), "application/json" + fallbackContent = json.dumps(fallbackData, indent=2) + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="application/json", + filename=self._determineFilename(title, "application/json") + ) + ] def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str: """Clean and validate JSON content from AI.""" diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index dfe2bda2..7b23eb25 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -5,7 +5,8 @@ Markdown renderer for report generation. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List class RendererMarkdown(BaseRenderer): """Renders content to Markdown format with format-specific extraction.""" @@ -25,18 +26,40 @@ class RendererMarkdown(BaseRenderer): """Return priority for markdown renderer.""" return 95 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to Markdown format.""" try: # Generate markdown from JSON structure markdownContent = self._generateMarkdownFromJson(extractedContent, title) - return markdownContent, "text/markdown" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "text/markdown") + else: + filename = self._determineFilename(title, "text/markdown") + + return [ + RenderedDocument( + documentData=markdownContent.encode('utf-8'), + mimeType="text/markdown", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering markdown: {str(e)}") # Return minimal markdown fallback - return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown" + fallbackContent = f"# {title}\n\nError rendering report: {str(e)}" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/markdown", + filename=self._determineFilename(title, "text/markdown") + ) + ] def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate markdown content from structured JSON document.""" diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 128e84d3..9767449e 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -5,7 +5,8 @@ PDF renderer for report generation using reportlab. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List import io import base64 @@ -38,25 +39,56 @@ class RendererPdf(BaseRenderer): """Return priority for PDF renderer.""" return 120 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to PDF format using AI-analyzed styling.""" try: if not REPORTLAB_AVAILABLE: # Fallback to HTML if reportlab not available from .rendererHtml import RendererHtml html_renderer = RendererHtml() - html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService) - return html_content, "text/html" + return await html_renderer.render(extractedContent, title, userPrompt, aiService) # Generate PDF using AI-analyzed styling pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) - return pdf_content, "application/pdf" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "application/pdf") + else: + filename = self._determineFilename(title, "application/pdf") + + # Convert PDF content to bytes if it's a string (base64) + if isinstance(pdf_content, str): + # Try to decode as base64, otherwise encode as UTF-8 + try: + pdf_bytes = base64.b64decode(pdf_content) + except Exception: + pdf_bytes = pdf_content.encode('utf-8') + else: + pdf_bytes = pdf_content + + return [ + RenderedDocument( + documentData=pdf_bytes, + mimeType="application/pdf", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering PDF: {str(e)}") # Return minimal fallback - return f"PDF Generation Error: {str(e)}", "text/plain" + fallbackContent = f"PDF Generation Error: {str(e)}" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/plain", + filename=self._determineFilename(title, "text/plain") + ) + ] async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate PDF content from structured JSON document using AI-generated styling.""" diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index e9ad334c..d12048c7 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -6,8 +6,9 @@ import io import json import re from datetime import datetime, UTC -from typing import Dict, Any, Optional, Tuple, List +from typing import Dict, Any, Optional, List from .rendererBaseTemplate import BaseRenderer +from modules.datamodels.datamodelDocument import RenderedDocument logger = logging.getLogger(__name__) @@ -25,7 +26,7 @@ class RendererPptx(BaseRenderer): """Get list of supported output formats.""" return ["pptx", "ppt"] - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """ Render content as PowerPoint presentation from JSON data. @@ -204,14 +205,44 @@ class RendererPptx(BaseRenderer): pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8') logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes") - return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation" + + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation") + else: + filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation") + + return [ + RenderedDocument( + documentData=pptx_bytes, + mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", + filename=filename + ) + ] except ImportError: logger.error("python-pptx library not installed. Install with: pip install python-pptx") - return "python-pptx library not installed", "text/plain" + fallbackContent = "python-pptx library not installed" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/plain", + filename=self._determineFilename(title, "text/plain") + ) + ] except Exception as e: logger.error(f"Error rendering PowerPoint presentation: {str(e)}") - return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain" + fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/plain", + filename=self._determineFilename(title, "text/plain") + ) + ] def _parseContentToSlides(self, content: str, title: str) -> list: """ diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index acbeaaf9..1948b29f 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -5,7 +5,8 @@ Text renderer for report generation. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List class RendererText(BaseRenderer): """Renders content to plain text format with format-specific extraction.""" @@ -47,18 +48,40 @@ class RendererText(BaseRenderer): """Return priority for text renderer.""" return 90 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to plain text format.""" try: # Generate text from JSON structure textContent = self._generateTextFromJson(extractedContent, title) - return textContent, "text/plain" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "text/plain") + else: + filename = self._determineFilename(title, "text/plain") + + return [ + RenderedDocument( + documentData=textContent.encode('utf-8'), + mimeType="text/plain", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering text: {str(e)}") # Return minimal text fallback - return f"{title}\n\nError rendering report: {str(e)}", "text/plain" + fallbackContent = f"{title}\n\nError rendering report: {str(e)}" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/plain", + filename=self._determineFilename(title, "text/plain") + ) + ] def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate text content from structured JSON document.""" diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index a8cffd56..d8d23065 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -5,7 +5,8 @@ Excel renderer for report generation using openpyxl. """ from .rendererBaseTemplate import BaseRenderer -from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List import io import base64 from datetime import datetime, UTC @@ -37,20 +38,43 @@ class RendererXlsx(BaseRenderer): """Return priority for Excel renderer.""" return 110 - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available from .rendererCsv import RendererCsv csvRenderer = RendererCsv() - csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService) - return csvContent, "text/csv" + return await csvRenderer.render(extractedContent, title, userPrompt, aiService) # Generate Excel using AI-analyzed styling excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) - return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + # Determine filename from document or title + documents = extractedContent.get("documents", []) + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") + else: + filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") + + # Convert Excel content to bytes if it's a string (base64) + if isinstance(excelContent, str): + try: + excel_bytes = base64.b64decode(excelContent) + except Exception: + excel_bytes = excelContent.encode('utf-8') + else: + excel_bytes = excelContent + + return [ + RenderedDocument( + documentData=excel_bytes, + mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + filename=filename + ) + ] except Exception as e: self.logger.error(f"Error rendering Excel: {str(e)}") diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index b1de9f98..4e405630 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -106,6 +106,18 @@ class ContentValidator: if section.get("textPreview"): sectionSummary["textPreview"] = section.get("textPreview") + # Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu + contentPartIds = section.get("contentPartIds", []) + if contentPartIds and not elements: + # Prüfe ob contentPartsMetadata vorhanden ist + contentPartsMetadata = section.get("contentPartsMetadata", []) + if contentPartsMetadata: + sectionSummary["contentPartsMetadata"] = contentPartsMetadata + else: + # Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar + sectionSummary["contentPartIds"] = contentPartIds + sectionSummary["note"] = "ContentParts referenced but metadata not available" + # Include any additional fields from section (generic approach) # This ensures all action-specific fields are preserved for key, value in section.items(): @@ -141,6 +153,18 @@ class ContentValidator: sectionSummary["rowCount"] = len(rows) sectionSummary["headers"] = headers + # Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu + contentPartIds = section.get("contentPartIds", []) + if contentPartIds and not elements: + # Prüfe ob contentPartsMetadata vorhanden ist + contentPartsMetadata = section.get("contentPartsMetadata", []) + if contentPartsMetadata: + sectionSummary["contentPartsMetadata"] = contentPartsMetadata + else: + # Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar + sectionSummary["contentPartIds"] = contentPartIds + sectionSummary["note"] = "ContentParts referenced but metadata not available" + # Include any additional fields from section (generic approach) for key, value in section.items(): if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately From db456f166722f2e6aa66e2fdbbb942e38f4953cd Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 28 Dec 2025 13:51:19 +0100 Subject: [PATCH 13/21] fixed generation issue and ai calls only for extracted content --- .../services/serviceAi/subStructureFilling.py | 121 ++++++++++++------ .../serviceAi/subStructureGeneration.py | 42 +----- .../renderers/rendererBaseTemplate.py | 47 +++++-- .../renderers/rendererHtml.py | 23 ++-- 4 files changed, 138 insertions(+), 95 deletions(-) diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index d93264af..548cf128 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -134,23 +134,13 @@ class StructureFiller: userPrompt=userPrompt ) - # Debug: Log Prompt - self.services.utils.writeDebugFile( - chapterPrompt, - f"chapter_structure_{chapterId}_prompt" - ) - + # AI-Call für Chapter-Struktur-Generierung + # Note: Debug logging is handled by callAiPlanning aiResponse = await self.aiService.callAiPlanning( prompt=chapterPrompt, debugType=f"chapter_structure_{chapterId}" ) - # Debug: Log Response - self.services.utils.writeDebugFile( - aiResponse, - f"chapter_structure_{chapterId}_response" - ) - sectionsStructure = json.loads( self.services.utils.jsonExtractString(aiResponse) ) @@ -158,20 +148,39 @@ class StructureFiller: chapter["sections"] = sectionsStructure.get("sections", []) # Setze useAiCall Flag (falls nicht von AI gesetzt) + # WICHTIG: useAiCall kann nur true sein, wenn mindestens ein ContentPart Format "extracted" hat! + # "object" und "reference" Formate werden direkt als Elemente hinzugefügt, benötigen kein AI. for section in chapter["sections"]: if "useAiCall" not in section: contentType = section.get("content_type", "paragraph") - useAiCall = contentType != "paragraph" + contentPartIds = section.get("contentPartIds", []) - # Prüfe contentPartInstructions - if not useAiCall: - for partId in section.get("contentPartIds", []): - instruction = contentPartInstructions.get(partId, {}).get("instruction", "") - if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]: - useAiCall = True + # Prüfe ob mindestens ein ContentPart Format "extracted" hat + hasExtractedPart = False + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if part: + contentFormat = part.metadata.get("contentFormat", "unknown") + if contentFormat == "extracted": + hasExtractedPart = True break + # useAiCall kann nur true sein, wenn extracted Parts vorhanden sind + useAiCall = False + if hasExtractedPart: + # Prüfe ob Transformation nötig ist + useAiCall = contentType != "paragraph" + + # Prüfe contentPartInstructions für Transformation + if not useAiCall: + for partId in contentPartIds: + instruction = contentPartInstructions.get(partId, {}).get("instruction", "") + if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]: + useAiCall = True + break + section["useAiCall"] = useAiCall + logger.debug(f"Section {section.get('id')}: useAiCall={useAiCall} (hasExtractedPart={hasExtractedPart}, contentType={contentType})") return chapterStructure @@ -200,10 +209,16 @@ class StructureFiller: sectionId = section.get("id") contentPartIds = section.get("contentPartIds", []) contentFormats = section.get("contentFormats", {}) - generationHint = section.get("generation_hint") + # Check both camelCase and snake_case for generationHint + generationHint = section.get("generationHint") or section.get("generation_hint") contentType = section.get("content_type", "paragraph") useAiCall = section.get("useAiCall", False) + # WICHTIG: Wenn keine ContentParts vorhanden sind, kann kein AI-Call gemacht werden + if len(contentPartIds) == 0: + useAiCall = False + logger.debug(f"Section {sectionId}: No content parts, setting useAiCall=False") + elements = [] # Prüfe ob Aggregation nötig ist @@ -212,6 +227,8 @@ class StructureFiller: contentPartCount=len(contentPartIds) ) + logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") + if needsAggregation and useAiCall: # Aggregation: Alle Parts zusammen verarbeiten sectionParts = [ @@ -251,6 +268,7 @@ class StructureFiller: # Aggregiere extracted Parts mit AI if extractedParts: + logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=extractedParts, # ALLE PARTS für Aggregation! @@ -279,6 +297,7 @@ class StructureFiller: generationPrompt, f"section_content_{sectionId}_prompt" ) + logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt (aggregation)") # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) request = AiCallRequest( @@ -297,6 +316,7 @@ class StructureFiller: aiResponse.content, f"section_content_{sectionId}_response" ) + logger.debug(f"Logged section response: section_content_{sectionId}_response (aggregation)") # Parse und füge zu elements hinzu generatedElements = json.loads( @@ -348,8 +368,10 @@ class StructureFiller: }) elif contentFormat == "extracted": - if generationHint: + # WICHTIG: Prüfe sowohl useAiCall als auch generationHint + if useAiCall and generationHint: # AI-Call mit einzelnen ContentPart + logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=[part], # EIN PART @@ -378,6 +400,7 @@ class StructureFiller: generationPrompt, f"section_content_{sectionId}_prompt" ) + logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") # Verwende callAi für ContentParts-Unterstützung request = AiCallRequest( @@ -396,6 +419,7 @@ class StructureFiller: aiResponse.content, f"section_content_{sectionId}_response" ) + logger.debug(f"Logged section response: section_content_{sectionId}_response") # Parse und füge zu elements hinzu generatedElements = json.loads( @@ -421,6 +445,7 @@ class StructureFiller: # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Füge extrahierten Text direkt hinzu (kein AI-Call) + logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") elements.append({ "type": "extracted_text", "content": part.data, @@ -566,8 +591,15 @@ class StructureFiller: sectionId = section.get("id") contentPartIds = section.get("contentPartIds", []) contentFormats = section.get("contentFormats", {}) - generationHint = section.get("generation_hint") + # Check both camelCase and snake_case for generationHint + generationHint = section.get("generationHint") or section.get("generation_hint") contentType = section.get("content_type", "paragraph") + useAiCall = section.get("useAiCall", False) + + # WICHTIG: Wenn keine ContentParts vorhanden sind, kann kein AI-Call gemacht werden + if len(contentPartIds) == 0: + useAiCall = False + logger.debug(f"Section {sectionId} (legacy): No content parts, setting useAiCall=False") elements = [] @@ -577,7 +609,9 @@ class StructureFiller: contentPartCount=len(contentPartIds) ) - if needsAggregation and generationHint: + logger.info(f"Processing section {sectionId} (legacy): contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") + + if needsAggregation and useAiCall and generationHint: # Aggregation: Alle Parts zusammen verarbeiten sectionParts = [ self._findContentPartById(pid, contentParts) @@ -702,8 +736,10 @@ class StructureFiller: }) elif contentFormat == "extracted": - if generationHint: + # WICHTIG: Prüfe sowohl useAiCall als auch generationHint + if useAiCall and generationHint: # AI-Call mit einzelnen ContentPart + logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=[part], @@ -729,6 +765,7 @@ class StructureFiller: generationPrompt, f"section_content_{sectionId}_prompt" ) + logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") request = AiCallRequest( prompt=generationPrompt, @@ -745,6 +782,7 @@ class StructureFiller: aiResponse.content, f"section_content_{sectionId}_response" ) + logger.debug(f"Logged section response: section_content_{sectionId}_response") generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) @@ -765,6 +803,8 @@ class StructureFiller: }) logger.error(f"Error generating section {sectionId}: {str(e)}") else: + # Füge extrahierten Text direkt hinzu (kein AI-Call) + logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") elements.append({ "type": "extracted_text", "content": part.data, @@ -817,35 +857,44 @@ class StructureFiller: prompt = f"""TASK: Generate Chapter Sections Structure -CHAPTER METADATA: -- Chapter ID: {chapterId} -- Chapter Level: {chapterLevel} -- Chapter Title: {chapterTitle} -- Generation Hint: {generationHint} +CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId}) +GENERATION HINT: {generationHint} -WICHTIG: Chapter hat bereits vordefinierte Heading-Section. -Generiere NICHT eine Heading-Section für Chapter-Title! +NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title. AVAILABLE CONTENT PARTS: {contentPartsIndex} -STANDARD JSON SCHEMA FOR SECTIONS: -Supported content_types: table, bullet_list, heading, paragraph, code_block, image +CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image -Return JSON: +useAiCall RULES: +- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed +- useAiCall: false if Format is "object" or "reference" (direct insertion) +- useAiCall: false if Format is "extracted" AND simple "include full text" instruction + +RETURN JSON: {{ "sections": [ {{ "id": "section_1", "content_type": "paragraph", - "contentPartIds": ["part_ext_1"], - "generationHint": "...", + "contentPartIds": ["extracted_part_1"], + "generationHint": "Include full text", "useAiCall": false, "elements": [] }} ] }} +EXAMPLES (all content types): +- paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}} +- bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}} +- table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}} +- heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}} +- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}} +- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}} +- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}} + CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index a4d7a19e..b8db20a1 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -68,24 +68,13 @@ class StructureGenerator: outputFormat=outputFormat ) - # Debug: Log Prompt - self.services.utils.writeDebugFile( - structurePrompt, - "chapter_structure_generation_prompt" - ) - # AI-Call für Chapter-Struktur-Generierung + # Note: Debug logging is handled by callAiPlanning aiResponse = await self.aiService.callAiPlanning( prompt=structurePrompt, debugType="chapter_structure_generation" ) - # Debug: Log Response - self.services.utils.writeDebugFile( - aiResponse, - "chapter_structure_generation_response" - ) - # Parse Struktur structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) @@ -143,34 +132,6 @@ class StructureGenerator: # Baue Index nur für gültige Parts for i, part in enumerate(validParts, 1): contentFormat = part.metadata.get("contentFormat", "unknown") - dataPreview = "" - - if contentFormat == "extracted": - # Für Image-Parts: Zeige dass es ein Image ist - if part.typeGroup == "image": - dataLength = len(part.data) if part.data else 0 - mimeType = part.mimeType or "image" - dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" - elif part.typeGroup == "container": - # Container ohne Daten überspringen wir bereits oben - dataPreview = "Container structure (no text content)" - else: - # Zeige Preview von extrahiertem Text - if part.data: - preview = part.data[:200] + "..." if len(part.data) > 200 else part.data - dataPreview = preview - else: - dataPreview = "(empty)" - elif contentFormat == "object": - dataLength = len(part.data) if part.data else 0 - mimeType = part.mimeType or "binary" - if part.typeGroup == "image": - dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" - else: - dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" - elif contentFormat == "reference": - dataPreview = part.metadata.get("documentReference", "reference") - originalFileName = part.metadata.get('originalFileName', 'N/A') contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" @@ -180,7 +141,6 @@ class StructureGenerator: contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" contentPartsIndex += f" Original file name: {originalFileName}\n" contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" - contentPartsIndex += f" Data preview: {dataPreview}\n" if not contentPartsIndex: contentPartsIndex = "\n(No content parts available)" diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index e15e0711..ebd37885 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -5,7 +5,7 @@ Base renderer class for all format renderers. """ from abc import ABC, abstractmethod -from typing import Dict, Any, List +from typing import Dict, Any, List, Tuple from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelDocument import RenderedDocument import json @@ -201,9 +201,15 @@ class BaseRenderer(ABC): def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: """Extract table headers and rows from section data.""" # Normalize when elements array was passed in - if isinstance(sectionData, list) and sectionData: - candidate = sectionData[0] - sectionData = candidate if isinstance(candidate, dict) else {} + if isinstance(sectionData, list): + if sectionData and isinstance(sectionData[0], dict): + sectionData = sectionData[0] + else: + # Empty list or invalid structure - return empty table + return [], [] + # Ensure sectionData is a dict before calling .get() + if not isinstance(sectionData, dict): + return [], [] headers = sectionData.get("headers", []) rows = sectionData.get("rows", []) return headers, rows @@ -227,8 +233,15 @@ class BaseRenderer(ABC): def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]: """Extract heading level and text from section data.""" # Normalize when elements array was passed in - if isinstance(sectionData, list) and sectionData: - sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + if isinstance(sectionData, list): + if sectionData and isinstance(sectionData[0], dict): + sectionData = sectionData[0] + else: + # Empty list or invalid structure - return default + return 1, "" + # Ensure sectionData is a dict before calling .get() + if not isinstance(sectionData, dict): + return 1, "" level = sectionData.get("level", 1) text = sectionData.get("text", "") return level, text @@ -249,8 +262,15 @@ class BaseRenderer(ABC): def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: """Extract code and language from section data.""" # Normalize when elements array was passed in - if isinstance(sectionData, list) and sectionData: - sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + if isinstance(sectionData, list): + if sectionData and isinstance(sectionData[0], dict): + sectionData = sectionData[0] + else: + # Empty list or invalid structure - return default + return "", "" + # Ensure sectionData is a dict before calling .get() + if not isinstance(sectionData, dict): + return "", "" code = sectionData.get("code", "") language = sectionData.get("language", "") return code, language @@ -258,8 +278,15 @@ class BaseRenderer(ABC): def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: """Extract base64 data and alt text from section data.""" # Normalize when elements array was passed in - if isinstance(sectionData, list) and sectionData: - sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + if isinstance(sectionData, list): + if sectionData and isinstance(sectionData[0], dict): + sectionData = sectionData[0] + else: + # Empty list or invalid structure - return default + return "", "Image" + # Ensure sectionData is a dict before calling .get() + if not isinstance(sectionData, dict): + return "", "Image" base64Data = sectionData.get("base64Data", "") altText = sectionData.get("altText", "Image") return base64Data, altText diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index dba6a03f..275302b6 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -396,7 +396,7 @@ class RendererHtml(BaseRenderer): source = element.get("source", "") if content: source_text = f' (Source: {source})' if source else '' - htmlParts.append(f'

{content}{source_text}

') + htmlParts.append(f'

{content}{source_text}

') elif isinstance(element, dict): # Regular paragraph element text = element.get("text", element.get("content", "")) @@ -432,7 +432,7 @@ class RendererHtml(BaseRenderer): source = element.get("source", "") if content: source_text = f' (Source: {source})' if source else '' - htmlParts.append(f'

{content}{source_text}

') + htmlParts.append(f'

{content}{source_text}

') if htmlParts: return '\n'.join(htmlParts) @@ -577,18 +577,23 @@ class RendererHtml(BaseRenderer): def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON image to HTML with placeholder for later replacement.""" try: + import html base64Data = imageData.get("base64Data", "") altText = imageData.get("altText", "Image") caption = imageData.get("caption", "") + # Escape HTML in altText and caption to prevent injection + altTextEscaped = html.escape(str(altText)) + captionEscaped = html.escape(str(caption)) if caption else "" + if base64Data: # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris # Include a marker so we can find and replace it - imageMarker = f"" - imgTag = f'{altText}' + imageMarker = f"" + imgTag = f'{altTextEscaped}' - if caption: - return f'{imageMarker}
{imgTag}
{caption}
' + if captionEscaped: + return f'{imageMarker}
{imgTag}
{captionEscaped}
' else: return f'{imageMarker}{imgTag}' @@ -712,12 +717,14 @@ class RendererHtml(BaseRenderer): break if matchingImage: + import html # Use filename from image data (generated from section ID) filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png") # Replace with relative path (ohne Pfad, nur Dateiname) - altText = matchingImage.get("altText", "Image") - caption = matchingImage.get("caption", "") + # Escape HTML in altText and caption to prevent injection + altText = html.escape(str(matchingImage.get("altText", "Image"))) + caption = html.escape(str(matchingImage.get("caption", ""))) if matchingImage.get("caption") else "" # Entferne IMAGE_MARKER Kommentar falls vorhanden imgTag = f'{altText}' From 02808799686c1fc455639f14b774c6fa1e98b5e2 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 28 Dec 2025 14:08:28 +0100 Subject: [PATCH 14/21] module tests completed --- .../mainServiceExtraction.py | 2 +- .../renderers/rendererHtml.py | 30 ++++++++++++------- .../serviceGeneration/subDocumentUtility.py | 10 +++++++ 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index ba4bfb69..33edb6c7 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -240,7 +240,7 @@ class ExtractionService: partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]" extractionSummary["parts"].append(partSummary) - writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}") + writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}.txt") except Exception as e: logger.debug(f"Failed to write extraction debug file: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 275302b6..17ac25b3 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -700,12 +700,19 @@ class RendererHtml(BaseRenderer): import base64 import re - # Find all image data URIs in HTML (verschiedene MIME-Types unterstützen) - # Pattern: data:image/[type];base64, - dataUriPattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)' + # Find entire img tags with data URIs and replace them + # Pattern: + imgTagPattern = r']*>' - def replaceDataUri(match): - base64Data = match.group(1) + def replaceImgTag(match): + imgTag = match.group(0) + + # Extract base64 data from the img tag + base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', imgTag) + if not base64Match: + return imgTag # Return original if no base64 found + + base64Data = base64Match.group(1) # Find matching image in images list matchingImage = None @@ -721,12 +728,13 @@ class RendererHtml(BaseRenderer): # Use filename from image data (generated from section ID) filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png") - # Replace with relative path (ohne Pfad, nur Dateiname) - # Escape HTML in altText and caption to prevent injection - altText = html.escape(str(matchingImage.get("altText", "Image"))) + # Extract existing alt text or use from matchingImage + altMatch = re.search(r'alt="([^"]*)"', imgTag) + existingAlt = altMatch.group(1) if altMatch else "" + altText = html.escape(str(matchingImage.get("altText", existingAlt or "Image"))) caption = html.escape(str(matchingImage.get("caption", ""))) if matchingImage.get("caption") else "" - # Entferne IMAGE_MARKER Kommentar falls vorhanden + # Create new img tag with filename imgTag = f'{altText}' if caption: @@ -737,8 +745,8 @@ class RendererHtml(BaseRenderer): # Keep original if no match found return match.group(0) - # Replace all data URIs (auch IMAGE_MARKER Kommentare entfernen) - updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent) + # Replace all img tags with data URIs (auch IMAGE_MARKER Kommentare entfernen) + updatedHtml = re.sub(imgTagPattern, replaceImgTag, htmlContent) # Entferne IMAGE_MARKER Kommentare die übrig geblieben sind updatedHtml = re.sub(r'', '', updatedHtml) diff --git a/modules/services/serviceGeneration/subDocumentUtility.py b/modules/services/serviceGeneration/subDocumentUtility.py index abef95da..329f09f6 100644 --- a/modules/services/serviceGeneration/subDocumentUtility.py +++ b/modules/services/serviceGeneration/subDocumentUtility.py @@ -180,6 +180,16 @@ def convertDocumentDataToString(document_data: Any, file_extension: str) -> str: try: if document_data is None: return "" + if isinstance(document_data, bytes): + # WICHTIG: Decode bytes to string for text files (HTML, text, etc.) + try: + return document_data.decode('utf-8') + except UnicodeDecodeError: + # Fallback: try latin1 or return with error replacement + try: + return document_data.decode('latin1') + except Exception: + return document_data.decode('utf-8', errors='replace') if isinstance(document_data, str): return document_data if isinstance(document_data, dict): From 3e7c75335a8528242156c3c0573f914cda1193c6 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 28 Dec 2025 23:34:32 +0100 Subject: [PATCH 15/21] fixed handovers from generator to renderers --- modules/aicore/aicorePluginOpenai.py | 9 +- modules/datamodels/datamodelDocument.py | 4 + .../services/serviceAi/subStructureFilling.py | 611 ++++++++---------- .../mainServiceExtraction.py | 5 +- .../renderers/rendererBaseTemplate.py | 96 ++- .../renderers/rendererCsv.py | 47 +- .../renderers/rendererDocx.py | 162 +++-- .../renderers/rendererHtml.py | 136 +++- .../renderers/rendererImage.py | 8 +- .../renderers/rendererJson.py | 14 +- .../renderers/rendererMarkdown.py | 61 +- .../renderers/rendererPdf.py | 238 +++++-- .../renderers/rendererPptx.py | 231 +++++-- .../renderers/rendererText.py | 62 +- .../renderers/rendererXlsx.py | 406 +++++++++--- .../processing/adaptive/contentValidator.py | 117 +++- .../processing/shared/placeholderFactory.py | 28 +- .../test10_document_generation_formats.py | 541 ++++++++++++++++ 18 files changed, 2067 insertions(+), 709 deletions(-) create mode 100644 tests/functional/test10_document_generation_formats.py diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index 89ffdccf..026be18b 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -354,10 +354,11 @@ class AiOpenai(BaseConnectorAi): if response.status_code != 200: logger.error(f"DALL-E API error: {response.status_code} - {response.text}") - return { - "success": False, - "error": f"DALL-E API error: {response.status_code} - {response.text}" - } + return AiModelResponse( + content="", + success=False, + error=f"DALL-E API error: {response.status_code} - {response.text}" + ) responseJson = response.json() diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py index 2f5af99a..a5cd6b0c 100644 --- a/modules/datamodels/datamodelDocument.py +++ b/modules/datamodels/datamodelDocument.py @@ -13,6 +13,8 @@ class DocumentMetadata(BaseModel): sourceDocuments: List[str] = Field(default_factory=list, description="Source document IDs") extractionMethod: str = Field(default="ai_extraction", description="Method used for extraction") version: str = Field(default="1.0", description="Document version") + documentType: Optional[str] = Field(default=None, description="Type of document (e.g., 'report', 'invoice', 'analysis')") + styles: Optional[Dict[str, Any]] = Field(default=None, description="Document styling configuration") class TableData(BaseModel): @@ -112,6 +114,8 @@ class RenderedDocument(BaseModel): documentData: bytes = Field(description="Document content as bytes") mimeType: str = Field(description="MIME type of the document (e.g., 'text/html', 'application/pdf')") filename: str = Field(description="Filename for the document (e.g., 'report.html', 'image.png')") + documentType: Optional[str] = Field(default=None, description="Type of document (e.g., 'report', 'invoice', 'analysis')") + metadata: Optional[Dict[str, Any]] = Field(default=None, description="Document metadata (title, author, etc.)") class Config: json_encoders = { diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index 548cf128..af1e51f6 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -52,7 +52,7 @@ class StructureFiller: # Erstelle Operation-ID für Struktur-Abfüllen fillOperationId = f"{parentOperationId}_structure_filling" - # Prüfe ob Struktur Chapters oder Sections hat + # Validate structure has chapters hasChapters = False for doc in structure.get("documents", []): if "chapters" in doc: @@ -60,9 +60,9 @@ class StructureFiller: break if not hasChapters: - # Fallback: Alte Struktur mit Sections direkt - verwende alte Logik - logger.warning("Structure has no chapters, using legacy section-based filling") - return await self._fillStructureLegacy(structure, contentParts, userPrompt, fillOperationId) + error_msg = "Structure must have chapters. Legacy section-based structure is not supported." + logger.error(error_msg) + raise ValueError(error_msg) # Starte ChatLog mit Parent-Referenz chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", [])) @@ -214,10 +214,11 @@ class StructureFiller: contentType = section.get("content_type", "paragraph") useAiCall = section.get("useAiCall", False) - # WICHTIG: Wenn keine ContentParts vorhanden sind, kann kein AI-Call gemacht werden - if len(contentPartIds) == 0: + # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden + # Aber: Wenn generationHint vorhanden ist, kann AI auch ohne ContentParts generieren (z.B. Executive Summary) + if len(contentPartIds) == 0 and not generationHint: useAiCall = False - logger.debug(f"Section {sectionId}: No content parts, setting useAiCall=False") + logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") elements = [] @@ -259,12 +260,25 @@ class StructureFiller: "label": part.metadata.get("usageHint", part.label) }) elif contentFormat == "object": - elements.append({ - "type": part.typeGroup, - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) + # Nested content structure for objects + if part.typeGroup == "image": + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": part.metadata.get("caption", "") + } + }) + else: + elements.append({ + "type": part.typeGroup, + "content": { + "data": part.data, + "mimeType": part.mimeType, + "label": part.metadata.get("usageHint", part.label) + } + }) # Aggregiere extracted Parts mit AI if extractedParts: @@ -300,11 +314,24 @@ class StructureFiller: logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt (aggregation)") # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + # Use IMAGE_GENERATE for image content type + operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE + + # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) + if operationType == OperationTypeEnum.IMAGE_GENERATE: + maxPromptLength = 4000 + if len(generationPrompt) > maxPromptLength: + logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") + # Keep the beginning (task, metadata, generation hint) and truncate from end + generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline + + # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks + contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts request = AiCallRequest( prompt=generationPrompt, - contentParts=extractedParts, # ALLE PARTS! + contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, + operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) @@ -318,14 +345,39 @@ class StructureFiller: ) logger.debug(f"Logged section response: section_content_{sectionId}_response (aggregation)") - # Parse und füge zu elements hinzu - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + # Handle IMAGE_GENERATE differently - returns image data directly + if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: + import base64 + # Convert image data to base64 string if needed + if isinstance(aiResponse.content, bytes): + base64Data = base64.b64encode(aiResponse.content).decode('utf-8') + elif isinstance(aiResponse.content, str): + # Already base64 string or data URI + if aiResponse.content.startswith("data:image/"): + # Extract base64 from data URI + base64Data = aiResponse.content.split(",", 1)[1] + else: + base64Data = aiResponse.content + else: + base64Data = "" + + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + else: + # Parse JSON response for other content types + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) @@ -342,6 +394,117 @@ class StructureFiller: # NICHT raise - Section wird mit Fehlermeldung gerendert else: + # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts + # Handle case where no content parts but generationHint exists (e.g., Executive Summary) + if len(contentPartIds) == 0 and useAiCall and generationHint: + # Generate content from scratch using only generationHint + logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[], # NO PARTS + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=False + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId} from generationHint", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") + + # Verwende callAi ohne ContentParts + operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE + + # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) + if operationType == OperationTypeEnum.IMAGE_GENERATE: + maxPromptLength = 4000 + if len(generationPrompt) > maxPromptLength: + logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") + # Keep the beginning (task, metadata, generation hint) and truncate from end + generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline + + request = AiCallRequest( + prompt=generationPrompt, + contentParts=[], # NO PARTS + options=AiCallOptions( + operationType=operationType, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + logger.debug(f"Logged section response: section_content_{sectionId}_response") + + # Handle IMAGE_GENERATE differently - returns image data directly + if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: + import base64 + # Convert image data to base64 string if needed + if isinstance(aiResponse.content, bytes): + base64Data = base64.b64encode(aiResponse.content).decode('utf-8') + elif isinstance(aiResponse.content, str): + # Already base64 string or data URI + if aiResponse.content.startswith("data:image/"): + # Extract base64 from data URI + base64Data = aiResponse.content.split(",", 1)[1] + else: + base64Data = aiResponse.content + else: + base64Data = "" + + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + else: + # Parse JSON response for other content types + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # Einzelverarbeitung: Jeder Part einzeln for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) @@ -359,13 +522,26 @@ class StructureFiller: }) elif contentFormat == "object": - # Füge base64 Object hinzu - elements.append({ - "type": part.typeGroup, # "image", "binary", etc. - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) + # Füge base64 Object hinzu (nested in content structure) + if part.typeGroup == "image": + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": part.metadata.get("caption", "") + } + }) + else: + # For other object types, use generic structure + elements.append({ + "type": part.typeGroup, + "content": { + "data": part.data, + "mimeType": part.mimeType, + "label": part.metadata.get("usageHint", part.label) + } + }) elif contentFormat == "extracted": # WICHTIG: Prüfe sowohl useAiCall als auch generationHint @@ -403,11 +579,24 @@ class StructureFiller: logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") # Verwende callAi für ContentParts-Unterstützung + # Use IMAGE_GENERATE for image content type + operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE + + # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) + if operationType == OperationTypeEnum.IMAGE_GENERATE: + maxPromptLength = 4000 + if len(generationPrompt) > maxPromptLength: + logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") + # Keep the beginning (task, metadata, generation hint) and truncate from end + generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline + + # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks + contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else [part] request = AiCallRequest( prompt=generationPrompt, - contentParts=[part], + contentParts=contentPartsForCall, options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, + operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) @@ -421,14 +610,39 @@ class StructureFiller: ) logger.debug(f"Logged section response: section_content_{sectionId}_response") - # Parse und füge zu elements hinzu - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + # Handle IMAGE_GENERATE differently - returns image data directly + if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: + import base64 + # Convert image data to base64 string if needed + if isinstance(aiResponse.content, bytes): + base64Data = base64.b64encode(aiResponse.content).decode('utf-8') + elif isinstance(aiResponse.content, str): + # Already base64 string or data URI + if aiResponse.content.startswith("data:image/"): + # Extract base64 from data URI + base64Data = aiResponse.content.split(",", 1)[1] + else: + base64Data = aiResponse.content + else: + base64Data = "" + + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + else: + # Parse JSON response for other content types + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) @@ -502,16 +716,6 @@ class StructureFiller: if partId in contentPartsMap: section["contentPartsMetadata"].append(contentPartsMap[partId]) - # Prüfe ob Sections direkt vorhanden sind (Legacy-Struktur) - elif "sections" in doc: - for section in doc.get("sections", []): - contentPartIds = section.get("contentPartIds", []) - if contentPartIds: - section["contentPartsMetadata"] = [] - for partId in contentPartIds: - if partId in contentPartsMap: - section["contentPartsMetadata"].append(contentPartsMap[partId]) - return structure def _flattenChaptersToSections( @@ -542,8 +746,10 @@ class StructureFiller: "content_type": "heading", "elements": [{ "type": "heading", - "content": chapter.get("title"), - "level": chapter.get("level", 1) + "content": { + "text": chapter.get("title", ""), + "level": chapter.get("level", 1) + } }] } flattened_doc["sections"].append(heading_section) @@ -555,276 +761,6 @@ class StructureFiller: return result - async def _fillStructureLegacy( - self, - structure: Dict[str, Any], - contentParts: List[ContentPart], - userPrompt: str, - fillOperationId: str - ) -> Dict[str, Any]: - """ - Legacy: Füllt Struktur mit Sections direkt (für Rückwärtskompatibilität). - """ - # Starte ChatLog - self.services.chat.progressLogStart( - fillOperationId, - "Structure Filling (Legacy)", - "Filling", - f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", - parentOperationId=fillOperationId - ) - - try: - filledStructure = copy.deepcopy(structure) - - # Sammle alle Sections - sections_to_process = [] - all_sections_list = [] - for doc in filledStructure.get("documents", []): - doc_sections = doc.get("sections", []) - all_sections_list.extend(doc_sections) - for section in doc_sections: - sections_to_process.append((doc, section)) - - # Verarbeite Sections (bestehende Logik) - for sectionIndex, (doc, section) in enumerate(sections_to_process): - sectionId = section.get("id") - contentPartIds = section.get("contentPartIds", []) - contentFormats = section.get("contentFormats", {}) - # Check both camelCase and snake_case for generationHint - generationHint = section.get("generationHint") or section.get("generation_hint") - contentType = section.get("content_type", "paragraph") - useAiCall = section.get("useAiCall", False) - - # WICHTIG: Wenn keine ContentParts vorhanden sind, kann kein AI-Call gemacht werden - if len(contentPartIds) == 0: - useAiCall = False - logger.debug(f"Section {sectionId} (legacy): No content parts, setting useAiCall=False") - - elements = [] - - # Prüfe ob Aggregation nötig ist - needsAggregation = self._needsAggregation( - contentType=contentType, - contentPartCount=len(contentPartIds) - ) - - logger.info(f"Processing section {sectionId} (legacy): contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") - - if needsAggregation and useAiCall and generationHint: - # Aggregation: Alle Parts zusammen verarbeiten - sectionParts = [ - self._findContentPartById(pid, contentParts) - for pid in contentPartIds - ] - sectionParts = [p for p in sectionParts if p is not None] - - if sectionParts: - # Filtere nur extracted Parts für Aggregation - extractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" - ] - nonExtractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" - ] - - # Verarbeite non-extracted Parts separat - for part in nonExtractedParts: - contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) - - if contentFormat == "reference": - elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) - }) - elif contentFormat == "object": - elements.append({ - "type": part.typeGroup, - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) - - # Aggregiere extracted Parts mit AI - if extractedParts: - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=extractedParts, - userPrompt=userPrompt, - generationHint=generationHint, - allSections=all_sections_list, - sectionIndex=sectionIndex, - isAggregation=True - ) - - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation (Aggregation)", - "Section", - f"Generating section {sectionId} with {len(extractedParts)} parts", - parentOperationId=fillOperationId - ) - - try: - self.services.utils.writeDebugFile( - generationPrompt, - f"section_content_{sectionId}_prompt" - ) - - request = AiCallRequest( - prompt=generationPrompt, - contentParts=extractedParts, - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.DETAILED - ) - ) - aiResponse = await self.aiService.callAi(request) - - self.services.utils.writeDebugFile( - aiResponse.content, - f"section_content_{sectionId}_response" - ) - - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) - - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") - - else: - # Einzelverarbeitung: Jeder Part einzeln - for partId in contentPartIds: - part = self._findContentPartById(partId, contentParts) - if not part: - continue - - contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) - - if contentFormat == "reference": - elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "object": - elements.append({ - "type": part.typeGroup, - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "extracted": - # WICHTIG: Prüfe sowohl useAiCall als auch generationHint - if useAiCall and generationHint: - # AI-Call mit einzelnen ContentPart - logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})") - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=[part], - userPrompt=userPrompt, - generationHint=generationHint, - allSections=all_sections_list, - sectionIndex=sectionIndex, - isAggregation=False - ) - - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation", - "Section", - f"Generating section {sectionId}", - parentOperationId=fillOperationId - ) - - try: - self.services.utils.writeDebugFile( - generationPrompt, - f"section_content_{sectionId}_prompt" - ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") - - request = AiCallRequest( - prompt=generationPrompt, - contentParts=[part], - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.DETAILED - ) - ) - aiResponse = await self.aiService.callAi(request) - - self.services.utils.writeDebugFile( - aiResponse.content, - f"section_content_{sectionId}_response" - ) - logger.debug(f"Logged section response: section_content_{sectionId}_response") - - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) - - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") - else: - # Füge extrahierten Text direkt hinzu (kein AI-Call) - logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) - - section["elements"] = elements - - # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) - filledStructure = self._addContentPartsMetadata(filledStructure, contentParts) - - self.services.chat.progressLogFinish(fillOperationId, True) - return filledStructure - - except Exception as e: - self.services.chat.progressLogFinish(fillOperationId, False) - logger.error(f"Error in _fillStructureLegacy: {str(e)}") - raise - def _buildChapterSectionsStructurePrompt( self, chapterId: str, @@ -899,6 +835,18 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th """ return prompt + def _getContentStructureExample(self, contentType: str) -> str: + """Get the JSON structure example for a specific content type.""" + structures = { + "table": '{{"headers": ["Column1", "Column2"], "rows": [["Value1", "Value2"], ["Value3", "Value4"]]}}', + "bullet_list": '{{"items": ["Item 1", "Item 2", "Item 3"]}}', + "heading": '{{"text": "Section Title", "level": 2}}', + "paragraph": '{{"text": "This is paragraph text."}}', + "code_block": '{{"code": "function example() {{ return true; }}", "language": "javascript"}}', + "image": '{{"base64Data": "", "altText": "Description", "caption": "Optional caption"}}' + } + return structures.get(contentType, '{{"text": ""}}') + def _buildSectionGenerationPrompt( self, section: Dict[str, Any], @@ -998,6 +946,8 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th for next in nextSections: contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" + contentStructureExample = self._getContentStructureExample(contentType) + if isAggregation: prompt = f"""# TASK: Generate Section Content (Aggregation) @@ -1027,21 +977,17 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th ## OUTPUT FORMAT Return a JSON object with this structure: -```json + {{ "elements": [ {{ "type": "{contentType}", - "headers": [...], // if table - "rows": [...], // if table - "items": [...], // if bullet_list - "content": "..." // if paragraph + "content": {contentStructureExample} }} ] }} -``` -CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ else: prompt = f"""# TASK: Generate Section Content @@ -1071,18 +1017,17 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th ## OUTPUT FORMAT Return a JSON object with this structure: -```json + {{ "elements": [ {{ "type": "{contentType}", - "content": "..." + "content": {contentStructureExample} }} ] }} -``` -CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index 33edb6c7..06877968 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -1129,8 +1129,9 @@ class ExtractionService: logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required") # If either condition fails, chunk the content - if totalTokens > maxTotalTokens or partSize > availableContentBytes: - # Part too large or total exceeds limit - chunk it + # CRITICAL: IMAGE_GENERATE operations should NOT use chunking - they generate images from prompts, not process content chunks + if (totalTokens > maxTotalTokens or partSize > availableContentBytes) and options.operationType != OperationTypeEnum.IMAGE_GENERATE: + # Part too large or total exceeds limit - chunk it (but not for image generation) chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) if not chunks: raise ValueError(f"Failed to chunk content part for model {model.name}") diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index ebd37885..ee16c5a4 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -199,29 +199,40 @@ class BaseRenderer(ABC): return "unknown" def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: - """Extract table headers and rows from section data.""" + """Extract table headers and rows from section data. Expects nested content structure.""" # Normalize when elements array was passed in if isinstance(sectionData, list): if sectionData and isinstance(sectionData[0], dict): sectionData = sectionData[0] else: - # Empty list or invalid structure - return empty table return [], [] - # Ensure sectionData is a dict before calling .get() + # Ensure sectionData is a dict if not isinstance(sectionData, dict): return [], [] - headers = sectionData.get("headers", []) - rows = sectionData.get("rows", []) + # Extract from nested content structure + content = sectionData.get("content", {}) + if not isinstance(content, dict): + return [], [] + headers = content.get("headers", []) + rows = content.get("rows", []) return headers, rows def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]: - """Extract bullet list items from section data.""" - # Normalize when elements array or raw list was passed in + """Extract bullet list items from section data. Expects nested content structure.""" + # Normalize when elements array was passed in if isinstance(sectionData, list): - # Already a list of items (strings or dicts) - items = sectionData - else: - items = sectionData.get("items", []) + if sectionData and isinstance(sectionData[0], dict): + sectionData = sectionData[0] + else: + return [] + # Ensure sectionData is a dict + if not isinstance(sectionData, dict): + return [] + # Extract from nested content structure + content = sectionData.get("content", {}) + if not isinstance(content, dict): + return [] + items = content.get("items", []) result = [] for item in items: if isinstance(item, str): @@ -231,64 +242,89 @@ class BaseRenderer(ABC): return result def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]: - """Extract heading level and text from section data.""" + """Extract heading level and text from section data. Expects nested content structure.""" # Normalize when elements array was passed in if isinstance(sectionData, list): if sectionData and isinstance(sectionData[0], dict): sectionData = sectionData[0] else: - # Empty list or invalid structure - return default return 1, "" - # Ensure sectionData is a dict before calling .get() + # Ensure sectionData is a dict if not isinstance(sectionData, dict): return 1, "" - level = sectionData.get("level", 1) - text = sectionData.get("text", "") + # Extract from nested content structure + content = sectionData.get("content", {}) + if not isinstance(content, dict): + return 1, "" + level = content.get("level", 1) + text = content.get("text", "") return level, text def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str: - """Extract paragraph text from section data.""" + """Extract paragraph text from section data. Expects nested content structure.""" if isinstance(sectionData, list): # Join multiple paragraph elements if provided as a list texts = [] for el in sectionData: - if isinstance(el, dict) and "text" in el: - texts.append(el["text"]) + if isinstance(el, dict): + content = el.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + if text: + texts.append(text) elif isinstance(el, str): texts.append(el) return "\n".join(texts) - return sectionData.get("text", "") + # Extract from nested content structure + if not isinstance(sectionData, dict): + return "" + content = sectionData.get("content", {}) + if isinstance(content, dict): + return content.get("text", "") + elif isinstance(content, str): + return content + return "" def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: - """Extract code and language from section data.""" + """Extract code and language from section data. Expects nested content structure.""" # Normalize when elements array was passed in if isinstance(sectionData, list): if sectionData and isinstance(sectionData[0], dict): sectionData = sectionData[0] else: - # Empty list or invalid structure - return default return "", "" - # Ensure sectionData is a dict before calling .get() + # Ensure sectionData is a dict if not isinstance(sectionData, dict): return "", "" - code = sectionData.get("code", "") - language = sectionData.get("language", "") + # Extract from nested content structure + content = sectionData.get("content", {}) + if not isinstance(content, dict): + return "", "" + code = content.get("code", "") + language = content.get("language", "") return code, language def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: - """Extract base64 data and alt text from section data.""" + """Extract base64 data and alt text from section data. Expects nested content structure.""" # Normalize when elements array was passed in if isinstance(sectionData, list): if sectionData and isinstance(sectionData[0], dict): sectionData = sectionData[0] else: - # Empty list or invalid structure - return default return "", "Image" - # Ensure sectionData is a dict before calling .get() + # Ensure sectionData is a dict if not isinstance(sectionData, dict): return "", "Image" - base64Data = sectionData.get("base64Data", "") - altText = sectionData.get("altText", "Image") + # Extract from nested content structure + content = sectionData.get("content", {}) + if not isinstance(content, dict): + return "", "Image" + base64Data = content.get("base64Data", "") + altText = content.get("altText", "Image") return base64Data, altText def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py index 52e2933d..83ca41c1 100644 --- a/modules/services/serviceGeneration/renderers/rendererCsv.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -41,11 +41,17 @@ class RendererCsv(BaseRenderer): else: filename = self._determineFilename(title, "text/csv") + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + return [ RenderedDocument( documentData=csvContent.encode('utf-8'), mimeType="text/csv", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -130,8 +136,12 @@ class RendererCsv(BaseRenderer): def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]: """Render a JSON table to CSV rows.""" try: - headers = tableData.get("headers", []) - rows = tableData.get("rows", []) + # Extract from nested content structure + content = tableData.get("content", {}) + if not isinstance(content, dict): + return [] + headers = content.get("headers", []) + rows = content.get("rows", []) csvRows = [] @@ -150,7 +160,11 @@ class RendererCsv(BaseRenderer): def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]: """Render a JSON list to CSV rows.""" try: - items = listData.get("items", []) + # Extract from nested content structure + content = listData.get("content", {}) + if not isinstance(content, dict): + return [] + items = content.get("items", []) csvRows = [] for item in items: @@ -177,8 +191,12 @@ class RendererCsv(BaseRenderer): def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]: """Render a JSON heading to CSV rows.""" try: - text = headingData.get("text", "") - level = headingData.get("level", 1) + # Extract from nested content structure + content = headingData.get("content", {}) + if not isinstance(content, dict): + return [] + text = content.get("text", "") + level = content.get("level", 1) if text: # Use # symbols for heading levels @@ -194,7 +212,14 @@ class RendererCsv(BaseRenderer): def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]: """Render a JSON paragraph to CSV rows.""" try: - text = paragraphData.get("text", "") + # Extract from nested content structure + content = paragraphData.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" if text: # Split long paragraphs into multiple rows if needed @@ -229,8 +254,12 @@ class RendererCsv(BaseRenderer): def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]: """Render a JSON code block to CSV rows.""" try: - code = codeData.get("code", "") - language = codeData.get("language", "") + # Extract from nested content structure + content = codeData.get("content", {}) + if not isinstance(content, dict): + return [] + code = content.get("code", "") + language = content.get("language", "") csvRows = [] diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index ee88369f..43c85c47 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -52,6 +52,10 @@ class RendererDocx(BaseRenderer): # Generate DOCX using AI-analyzed styling docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + # Determine filename from document or title documents = extractedContent.get("documents", []) if documents and isinstance(documents[0], dict): @@ -74,7 +78,9 @@ class RendererDocx(BaseRenderer): RenderedDocument( documentData=docx_bytes, mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -82,11 +88,15 @@ class RendererDocx(BaseRenderer): self.logger.error(f"Error rendering DOCX: {str(e)}") # Return minimal fallback fallbackContent = f"DOCX Generation Error: {str(e)}" + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/plain", - filename=self._determineFilename(title, "text/plain") + filename=self._determineFilename(title, "text/plain"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -96,8 +106,8 @@ class RendererDocx(BaseRenderer): # Create new document doc = Document() - # Get style set: default styles, enhanced with AI if style instructions present - styleSet = await self._getStyleSet(userPrompt, aiService) + # Get style set: use styles from metadata if available, otherwise enhance with AI + styleSet = await self._getStyleSet(json_content, userPrompt, aiService) # Setup basic document styles and create all styles from style set self._setupBasicDocumentStyles(doc) @@ -137,12 +147,17 @@ class RendererDocx(BaseRenderer): self.logger.error(f"Error generating DOCX from JSON: {str(e)}") raise Exception(f"DOCX generation failed: {str(e)}") - async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - default styles, enhanced with AI if userPrompt provided. + async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: + """Get style set - use styles from document generation metadata if available, + otherwise enhance default styles with AI if userPrompt provided. + + WICHTIG: In a dynamic scalable AI system, styling should come from document generation, + not be generated separately by renderers. Only fall back to AI if styles not provided. Args: + extractedContent: Document content with metadata (may contain styles) userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if userPrompt provided) + aiService: AI service (used only if styles not in metadata and userPrompt provided) templateName: Name of template style set (None = default) Returns: @@ -156,10 +171,18 @@ class RendererDocx(BaseRenderer): else: defaultStyleSet = self._getDefaultStyleSet() - # Enhance with AI if userPrompt provided (AI handles multilingual style detection) + # FIRST: Check if styles are provided in document generation metadata (preferred approach) + if extractedContent: + metadata = extractedContent.get("metadata", {}) + if isinstance(metadata, dict): + styles = metadata.get("styles") + if styles and isinstance(styles, dict): + self.logger.debug("Using styles from document generation metadata") + return self._validateStylesContrast(styles) + + # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) if userPrompt and aiService: - # AI will naturally detect style instructions in any language - self.logger.info(f"Enhancing styles with AI based on user prompt...") + self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) return self._validateStylesContrast(enhancedStyleSet) else: @@ -264,6 +287,10 @@ class RendererDocx(BaseRenderer): section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # If no elements, skip this section (it has no content to render) + if not elements: + return + # Process each element in the section for element in elements: element_type = element.get("type", "") @@ -286,22 +313,36 @@ class RendererDocx(BaseRenderer): para.add_run(f" (Source: {source})").italic = True continue - # Standard section types - if section_type == "table": + # Check element type, not section type (elements can have different types than section) + if element_type == "table": self._renderJsonTable(doc, element, styles) - elif section_type == "bullet_list": + elif element_type == "bullet_list": self._renderJsonBulletList(doc, element, styles) - elif section_type == "heading": + elif element_type == "heading": self._renderJsonHeading(doc, element, styles) - elif section_type == "paragraph": + elif element_type == "paragraph": self._renderJsonParagraph(doc, element, styles) - elif section_type == "code_block": + elif element_type == "code_block": self._renderJsonCodeBlock(doc, element, styles) - elif section_type == "image": + elif element_type == "image": self._renderJsonImage(doc, element, styles) else: - # Fallback to paragraph for unknown types - self._renderJsonParagraph(doc, element, styles) + # Fallback: if element_type not set, use section_type + if section_type == "table": + self._renderJsonTable(doc, element, styles) + elif section_type == "bullet_list": + self._renderJsonBulletList(doc, element, styles) + elif section_type == "heading": + self._renderJsonHeading(doc, element, styles) + elif section_type == "paragraph": + self._renderJsonParagraph(doc, element, styles) + elif section_type == "code_block": + self._renderJsonCodeBlock(doc, element, styles) + elif section_type == "image": + self._renderJsonImage(doc, element, styles) + else: + # Fallback to paragraph for unknown types + self._renderJsonParagraph(doc, element, styles) except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") @@ -311,8 +352,12 @@ class RendererDocx(BaseRenderer): def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON table to DOCX using AI-generated styles.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + # Extract from nested content structure + content = table_data.get("content", {}) + if not isinstance(content, dict): + return + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers or not rows: return @@ -467,7 +512,11 @@ class RendererDocx(BaseRenderer): def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON bullet list to DOCX using AI-generated styles.""" try: - items = list_data.get("items", []) + # Extract from nested content structure + content = list_data.get("content", {}) + if not isinstance(content, dict): + return + items = content.get("items", []) bullet_style = styles["bullet_list"] for item in items: @@ -482,8 +531,12 @@ class RendererDocx(BaseRenderer): def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON heading to DOCX using AI-generated styles.""" try: - level = heading_data.get("level", 1) - text = heading_data.get("text", "") + # Extract from nested content structure + content = heading_data.get("content", {}) + if not isinstance(content, dict): + return + text = content.get("text", "") + level = content.get("level", 1) if text: level = max(1, min(6, level)) @@ -495,7 +548,25 @@ class RendererDocx(BaseRenderer): def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON paragraph to DOCX using AI-generated styles.""" try: - text = paragraph_data.get("text", "") + # Extract from nested content structure + content = paragraph_data.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + # CRITICAL: Prevent rendering base64 image data as text + # Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG) + if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or + (len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))): + # This looks like base64 data - don't render as text + self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})") + para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]") + if para.runs: + para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error + return if text: para = doc.add_paragraph(text) @@ -506,8 +577,12 @@ class RendererDocx(BaseRenderer): def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON code block to DOCX using AI-generated styles.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + # Extract from nested content structure + content = code_data.get("content", {}) + if not isinstance(content, dict): + return + code = content.get("code", "") + language = content.get("language", "") if code: if language: @@ -525,20 +600,33 @@ class RendererDocx(BaseRenderer): def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON image to DOCX.""" try: - base64_data = image_data.get("base64Data", "") - alt_text = image_data.get("altText", "Image") + # Extract from nested content structure + content = image_data.get("content", {}) + if not isinstance(content, dict): + return + base64_data = content.get("base64Data", "") + alt_text = content.get("altText", "Image") if base64_data: - image_bytes = base64.b64decode(base64_data) - doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) - - if alt_text: - caption_para = doc.add_paragraph(f"Figure: {alt_text}") - caption_para.runs[0].italic = True + try: + image_bytes = base64.b64decode(base64_data) + doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) + + if alt_text: + caption_para = doc.add_paragraph(f"Figure: {alt_text}") + caption_para.runs[0].italic = True + except Exception as embedError: + # Image decoding or embedding failed + raise Exception(f"Failed to decode or embed image: {str(embedError)}") + else: + raise Exception("No image data provided (base64Data is empty)") except Exception as e: - self.logger.warning(f"Error rendering image: {str(e)}") - doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]") + self.logger.error(f"Error embedding image in DOCX: {str(e)}") + errorMsg = f"[Error: Could not embed image '{image_data.get('altText', 'Image')}'. {str(e)}]" + errorPara = doc.add_paragraph(errorMsg) + if errorPara.runs: + errorPara.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]: """Extract document structure from user prompt.""" diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 17ac25b3..4d7dafe0 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -55,12 +55,18 @@ class RendererHtml(BaseRenderer): else: htmlFilename = self._determineFilename(title, "text/html") + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + # Start with HTML document resultDocuments = [ RenderedDocument( documentData=htmlContent.encode('utf-8'), mimeType="text/html", - filename=htmlFilename + filename=htmlFilename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -90,8 +96,8 @@ class RendererHtml(BaseRenderer): async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate HTML content from structured JSON document using AI-generated styling.""" try: - # Get style set: default styles, enhanced with AI if userPrompt provided - styles = await self._getStyleSet(userPrompt, aiService) + # Get style set: use styles from metadata if available, otherwise enhance with AI + styles = await self._getStyleSet(jsonContent, userPrompt, aiService) # Validate JSON structure if not self._validateJsonStructure(jsonContent): @@ -148,12 +154,17 @@ class RendererHtml(BaseRenderer): self.logger.error(f"Error generating HTML from JSON: {str(e)}") raise Exception(f"HTML generation failed: {str(e)}") - async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - default styles, enhanced with AI if userPrompt provided. + async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: + """Get style set - use styles from document generation metadata if available, + otherwise enhance default styles with AI if userPrompt provided. + + WICHTIG: In a dynamic scalable AI system, styling should come from document generation, + not be generated separately by renderers. Only fall back to AI if styles not provided. Args: + extractedContent: Document content with metadata (may contain styles) userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if userPrompt provided) + aiService: AI service (used only if styles not in metadata and userPrompt provided) templateName: Name of template style set (None = default) Returns: @@ -162,10 +173,18 @@ class RendererHtml(BaseRenderer): # Get default style set defaultStyleSet = self._getDefaultStyleSet() - # Enhance with AI if userPrompt provided (AI handles multilingual style detection) + # FIRST: Check if styles are provided in document generation metadata (preferred approach) + if extractedContent: + metadata = extractedContent.get("metadata", {}) + if isinstance(metadata, dict): + styles = metadata.get("styles") + if styles and isinstance(styles, dict): + self.logger.debug("Using styles from document generation metadata") + return self._validateStylesContrast(styles) + + # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) if userPrompt and aiService: - # AI will naturally detect style instructions in any language - self.logger.info(f"Enhancing styles with AI based on user prompt...") + self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) return self._validateStylesContrast(enhancedStyleSet) else: @@ -446,8 +465,12 @@ class RendererHtml(BaseRenderer): def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON table to HTML using AI-generated styles.""" try: - headers = tableData.get("headers", []) - rows = tableData.get("rows", []) + # Extract from nested content structure + content = tableData.get("content", {}) + if not isinstance(content, dict): + return "" + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers or not rows: return "" @@ -477,9 +500,13 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON bullet list to HTML using AI-generated styles.""" + """Render a JSON bullet list to HTML using AI-generated styles. Expects nested content structure.""" try: - items = listData.get("items", []) + # Extract from nested content structure + content = listData.get("content", {}) + if not isinstance(content, dict): + return "" + items = content.get("items", []) if not items: return "" @@ -513,8 +540,12 @@ class RendererHtml(BaseRenderer): elif not isinstance(headingData, dict): return "" - level = headingData.get("level", 1) - text = headingData.get("text", "") + # Extract from nested content structure + content = headingData.get("content", {}) + if not isinstance(content, dict): + return "" + text = content.get("text", "") + level = content.get("level", 1) if text: level = max(1, min(6, level)) @@ -531,11 +562,19 @@ class RendererHtml(BaseRenderer): try: # Normalize inputs - paragraphData is typically a list of elements from _getSectionData if isinstance(paragraphData, list): - # Extract text from all paragraph elements + # Extract text from all paragraph elements (expects nested content structure) texts = [] for el in paragraphData: - if isinstance(el, dict) and "text" in el: - texts.append(el["text"]) + if isinstance(el, dict): + content = el.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + if text: + texts.append(text) elif isinstance(el, str): texts.append(el) if texts: @@ -545,7 +584,15 @@ class RendererHtml(BaseRenderer): elif isinstance(paragraphData, str): return f'

{paragraphData}

' elif isinstance(paragraphData, dict): - text = paragraphData.get("text", "") + # Handle nested content structure: element.content vs element.text + # Extract from nested content structure + content = paragraphData.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" if text: return f'

{text}

' return "" @@ -557,10 +604,14 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON code block to HTML using AI-generated styles.""" + """Render a JSON code block to HTML using AI-generated styles. Expects nested content structure.""" try: - code = codeData.get("code", "") - language = codeData.get("language", "") + # Extract from nested content structure + content = codeData.get("content", {}) + if not isinstance(content, dict): + return "" + code = content.get("code", "") + language = content.get("language", "") if code: if language: @@ -575,12 +626,16 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON image to HTML with placeholder for later replacement.""" + """Render a JSON image to HTML with placeholder for later replacement. Expects nested content structure.""" try: import html - base64Data = imageData.get("base64Data", "") - altText = imageData.get("altText", "Image") - caption = imageData.get("caption", "") + # Extract from nested content structure + content = imageData.get("content", {}) + if not isinstance(content, dict): + return "" + base64Data = content.get("base64Data", "") + altText = content.get("altText", "Image") + caption = content.get("caption", "") # Escape HTML in altText and caption to prevent injection altTextEscaped = html.escape(str(altText)) @@ -600,8 +655,10 @@ class RendererHtml(BaseRenderer): return "" except Exception as e: - self.logger.warning(f"Error rendering image: {str(e)}") - return f'
[Image: {imageData.get("altText", "Image")}]
' + self.logger.error(f"Error embedding image in HTML: {str(e)}") + altText = imageData.get("altText", "Image") + errorMsg = html.escape(f"[Error: Could not embed image '{altText}'. {str(e)}]") + return f'
{errorMsg}
' def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: """ @@ -626,12 +683,24 @@ class RendererHtml(BaseRenderer): if section.get("content_type") == "image": elements = section.get("elements", []) for element in elements: - base64Data = element.get("base64Data", "") + # Extract from nested content structure + content = element.get("content", {}) + base64Data = "" - # If base64Data not found, try extracting from url data URI + if isinstance(content, dict): + base64Data = content.get("base64Data", "") + elif isinstance(content, str): + # Content might be base64 string directly (shouldn't happen) + pass + + # If base64Data not found in content, try direct element fields (fallback) if not base64Data: - url = element.get("url", "") - if url.startswith("data:image/"): + base64Data = element.get("base64Data", "") + + # If base64Data still not found, try extracting from url data URI + if not base64Data: + url = element.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "") + if url and isinstance(url, str) and url.startswith("data:image/"): # Extract base64 from data URI: data:image/png;base64, import re match = re.match(r'data:image/[^;]+;base64,(.+)', url) @@ -642,7 +711,8 @@ class RendererHtml(BaseRenderer): sectionId = section.get("id", "unknown") # Bestimme MIME-Type und Extension - mimeType = element.get("mimeType", "image/png") + mimeType = element.get("mimeType", "") or (content.get("mimeType", "") if isinstance(content, dict) else "") + if not mimeType or mimeType == "unknown": if not mimeType or mimeType == "unknown": # Versuche MIME-Type aus base64 zu erkennen if base64Data.startswith("/9j/"): diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index 7d317131..479881df 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -54,11 +54,17 @@ class RendererImage(BaseRenderer): else: imageBytes = imageContent + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + return [ RenderedDocument( documentData=imageBytes, mimeType="image/png", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py index 04196cf4..91e8342d 100644 --- a/modules/services/serviceGeneration/renderers/rendererJson.py +++ b/modules/services/serviceGeneration/renderers/rendererJson.py @@ -43,11 +43,17 @@ class RendererJson(BaseRenderer): else: filename = self._determineFilename(title, "application/json") + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + return [ RenderedDocument( documentData=jsonContent.encode('utf-8'), mimeType="application/json", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -60,11 +66,15 @@ class RendererJson(BaseRenderer): "metadata": {"error": str(e)} } fallbackContent = json.dumps(fallbackData, indent=2) + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="application/json", - filename=self._determineFilename(title, "application/json") + filename=self._determineFilename(title, "application/json"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index 7b23eb25..d491c8c2 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -41,11 +41,17 @@ class RendererMarkdown(BaseRenderer): else: filename = self._determineFilename(title, "text/markdown") + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + return [ RenderedDocument( documentData=markdownContent.encode('utf-8'), mimeType="text/markdown", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -53,11 +59,15 @@ class RendererMarkdown(BaseRenderer): self.logger.error(f"Error rendering markdown: {str(e)}") # Return minimal markdown fallback fallbackContent = f"# {title}\n\nError rendering report: {str(e)}" + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/markdown", - filename=self._determineFilename(title, "text/markdown") + filename=self._determineFilename(title, "text/markdown"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -164,8 +174,12 @@ class RendererMarkdown(BaseRenderer): def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to markdown.""" try: - headers = tableData.get("headers", []) - rows = tableData.get("rows", []) + # Extract from nested content structure + content = tableData.get("content", {}) + if not isinstance(content, dict): + return "" + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers or not rows: return "" @@ -194,7 +208,11 @@ class RendererMarkdown(BaseRenderer): def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to markdown.""" try: - items = listData.get("items", []) + # Extract from nested content structure + content = listData.get("content", {}) + if not isinstance(content, dict): + return "" + items = content.get("items", []) if not items: return "" @@ -215,8 +233,12 @@ class RendererMarkdown(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to markdown.""" try: - level = headingData.get("level", 1) - text = headingData.get("text", "") + # Extract from nested content structure + content = headingData.get("content", {}) + if not isinstance(content, dict): + return "" + text = content.get("text", "") + level = content.get("level", 1) if text: level = max(1, min(6, level)) @@ -231,7 +253,14 @@ class RendererMarkdown(BaseRenderer): def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str: """Render a JSON paragraph to markdown.""" try: - text = paragraphData.get("text", "") + # Extract from nested content structure + content = paragraphData.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" return text if text else "" except Exception as e: @@ -241,8 +270,12 @@ class RendererMarkdown(BaseRenderer): def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str: """Render a JSON code block to markdown.""" try: - code = codeData.get("code", "") - language = codeData.get("language", "") + # Extract from nested content structure + content = codeData.get("content", {}) + if not isinstance(content, dict): + return "" + code = content.get("code", "") + language = content.get("language", "") if code: if language: @@ -259,8 +292,12 @@ class RendererMarkdown(BaseRenderer): def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: """Render a JSON image to markdown.""" try: - altText = imageData.get("altText", "Image") - base64Data = imageData.get("base64Data", "") + # Extract from nested content structure + content = imageData.get("content", {}) + if not isinstance(content, dict): + return "" + altText = content.get("altText", "Image") + base64Data = content.get("base64Data", "") if base64Data: # For base64 images, we can't embed them directly in markdown diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 9767449e..a6583a33 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -51,6 +51,10 @@ class RendererPdf(BaseRenderer): # Generate PDF using AI-analyzed styling pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + # Determine filename from document or title documents = extractedContent.get("documents", []) if documents and isinstance(documents[0], dict): @@ -74,7 +78,9 @@ class RendererPdf(BaseRenderer): RenderedDocument( documentData=pdf_bytes, mimeType="application/pdf", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -93,8 +99,8 @@ class RendererPdf(BaseRenderer): async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate PDF content from structured JSON document using AI-generated styling.""" try: - # Get style set: default styles, enhanced with AI if userPrompt provided - styles = await self._getStyleSet(userPrompt, aiService) + # Get style set: use styles from metadata if available, otherwise enhance with AI + styles = await self._getStyleSet(json_content, userPrompt, aiService) # Validate JSON structure if not self._validateJsonStructure(json_content): @@ -157,12 +163,17 @@ class RendererPdf(BaseRenderer): self.logger.error(f"Error generating PDF from JSON: {str(e)}") raise Exception(f"PDF generation failed: {str(e)}") - async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - default styles, enhanced with AI if userPrompt provided. + async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: + """Get style set - use styles from document generation metadata if available, + otherwise enhance default styles with AI if userPrompt provided. + + WICHTIG: In a dynamic scalable AI system, styling should come from document generation, + not be generated separately by renderers. Only fall back to AI if styles not provided. Args: + extractedContent: Document content with metadata (may contain styles) userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if userPrompt provided) + aiService: AI service (used only if styles not in metadata and userPrompt provided) templateName: Name of template style set (None = default) Returns: @@ -171,10 +182,19 @@ class RendererPdf(BaseRenderer): # Get default style set defaultStyleSet = self._getDefaultStyleSet() - # Enhance with AI if userPrompt provided (AI handles multilingual style detection) + # FIRST: Check if styles are provided in document generation metadata (preferred approach) + if extractedContent: + metadata = extractedContent.get("metadata", {}) + if isinstance(metadata, dict): + styles = metadata.get("styles") + if styles and isinstance(styles, dict): + self.logger.debug("Using styles from document generation metadata") + enhancedStyleSet = self._convertColorsFormat(styles) + return self._validateStylesContrast(enhancedStyleSet) + + # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) if userPrompt and aiService: - # AI will naturally detect style instructions in any language - self.logger.info(f"Enhancing styles with AI based on user prompt...") + self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) # Convert colors to PDF format after getting styles enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) @@ -545,22 +565,36 @@ class RendererPdf(BaseRenderer): all_elements.append(Spacer(1, 6)) continue - # Standard section types - if section_type == "table": + # Check element type, not section type (elements can have different types than section) + if element_type == "table": all_elements.extend(self._renderJsonTable(element, styles)) - elif section_type == "bullet_list": + elif element_type == "bullet_list": all_elements.extend(self._renderJsonBulletList(element, styles)) - elif section_type == "heading": + elif element_type == "heading": all_elements.extend(self._renderJsonHeading(element, styles)) - elif section_type == "paragraph": + elif element_type == "paragraph": all_elements.extend(self._renderJsonParagraph(element, styles)) - elif section_type == "code_block": + elif element_type == "code_block": all_elements.extend(self._renderJsonCodeBlock(element, styles)) - elif section_type == "image": + elif element_type == "image": all_elements.extend(self._renderJsonImage(element, styles)) else: - # Fallback to paragraph for unknown types - all_elements.extend(self._renderJsonParagraph(element, styles)) + # Fallback: if element_type not set, use section_type as fallback + if section_type == "table": + all_elements.extend(self._renderJsonTable(element, styles)) + elif section_type == "bullet_list": + all_elements.extend(self._renderJsonBulletList(element, styles)) + elif section_type == "heading": + all_elements.extend(self._renderJsonHeading(element, styles)) + elif section_type == "paragraph": + all_elements.extend(self._renderJsonParagraph(element, styles)) + elif section_type == "code_block": + all_elements.extend(self._renderJsonCodeBlock(element, styles)) + elif section_type == "image": + all_elements.extend(self._renderJsonImage(element, styles)) + else: + # Final fallback to paragraph for unknown types + all_elements.extend(self._renderJsonParagraph(element, styles)) return all_elements @@ -571,8 +605,13 @@ class RendererPdf(BaseRenderer): def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON table to PDF elements using AI-generated styles.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + # Handle nested content structure: element.content.headers vs element.headers + # Extract from nested content structure + content = table_data.get("content", {}) + if not isinstance(content, dict): + return [] + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers or not rows: return [] @@ -588,13 +627,13 @@ class RendererPdf(BaseRenderer): table_cell_style = styles.get("table_cell", {}) table_style = [ - ('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))), - ('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))), + ('BACKGROUND', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))), + ('TEXTCOLOR', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("text_color", "#FFFFFF"))), ('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'), ('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), - ('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))), + ('BACKGROUND', (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))), ('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)), ('GRID', (0, 0), (-1, -1), 1, colors.black) ] @@ -610,7 +649,11 @@ class RendererPdf(BaseRenderer): def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON bullet list to PDF elements using AI-generated styles.""" try: - items = list_data.get("items", []) + # Extract from nested content structure + content = list_data.get("content", {}) + if not isinstance(content, dict): + return [] + items = content.get("items", []) bullet_style_def = styles.get("bullet_list", {}) elements = [] @@ -632,8 +675,12 @@ class RendererPdf(BaseRenderer): def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON heading to PDF elements using AI-generated styles.""" try: - level = heading_data.get("level", 1) - text = heading_data.get("text", "") + # Extract from nested content structure + content = heading_data.get("content", {}) + if not isinstance(content, dict): + return [] + text = content.get("text", "") + level = content.get("level", 1) if text: level = max(1, min(6, level)) @@ -649,7 +696,14 @@ class RendererPdf(BaseRenderer): def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON paragraph to PDF elements using AI-generated styles.""" try: - text = paragraph_data.get("text", "") + # Extract from nested content structure + content = paragraph_data.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" if text: return [Paragraph(text, self._createNormalStyle(styles))] @@ -663,8 +717,12 @@ class RendererPdf(BaseRenderer): def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON code block to PDF elements using AI-generated styles.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + # Extract from nested content structure + content = code_data.get("content", {}) + if not isinstance(content, dict): + return [] + code = content.get("code", "") + language = content.get("language", "") code_style_def = styles.get("code_block", {}) if code: @@ -700,14 +758,34 @@ class RendererPdf(BaseRenderer): def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON image to PDF elements using reportlab.""" try: - base64_data = image_data.get("base64Data", "") - alt_text = image_data.get("altText", "Image") - caption = image_data.get("caption", "") + # Extract from nested content structure + content = image_data.get("content", {}) + base64_data = "" + alt_text = "Image" + caption = "" - # If base64Data not found, try extracting from url data URI + if isinstance(content, dict): + # Nested content structure + base64_data = content.get("base64Data", "") + alt_text = content.get("altText", "Image") + caption = content.get("caption", "") + elif isinstance(content, str): + # Content might be base64 string directly (shouldn't happen, but handle it) + self.logger.warning("Image content is a string, not a dict. This should not happen.") + return [Paragraph(f"[Image: Invalid format]", self._createNormalStyle(styles))] + + # If base64Data not found in content, try direct element fields (fallback) if not base64_data: - url = image_data.get("url", "") - if url.startswith("data:image/"): + base64_data = image_data.get("base64Data", "") + if not alt_text or alt_text == "Image": + alt_text = image_data.get("altText", "Image") + if not caption: + caption = image_data.get("caption", "") + + # If base64Data still not found, try extracting from url data URI + if not base64_data: + url = image_data.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "") + if url and isinstance(url, str) and url.startswith("data:image/"): # Extract base64 from data URI: data:image/png;base64, import re match = re.match(r'data:image/[^;]+;base64,(.+)', url) @@ -715,8 +793,18 @@ class RendererPdf(BaseRenderer): base64_data = match.group(1) if not base64_data: + self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}") return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] + # Validate that base64_data is actually base64 (not the entire element rendered as text) + if len(base64_data) > 10000: # Very long string might be entire element JSON + self.logger.warning(f"Base64 data seems too long ({len(base64_data)} chars), might be incorrectly extracted") + + # Ensure base64_data is a string, not bytes or other type + if not isinstance(base64_data, str): + self.logger.warning(f"Base64 data is not a string: {type(base64_data)}") + return [Paragraph(f"[Image: {alt_text} - Invalid data type]", self._createNormalStyle(styles))] + try: from reportlab.platypus import Image as ReportLabImage from reportlab.lib.units import inch @@ -731,25 +819,61 @@ class RendererPdf(BaseRenderer): # Try to get image dimensions from PIL try: from PIL import Image as PILImage - pilImage = PILImage.open(imageStream) - imgWidth, imgHeight = pilImage.size + from reportlab.lib.pagesizes import A4 - # Scale to fit page (max width 6 inches, maintain aspect ratio) - maxWidth = 6 * inch - if imgWidth > maxWidth: - scale = maxWidth / imgWidth - imgWidth = maxWidth + pilImage = PILImage.open(imageStream) + originalWidth, originalHeight = pilImage.size + + # Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom) + pageWidth = A4[0] # 595.27 points + pageHeight = A4[1] # 841.89 points + leftMargin = 72 + rightMargin = 72 + topMargin = 72 + bottomMargin = 18 + + # Use actual frame dimensions from SimpleDocTemplate + # Frame is smaller than page minus margins due to internal spacing + # From error message: frame is 439.27559055118115 x 739.8897637795277 + # Use conservative values with safety margin + availableWidth = 430.0 # Slightly smaller than frame width for safety + availableHeight = 730.0 # Slightly smaller than frame height for safety + + # Convert original image size from pixels to points (assuming 72 DPI) + # If image DPI is different, PIL will provide correct size + # For safety, use a conservative conversion + imgWidthPoints = originalWidth * (inch / 72) # Convert to inches, then to points + imgHeightPoints = originalHeight * (inch / 72) + + # Scale to fit within available page dimensions while maintaining aspect ratio + widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0 + heightScale = availableHeight / imgHeightPoints if imgHeightPoints > 0 else 1.0 + + # Use the smaller scale to ensure image fits both width and height + scale = min(widthScale, heightScale, 1.0) # Don't scale up, only down + + imgWidth = imgWidthPoints * scale + imgHeight = imgHeightPoints * scale + + # Additional safety check: ensure dimensions don't exceed available space + if imgWidth > availableWidth: + scale = availableWidth / imgWidth + imgWidth = availableWidth imgHeight = imgHeight * scale - else: - imgWidth = imgWidth * (inch / 72) # Convert pixels to inches (assuming 72 DPI) - imgHeight = imgHeight * (inch / 72) + + if imgHeight > availableHeight: + scale = availableHeight / imgHeight + imgHeight = availableHeight + imgWidth = imgWidth * scale # Reset stream for reportlab imageStream.seek(0) - except Exception: - # Fallback: use default size - imgWidth = 4 * inch - imgHeight = 3 * inch + except Exception as e: + # Fallback: use default size that fits page + self.logger.warning(f"Error calculating image size: {str(e)}, using safe default") + # Use 80% of available width as safe default + imgWidth = 4 * inch # ~288 points, safe for ~451pt available width + imgHeight = 3 * inch # ~216 points, safe for ~751pt available height imageStream.seek(0) # Create reportlab Image @@ -773,10 +897,16 @@ class RendererPdf(BaseRenderer): return elements except Exception as imgError: - self.logger.warning(f"Error embedding image in PDF: {str(imgError)}") - # Fallback to placeholder - return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] + self.logger.error(f"Error embedding image in PDF: {str(imgError)}") + # Return error message instead of placeholder + errorStyle = self._createNormalStyle(styles) + errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error + errorMsg = f"[Error: Could not embed image '{alt_text}'. {str(imgError)}]" + return [Paragraph(errorMsg, errorStyle)] except Exception as e: - self.logger.warning(f"Error rendering image: {str(e)}") - return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))] \ No newline at end of file + self.logger.error(f"Error rendering image: {str(e)}") + errorStyle = self._createNormalStyle(styles) + errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error + errorMsg = f"[Error: Could not render image '{image_data.get('altText', 'Image')}'. {str(e)}]" + return [Paragraph(errorMsg, errorStyle)] \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index d12048c7..850a59a4 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -48,8 +48,8 @@ class RendererPptx(BaseRenderer): from pptx.dml.color import RGBColor import re - # Get style set: default styles, enhanced with AI if userPrompt provided - styles = await self._getStyleSet(userPrompt, aiService) + # Get style set: use styles from metadata if available, otherwise enhance with AI + styles = await self._getStyleSet(extractedContent, userPrompt, aiService) # Create new presentation prs = Presentation() @@ -99,7 +99,7 @@ class RendererPptx(BaseRenderer): if title_shape.text_frame.paragraphs[0].font: title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._get_safe_color(title_style.get("color", (31, 78, 121))) + title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) # Handle images first (if present) @@ -133,7 +133,7 @@ class RendererPptx(BaseRenderer): heading_style = styles.get("heading", {}) p.font.size = Pt(heading_style.get("font_size", 32)) p.font.bold = heading_style.get("bold", True) - heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47))) + heading_color = self._getSafeColor(heading_style.get("color", (47, 47, 47))) p.font.color.rgb = RGBColor(*heading_color) elif paragraph.startswith('##'): # Subheader @@ -141,7 +141,7 @@ class RendererPptx(BaseRenderer): subheading_style = styles.get("subheading", {}) p.font.size = Pt(subheading_style.get("font_size", 24)) p.font.bold = subheading_style.get("bold", True) - subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79))) + subheading_color = self._getSafeColor(subheading_style.get("color", (79, 79, 79))) p.font.color.rgb = RGBColor(*subheading_color) elif paragraph.startswith('*') and paragraph.endswith('*'): # Bold text @@ -149,14 +149,14 @@ class RendererPptx(BaseRenderer): paragraph_style = styles.get("paragraph", {}) p.font.size = Pt(paragraph_style.get("font_size", 18)) p.font.bold = True - paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) + paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) p.font.color.rgb = RGBColor(*paragraph_color) else: # Regular text paragraph_style = styles.get("paragraph", {}) p.font.size = Pt(paragraph_style.get("font_size", 18)) p.font.bold = paragraph_style.get("bold", False) - paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) + paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) p.font.color.rgb = RGBColor(*paragraph_color) # Apply alignment @@ -181,7 +181,7 @@ class RendererPptx(BaseRenderer): if title_shape.text_frame.paragraphs[0].font: title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48)) title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._get_safe_color(title_style.get("color", (31, 78, 121))) + title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) subtitle_shape = slide.placeholders[1] @@ -215,32 +215,46 @@ class RendererPptx(BaseRenderer): else: filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation") + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + return [ RenderedDocument( documentData=pptx_bytes, mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] except ImportError: logger.error("python-pptx library not installed. Install with: pip install python-pptx") fallbackContent = "python-pptx library not installed" + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/plain", - filename=self._determineFilename(title, "text/plain") + filename=self._determineFilename(title, "text/plain"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] except Exception as e: logger.error(f"Error rendering PowerPoint presentation: {str(e)}") fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}" + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/plain", - filename=self._determineFilename(title, "text/plain") + filename=self._determineFilename(title, "text/plain"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -349,12 +363,17 @@ class RendererPptx(BaseRenderer): """Get MIME type for rendered output.""" return self.outputMimeType - async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - default styles, enhanced with AI if userPrompt provided. + async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: + """Get style set - use styles from document generation metadata if available, + otherwise enhance default styles with AI if userPrompt provided. + + WICHTIG: In a dynamic scalable AI system, styling should come from document generation, + not be generated separately by renderers. Only fall back to AI if styles not provided. Args: + extractedContent: Document content with metadata (may contain styles) userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if userPrompt provided) + aiService: AI service (used only if styles not in metadata and userPrompt provided) templateName: Name of template style set (None = default) Returns: @@ -363,10 +382,19 @@ class RendererPptx(BaseRenderer): # Get default style set defaultStyleSet = self._getDefaultStyleSet() - # Enhance with AI if userPrompt provided (AI handles multilingual style detection) + # FIRST: Check if styles are provided in document generation metadata (preferred approach) + if extractedContent: + metadata = extractedContent.get("metadata", {}) + if isinstance(metadata, dict): + styles = metadata.get("styles") + if styles and isinstance(styles, dict): + self.logger.debug("Using styles from document generation metadata") + enhancedStyleSet = self._convertColorsFormat(styles) + return self._validateStylesReadability(enhancedStyleSet) + + # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) if userPrompt and aiService: - # AI will naturally detect style instructions in any language - self.logger.info(f"Enhancing styles with AI based on user prompt...") + self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) # Convert colors to PPTX format after getting styles enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) @@ -690,15 +718,28 @@ JSON ONLY. NO OTHER TEXT.""" # Handle image sections specially if content_type == "image": - # Extract image data + # Extract image data from nested content structure images = [] for element in elements: - if element.get("base64Data"): - images.append({ - "base64Data": element.get("base64Data"), - "altText": element.get("altText", "Image"), - "caption": element.get("caption") - }) + if isinstance(element, dict): + # Extract from nested content structure + content = element.get("content", {}) + if isinstance(content, dict): + base64Data = content.get("base64Data") + altText = content.get("altText", "Image") + caption = content.get("caption", "") + else: + # Fallback to direct element fields + base64Data = element.get("base64Data") + altText = element.get("altText", "Image") + caption = element.get("caption", "") + + if base64Data: + images.append({ + "base64Data": base64Data, + "altText": altText, + "caption": caption + }) return { "title": section_title or (elements[0].get("altText", "Image") if elements else "Image"), @@ -719,7 +760,7 @@ JSON ONLY. NO OTHER TEXT.""" elif content_type == "code": content_parts.append(self._formatCodeForSlide(elements)) else: - content_parts.append(self._format_paragraph_for_slide(elements)) + content_parts.append(self._formatParagraphForSlide(elements)) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) @@ -734,17 +775,20 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error creating slide from section: {str(e)}") return None - def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str: + def _formatTableForSlide(self, element: Dict[str, Any]) -> str: """Format table data for slide presentation.""" try: - # Extract table data from elements array - headers = [] - rows = [] - for element in elements: - if isinstance(element, dict) and "headers" in element and "rows" in element: - headers = element.get("headers", []) - rows = element.get("rows", []) - break + # Extract table data from element - handle nested content structure + if not isinstance(element, dict): + return "" + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return "" + + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers: return "" @@ -778,7 +822,11 @@ JSON ONLY. NO OTHER TEXT.""" def _formatListForSlide(self, list_data: Dict[str, Any]) -> str: """Format list data for slide presentation.""" try: - items = list_data.get("items", []) + # Extract from nested content structure + content = list_data.get("content", {}) + if not isinstance(content, dict): + return "" + items = content.get("items", []) if not items: return "" @@ -810,8 +858,12 @@ JSON ONLY. NO OTHER TEXT.""" def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str: """Format heading data for slide presentation.""" try: - text = heading_data.get("text", "") - level = heading_data.get("level", 1) + # Extract from nested content structure + content = heading_data.get("content", {}) + if not isinstance(content, dict): + return "" + text = content.get("text", "") + level = content.get("level", 1) if text: return f"{'#' * level} {text}" @@ -825,7 +877,14 @@ JSON ONLY. NO OTHER TEXT.""" def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str: """Format paragraph data for slide presentation.""" try: - text = paragraph_data.get("text", "") + # Extract from nested content structure + content = paragraph_data.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" if text: # Limit paragraph length based on content density @@ -844,8 +903,12 @@ JSON ONLY. NO OTHER TEXT.""" def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str: """Format code data for slide presentation.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + # Extract from nested content structure + content = code_data.get("content", {}) + if not isinstance(content, dict): + return "" + code = content.get("code", "") + language = content.get("language", "") if code: # Limit code length based on content density @@ -912,6 +975,10 @@ JSON ONLY. NO OTHER TEXT.""" section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Skip sections with no elements (unless they're headings that should create new slides) + if not elements and section_type != "heading": + continue + if section_type == "heading": # If we have accumulated content, create a slide if current_slide_content: @@ -923,10 +990,26 @@ JSON ONLY. NO OTHER TEXT.""" current_slide_content = [] # Start new slide with heading as title + heading_found = False for element in elements: - if isinstance(element, dict) and "text" in element: - current_slide_title = element.get("text", "Untitled Section") - break + if isinstance(element, dict): + # Extract from nested content structure + content = element.get("content", {}) + if isinstance(content, dict): + heading_text = content.get("text", "") + elif isinstance(content, str): + heading_text = content + else: + heading_text = "" + + if heading_text: + current_slide_title = heading_text + heading_found = True + break + + # If no heading text found but this is a heading section, use section ID or default + if not heading_found: + current_slide_title = section.get("id", "Untitled Section") elif section_type == "image": # Create separate slide for image if current_slide_content: @@ -940,12 +1023,25 @@ JSON ONLY. NO OTHER TEXT.""" # Extract image data imageData = [] for element in elements: - if element.get("base64Data"): - imageData.append({ - "base64Data": element.get("base64Data"), - "altText": element.get("altText", "Image"), - "caption": element.get("caption") - }) + if isinstance(element, dict): + # Extract from nested content structure + content = element.get("content", {}) + if isinstance(content, dict): + base64Data = content.get("base64Data") + altText = content.get("altText", "Image") + caption = content.get("caption", "") + else: + # Fallback to direct element fields + base64Data = element.get("base64Data") + altText = element.get("altText", "Image") + caption = element.get("caption", "") + + if base64Data: + imageData.append({ + "base64Data": base64Data, + "altText": altText, + "caption": caption + }) slides.append({ "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), @@ -986,17 +1082,17 @@ JSON ONLY. NO OTHER TEXT.""" content_parts = [] for element in elements: if content_type == "table": - content_parts.append(self._formatTableForSlide([element])) - elif content_type == "list": - content_parts.append(self._formatListForSlide([element])) + content_parts.append(self._formatTableForSlide(element)) + elif content_type == "bullet_list" or content_type == "list": + content_parts.append(self._formatListForSlide(element)) elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide([element])) + content_parts.append(self._formatHeadingForSlide(element)) elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide([element])) - elif content_type == "code": - content_parts.append(self._formatCodeForSlide([element])) + content_parts.append(self._formatParagraphForSlide(element)) + elif content_type == "code_block" or content_type == "code": + content_parts.append(self._formatCodeForSlide(element)) else: - content_parts.append(self._format_paragraph_for_slide([element])) + content_parts.append(self._formatParagraphForSlide(element)) return "\n\n".join(filter(None, content_parts)) @@ -1009,6 +1105,7 @@ JSON ONLY. NO OTHER TEXT.""" try: from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor import base64 import io @@ -1106,7 +1203,25 @@ JSON ONLY. NO OTHER TEXT.""" slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight) except Exception as e: - logger.warning(f"Error adding images to slide: {str(e)}") + logger.error(f"Error embedding images in PPTX slide: {str(e)}") + # Add error message text box to slide + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + errorMsg = f"[Error: Could not embed image(s). {str(e)}]" + errorBox = slide.shapes.add_textbox( + Inches(0.5), + Inches(2), + slideWidth - Inches(1), + Inches(0.5) + ) + errorFrame = errorBox.text_frame + errorFrame.text = errorMsg + errorFrame.paragraphs[0].font.size = Pt(12) + errorFrame.paragraphs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color + errorFrame.paragraphs[0].alignment = PP_ALIGN.LEFT + except Exception as errorBoxError: + logger.error(f"Could not add error message to slide: {str(errorBoxError)}") def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 1948b29f..340e55e4 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -63,11 +63,17 @@ class RendererText(BaseRenderer): else: filename = self._determineFilename(title, "text/plain") + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + return [ RenderedDocument( documentData=textContent.encode('utf-8'), mimeType="text/plain", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -75,11 +81,15 @@ class RendererText(BaseRenderer): self.logger.error(f"Error rendering text: {str(e)}") # Return minimal text fallback fallbackContent = f"{title}\n\nError rendering report: {str(e)}" + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/plain", - filename=self._determineFilename(title, "text/plain") + filename=self._determineFilename(title, "text/plain"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] @@ -201,8 +211,12 @@ class RendererText(BaseRenderer): def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to text.""" try: - headers = tableData.get("headers", []) - rows = tableData.get("rows", []) + # Extract from nested content structure + content = tableData.get("content", {}) + if not isinstance(content, dict): + return "" + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers or not rows: return "" @@ -231,7 +245,11 @@ class RendererText(BaseRenderer): def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to text.""" try: - items = listData.get("items", []) + # Extract from nested content structure + content = listData.get("content", {}) + if not isinstance(content, dict): + return "" + items = content.get("items", []) if not items: return "" @@ -252,8 +270,12 @@ class RendererText(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to text.""" try: - level = headingData.get("level", 1) - text = headingData.get("text", "") + # Extract from nested content structure + content = headingData.get("content", {}) + if not isinstance(content, dict): + return "" + text = content.get("text", "") + level = content.get("level", 1) if text: level = max(1, min(6, level)) @@ -273,7 +295,14 @@ class RendererText(BaseRenderer): def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str: """Render a JSON paragraph to text.""" try: - text = paragraphData.get("text", "") + # Extract from nested content structure + content = paragraphData.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" return text if text else "" except Exception as e: @@ -283,8 +312,12 @@ class RendererText(BaseRenderer): def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str: """Render a JSON code block to text.""" try: - code = codeData.get("code", "") - language = codeData.get("language", "") + # Extract from nested content structure + content = codeData.get("content", {}) + if not isinstance(content, dict): + return "" + code = content.get("code", "") + language = content.get("language", "") if code: if language: @@ -301,9 +334,14 @@ class RendererText(BaseRenderer): def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: """Render a JSON image to text.""" try: - altText = imageData.get("altText", "Image") + # Extract from nested content structure + content = imageData.get("content", {}) + if isinstance(content, dict): + altText = content.get("altText", "Image") + else: + altText = imageData.get("altText", "Image") return f"[Image: {altText}]" except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") - return f"[Image: {imageData.get('altText', 'Image')}]" + return f"[Image: Image]" diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index d8d23065..3ff49788 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -50,6 +50,10 @@ class RendererXlsx(BaseRenderer): # Generate Excel using AI-analyzed styling excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) + # Extract metadata for document type and other info + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + # Determine filename from document or title documents = extractedContent.get("documents", []) if documents and isinstance(documents[0], dict): @@ -72,14 +76,27 @@ class RendererXlsx(BaseRenderer): RenderedDocument( documentData=excel_bytes, mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - filename=filename + filename=filename, + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None ) ] except Exception as e: self.logger.error(f"Error rendering Excel: {str(e)}") - # Return CSV fallback - return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv" + # Return CSV fallback with metadata + metadata = extractedContent.get("metadata", {}) if extractedContent else {} + documentType = metadata.get("documentType") if isinstance(metadata, dict) else None + fallbackContent = f"Title,Content\n{title},Error rendering Excel report: {str(e)}" + return [ + RenderedDocument( + documentData=fallbackContent.encode('utf-8'), + mimeType="text/csv", + filename=self._determineFilename(title, "text/csv"), + documentType=documentType, + metadata=metadata if isinstance(metadata, dict) else None + ) + ] def _generateExcel(self, content: str, title: str) -> str: """Generate Excel content using openpyxl.""" @@ -231,8 +248,8 @@ class RendererXlsx(BaseRenderer): self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER") - # Get style set: default styles, enhanced with AI if userPrompt provided - styles = await self._getStyleSet(userPrompt, aiService) + # Get style set: use styles from metadata if available, otherwise enhance with AI + styles = await self._getStyleSet(jsonContent, userPrompt, aiService) # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) if not self._validateJsonStructure(jsonContent): @@ -275,12 +292,17 @@ class RendererXlsx(BaseRenderer): self.logger.error(f"Error generating Excel from JSON: {str(e)}") raise Exception(f"Excel generation failed: {str(e)}") - async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - default styles, enhanced with AI if userPrompt provided. + async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: + """Get style set - use styles from document generation metadata if available, + otherwise enhance default styles with AI if userPrompt provided. + + WICHTIG: In a dynamic scalable AI system, styling should come from document generation, + not be generated separately by renderers. Only fall back to AI if styles not provided. Args: + extractedContent: Document content with metadata (may contain styles) userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if userPrompt provided) + aiService: AI service (used only if styles not in metadata and userPrompt provided) templateName: Name of template style set (None = default) Returns: @@ -289,10 +311,19 @@ class RendererXlsx(BaseRenderer): # Get default style set defaultStyleSet = self._getDefaultStyleSet() - # Enhance with AI if userPrompt provided (AI handles multilingual style detection) + # FIRST: Check if styles are provided in document generation metadata (preferred approach) + if extractedContent: + metadata = extractedContent.get("metadata", {}) + if isinstance(metadata, dict): + styles = metadata.get("styles") + if styles and isinstance(styles, dict): + self.logger.debug("Using styles from document generation metadata") + enhancedStyleSet = self._convertColorsFormat(styles) + return self._validateStylesContrast(enhancedStyleSet) + + # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) if userPrompt and aiService: - # AI will naturally detect style instructions in any language - self.logger.info(f"Enhancing styles with AI based on user prompt...") + self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) # Convert colors to Excel format after getting styles enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) @@ -462,86 +493,119 @@ class RendererXlsx(BaseRenderer): # Create sheets for i, sheetName in enumerate(sheetNames): + # Sanitize sheet name before creating + sanitized_name = self._sanitizeSheetName(sheetName) if i == 0: # Use the default sheet for the first sheet sheet = wb.active - sheet.title = sheetName + sheet.title = sanitized_name else: # Create additional sheets - sheet = wb.create_sheet(sheetName, i) - sheets[sheetName.lower()] = sheet + sheet = wb.create_sheet(sanitized_name, i) + # Use sanitized name as key (lowercase for lookup) + sheets[sanitized_name.lower()] = sheet return sheets + def _sanitizeSheetName(self, name: str) -> str: + """Sanitize sheet name: remove invalid characters and ensure valid length.""" + if not name: + return "Sheet" + # Remove invalid characters: [ ] : * ? / \ + invalid_chars = ['[', ']', ':', '*', '?', '/', '\\'] + sanitized = name + for char in invalid_chars: + sanitized = sanitized.replace(char, '') + # Remove leading/trailing spaces and apostrophes + sanitized = sanitized.strip().strip("'") + # Ensure not empty + if not sanitized: + sanitized = "Sheet" + # Excel sheet name limit is 31 characters + return sanitized[:31] + def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: - """Generate sheet names based on actual content structure.""" + """Generate sheet names: each heading section creates a new tab.""" sections = self._extractSections(jsonContent) # If no sections, create a single sheet if not sections: return ["Content"] - # Generate sheet names based on content structure + # Simple logic: each heading section creates a new tab sheetNames = [] - - # Check if we have multiple table sections - tableSections = [s for s in sections if s.get("content_type") == "table"] - - if len(tableSections) > 1: - # Create separate sheets for each table - for i, section in enumerate(tableSections, 1): - # Try to get caption from table element first, then section title, then fallback - sectionTitle = None + for section in sections: + if section.get("content_type") == "heading": + # Extract heading text from elements elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: - tableElement = elements[0] - sectionTitle = tableElement.get("caption") - - if not sectionTitle: - sectionTitle = section.get("title") - - if not sectionTitle: - sectionTitle = f"Table {i}" - - sheetNames.append(sectionTitle[:31]) # Excel sheet name limit - else: - # Single table or mixed content - create only main sheet + headingElement = elements[0] + content = headingElement.get("content", {}) + if isinstance(content, dict): + headingText = content.get("text", "") + elif isinstance(content, str): + headingText = content + else: + headingText = "" + + if headingText: + sanitized_name = self._sanitizeSheetName(headingText) + # Ensure unique sheet names + if sanitized_name not in sheetNames: + sheetNames.append(sanitized_name) + else: + # Add number suffix for duplicates + counter = 1 + base_name = sanitized_name[:28] # Leave room for " (1)" + while f"{base_name} ({counter})" in sheetNames: + counter += 1 + sheetNames.append(f"{base_name} ({counter})"[:31]) + + # If no headings found, use document title + if not sheetNames: documentTitle = jsonContent.get("metadata", {}).get("title", "Document") - sheetNames.append(documentTitle[:31]) # Excel sheet name limit + sheetNames.append(self._sanitizeSheetName(documentTitle)) return sheetNames def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Populate Excel sheets with content from JSON based on actual sheet names.""" + """Populate Excel sheets: each heading creates a new tab, all following content goes in that tab.""" try: - # Get the actual sheet names that were created + # Get the actual sheet names that were created (keys are lowercase) sheetNames = list(sheets.keys()) if not sheetNames: return sections = self._extractSections(jsonContent) - tableSections = [s for s in sections if s.get("content_type") == "table"] - if len(tableSections) > 1: - # Multiple tables - populate each sheet with its corresponding table - for i, section in enumerate(tableSections): - if i < len(sheetNames): - sheetName = sheetNames[i] - sheet = sheets[sheetName] - # Use the caption from table element as sheet title, or fallback to sheet name - sheetTitle = sheetName - elements = section.get("elements", []) - if elements and isinstance(elements, list) and len(elements) > 0: - tableElement = elements[0] - caption = tableElement.get("caption") - if caption: - sheetTitle = caption - self._populateTableSheet(sheet, section, styles, sheetTitle) - else: - # Single table or mixed content - populate only main sheet - firstSheetName = sheetNames[0] - self._populateMainSheet(sheets[firstSheetName], jsonContent, styles) + # Simple logic: iterate through sections, each heading creates a new tab + currentSheetIndex = 0 + currentSheet = None + currentRow = 1 + + for section in sections: + contentType = section.get("content_type", "paragraph") + + # Heading section: switch to next sheet + if contentType == "heading": + if currentSheetIndex < len(sheetNames): + sheetName = sheetNames[currentSheetIndex] + currentSheet = sheets[sheetName] # sheets dict uses lowercase keys + currentSheetIndex += 1 + currentRow = 1 # Start at row 1 for new sheet + else: + # More headings than sheets - use last sheet + if sheetNames: + currentSheet = sheets[sheetNames[-1]] + + # Render content in current sheet (or first sheet if no headings yet) + if currentSheet is None and sheetNames: + currentSheet = sheets[sheetNames[0]] + + if currentSheet: + currentRow = self._addSectionToSheet(currentSheet, section, styles, currentRow) + currentRow += 1 # Add spacing between sections except Exception as e: self.logger.warning(f"Could not populate Excel sheets: {str(e)}") @@ -558,9 +622,15 @@ class RendererXlsx(BaseRenderer): # Get table data from elements (canonical JSON format) elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: - table_data = elements[0] - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + table_element = elements[0] + # Extract from nested content structure + content = table_element.get("content", {}) + if not isinstance(content, dict): + headers = [] + rows = [] + else: + headers = content.get("headers", []) + rows = content.get("rows", []) else: headers = [] rows = [] @@ -578,11 +648,28 @@ class RendererXlsx(BaseRenderer): if header_style.get("background"): cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid") - # Add rows + # Add rows - handle both array format and cells object format cell_style = styles.get("table_cell", {}) for row_idx, row_data in enumerate(rows, 4): - for col_idx, cell_value in enumerate(row_data, 1): - cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value) + # Handle different row formats + if isinstance(row_data, list): + # Array format: [value1, value2, ...] + cell_values = row_data + elif isinstance(row_data, dict) and "cells" in row_data: + # Cells object format: {"cells": [{"value": ...}, ...]} + cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] + else: + # Unknown format, skip + continue + + for col_idx, cell_value in enumerate(cell_values, 1): + # Extract value if it's a dict with "value" key + if isinstance(cell_value, dict): + actual_value = cell_value.get("value", "") + else: + actual_value = cell_value + + cell = sheet.cell(row=row_idx, column=col_idx, value=actual_value) if cell_style.get("text_color"): cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) @@ -714,18 +801,33 @@ class RendererXlsx(BaseRenderer): # Handle all section types using elements array elements = section.get("elements", []) for element in elements: - if section_type == "table": + # Check element type, not section type (elements can have different types than section) + element_type = element.get("type", "") if isinstance(element, dict) else "" + + if element_type == "table": startRow = self._addTableToExcel(sheet, element, styles, startRow) - elif section_type == "bullet_list" or section_type == "list": + elif element_type == "bullet_list" or element_type == "list": startRow = self._addListToExcel(sheet, element, styles, startRow) - elif section_type == "paragraph": + elif element_type == "paragraph": startRow = self._addParagraphToExcel(sheet, element, styles, startRow) - elif section_type == "heading": + elif element_type == "heading": startRow = self._addHeadingToExcel(sheet, element, styles, startRow) - elif section_type == "image": + elif element_type == "image": startRow = self._addImageToExcel(sheet, element, styles, startRow) else: - startRow = self._addParagraphToExcel(sheet, element, styles, startRow) + # Fallback: if element_type not set, use section_type + if section_type == "table": + startRow = self._addTableToExcel(sheet, element, styles, startRow) + elif section_type == "bullet_list" or section_type == "list": + startRow = self._addListToExcel(sheet, element, styles, startRow) + elif section_type == "paragraph": + startRow = self._addParagraphToExcel(sheet, element, styles, startRow) + elif section_type == "heading": + startRow = self._addHeadingToExcel(sheet, element, styles, startRow) + elif section_type == "image": + startRow = self._addImageToExcel(sheet, element, styles, startRow) + else: + startRow = self._addParagraphToExcel(sheet, element, styles, startRow) return startRow @@ -733,36 +835,114 @@ class RendererXlsx(BaseRenderer): self.logger.warning(f"Could not add section to sheet: {str(e)}") return startRow + 1 + def _sanitizeCellValue(self, value: Any) -> str: + """Sanitize cell value: remove markdown, convert to string, handle None.""" + if value is None: + return "" + if isinstance(value, dict): + # Extract value from dict if present + return str(value.get("value", "")) + if isinstance(value, (int, float)): + return value # Keep numbers as-is + # Convert to string and remove markdown formatting + text = str(value) + # Remove markdown bold (**text**) + text = text.replace("**", "") + # Remove markdown italic (*text*) + text = text.replace("*", "") + # Remove other markdown + text = text.replace("__", "").replace("_", "") + return text.strip() + def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: - """Add a table element to Excel sheet.""" + """Add a table element to Excel sheet with proper formatting and borders.""" try: - # In canonical JSON format, table elements have headers and rows directly - headers = element.get("headers", []) - rows = element.get("rows", []) + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return startRow + headers = content.get("headers", []) + rows = content.get("rows", []) if not headers and not rows: return startRow - # Add headers + # Define border style + thin_border = Border( + left=Side(style='thin'), + right=Side(style='thin'), + top=Side(style='thin'), + bottom=Side(style='thin') + ) + + headerRow = startRow header_style = styles.get("table_header", {}) + + # Add headers with formatting for col, header in enumerate(headers, 1): - cell = sheet.cell(row=startRow, column=col, value=header) - if header_style.get("bold"): - cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000"))) + sanitized_header = self._sanitizeCellValue(header) + cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) + + # Font styling + cell.font = Font( + bold=header_style.get("bold", True), + color=self._getSafeColor(header_style.get("text_color", "FF000000")) + ) + + # Background color if header_style.get("background"): - cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid") + cell.fill = PatternFill( + start_color=self._getSafeColor(header_style["background"]), + end_color=self._getSafeColor(header_style["background"]), + fill_type="solid" + ) + + # Alignment + cell.alignment = Alignment( + horizontal=header_style.get("align", "left"), + vertical="center" + ) + + # Border + cell.border = thin_border startRow += 1 - # Add rows + # Add rows with formatting cell_style = styles.get("table_cell", {}) for row_data in rows: - for col, cell_value in enumerate(row_data, 1): - cell = sheet.cell(row=startRow, column=col, value=cell_value) + # Handle different row formats + if isinstance(row_data, list): + cell_values = row_data + elif isinstance(row_data, dict) and "cells" in row_data: + cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] + else: + continue + + for col, cell_value in enumerate(cell_values, 1): + sanitized_value = self._sanitizeCellValue(cell_value) + cell = sheet.cell(row=startRow, column=col, value=sanitized_value) + + # Font styling if cell_style.get("text_color"): cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + + # Alignment + cell.alignment = Alignment( + horizontal=cell_style.get("align", "left"), + vertical="center" + ) + + # Border + cell.border = thin_border + startRow += 1 + # Auto-adjust column widths + for col in range(1, len(headers) + 1): + column_letter = get_column_letter(col) + sheet.column_dimensions[column_letter].width = 20 + return startRow except Exception as e: @@ -770,9 +950,13 @@ class RendererXlsx(BaseRenderer): return startRow + 1 def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: - """Add a list element to Excel sheet.""" + """Add a list element to Excel sheet. Expects nested content structure.""" try: - list_items = element.get("items", []) + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return startRow + list_items = content.get("items", []) list_style = styles.get("bullet_list", {}) for item in list_items: @@ -788,9 +972,16 @@ class RendererXlsx(BaseRenderer): return startRow + 1 def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: - """Add a paragraph element to Excel sheet.""" + """Add a paragraph element to Excel sheet. Expects nested content structure.""" try: - text = element.get("text", "") + # Extract from nested content structure + content = element.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" if text: sheet.cell(row=startRow, column=1, value=text) @@ -807,10 +998,14 @@ class RendererXlsx(BaseRenderer): return startRow + 1 def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: - """Add a heading element to Excel sheet.""" + """Add a heading element to Excel sheet. Expects nested content structure.""" try: - text = element.get("text", "") - level = element.get("level", 1) + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return startRow + text = content.get("text", "") + level = content.get("level", 1) if text: sheet.cell(row=startRow, column=1, value=text) @@ -835,11 +1030,15 @@ class RendererXlsx(BaseRenderer): return startRow + 1 def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: - """Add an image element to Excel sheet using openpyxl.""" + """Add an image element to Excel sheet using openpyxl. Expects nested content structure.""" try: - base64Data = element.get("base64Data", "") - altText = element.get("altText", "Image") - caption = element.get("caption", "") + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return startRow + base64Data = content.get("base64Data", "") + altText = content.get("altText", "Image") + caption = content.get("caption", "") if not base64Data: # No image data - add placeholder text @@ -891,16 +1090,23 @@ class RendererXlsx(BaseRenderer): return startRow + 1 except ImportError: - self.logger.warning("openpyxl.drawing.image not available, using placeholder") - sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + self.logger.error("openpyxl.drawing.image not available, cannot embed image") + errorMsg = f"[Error: Image embedding not available. Image: {altText}]" + errorCell = sheet.cell(row=startRow, column=1, value=errorMsg) + errorCell.font = Font(color="FFFF0000", italic=True) # Red color return startRow + 1 except Exception as imgError: - self.logger.warning(f"Error embedding image in Excel: {str(imgError)}") - sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + self.logger.error(f"Error embedding image in Excel: {str(imgError)}") + errorMsg = f"[Error: Could not embed image '{altText}'. {str(imgError)}]" + errorCell = sheet.cell(row=startRow, column=1, value=errorMsg) + errorCell.font = Font(color="FFFF0000", italic=True) # Red color return startRow + 1 except Exception as e: - self.logger.warning(f"Could not add image to Excel: {str(e)}") + self.logger.error(f"Error adding image to Excel: {str(e)}") + errorMsg = f"[Error: Could not process image. {str(e)}]" + errorCell = sheet.cell(row=startRow, column=1, value=errorMsg) + errorCell.font = Font(color="FFFF0000", italic=True) # Red color return startRow + 1 def _formatTimestamp(self) -> str: diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 4e405630..1eb453ee 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -213,10 +213,21 @@ class ContentValidator: sourceJson = getattr(doc, 'sourceJson', None) data = getattr(doc, 'documentData', None) + # WICHTIG: For rendered documents (HTML, PDF, DOCX, etc.), jsonStructure is METADATA about the structure, + # NOT the actual rendered content. The actual content is in documentData. + # Include both: jsonStructure for structure metadata, and contentPreview for actual content check if sourceJson and isinstance(sourceJson, dict): # Use source JSON for structure analysis (for rendered documents like xlsx/docx/pdf) jsonSummary = self._summarizeJsonStructure(sourceJson) summary["jsonStructure"] = jsonSummary + # Add note that this is metadata, not actual content + summary["note"] = "jsonStructure contains metadata about document structure. Actual rendered content is in documentData." + + # For rendered documents, also check actual content + if data is not None: + contentPreview = self._getContentPreview(data, formatExt, mimeType) + if contentPreview: + summary["contentPreview"] = contentPreview elif data is not None: # Fallback: try to parse documentData as JSON (for non-rendered documents) if isinstance(data, dict): @@ -227,6 +238,11 @@ class ContentValidator: # Handle list of documents jsonSummary = self._summarizeJsonStructure(data[0]) summary["jsonStructure"] = jsonSummary + else: + # For non-JSON data (e.g., rendered HTML), get content preview + contentPreview = self._getContentPreview(data, formatExt, mimeType) + if contentPreview: + summary["contentPreview"] = contentPreview summaries.append(summary) except Exception as e: @@ -295,6 +311,73 @@ class ContentValidator: bytes /= 1024.0 return f"{bytes:.1f} TB" + def _getContentPreview(self, data: Any, formatExt: str, mimeType: str) -> Optional[Dict[str, Any]]: + """Get structural validation info for rendered documents (generic, NO content preview for security/privacy) + + Returns metadata about document structure to help validation distinguish between: + - Structure metadata (jsonStructure) - describes what should be rendered + - Actual rendered content (documentData) - the actual document file + + Does NOT expose actual content, only structural indicators. + """ + try: + if data is None: + return None + + preview = {} + + # Generic content type detection + if isinstance(data, bytes): + preview["dataType"] = "bytes" + preview["contentLength"] = len(data) + # Check if it's likely text-based (for text formats like HTML, TXT, etc.) + try: + # Try to decode as UTF-8 to check if it's text-based + decoded = data.decode('utf-8', errors='strict') + preview["isTextBased"] = True + preview["contentLength"] = len(decoded) + + # For text-based formats, check if it looks like rendered content vs JSON metadata + # JSON metadata typically starts with { or [ and contains structure keywords + trimmed = decoded.strip() + looksLikeJson = (trimmed.startswith('{') or trimmed.startswith('[')) and \ + ('"sections"' in trimmed or '"contentPartIds"' in trimmed or '"generationHint"' in trimmed) + preview["looksLikeRenderedContent"] = not looksLikeJson + + except UnicodeDecodeError: + # Not valid UTF-8, likely binary (PDF, DOCX, images, etc.) + preview["isTextBased"] = False + preview["isBinary"] = True + # Binary files with content are rendered (not metadata) + preview["looksLikeRenderedContent"] = True + + elif isinstance(data, str): + preview["dataType"] = "string" + preview["isTextBased"] = True + preview["contentLength"] = len(data) + + # Check if it looks like rendered content vs JSON metadata + trimmed = data.strip() + looksLikeJson = (trimmed.startswith('{') or trimmed.startswith('[')) and \ + ('"sections"' in trimmed or '"contentPartIds"' in trimmed or '"generationHint"' in trimmed) + preview["looksLikeRenderedContent"] = not looksLikeJson + + elif isinstance(data, (dict, list)): + # If documentData is still a dict/list, it's likely structure metadata, not rendered content + preview["dataType"] = "json" + preview["isTextBased"] = True + preview["looksLikeRenderedContent"] = False + preview["note"] = "documentData is JSON structure, not rendered document file" + else: + preview["dataType"] = type(data).__name__ + preview["contentLength"] = len(str(data)) if hasattr(data, '__len__') else 0 + + return preview if preview else None + + except Exception as e: + logger.warning(f"Error getting content structure info: {str(e)}") + return None + def _isFormatCompatible(self, deliveredFormat: str, expectedFormat: str) -> bool: """ @@ -445,31 +528,23 @@ EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContex === VALIDATION INSTRUCTIONS === -IMPORTANT: Different formats can represent the same data structure. Do not reject a format just because it differs from expected - check the structure summary for actual content. +CRITICAL: Validate ONLY metadata/structure. Documents may be binary (PDF, DOCX, images) or very large (200MB+). NEVER try to read or validate actual content values. VALIDATION RULES: -1. Use structure summary (sections, statistics, counts) as PRIMARY evidence for DATA-ORIENTED criteria. Trust structure over format claims. -2. Use ACTION HISTORY as PRIMARY evidence for PROCESS-ORIENTED criteria (e.g., "internet search performed", "sources cited"). Document metadata may only reflect the last action, not the entire workflow. -3. For each criterion in criteriaMapping: evaluate ONLY that criterion. Do not mention other criteria. -4. Priority: Data completeness > Format compatibility. Missing data is more critical than format mismatch. -5. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name. -6. Multi-step workflow awareness: If ACTION HISTORY is present, consider the workflow as a whole. Document metadata (e.g., extraction_method) describes how data was EXTRACTED in the last step, not necessarily how it was OBTAINED in the workflow. -7. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help. -8. CRITICAL - Data vs Data Description: When criteria require specific data types (e.g., images, tables, charts, files), distinguish between: - - ACTUAL DATA: The actual data itself (binary data, structured data, embedded content) - - DATA DESCRIPTIONS: Text fields that describe or specify what data should be created (e.g., "image_description", "table_description", "chart_specification") - these are TEXT METADATA, NOT the actual data - - If only descriptions/specifications exist but no actual data, the criterion is NOT met. Descriptions are instructions for creating data, not the data itself. - - Check content types in sections/elements: if content_type matches the required data type (e.g., "image" for images, "table" for tables), actual data exists. If only text fields describing the data exist, the data is missing. - - Check document statistics: if counts for the required data type are 0, the data is missing even if descriptions exist. +1. METADATA ONLY: Use jsonStructure (sections, contentPartIds, content_type, statistics) and contentPreview (dataType, contentLength, looksLikeRenderedContent) for validation. These are METADATA indicators, NOT actual content. +2. FORMAT VALIDATION: Check mimeType/format metadata only. Do NOT inspect content to determine format. Format mismatch = wrong_format gap. +3. CONTENT EXISTENCE: Use contentPreview.looksLikeRenderedContent=true to confirm content exists. Use jsonStructure.content_type to confirm data types exist (e.g., "image" section = image exists). Do NOT validate content quality, accuracy, or completeness of actual data values. +4. STRUCTURE VALIDATION: Use jsonStructure.sections, statistics (counts, rowCount, columnCount) as evidence. Trust structure metadata over format claims. +5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done"). +6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria. VALIDATION STEPS: -- Check ACTION HISTORY first (if present) for PROCESS-ORIENTED criteria (e.g., "search performed", "sources used", "verification done") -- Check ACTION VALIDATION METADATA (if present) - this contains action-specific context for the LAST action only -- Check structure summary for quantities, counts, statistics (for DATA-ORIENTED criteria) -- Compare found values with required values from criteria -- If structure unavailable, use metadata only (format, filename, size) -- Classify gaps: missing_data (less than required), incomplete_data (partial), wrong_structure (wrong organization), wrong_format (format mismatch but data present) -- Assess if documents contain the required data: If structure shows documents lack the data, note this in findings - data must be generated or obtained elsewhere, not re-extracted from same documents +- Check ACTION HISTORY for process-oriented criteria +- Check jsonStructure metadata (sections, content_type, statistics) for structure validation +- Check contentPreview.looksLikeRenderedContent for content existence (not quality) +- Check mimeType/format for format validation +- NEVER try to read actual content values (binary files, large files, data accuracy) +- Classify gaps: missing_data, incomplete_data, wrong_structure, wrong_format SCORING: - Data complete + structure matches → qualityScore: 0.9-1.0 diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py index 797352ab..c8920247 100644 --- a/modules/workflows/processing/shared/placeholderFactory.py +++ b/modules/workflows/processing/shared/placeholderFactory.py @@ -379,8 +379,34 @@ def extractLearningsAndImprovements(context: Any) -> str: return "No learnings available yet" def extractLatestRefinementFeedback(context: Any) -> str: - """Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}}""" + """Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}} + + CRITICAL: If ERROR level logs are found, refinement should stop processing. + """ try: + # First check for ERROR level logs in workflow + if hasattr(context, 'workflow') and context.workflow: + try: + import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects + from modules.interfaces.interfaceDbAppObjects import getRootInterface + rootInterface = getRootInterface() + interfaceDbChat = interfaceDbChatObjects.getInterface(rootInterface.currentUser) + + # Get workflow logs + chatData = interfaceDbChat.getUnifiedChatData(context.workflow.id, None) + logs = chatData.get("logs", []) + + # Check for ERROR level logs + for log in logs: + if isinstance(log, dict): + log_level = log.get("level", "").upper() + log_message = str(log.get("message", "")) + if log_level == "ERROR" or "ERROR" in log_message.upper(): + return f"CRITICAL: Processing stopped due to ERROR in logs: {log_message[:200]}" + except Exception as log_check_error: + # If we can't check logs, continue with normal feedback extraction + logger.warning(f"Could not check for ERROR logs: {str(log_check_error)}") + if not hasattr(context, 'previousReviewResult') or not context.previousReviewResult or not isinstance(context.previousReviewResult, list): return "No previous refinement feedback available" diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py new file mode 100644 index 00000000..941034ba --- /dev/null +++ b/tests/functional/test10_document_generation_formats.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Generation Formats Test 10 - Tests document generation in DOCX, XLSX, PPTX, and PDF formats +Tests professional document formats with various content types including tables, images, and structured data. +""" + +import asyncio +import json +import sys +import os +import time +import base64 +from typing import Dict, Any, List, Optional + +# Add the gateway to path (go up 2 levels from tests/functional/) +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +# Import the service initialization +from modules.services import getInterface as getServices +from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum +from modules.datamodels.datamodelUam import User +from modules.features.workflow import chatStart +import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects + + +class DocumentGenerationFormatsTester10: + def __init__(self): + # Use root user for testing (has full access to everything) + from modules.interfaces.interfaceDbAppObjects import getRootInterface + rootInterface = getRootInterface() + self.testUser = rootInterface.currentUser + + # Initialize services using the existing system + self.services = getServices(self.testUser, None) # Test user, no workflow + self.workflow = None + self.testResults = {} + self.generatedDocuments = {} + self.pdfFileId = None # Store PDF file ID for reuse + + async def initialize(self): + """Initialize the test environment.""" + # Enable debug file logging for tests + from modules.shared.configuration import APP_CONFIG + APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True) + + # Set logging level to INFO to see workflow progress + import logging + logging.getLogger().setLevel(logging.INFO) + + print(f"Initialized test with user: {self.testUser.id}") + print(f"Mandate ID: {self.testUser.mandateId}") + print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}") + + # Upload PDF file for testing + await self.uploadPdfFile() + + async def uploadPdfFile(self): + """Upload the PDF file and store its file ID.""" + pdfPath = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "temp", "B2025-02c.pdf") + pdfPath = os.path.abspath(pdfPath) + + if not os.path.exists(pdfPath): + print(f"⚠️ Warning: PDF file not found at {pdfPath}") + print(" Test will continue without PDF attachment") + return + + try: + # Read PDF file + with open(pdfPath, "rb") as f: + pdfContent = f.read() + + # Create file using services.interfaceDbComponent + if not hasattr(self.services, 'interfaceDbComponent') or not self.services.interfaceDbComponent: + print("⚠️ Warning: interfaceDbComponent not available in services") + print(" Test will continue without PDF attachment") + return + + interfaceDbComponent = self.services.interfaceDbComponent + + fileItem = interfaceDbComponent.createFile( + name="B2025-02c.pdf", + mimeType="application/pdf", + content=pdfContent + ) + + # Store file data + interfaceDbComponent.createFileData(fileItem.id, pdfContent) + + self.pdfFileId = fileItem.id + print(f"✅ Uploaded PDF file: {fileItem.fileName} (ID: {self.pdfFileId}, Size: {len(pdfContent)} bytes)") + + except Exception as e: + import traceback + print(f"⚠️ Warning: Failed to upload PDF file: {str(e)}") + print(f" Traceback: {traceback.format_exc()}") + print(" Test will continue without PDF attachment") + + def createTestPrompt(self, format: str) -> str: + """Create a test prompt for document generation in the specified format. + + The prompt requests: + - Professional document structure with title, sections, tables, and images + - Extraction of content from attached PDF + - Structured data presentation appropriate for the format + """ + formatPrompts = { + "docx": ( + "Create a professional Word document about 'Fuel Station Receipt Analysis' with:\n" + "1) A main title\n" + "2) An executive summary paragraph\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) A detailed analysis section with:\n" + " - Bullet points of key findings\n" + " - A table summarizing transaction details\n" + "5) A conclusion section with recommendations\n\n" + "Format as a professional DOCX document with proper headings and structure." + ), + "xlsx": ( + "Create an Excel spreadsheet analyzing the fuel station receipt from the attached PDF (B2025-02c.pdf).\n" + "Include:\n" + "1) A summary sheet with key metrics\n" + "2) A detailed data sheet with:\n" + " - Transaction details in rows\n" + " - Columns for: Date, Item, Quantity, Price, Total\n" + " - Proper formatting and headers\n" + "3) A calculations sheet with:\n" + " - VAT calculations\n" + " - Net and gross totals\n\n" + "Format as a professional XLSX spreadsheet with formulas and formatting." + ), + "pptx": ( + "Create a PowerPoint presentation about 'Fuel Station Receipt Analysis' with:\n" + "1) Title slide with main title\n" + "2) Overview slide explaining the receipt analysis\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) Analysis slides with:\n" + " - Bullet points of key findings\n" + " - Visual representation of data\n" + "5) Conclusion slide with recommendations\n\n" + "Format as a professional PPTX presentation with consistent styling." + ), + "pdf": ( + "Create a professional PDF document about 'Fuel Station Receipt Analysis' with:\n" + "1) A main title\n" + "2) An introduction paragraph explaining the receipt analysis\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) A section analyzing the receipt data with:\n" + " - Bullet points of key findings\n" + " - A table summarizing transaction details\n" + "5) A conclusion paragraph with recommendations\n\n" + "Format as a professional PDF document suitable for printing." + ) + } + + return formatPrompts.get(format.lower(), formatPrompts["docx"]) + + async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]: + """Generate a document in the specified format using workflow.""" + print("\n" + "="*80) + print(f"GENERATING DOCUMENT IN {format.upper()} FORMAT") + print("="*80) + + prompt = self.createTestPrompt(format) + print(f"Prompt: {prompt[:200]}...") + + # Create user input request with PDF file attachment + listFileId = [] + if self.pdfFileId: + listFileId = [self.pdfFileId] + print(f"Attaching PDF file (ID: {self.pdfFileId})") + else: + print("⚠️ No PDF file attached (file upload may have failed)") + + # Create user input request + userInput = UserInputRequest( + prompt=prompt, + listFileId=listFileId, + userLanguage="en" + ) + + # Start workflow + print(f"\nStarting workflow for {format.upper()} generation...") + workflow = await chatStart( + currentUser=self.testUser, + userInput=userInput, + workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC, + workflowId=None + ) + + if not workflow: + return { + "success": False, + "error": "Failed to start workflow" + } + + self.workflow = workflow + print(f"Workflow started: {workflow.id}") + + # Wait for workflow completion (no timeout - wait indefinitely) + print(f"Waiting for workflow completion...") + completed = await self.waitForWorkflowCompletion(timeout=None) + + if not completed: + return { + "success": False, + "error": "Workflow did not complete", + "workflowId": workflow.id, + "status": workflow.status if workflow else "unknown" + } + + # Analyze results + results = self.analyzeWorkflowResults() + + # Extract documents for this format + documents = results.get("documents", []) + formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")] + + return { + "success": True, + "format": format, + "workflowId": workflow.id, + "status": results.get("status"), + "documentCount": len(formatDocuments), + "documents": formatDocuments, + "results": results + } + + async def waitForWorkflowCompletion(self, timeout: Optional[int] = None, checkInterval: int = 2) -> bool: + """Wait for workflow to complete.""" + if not self.workflow: + return False + + startTime = time.time() + lastStatus = None + + interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + + if timeout is None: + print("Waiting indefinitely (no timeout)") + + while True: + # Check timeout only if specified + if timeout is not None and time.time() - startTime > timeout: + print(f"\n⏱️ Timeout after {timeout} seconds") + return False + + # Get current workflow status + try: + currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id) + if not currentWorkflow: + print("\n❌ Workflow not found") + return False + + currentStatus = currentWorkflow.status + elapsed = int(time.time() - startTime) + + # Print status if it changed + if currentStatus != lastStatus: + print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)") + lastStatus = currentStatus + + # Check if workflow is complete + if currentStatus in ["completed", "stopped", "failed"]: + self.workflow = currentWorkflow + statusIcon = "✅" if currentStatus == "completed" else "❌" + print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)") + return currentStatus == "completed" + + # Wait before next check + await asyncio.sleep(checkInterval) + + except Exception as e: + print(f"\n⚠️ Error checking workflow status: {str(e)}") + await asyncio.sleep(checkInterval) + + def analyzeWorkflowResults(self) -> Dict[str, Any]: + """Analyze workflow results and extract information.""" + if not self.workflow: + return {"error": "No workflow to analyze"} + + interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + workflow = interfaceDbChat.getWorkflow(self.workflow.id) + + if not workflow: + return {"error": "Workflow not found"} + + # Get unified chat data + chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None) + + # Count messages + messages = chatData.get("messages", []) + userMessages = [m for m in messages if m.get("role") == "user"] + assistantMessages = [m for m in messages if m.get("role") == "assistant"] + + # Count documents + documents = chatData.get("documents", []) + + # Get logs + logs = chatData.get("logs", []) + + results = { + "workflowId": workflow.id, + "status": workflow.status, + "workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None, + "currentRound": workflow.currentRound, + "totalTasks": workflow.totalTasks, + "totalActions": workflow.totalActions, + "messageCount": len(messages), + "userMessageCount": len(userMessages), + "assistantMessageCount": len(assistantMessages), + "documentCount": len(documents), + "logCount": len(logs), + "documents": documents, + "logs": logs + } + + print(f"\nWorkflow Results:") + print(f" Status: {results['status']}") + print(f" Tasks: {results['totalTasks']}") + print(f" Actions: {results['totalActions']}") + print(f" Messages: {results['messageCount']}") + print(f" Documents: {results['documentCount']}") + + # Print document details + if documents: + print(f"\nGenerated Documents:") + for doc in documents: + fileName = doc.get("fileName", "unknown") + fileSize = doc.get("fileSize", 0) + mimeType = doc.get("mimeType", "unknown") + documentType = doc.get("documentType", "N/A") + print(f" - {fileName} ({fileSize} bytes, {mimeType}, type: {documentType})") + + return results + + def verifyDocumentFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]: + """Verify that a document matches the expected format and contains expected metadata.""" + fileName = document.get("fileName", "") + mimeType = document.get("mimeType", "") + fileSize = document.get("fileSize", 0) + documentType = document.get("documentType") + metadata = document.get("metadata") + + # Expected MIME types + expectedMimeTypes = { + "pdf": ["application/pdf"], + "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], + "xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"], + "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] + } + + # Expected file extensions + expectedExtensions = { + "pdf": [".pdf"], + "docx": [".docx"], + "xlsx": [".xlsx"], + "pptx": [".pptx"] + } + + formatLower = expectedFormat.lower() + expectedMimes = expectedMimeTypes.get(formatLower, []) + expectedExts = expectedExtensions.get(formatLower, []) + + # Check file extension + hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts) + + # Check MIME type + hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes) + + # Check file size (should be > 0) + hasValidSize = fileSize > 0 + + # Check document type (should be present) + hasDocumentType = documentType is not None + + # Check metadata (should be present) + hasMetadata = metadata is not None and isinstance(metadata, dict) + + verification = { + "format": expectedFormat, + "fileName": fileName, + "mimeType": mimeType, + "fileSize": fileSize, + "documentType": documentType, + "hasMetadata": hasMetadata, + "hasCorrectExtension": hasCorrectExtension, + "hasCorrectMimeType": hasCorrectMimeType, + "hasValidSize": hasValidSize, + "hasDocumentType": hasDocumentType, + "isValid": hasCorrectExtension and hasValidSize and hasCorrectMimeType, + "isComplete": hasCorrectExtension and hasValidSize and hasCorrectMimeType and hasDocumentType and hasMetadata + } + + return verification + + async def testAllFormats(self) -> Dict[str, Any]: + """Test document generation in DOCX, XLSX, PPTX, and PDF formats.""" + print("\n" + "="*80) + print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, AND PDF FORMATS") + print("="*80) + + formats = ["docx", "xlsx", "pptx", "pdf"] + results = {} + + for format in formats: + try: + print(f"\n{'='*80}") + print(f"Testing {format.upper()} format...") + print(f"{'='*80}") + + result = await self.generateDocumentInFormat(format) + results[format] = result + + if result.get("success"): + documents = result.get("documents", []) + if documents: + # Verify first document + verification = self.verifyDocumentFormat(documents[0], format) + result["verification"] = verification + + print(f"\n✅ {format.upper()} generation successful!") + print(f" Documents: {len(documents)}") + print(f" Verification: {'✅ PASS' if verification['isValid'] else '❌ FAIL'}") + print(f" Complete (with metadata): {'✅ YES' if verification['isComplete'] else '❌ NO'}") + if verification.get("fileName"): + print(f" File: {verification['fileName']}") + print(f" Size: {verification['fileSize']} bytes") + print(f" MIME: {verification['mimeType']}") + print(f" Document Type: {verification.get('documentType', 'N/A')}") + print(f" Has Metadata: {'✅' if verification.get('hasMetadata') else '❌'}") + else: + print(f"\n⚠️ {format.upper()} generation completed but no documents found") + else: + error = result.get("error", "Unknown error") + print(f"\n❌ {format.upper()} generation failed: {error}") + + # Small delay between tests + await asyncio.sleep(2) + + except Exception as e: + import traceback + print(f"\n❌ Error testing {format.upper()}: {str(e)}") + print(traceback.format_exc()) + results[format] = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + + return results + + async def runTest(self): + """Run the complete test.""" + print("\n" + "="*80) + print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF") + print("="*80) + + try: + # Initialize + await self.initialize() + + # Test all formats + formatResults = await self.testAllFormats() + + # Summary + print("\n" + "="*80) + print("TEST SUMMARY") + print("="*80) + + # Format tests summary + print("\nFormat Tests:") + successCount = 0 + failCount = 0 + completeCount = 0 # Documents with metadata + + for format, result in formatResults.items(): + if result.get("success"): + successCount += 1 + verification = result.get("verification", {}) + isValid = verification.get("isValid", False) + isComplete = verification.get("isComplete", False) + if isComplete: + completeCount += 1 + statusIcon = "✅" if isValid else "⚠️" + completeIcon = "✅" if isComplete else "❌" + docCount = result.get("documentCount", 0) + print(f"{statusIcon} {format.upper():6s}: {'PASS' if isValid else 'FAIL'} - {docCount} document(s) - Metadata: {completeIcon}") + else: + failCount += 1 + error = result.get("error", "Unknown error") + print(f"❌ {format.upper():6s}: FAIL - {error}") + + print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats") + print(f"Complete Documents (with metadata): {completeCount} out of {successCount} successful generations") + + self.testResults = { + "success": failCount == 0, + "formatTests": { + "successCount": successCount, + "failCount": failCount, + "completeCount": completeCount, + "totalFormats": len(formatResults), + "results": formatResults + }, + "totalSuccess": successCount, + "totalFail": failCount + } + + return self.testResults + + except Exception as e: + import traceback + print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}") + print(f"Traceback:\n{traceback.format_exc()}") + self.testResults = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + return self.testResults + + +async def main(): + """Run document generation formats test 10.""" + tester = DocumentGenerationFormatsTester10() + results = await tester.runTest() + + # Print final results as JSON for easy parsing + print("\n" + "="*80) + print("FINAL RESULTS (JSON)") + print("="*80) + print(json.dumps(results, indent=2, default=str)) + + +if __name__ == "__main__": + asyncio.run(main()) + From a540729533a598f3d6cc309c4d7ee17dfa465dfa Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 29 Dec 2025 00:06:34 +0100 Subject: [PATCH 16/21] fixed rendering styles --- .../services/serviceAi/subStructureFilling.py | 38 +- .../renderers/rendererDocx.py | 119 ++++- .../renderers/rendererHtml.py | 1 - .../renderers/rendererPptx.py | 478 ++++++++++++------ .../test10_document_generation_formats.py | 25 +- 5 files changed, 456 insertions(+), 205 deletions(-) diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index af1e51f6..fd4d8bcd 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -215,10 +215,16 @@ class StructureFiller: useAiCall = section.get("useAiCall", False) # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden - # Aber: Wenn generationHint vorhanden ist, kann AI auch ohne ContentParts generieren (z.B. Executive Summary) + # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist + # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) if len(contentPartIds) == 0 and not generationHint: useAiCall = False logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") + elif len(contentPartIds) == 0 and generationHint and not useAiCall: + # Override: If there's a generationHint but no content parts, we should use AI + # This handles cases where structure generation set useAiCall=false incorrectly + useAiCall = True + logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") elements = [] @@ -658,14 +664,28 @@ class StructureFiller: logger.error(f"Error generating section {sectionId}: {str(e)}") # NICHT raise - Section wird mit Fehlermeldung gerendert else: - # Füge extrahierten Text direkt hinzu (kein AI-Call) - logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) + # Füge extrahierten Content direkt hinzu (kein AI-Call) + # CRITICAL: Check part typeGroup to determine correct element type + if part.typeGroup == "image": + # Image content should be added as image element, not extracted_text + logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call - adding as image element") + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": part.metadata.get("caption", "") + } + }) + else: + # Text content - add as extracted_text element + logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) section["elements"] = elements diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 43c85c47..337811a4 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -335,14 +335,26 @@ class RendererDocx(BaseRenderer): elif section_type == "heading": self._renderJsonHeading(doc, element, styles) elif section_type == "paragraph": - self._renderJsonParagraph(doc, element, styles) + # CRITICAL: Check if this is actually an image element before rendering as paragraph + # Image elements might not have type set, but have base64Data in content + content = element.get("content", {}) + if isinstance(content, dict) and content.get("base64Data"): + # This is actually an image, render it as such + self._renderJsonImage(doc, element, styles) + else: + self._renderJsonParagraph(doc, element, styles) elif section_type == "code_block": self._renderJsonCodeBlock(doc, element, styles) elif section_type == "image": self._renderJsonImage(doc, element, styles) else: - # Fallback to paragraph for unknown types - self._renderJsonParagraph(doc, element, styles) + # Fallback to paragraph for unknown types, but check for image data first + content = element.get("content", {}) + if isinstance(content, dict) and content.get("base64Data"): + # This is actually an image, render it as such + self._renderJsonImage(doc, element, styles) + else: + self._renderJsonParagraph(doc, element, styles) except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") @@ -517,13 +529,22 @@ class RendererDocx(BaseRenderer): if not isinstance(content, dict): return items = content.get("items", []) - bullet_style = styles["bullet_list"] + bullet_style = styles.get("bullet_list", {}) for item in items: if isinstance(item, str): para = doc.add_paragraph(item, style='List Bullet') elif isinstance(item, dict) and "text" in item: para = doc.add_paragraph(item["text"], style='List Bullet') + + # Apply bullet list styling from style set + if bullet_style and para.runs: + for run in para.runs: + if "font_size" in bullet_style: + run.font.size = Pt(bullet_style["font_size"]) + if "color" in bullet_style: + color_hex = bullet_style["color"].lstrip('#') + run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") @@ -540,7 +561,13 @@ class RendererDocx(BaseRenderer): if text: level = max(1, min(6, level)) - doc.add_heading(text, level=level) + # Use custom heading style if available, otherwise use built-in + style_name = f"Heading {level}" if level <= 2 else "Heading 1" + try: + para = doc.add_paragraph(text, style=style_name) + except KeyError: + # Fallback to built-in heading if custom style doesn't exist + doc.add_heading(text, level=level) except Exception as e: self.logger.warning(f"Error rendering heading: {str(e)}") @@ -570,6 +597,25 @@ class RendererDocx(BaseRenderer): if text: para = doc.add_paragraph(text) + # Apply paragraph styling from style set + paragraph_style = styles.get("paragraph", {}) + if paragraph_style: + for run in para.runs: + if "font_size" in paragraph_style: + run.font.size = Pt(paragraph_style["font_size"]) + if "bold" in paragraph_style: + run.font.bold = paragraph_style["bold"] + if "color" in paragraph_style: + color_hex = paragraph_style["color"].lstrip('#') + run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + if "align" in paragraph_style: + align = paragraph_style["align"] + if align == "center": + para.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif align == "right": + para.alignment = WD_ALIGN_PARAGRAPH.RIGHT + else: + para.alignment = WD_ALIGN_PARAGRAPH.LEFT except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") @@ -583,16 +629,21 @@ class RendererDocx(BaseRenderer): return code = content.get("code", "") language = content.get("language", "") + code_style = styles.get("code_block", {}) if code: if language: lang_para = doc.add_paragraph(f"Code ({language}):") - lang_para.runs[0].bold = True + if lang_para.runs: + lang_para.runs[0].bold = True code_para = doc.add_paragraph(code) for run in code_para.runs: - run.font.name = 'Courier New' - run.font.size = Pt(10) + run.font.name = code_style.get("font", "Courier New") + run.font.size = Pt(code_style.get("font_size", 9)) + if "color" in code_style: + color_hex = code_style["color"].lstrip('#') + run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") @@ -602,24 +653,38 @@ class RendererDocx(BaseRenderer): try: # Extract from nested content structure content = image_data.get("content", {}) - if not isinstance(content, dict): - return - base64_data = content.get("base64Data", "") - alt_text = content.get("altText", "Image") + base64_data = "" + alt_text = "Image" - if base64_data: - try: - image_bytes = base64.b64decode(base64_data) - doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) - - if alt_text: - caption_para = doc.add_paragraph(f"Figure: {alt_text}") - caption_para.runs[0].italic = True - except Exception as embedError: - # Image decoding or embedding failed - raise Exception(f"Failed to decode or embed image: {str(embedError)}") - else: + if isinstance(content, dict): + base64_data = content.get("base64Data", "") + alt_text = content.get("altText", "Image") + elif isinstance(content, str): + # Content might be base64 string directly (shouldn't happen, but handle it) + self.logger.warning("Image content is a string, not a dict. This should not happen.") + return + + # If base64Data not found in content, try direct element fields (fallback) + if not base64_data: + base64_data = image_data.get("base64Data", "") + if not alt_text or alt_text == "Image": + alt_text = image_data.get("altText", "Image") + + # CRITICAL: Ensure we don't render base64 data as text + # If base64_data looks like it might be rendered elsewhere, skip it + if not base64_data: raise Exception("No image data provided (base64Data is empty)") + + try: + image_bytes = base64.b64decode(base64_data) + doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) + + if alt_text and alt_text != "Image": + caption_para = doc.add_paragraph(f"Figure: {alt_text}") + caption_para.runs[0].italic = True + except Exception as embedError: + # Image decoding or embedding failed + raise Exception(f"Failed to decode or embed image: {str(embedError)}") except Exception as e: self.logger.error(f"Error embedding image in DOCX: {str(e)}") @@ -792,7 +857,11 @@ class RendererDocx(BaseRenderer): if "heading2" in styleSet: self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH) - # Note: List Bullet and List Number are built-in Word styles, no need to create + # Create Paragraph style + if "paragraph" in styleSet: + self._createStyle(doc, "Custom Paragraph", styleSet["paragraph"], WD_STYLE_TYPE.PARAGRAPH) + + # Note: List Bullet and List Number are built-in Word styles, but we apply custom styling to runs except Exception as e: self.logger.warning(f"Could not set up document styles: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 4d7dafe0..dda2c09f 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -712,7 +712,6 @@ class RendererHtml(BaseRenderer): # Bestimme MIME-Type und Extension mimeType = element.get("mimeType", "") or (content.get("mimeType", "") if isinstance(content, dict) else "") - if not mimeType or mimeType == "unknown": if not mimeType or mimeType == "unknown": # Versuche MIME-Type aus base64 zu erkennen if base64Data.startswith("/9j/"): diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 850a59a4..f824aa62 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -106,11 +106,85 @@ class RendererPptx(BaseRenderer): if hasImages: self._addImagesToSlide(slide, slide_data.get("images", []), styles) - # Set content with AI-generated styling (if not image-only slide) - if slide_content or not hasImages: + # Render sections with proper PowerPoint objects (tables, lists, etc.) + slide_sections = slide_data.get("sections", []) + if slide_sections: + # Use content placeholder for structured content content_shape = slide.placeholders[1] + text_frame = content_shape.text_frame + text_frame.clear() - # Format content text with AI styles + # Track vertical position for multiple content types + current_y = Inches(1.5) # Start below title + + for section in slide_sections: + section_type = section.get("content_type", "paragraph") + elements = section.get("elements", []) + + # Handle sections without elements (e.g., headings that create slides) + if not elements: + continue + + for element in elements: + if not isinstance(element, dict): + continue + + # Check element type first, fall back to section type + element_type = element.get("type", "") + if not element_type: + element_type = section_type + + if element_type == "table": + # Render as actual PowerPoint table + self._addTableToSlide(slide, element, styles, current_y) + current_y += Inches(2) # Space for table + elif element_type == "bullet_list" or element_type == "list": + # Render as actual PowerPoint bullet list + self._addBulletListToSlide(slide, element, styles, text_frame) + elif element_type == "heading": + # Render as heading in text frame + self._addHeadingToSlide(slide, element, styles, text_frame) + elif element_type == "paragraph": + # Render as paragraph in text frame + self._addParagraphToSlide(slide, element, styles, text_frame) + elif element_type == "code_block" or element_type == "code": + # Render as formatted code block + self._addCodeBlockToSlide(slide, element, styles, text_frame) + elif element_type == "extracted_text": + # Render extracted text as paragraph with styling + content = element.get("content", "") + source = element.get("source", "") + if content: + paragraph_style = styles.get("paragraph", {}) + p = text_frame.add_paragraph() + p.text = content + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + if source: + p.add_run(f" (Source: {source})").font.italic = True + elif element_type == "reference": + # Render reference + label = element.get("label", "Reference") + p = text_frame.add_paragraph() + p.text = f"[Reference: {label}]" + p.font.italic = True + else: + # Fallback: try to render as paragraph + content = element.get("content", "") + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + self._addParagraphToSlide(slide, element, styles, text_frame) + + # Fallback: if no sections but has content text, render as before + elif slide_content and not hasImages: + content_shape = slide.placeholders[1] text_frame = content_shape.text_frame text_frame.clear() @@ -126,38 +200,12 @@ class RendererPptx(BaseRenderer): p.text = paragraph.strip() - # Apply AI-generated styling based on content type - if paragraph.startswith('#'): - # Header - p.text = paragraph.lstrip('#').strip() - heading_style = styles.get("heading", {}) - p.font.size = Pt(heading_style.get("font_size", 32)) - p.font.bold = heading_style.get("bold", True) - heading_color = self._getSafeColor(heading_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*heading_color) - elif paragraph.startswith('##'): - # Subheader - p.text = paragraph.lstrip('#').strip() - subheading_style = styles.get("subheading", {}) - p.font.size = Pt(subheading_style.get("font_size", 24)) - p.font.bold = subheading_style.get("bold", True) - subheading_color = self._getSafeColor(subheading_style.get("color", (79, 79, 79))) - p.font.color.rgb = RGBColor(*subheading_color) - elif paragraph.startswith('*') and paragraph.endswith('*'): - # Bold text - p.text = paragraph.strip('*') - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = True - paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) - else: - # Regular text - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) + # Apply AI-generated styling + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*paragraph_color) # Apply alignment align = paragraph_style.get("align", "left") @@ -396,8 +444,7 @@ class RendererPptx(BaseRenderer): if userPrompt and aiService: self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - # Convert colors to PPTX format after getting styles - enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) + # Colors already converted in _getAiStylesWithPptxColors return self._validateStylesReadability(enhancedStyleSet) else: # Use default styles only @@ -481,104 +528,19 @@ Return ONLY this JSON with your changes: JSON ONLY. NO OTHER TEXT.""" async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: - """Get AI styles with proper PowerPoint color conversion.""" + """Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing.""" if not aiService: return default_styles try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + # Use base template method which handles debug file writing + enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles) - request_options = AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_GENERATE - - request = AiCallRequest(prompt=style_template, context="", options=request_options) - - # Check if AI service is properly configured - if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects: - self.logger.warning("AI service not properly configured, using defaults") - return default_styles - - response = await aiService.callAi(request) - - # Check if response is valid - if not response: - self.logger.warning("AI service returned no response, using defaults") - return default_styles - - # json and re are already imported at module level - - # Clean and parse JSON - result = response.content.strip() if response and response.content else "" - - # Check if result is empty - if not result: - self.logger.warning("AI styling returned empty response, using defaults") - return default_styles - - # Log the raw response for debugging - self.logger.debug(f"AI styling raw response: {result[:200]}...") - - # Extract JSON from various formats - json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if json_match: - result = json_match.group(1).strip() - elif result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - # Try to extract JSON from explanatory text - json_patterns = [ - r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object - r'\{.*?"title".*?\}', # JSON with title field - r'\{.*?"font_size".*?\}', # JSON with font_size field - ] - - for pattern in json_patterns: - json_match = re.search(pattern, result, re.DOTALL) - if json_match: - result = json_match.group(0) - break - - # Additional cleanup - remove any leading/trailing whitespace and newlines - result = result.strip() - - # Check if result is still empty after cleanup - if not result: - self.logger.warning("AI styling returned empty content after cleanup, using defaults") - return default_styles - - # Try to parse JSON - try: - styles = json.loads(result) - self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}") - except json.JSONDecodeError as json_error: - self.logger.warning(f"AI styling returned invalid JSON: {json_error}") - self.logger.warning(f"Raw content that failed to parse: {result[:100]}...") - # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] - try: - styles = json.loads(json_part) - self.logger.info("Successfully extracted JSON from explanatory text") - self.logger.debug(f"Extracted AI styles: {list(styles.keys())}") - except json.JSONDecodeError: - self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles - else: - return default_styles - - # Convert colors to PowerPoint RGB format - styles = self._convertColorsFormat(styles) - - return styles + # Convert colors to PPTX format (RGB tuples) + return self._convertColorsFormat(enhanced_styles) except Exception as e: - self.logger.warning(f"AI styling failed: {str(e)}, using defaults") + self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults") return default_styles def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: @@ -962,13 +924,10 @@ JSON ONLY. NO OTHER TEXT.""" return 1 # Default to title and content layout def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: - """Create slides from sections based on content density and user intent.""" + """Create slides from sections: each heading creates a new slide, content accumulates until next heading.""" try: slides = [] - content_per_slide = styles.get("content_per_slide", "concise") - - # Group sections by type and create slides - current_slide_content = [] + current_slide_sections = [] # Store sections (not formatted text) for proper rendering current_slide_title = "Content Overview" for section in sections: @@ -981,13 +940,13 @@ JSON ONLY. NO OTHER TEXT.""" if section_type == "heading": # If we have accumulated content, create a slide - if current_slide_content: + if current_slide_sections: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content), + "sections": current_slide_sections.copy(), # Store sections for proper rendering "images": [] }) - current_slide_content = [] + current_slide_sections = [] # Start new slide with heading as title heading_found = False @@ -1012,13 +971,13 @@ JSON ONLY. NO OTHER TEXT.""" current_slide_title = section.get("id", "Untitled Section") elif section_type == "image": # Create separate slide for image - if current_slide_content: + if current_slide_sections: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content), + "sections": current_slide_sections.copy(), "images": [] }) - current_slide_content = [] + current_slide_sections = [] # Extract image data imageData = [] @@ -1045,20 +1004,18 @@ JSON ONLY. NO OTHER TEXT.""" slides.append({ "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), - "content": "", + "sections": [], "images": imageData }) else: - # Add content to current slide - formatted_content = self._formatSectionContent(section) - if formatted_content: - current_slide_content.append(formatted_content) + # Add section to current slide (will be rendered properly) + current_slide_sections.append(section) # Add final slide if there's content - if current_slide_content: + if current_slide_sections: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content), + "sections": current_slide_sections.copy(), "images": [] }) @@ -1204,24 +1161,217 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.error(f"Error embedding images in PPTX slide: {str(e)}") - # Add error message text box to slide - try: - from pptx.util import Inches, Pt - from pptx.enum.text import PP_ALIGN - errorMsg = f"[Error: Could not embed image(s). {str(e)}]" - errorBox = slide.shapes.add_textbox( - Inches(0.5), - Inches(2), - slideWidth - Inches(1), - Inches(0.5) - ) - errorFrame = errorBox.text_frame - errorFrame.text = errorMsg - errorFrame.paragraphs[0].font.size = Pt(12) - errorFrame.paragraphs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color - errorFrame.paragraphs[0].alignment = PP_ALIGN.LEFT - except Exception as errorBoxError: - logger.error(f"Could not add error message to slide: {str(errorBoxError)}") + + def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None: + """Add a PowerPoint table to slide.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + headers = content.get("headers", []) + rows = content.get("rows", []) + + if not headers: + return + + # Calculate table dimensions + num_cols = len(headers) + num_rows = len(rows) + 1 # +1 for header row + left = Inches(0.5) + width = slide.presentation.slide_width - Inches(1) + row_height = Inches(0.4) + + # Create table + table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, row_height * num_rows) + table = table_shape.table + + # Set column widths + col_width = width / num_cols + for col_idx in range(num_cols): + table.columns[col_idx].width = col_width + + # Add headers with styling + header_style = styles.get("table_header", {}) + header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121))) + header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255))) + header_font_size = header_style.get("font_size", 18) + + for col_idx, header in enumerate(headers): + cell = table.cell(0, col_idx) + cell.text = str(header) + cell.fill.solid() + cell.fill.fore_color.rgb = RGBColor(*header_bg_color) + cell.text_frame.paragraphs[0].font.bold = header_style.get("bold", True) + cell.text_frame.paragraphs[0].font.size = Pt(header_font_size) + cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*header_text_color) + + align = header_style.get("align", "center") + if align == "left": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + elif align == "right": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + else: + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + + # Add data rows with styling + cell_style = styles.get("table_cell", {}) + cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255))) + cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47))) + cell_font_size = cell_style.get("font_size", 16) + + for row_idx, row_data in enumerate(rows, 1): + for col_idx, cell_data in enumerate(row_data[:num_cols]): + cell = table.cell(row_idx, col_idx) + cell.text = str(cell_data) + cell.fill.solid() + cell.fill.fore_color.rgb = RGBColor(*cell_bg_color) + cell.text_frame.paragraphs[0].font.size = Pt(cell_font_size) + cell.text_frame.paragraphs[0].font.bold = cell_style.get("bold", False) + cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*cell_text_color) + + align = cell_style.get("align", "left") + if align == "center": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + elif align == "right": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + else: + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + + except Exception as e: + logger.warning(f"Error adding table to slide: {str(e)}") + + def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add bullet list to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + items = content.get("items", []) + if not items: + return + + list_style = styles.get("bullet_list", {}) + for item in items: + p = text_frame.add_paragraph() + if isinstance(item, dict): + p.text = item.get("text", "") + else: + p.text = str(item) + + p.level = 0 + p.font.size = Pt(list_style.get("font_size", 18)) + p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) + + except Exception as e: + logger.warning(f"Error adding bullet list to slide: {str(e)}") + + def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add heading to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + text = content.get("text", "") + level = content.get("level", 1) + + if text: + p = text_frame.add_paragraph() + p.text = text + p.level = min(level - 1, 2) # PowerPoint supports 0-2 levels + + heading_style = styles.get("heading", {}) + p.font.size = Pt(heading_style.get("font_size", 32)) + p.font.bold = heading_style.get("bold", True) + p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (47, 47, 47)))) + + except Exception as e: + logger.warning(f"Error adding heading to slide: {str(e)}") + + def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add paragraph to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + from pptx.enum.text import PP_ALIGN + + # Extract from nested content structure + content = element.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + p = text_frame.add_paragraph() + p.text = text + + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + + align = paragraph_style.get("align", "left") + if align == "center": + p.alignment = PP_ALIGN.CENTER + elif align == "right": + p.alignment = PP_ALIGN.RIGHT + else: + p.alignment = PP_ALIGN.LEFT + + except Exception as e: + logger.warning(f"Error adding paragraph to slide: {str(e)}") + + def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add code block to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + code = content.get("code", "") + language = content.get("language", "") + + if code: + code_style = styles.get("code_block", {}) + code_font = code_style.get("font", "Courier New") + code_font_size = code_style.get("font_size", 9) + code_color = self._getSafeColor(code_style.get("color", (47, 47, 47))) + + p = text_frame.add_paragraph() + if language: + p.text = f"Code ({language}):" + p.font.bold = True + p = text_frame.add_paragraph() + + p.text = code + p.font.name = code_font + p.font.size = Pt(code_font_size) + p.font.color.rgb = RGBColor(*code_color) + + except Exception as e: + logger.warning(f"Error adding code block to slide: {str(e)}") def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py index 941034ba..05532313 100644 --- a/tests/functional/test10_document_generation_formats.py +++ b/tests/functional/test10_document_generation_formats.py @@ -153,6 +153,17 @@ class DocumentGenerationFormatsTester10: " - A table summarizing transaction details\n" "5) A conclusion paragraph with recommendations\n\n" "Format as a professional PDF document suitable for printing." + ), + "html": ( + "Create a professional HTML document about 'Fuel Station Receipt Analysis' with:\n" + "1) A main title\n" + "2) An introduction paragraph explaining the receipt analysis\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) A section analyzing the receipt data with:\n" + " - Bullet points of key findings\n" + " - A table summarizing transaction details\n" + "5) A conclusion paragraph with recommendations\n\n" + "Format as a professional HTML document with proper styling, responsive design, and embedded CSS." ) } @@ -350,7 +361,8 @@ class DocumentGenerationFormatsTester10: "pdf": ["application/pdf"], "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], "xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"], - "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] + "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"], + "html": ["text/html", "application/xhtml+xml"] } # Expected file extensions @@ -358,7 +370,8 @@ class DocumentGenerationFormatsTester10: "pdf": [".pdf"], "docx": [".docx"], "xlsx": [".xlsx"], - "pptx": [".pptx"] + "pptx": [".pptx"], + "html": [".html", ".htm"] } formatLower = expectedFormat.lower() @@ -398,12 +411,12 @@ class DocumentGenerationFormatsTester10: return verification async def testAllFormats(self) -> Dict[str, Any]: - """Test document generation in DOCX, XLSX, PPTX, and PDF formats.""" + """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats.""" print("\n" + "="*80) - print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, AND PDF FORMATS") + print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS") print("="*80) - formats = ["docx", "xlsx", "pptx", "pdf"] + formats = ["docx", "xlsx", "pptx", "pdf", "html"] results = {} for format in formats: @@ -456,7 +469,7 @@ class DocumentGenerationFormatsTester10: async def runTest(self): """Run the complete test.""" print("\n" + "="*80) - print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF") + print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML") print("="*80) try: From 29c27d4f405c0bbbaaff356ccd49206c496db0de Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 29 Dec 2025 00:11:58 +0100 Subject: [PATCH 17/21] prioritized openai model against anthropic for cost effieiency --- modules/aicore/aicorePluginAnthropic.py | 2 +- modules/aicore/aicorePluginOpenai.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index 5232d5a3..2a619056 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -67,7 +67,7 @@ class AiAnthropic(BaseConnectorAi): processingMode=ProcessingModeEnum.DETAILED, operationTypes=createOperationTypeRatings( (OperationTypeEnum.PLAN, 9), - (OperationTypeEnum.DATA_ANALYSE, 10), + (OperationTypeEnum.DATA_ANALYSE, 9), (OperationTypeEnum.DATA_GENERATE, 9), (OperationTypeEnum.DATA_EXTRACT, 8) ), diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index 026be18b..f42bdb63 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -67,8 +67,8 @@ class AiOpenai(BaseConnectorAi): processingMode=ProcessingModeEnum.ADVANCED, operationTypes=createOperationTypeRatings( (OperationTypeEnum.PLAN, 8), - (OperationTypeEnum.DATA_ANALYSE, 9), - (OperationTypeEnum.DATA_GENERATE, 9), + (OperationTypeEnum.DATA_ANALYSE, 10), + (OperationTypeEnum.DATA_GENERATE, 10), (OperationTypeEnum.DATA_EXTRACT, 7) ), version="gpt-4o", From 25d2908d48ea0e285ac41b08c31bada609da4e41 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 29 Dec 2025 01:40:30 +0100 Subject: [PATCH 18/21] fixes in validation and rendering --- modules/aicore/aicorePluginOpenai.py | 6 +- .../services/serviceAi/subStructureFilling.py | 23 ++- .../serviceAi/subStructureGeneration.py | 19 +- .../renderers/rendererBaseTemplate.py | 32 ++- .../renderers/rendererDocx.py | 35 +++- .../renderers/rendererHtml.py | 18 +- .../renderers/rendererPdf.py | 17 +- .../renderers/rendererPptx.py | 186 ++++++++++++------ .../renderers/rendererXlsx.py | 21 +- .../processing/adaptive/contentValidator.py | 176 ++++++++++++++--- 10 files changed, 416 insertions(+), 117 deletions(-) diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index f42bdb63..b2e256a4 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -59,14 +59,14 @@ class AiOpenai(BaseConnectorAi): contextLength=128000, costPer1kTokensInput=0.03, costPer1kTokensOutput=0.06, - speedRating=7, # Good speed for complex tasks - qualityRating=9, # High quality + speedRating=8, # Good speed for complex tasks + qualityRating=10, # High quality # capabilities removed (not used in business logic) functionCall=self.callAiBasic, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.ADVANCED, operationTypes=createOperationTypeRatings( - (OperationTypeEnum.PLAN, 8), + (OperationTypeEnum.PLAN, 9), (OperationTypeEnum.DATA_ANALYSE, 10), (OperationTypeEnum.DATA_GENERATE, 10), (OperationTypeEnum.DATA_EXTRACT, 7) diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index fd4d8bcd..e0cdfc53 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -818,6 +818,11 @@ GENERATION HINT: {generationHint} NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title. +IMPORTANT - SECTION INDEPENDENCE: +- Each section is independent and self-contained +- One section does NOT have information about another section +- Each section must provide its own context and be understandable alone + AVAILABLE CONTENT PARTS: {contentPartsIndex} @@ -827,6 +832,7 @@ useAiCall RULES: - useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed - useAiCall: false if Format is "object" or "reference" (direct insertion) - useAiCall: false if Format is "extracted" AND simple "include full text" instruction +- useAiCall: true if NO ContentPartIds provided (content must be generated from scratch); Sections without ContentParts MUST have a clear, detailed generationHint explaining what content to generate RETURN JSON: {{ @@ -850,6 +856,7 @@ EXAMPLES (all content types): - code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}} - image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}} - reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}} +- NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}} CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ @@ -985,13 +992,19 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} +## IMPORTANT - SECTION INDEPENDENCE: +- This section is independent and self-contained +- You do NOT have information about other sections' content +- Provide all necessary context within this section +- Context above is for logical flow only, NOT for content dependencies + ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) 3. For table content_type: Create a single table with headers and rows from all ContentParts 4. For bullet_list content_type: Create a single list with items from all ContentParts 5. Format appropriately based on content_type ({contentType}) -6. Ensure the generated content fits logically between previous and following sections +6. Ensure the generated content is self-contained and understandable independently 7. Return ONLY a JSON object with an "elements" array 8. Each element should match the content_type: {contentType} @@ -1026,12 +1039,18 @@ CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} +## IMPORTANT - SECTION INDEPENDENCE: +- This section is independent and self-contained +- You do NOT have information about other sections' content +- Provide all necessary context within this section +- Context above is for logical flow only, NOT for content dependencies + ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. Use the available content parts to populate this section 3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data 4. For extracted text: Format appropriately based on content_type ({contentType}) -5. Ensure the generated content fits logically between previous and following sections +5. Ensure the generated content is self-contained and understandable independently 6. Return ONLY a JSON object with an "elements" array 7. Each element should match the content_type: {contentType} diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index b8db20a1..a11fba62 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -145,25 +145,32 @@ class StructureGenerator: if not contentPartsIndex: contentPartsIndex = "\n(No content parts available)" - prompt = f"""USER REQUEST: + prompt = f"""USER REQUEST (for context): +``` {userPrompt} +``` AVAILABLE CONTENT PARTS: {contentPartsIndex} -TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente. +TASK: Generate Chapter Structure for the documents to be generated. -Für jedes Chapter: +IMPORTANT - CHAPTER INDEPENDENCE: +- Each chapter is independent and self-contained +- One chapter does NOT have information about another chapter +- Each chapter must provide its own context and be understandable alone + +For each chapter: - chapter id - level (1, 2, 3, etc.) - title -- contentPartIds: [Liste von ContentPart-IDs] +- contentPartIds: [List of ContentPart IDs] - contentPartInstructions: {{ "partId": {{ - "instruction": "Wie Content strukturiert werden soll" + "instruction": "How content should be structured" }} }} -- generationHint: Beschreibung des Inhalts +- generationHint: Description of the content (must be self-contained with all necessary context) OUTPUT FORMAT: {outputFormat} diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index ee16c5a4..e582ddff 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -450,16 +450,34 @@ class BaseRenderer(ABC): code, language = self._extractCodeBlockData(sectionData) return {"content_type": "code_block", "code": code, "language": language} elif sectionType == "image": - base64Data, altText = self._extractImageData(sectionData) + # Extract image data - preserve nested content structure + if isinstance(sectionData, list) and sectionData: + # Get first element from elements array + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + elif isinstance(sectionData, dict): + element = sectionData + else: + return {"content_type": "paragraph", "text": "[Image: Invalid data]"} + + # Extract from nested content structure (standard JSON format) + content = element.get("content", {}) + if not isinstance(content, dict): + return {"content_type": "paragraph", "text": "[Image: Invalid content]"} + + base64Data = content.get("base64Data", "") + altText = content.get("altText", "Image") + caption = content.get("caption", "") + # Validate image data if self._validateImageData(base64Data, altText): + # Return nested structure matching standard JSON format return { - "content_type": "image", - "base64Data": base64Data, - "altText": altText, - "width": sectionData.get("width") if isinstance(sectionData, dict) else None, - "height": sectionData.get("height") if isinstance(sectionData, dict) else None, - "caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else "" + "content_type": "image", + "content": { + "base64Data": base64Data, + "altText": altText, + "caption": caption + } } else: # Return placeholder if image data is invalid diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 337811a4..c7363918 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -677,7 +677,40 @@ class RendererDocx(BaseRenderer): try: image_bytes = base64.b64decode(base64_data) - doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) + image_stream = io.BytesIO(image_bytes) + + # Get image dimensions to calculate proper size + try: + from PIL import Image as PILImage + pil_image = PILImage.open(image_stream) + img_width_px, img_height_px = pil_image.size + + # DOCX page width is typically 8.5 inches, usable width ~6.5 inches with margins + # Standard margins: 1 inch left/right, so usable width = 6.5 inches + max_width_inches = 6.5 + max_height_inches = 9.0 # Leave room for text above/below + + # Calculate scale factor to fit within page dimensions + # Convert pixels to inches (assuming 96 DPI for modern displays, but images may vary) + # Use conservative estimate: 1 inch = 96 pixels + img_width_inches = img_width_px / 96.0 + img_height_inches = img_height_px / 96.0 + + # Calculate scale to fit + width_scale = max_width_inches / img_width_inches if img_width_inches > max_width_inches else 1.0 + height_scale = max_height_inches / img_height_inches if img_height_inches > max_height_inches else 1.0 + scale = min(width_scale, height_scale, 1.0) # Don't scale up, only down + + final_width = img_width_inches * scale + final_height = img_height_inches * scale + + # Reset stream for docx + image_stream.seek(0) + doc.add_picture(image_stream, width=Inches(final_width)) + except Exception: + # Fallback: use conservative default size if PIL fails + image_stream.seek(0) + doc.add_picture(image_stream, width=Inches(6.0)) if alt_text and alt_text != "Image": caption_para = doc.add_paragraph(f"Figure: {alt_text}") diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index dda2c09f..04e7e543 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -417,8 +417,15 @@ class RendererHtml(BaseRenderer): source_text = f' (Source: {source})' if source else '' htmlParts.append(f'

{content}{source_text}

') elif isinstance(element, dict): - # Regular paragraph element - text = element.get("text", element.get("content", "")) + # Regular paragraph element - extract from nested content structure (standard JSON format) + content = element.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + if text: htmlParts.append(f'

{text}

') elif isinstance(element, str): @@ -629,10 +636,11 @@ class RendererHtml(BaseRenderer): """Render a JSON image to HTML with placeholder for later replacement. Expects nested content structure.""" try: import html - # Extract from nested content structure + # Extract from nested content structure (standard JSON format) content = imageData.get("content", {}) if not isinstance(content, dict): return "" + base64Data = content.get("base64Data", "") altText = content.get("altText", "Image") caption = content.get("caption", "") @@ -645,7 +653,9 @@ class RendererHtml(BaseRenderer): # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris # Include a marker so we can find and replace it imageMarker = f"" - imgTag = f'{altTextEscaped}' + # Add max-width and max-height to ensure image fits within page dimensions + # Typical page width is ~800-1200px, height varies but we limit to 600px for readability + imgTag = f'{altTextEscaped}' if captionEscaped: return f'{imageMarker}
{imgTag}
{captionEscaped}
' diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index a6583a33..f1c3f7fa 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -839,11 +839,18 @@ class RendererPdf(BaseRenderer): availableWidth = 430.0 # Slightly smaller than frame width for safety availableHeight = 730.0 # Slightly smaller than frame height for safety - # Convert original image size from pixels to points (assuming 72 DPI) - # If image DPI is different, PIL will provide correct size - # For safety, use a conservative conversion - imgWidthPoints = originalWidth * (inch / 72) # Convert to inches, then to points - imgHeightPoints = originalHeight * (inch / 72) + # Convert original image size from pixels to points + # PIL provides size in pixels, need to convert to points + # Standard conversion: 1 inch = 72 points, typical screen DPI = 96 pixels/inch + # So: pixels * (72/96) = points, or pixels * 0.75 = points + # But for images, we should use the image's actual DPI if available + dpi = pilImage.info.get('dpi', (96, 96))[0] # Default to 96 DPI if not specified + if dpi <= 0: + dpi = 96 # Fallback to 96 DPI + + # Convert pixels to points: 1 point = 1/72 inch, so pixels * (72/dpi) = points + imgWidthPoints = originalWidth * (72.0 / dpi) + imgHeightPoints = originalHeight * (72.0 / dpi) # Scale to fit within available page dimensions while maintaining aspect ratio widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0 diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index f824aa62..2fc93892 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -74,19 +74,19 @@ class RendererPptx(BaseRenderer): self._currentPresentation = prs for i, slide_data in enumerate(slidesData): - logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") - # Debug: Show slide content preview + slide_sections = slide_data.get("sections", []) + slide_images = list(slide_data.get("images", [])) # Make copy so we can append slide_content = slide_data.get('content', '') - if slide_content: - logger.info(f" Content preview: '{slide_content[:100]}...'") + hasSections = slide_sections and len(slide_sections) > 0 + + logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars") + + # Determine layout: first slide (i==0) uses title slide layout, others use title+content + if i == 0: + slideLayoutIndex = 0 # Title slide layout else: - logger.warning(f" ⚠️ Slide {i+1} has NO content!") + slideLayoutIndex = 1 # Title and content layout - # Check if slide has images - hasImages = slide_data.get("images") and len(slide_data.get("images", [])) > 0 - - # Create slide with appropriate layout based on content - slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles) slide_layout = prs.slide_layouts[slideLayoutIndex] slide = prs.slides.add_slide(slide_layout) @@ -94,25 +94,33 @@ class RendererPptx(BaseRenderer): title_shape = slide.shapes.title title_shape.text = slide_data.get("title", "Slide") - # Apply title styling + # Apply title styling - LEFT ALIGNED by default title_style = styles.get("title", {}) if title_shape.text_frame.paragraphs[0].font: title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - - # Handle images first (if present) - if hasImages: - self._addImagesToSlide(slide, slide_data.get("images", []), styles) + # Set left alignment for title + title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT # Render sections with proper PowerPoint objects (tables, lists, etc.) - slide_sections = slide_data.get("sections", []) - if slide_sections: - # Use content placeholder for structured content - content_shape = slide.placeholders[1] - text_frame = content_shape.text_frame - text_frame.clear() + if hasSections: + # Use content placeholder for structured content (only if layout has placeholder[1]) + try: + content_shape = slide.placeholders[1] + text_frame = content_shape.text_frame + text_frame.clear() + except (AttributeError, IndexError): + # Layout might not have placeholder[1], create textbox instead + from pptx.util import Inches + left = Inches(0.5) + top = Inches(1.5) + width = prs.slide_width - Inches(1) + height = prs.slide_height - top - Inches(0.5) + textbox = slide.shapes.add_textbox(left, top, width, height) + text_frame = textbox.text_frame + text_frame.word_wrap = True # Track vertical position for multiple content types current_y = Inches(1.5) # Start below title @@ -121,6 +129,22 @@ class RendererPptx(BaseRenderer): section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Check if section has image content_type + if section_type == "image": + # Extract images from this section + for element in elements: + if isinstance(element, dict) and element.get("type") == "image": + content = element.get("content", {}) + if isinstance(content, dict): + base64Data = content.get("base64Data") + if base64Data: + slide_images.append({ + "base64Data": base64Data, + "altText": content.get("altText", "Image"), + "caption": content.get("caption", "") + }) + continue # Skip rendering image sections as text + # Handle sections without elements (e.g., headings that create slides) if not elements: continue @@ -134,53 +158,80 @@ class RendererPptx(BaseRenderer): if not element_type: element_type = section_type + # Skip image elements - they're handled separately + if element_type == "image": + content = element.get("content", {}) + if isinstance(content, dict): + base64Data = content.get("base64Data") + if base64Data: + slide_images.append({ + "base64Data": base64Data, + "altText": content.get("altText", "Image"), + "caption": content.get("caption", "") + }) + continue + if element_type == "table": # Render as actual PowerPoint table self._addTableToSlide(slide, element, styles, current_y) current_y += Inches(2) # Space for table elif element_type == "bullet_list" or element_type == "list": # Render as actual PowerPoint bullet list - self._addBulletListToSlide(slide, element, styles, text_frame) + if text_frame: + self._addBulletListToSlide(slide, element, styles, text_frame) elif element_type == "heading": # Render as heading in text frame - self._addHeadingToSlide(slide, element, styles, text_frame) + if text_frame: + self._addHeadingToSlide(slide, element, styles, text_frame) elif element_type == "paragraph": # Render as paragraph in text frame - self._addParagraphToSlide(slide, element, styles, text_frame) + if text_frame: + self._addParagraphToSlide(slide, element, styles, text_frame) elif element_type == "code_block" or element_type == "code": # Render as formatted code block - self._addCodeBlockToSlide(slide, element, styles, text_frame) + if text_frame: + self._addCodeBlockToSlide(slide, element, styles, text_frame) elif element_type == "extracted_text": # Render extracted text as paragraph with styling - content = element.get("content", "") - source = element.get("source", "") - if content: - paragraph_style = styles.get("paragraph", {}) - p = text_frame.add_paragraph() - p.text = content - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) - if source: - p.add_run(f" (Source: {source})").font.italic = True + if text_frame: + content = element.get("content", "") + source = element.get("source", "") + if content: + paragraph_style = styles.get("paragraph", {}) + p = text_frame.add_paragraph() + p.text = content + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + p.alignment = PP_ALIGN.LEFT # Left align by default + if source: + p.add_run(f" (Source: {source})").font.italic = True elif element_type == "reference": # Render reference - label = element.get("label", "Reference") - p = text_frame.add_paragraph() - p.text = f"[Reference: {label}]" - p.font.italic = True + if text_frame: + label = element.get("label", "Reference") + p = text_frame.add_paragraph() + p.text = f"[Reference: {label}]" + p.font.italic = True + p.alignment = PP_ALIGN.LEFT else: # Fallback: try to render as paragraph - content = element.get("content", "") - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - - if text: - self._addParagraphToSlide(slide, element, styles, text_frame) + if text_frame: + content = element.get("content", "") + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + self._addParagraphToSlide(slide, element, styles, text_frame) + + # Handle images after processing sections (images may have been extracted from sections) + hasImages = len(slide_images) > 0 + if hasImages: + self._addImagesToSlide(slide, slide_images, styles) # Fallback: if no sections but has content text, render as before elif slide_content and not hasImages: @@ -1097,13 +1148,18 @@ JSON ONLY. NO OTHER TEXT.""" pilImage = PILImage.open(imageStream) imgWidth, imgHeight = pilImage.size - # Scale to fit available space (max 80% of slide) - maxWidth = availableWidth * 0.8 - maxHeight = availableHeight * 0.8 + # Scale to fit available space (max 90% of slide for better visibility) + # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) + # Conversion: pixels * (72/96) = points + imgWidthPoints = imgWidth * (72.0 / 96.0) + imgHeightPoints = imgHeight * (72.0 / 96.0) - scale = min(maxWidth / imgWidth, maxHeight / imgHeight, 1.0) - finalWidth = imgWidth * scale - finalHeight = imgHeight * scale + maxWidth = availableWidth * 0.9 + maxHeight = availableHeight * 0.9 + + scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) + finalWidth = imgWidthPoints * scale + finalHeight = imgHeightPoints * scale # Center image left = (slideWidth - finalWidth) / 2 @@ -1184,7 +1240,12 @@ JSON ONLY. NO OTHER TEXT.""" num_cols = len(headers) num_rows = len(rows) + 1 # +1 for header row left = Inches(0.5) - width = slide.presentation.slide_width - Inches(1) + # Get presentation from stored reference or slide + if hasattr(self, '_currentPresentation'): + prs = self._currentPresentation + else: + prs = slide.presentation + width = prs.slide_width - Inches(1) row_height = Inches(0.4) # Create table @@ -1251,6 +1312,7 @@ JSON ONLY. NO OTHER TEXT.""" try: from pptx.util import Pt from pptx.dml.color import RGBColor + from pptx.enum.text import PP_ALIGN # Extract from nested content structure content = element.get("content", {}) @@ -1272,6 +1334,16 @@ JSON ONLY. NO OTHER TEXT.""" p.level = 0 p.font.size = Pt(list_style.get("font_size", 18)) p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) + p.alignment = PP_ALIGN.LEFT # Left align bullet lists + p.space_before = Pt(6) + # Enable bullet points - set bullet type to enable bullets + try: + from pptx.enum.text import MSO_AUTO_NUMBER + p.paragraph_format.bullet.type = MSO_AUTO_NUMBER.BULLET + except (ImportError, AttributeError): + # Fallback: bullets are usually enabled by default when level is set + # Just ensure level is set (already done above) + pass except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index 3ff49788..1051e7bf 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -1057,12 +1057,21 @@ class RendererXlsx(BaseRenderer): # Create openpyxl Image img = OpenpyxlImage(imageStream) - # Set image size (max width 6 inches, maintain aspect ratio) - maxWidth = 400 # pixels (approximately 6 inches at 72 DPI) - if img.width > maxWidth: - scale = maxWidth / img.width - img.width = maxWidth - img.height = int(img.height * scale) + # Calculate max width based on Excel column width + # Excel default column width is ~64 pixels (8.43 characters at default font) + # Use multiple columns for image width (typically 8-10 columns = ~512-640 pixels) + # Standard Excel sheet width is ~1024 pixels (14.5 inches at 72 DPI) + # Use 80% of sheet width to leave margins + maxWidth = 800 # pixels (approximately 11 inches at 72 DPI, fits within page) + maxHeight = 600 # pixels (approximately 8.3 inches at 72 DPI) + + # Scale image to fit within page dimensions while maintaining aspect ratio + width_scale = maxWidth / img.width if img.width > maxWidth else 1.0 + height_scale = maxHeight / img.height if img.height > maxHeight else 1.0 + scale = min(width_scale, height_scale, 1.0) # Don't scale up, only down + + img.width = int(img.width * scale) + img.height = int(img.height * scale) # Anchor image to cell (A column, current row) img.anchor = f'A{startRow}' diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 1eb453ee..36673ed0 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -81,30 +81,61 @@ class ContentValidator: if section.get("content_type") == "table": if elements and isinstance(elements, list) and len(elements) > 0: tableElement = elements[0] - sectionSummary["caption"] = tableElement.get("caption") - headers = tableElement.get("headers", []) - rows = tableElement.get("rows", []) - sectionSummary["columnCount"] = len(headers) - sectionSummary["rowCount"] = len(rows) - sectionSummary["headers"] = headers # Include headers for context + content = tableElement.get("content", {}) + if isinstance(content, dict): + headers = content.get("headers", []) + rows = content.get("rows", []) + else: + headers = tableElement.get("headers", []) + rows = tableElement.get("rows", []) + if headers: + sectionSummary["columnCount"] = len(headers) + sectionSummary["headers"] = headers # Include headers for context + if rows: + sectionSummary["rowCount"] = len(rows) + sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None) - # For lists: extract item count - elif section.get("content_type") == "list": + # For lists and bullet_lists: extract item count + elif section.get("content_type") in ["list", "bullet_list"]: if elements and isinstance(elements, list) and len(elements) > 0: listElement = elements[0] - items = listElement.get("items", []) - sectionSummary["itemCount"] = len(items) + content = listElement.get("content", {}) + if isinstance(content, dict): + items = content.get("items", []) + else: + items = listElement.get("items", []) + if items: + sectionSummary["itemCount"] = len(items) - # For paragraphs/headings: extract text preview + # For paragraphs/headings: extract text statistics (no preview for security) elif section.get("content_type") in ["paragraph", "heading"]: if elements and isinstance(elements, list) and len(elements) > 0: textElement = elements[0] - text = textElement.get("text", "") + content = textElement.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + else: + text = textElement.get("text", "") if text: - sectionSummary["textPreview"] = text[:100] + ("..." if len(text) > 100 else "") - # Also check for textPreview directly in section (for web crawl results) - if section.get("textPreview"): - sectionSummary["textPreview"] = section.get("textPreview") + sectionSummary["textLength"] = len(text) + sectionSummary["wordCount"] = len(text.split()) + # Also check for text length if available directly in section + if section.get("textLength"): + sectionSummary["textLength"] = section.get("textLength") + + # For code blocks: extract code statistics (no preview for security) + elif section.get("content_type") == "code_block": + if elements and isinstance(elements, list) and len(elements) > 0: + codeElement = elements[0] + content = codeElement.get("content", {}) + if isinstance(content, dict): + code = content.get("code", "") + language = content.get("language", "") + if code: + sectionSummary["codeLength"] = len(code) + sectionSummary["codeLineCount"] = code.count('\n') + 1 + if language: + sectionSummary["language"] = language # Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu contentPartIds = section.get("contentPartIds", []) @@ -120,8 +151,30 @@ class ContentValidator: # Include any additional fields from section (generic approach) # This ensures all action-specific fields are preserved + # BUT exclude type-specific KPIs that don't belong to this content_type + contentType = section.get("content_type", "") + # Define KPIs that are ONLY valid for specific types + typeExclusiveKpis = { + "table": ["columnCount", "rowCount", "headers"], # Only for tables + "bullet_list": ["itemCount"], # Only for bullet_list + "list": ["itemCount"] # Only for list + } + excludedKpis = [] + for kpiType, kpiFields in typeExclusiveKpis.items(): + if kpiType != contentType: + excludedKpis.extend(kpiFields) + for key, value in section.items(): - if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately + if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis: + # Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves + # This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase + if key in ["columnCount", "rowCount", "headers", "itemCount"]: + # Skip if it's 0/empty - we'll only include KPIs we extracted from elements + if isinstance(value, int) and value == 0: + continue + if isinstance(value, list) and len(value) == 0: + continue + # Include simple types (str, int, float, bool, list of primitives) if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10): sectionSummary[key] = value @@ -146,12 +199,60 @@ class ContentValidator: if section.get("content_type") == "table": if elements and isinstance(elements, list) and len(elements) > 0: tableElement = elements[0] - sectionSummary["caption"] = tableElement.get("caption") - headers = tableElement.get("headers", []) - rows = tableElement.get("rows", []) - sectionSummary["columnCount"] = len(headers) - sectionSummary["rowCount"] = len(rows) - sectionSummary["headers"] = headers + content = tableElement.get("content", {}) + if isinstance(content, dict): + headers = content.get("headers", []) + rows = content.get("rows", []) + else: + headers = tableElement.get("headers", []) + rows = tableElement.get("rows", []) + if headers: + sectionSummary["columnCount"] = len(headers) + sectionSummary["headers"] = headers + if rows: + sectionSummary["rowCount"] = len(rows) + sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None) + + # For lists and bullet_lists: extract item count + elif section.get("content_type") in ["list", "bullet_list"]: + if elements and isinstance(elements, list) and len(elements) > 0: + listElement = elements[0] + content = listElement.get("content", {}) + if isinstance(content, dict): + items = content.get("items", []) + else: + items = listElement.get("items", []) + if items: + sectionSummary["itemCount"] = len(items) + + # For paragraphs/headings: extract text statistics (no preview for security) + elif section.get("content_type") in ["paragraph", "heading"]: + if elements and isinstance(elements, list) and len(elements) > 0: + textElement = elements[0] + content = textElement.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + else: + text = textElement.get("text", "") + if text: + sectionSummary["textLength"] = len(text) + sectionSummary["wordCount"] = len(text.split()) + if section.get("textLength"): + sectionSummary["textLength"] = section.get("textLength") + + # For code blocks: extract code statistics (no preview for security) + elif section.get("content_type") == "code_block": + if elements and isinstance(elements, list) and len(elements) > 0: + codeElement = elements[0] + content = codeElement.get("content", {}) + if isinstance(content, dict): + code = content.get("code", "") + language = content.get("language", "") + if code: + sectionSummary["codeLength"] = len(code) + sectionSummary["codeLineCount"] = code.count('\n') + 1 + if language: + sectionSummary["language"] = language # Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu contentPartIds = section.get("contentPartIds", []) @@ -166,8 +267,30 @@ class ContentValidator: sectionSummary["note"] = "ContentParts referenced but metadata not available" # Include any additional fields from section (generic approach) + # BUT exclude type-specific KPIs that don't belong to this content_type + contentType = section.get("content_type", "") + # Define KPIs that are ONLY valid for specific types + typeExclusiveKpis = { + "table": ["columnCount", "rowCount", "headers"], # Only for tables + "bullet_list": ["itemCount"], # Only for bullet_list + "list": ["itemCount"] # Only for list + } + excludedKpis = [] + for kpiType, kpiFields in typeExclusiveKpis.items(): + if kpiType != contentType: + excludedKpis.extend(kpiFields) + for key, value in section.items(): - if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately + if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis: + # Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves + # This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase + if key in ["columnCount", "rowCount", "headers", "itemCount"]: + # Skip if it's 0/empty - we'll only include KPIs we extracted from elements + if isinstance(value, int) and value == 0: + continue + if isinstance(value, list) and len(value) == 0: + continue + # Include simple types (str, int, float, bool, list of primitives) if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10): sectionSummary[key] = value @@ -533,10 +656,11 @@ CRITICAL: Validate ONLY metadata/structure. Documents may be binary (PDF, DOCX, VALIDATION RULES: 1. METADATA ONLY: Use jsonStructure (sections, contentPartIds, content_type, statistics) and contentPreview (dataType, contentLength, looksLikeRenderedContent) for validation. These are METADATA indicators, NOT actual content. 2. FORMAT VALIDATION: Check mimeType/format metadata only. Do NOT inspect content to determine format. Format mismatch = wrong_format gap. -3. CONTENT EXISTENCE: Use contentPreview.looksLikeRenderedContent=true to confirm content exists. Use jsonStructure.content_type to confirm data types exist (e.g., "image" section = image exists). Do NOT validate content quality, accuracy, or completeness of actual data values. -4. STRUCTURE VALIDATION: Use jsonStructure.sections, statistics (counts, rowCount, columnCount) as evidence. Trust structure metadata over format claims. +3. CONTENT EXISTENCE: Use contentPreview.looksLikeRenderedContent=true to confirm content exists. Use jsonStructure.content_type to confirm data types exist (e.g., "image" section = image exists, "bullet_list" section = bullet list exists, "table" section = table exists). If a section with a content_type exists, the content has been delivered. Do NOT assume content was AI-generated vs extracted - if the section exists, it was delivered. +4. STRUCTURE VALIDATION: Use jsonStructure.sections, statistics (counts, rowCount, columnCount, itemCount) as evidence. Trust structure metadata over format claims. Only check KPIs if they are present (missing KPIs mean elements not yet populated, not that content is missing). 5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done"). 6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria. +7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata. VALIDATION STEPS: - Check ACTION HISTORY for process-oriented criteria From bcbaf41f4fd4b9128b18ee7edc9886e892cd6470 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 29 Dec 2025 02:09:33 +0100 Subject: [PATCH 19/21] fixed generation to renderer --- .../serviceAi/subStructureGeneration.py | 17 ++ .../renderers/rendererPptx.py | 81 +++++-- .../renderers/rendererXlsx.py | 198 +++++++++++++----- 3 files changed, 222 insertions(+), 74 deletions(-) diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index a11fba62..84e659a4 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -160,6 +160,13 @@ IMPORTANT - CHAPTER INDEPENDENCE: - One chapter does NOT have information about another chapter - Each chapter must provide its own context and be understandable alone +CRITICAL - CHAPTERS WITHOUT CONTENT PARTS: +- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch +- Include: what to generate, what information to include, purpose, specific details +- Without content parts, AI relies ENTIRELY on generationHint +- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]." +- BAD: "Create title" or "Add section" (too vague) + For each chapter: - chapter id - level (1, 2, 3, etc.) @@ -171,6 +178,7 @@ For each chapter: }} }} - generationHint: Description of the content (must be self-contained with all necessary context) + * If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch OUTPUT FORMAT: {outputFormat} @@ -197,6 +205,15 @@ RETURN JSON: }}, "generationHint": "Create introduction section", "sections": [] + }}, + {{ + "id": "chapter_2", + "level": 1, + "title": "Main Title", + "contentPartIds": [], + "contentPartInstructions": {{}}, + "generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].", + "sections": [] }} ] }}] diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 2fc93892..9e6f41c9 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -78,12 +78,24 @@ class RendererPptx(BaseRenderer): slide_images = list(slide_data.get("images", [])) # Make copy so we can append slide_content = slide_data.get('content', '') hasSections = slide_sections and len(slide_sections) > 0 + hasImages = len(slide_images) > 0 logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars") - # Determine layout: first slide (i==0) uses title slide layout, others use title+content + # Determine layout: first slide (i==0) uses title slide layout + # For image-only slides, use blank layout to avoid placeholder interference + # Otherwise use title+content layout if i == 0: slideLayoutIndex = 0 # Title slide layout + elif hasImages and not hasSections and not slide_content: + # Image-only slide: use blank layout (typically index 6, fallback to 5 if not available) + try: + slideLayoutIndex = 6 # Blank layout + # Verify layout exists, fallback if not + if slideLayoutIndex >= len(prs.slide_layouts): + slideLayoutIndex = 5 # Alternative blank layout + except (AttributeError, IndexError): + slideLayoutIndex = 1 # Fallback to title+content else: slideLayoutIndex = 1 # Title and content layout @@ -91,18 +103,32 @@ class RendererPptx(BaseRenderer): slide = prs.slides.add_slide(slide_layout) # Set title with AI-generated styling - title_shape = slide.shapes.title - title_shape.text = slide_data.get("title", "Slide") - - # Apply title styling - LEFT ALIGNED by default - title_style = styles.get("title", {}) - if title_shape.text_frame.paragraphs[0].font: - title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) - title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) + # For blank layouts, add title as textbox since there's no title placeholder + try: + title_shape = slide.shapes.title + title_shape.text = slide_data.get("title", "Slide") + + # Apply title styling - LEFT ALIGNED by default + title_style = styles.get("title", {}) + if title_shape.text_frame.paragraphs[0].font: + title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) + title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) + title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) + title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) + # Set left alignment for title + title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + except AttributeError: + # Blank layout has no title placeholder - add title as textbox + from pptx.util import Inches + titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), prs.slide_width - Inches(1), Inches(0.8)) + titleFrame = titleBox.text_frame + titleFrame.text = slide_data.get("title", "Slide") + title_style = styles.get("title", {}) + titleFrame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) + titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) - title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - # Set left alignment for title - title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) + titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT # Render sections with proper PowerPoint objects (tables, lists, etc.) if hasSections: @@ -229,6 +255,7 @@ class RendererPptx(BaseRenderer): self._addParagraphToSlide(slide, element, styles, text_frame) # Handle images after processing sections (images may have been extracted from sections) + # Update hasImages in case images were added during section processing hasImages = len(slide_images) > 0 if hasImages: self._addImagesToSlide(slide, slide_images, styles) @@ -1138,9 +1165,20 @@ JSON ONLY. NO OTHER TEXT.""" # Single image: center it img = images[0] base64Data = img.get("base64Data") - if base64Data: - imageBytes = base64.b64decode(base64Data) - imageStream = io.BytesIO(imageBytes) + # Validate base64Data is present and not empty + if base64Data and isinstance(base64Data, str) and len(base64Data.strip()) > 0: + try: + imageBytes = base64.b64decode(base64Data) + if len(imageBytes) == 0: + logger.error("Decoded image bytes are empty") + return + imageStream = io.BytesIO(imageBytes) + except Exception as decode_error: + logger.error(f"Failed to decode base64 image data: {str(decode_error)}") + return + else: + logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}") + return # Get image dimensions try: @@ -1175,7 +1213,16 @@ JSON ONLY. NO OTHER TEXT.""" imageStream.seek(0) # Add image to slide - slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + try: + slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + except Exception as add_error: + # If add_picture fails, try with explicit format + imageStream.seek(0) + # Ensure we have valid image data + if len(imageBytes) > 0: + slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + else: + raise Exception(f"Empty image data: {add_error}") # Add caption if available caption = img.get("caption") or img.get("altText") @@ -1217,6 +1264,8 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.error(f"Error embedding images in PPTX slide: {str(e)}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None: """Add a PowerPoint table to slide.""" diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index 1051e7bf..c1992f94 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -113,14 +113,37 @@ class RendererXlsx(BaseRenderer): analysisSheet = wb.create_sheet("Analysis", 2) # Add content to sheets - self._populateSummarySheet(summarySheet, title) + self._populateSummarySheet(summarySheet, title, wb) self._populateDataSheet(dataSheet, content) self._populateAnalysisSheet(analysisSheet, content) - # Save to buffer + # Ensure workbook has at least one sheet (Excel requirement) + if len(wb.worksheets) == 0: + wb.create_sheet("Sheet1") + + # Save to buffer with error handling buffer = io.BytesIO() - wb.save(buffer) - buffer.seek(0) + try: + wb.save(buffer) + buffer.seek(0) + except Exception as save_error: + self.logger.error(f"Error saving Excel workbook: {str(save_error)}") + # Try to fix common issues and retry + try: + # Remove any invalid sheet names or empty sheets + for sheet in list(wb.worksheets): + if not sheet.title or len(sheet.title.strip()) == 0: + wb.remove(sheet) + # Ensure at least one sheet exists + if len(wb.worksheets) == 0: + wb.create_sheet("Sheet1") + # Retry save + buffer = io.BytesIO() + wb.save(buffer) + buffer.seek(0) + except Exception as retry_error: + self.logger.error(f"Retry save also failed: {str(retry_error)}") + raise Exception(f"Failed to save Excel workbook: {str(save_error)}") # Convert to base64 excelBytes = buffer.getvalue() @@ -132,7 +155,7 @@ class RendererXlsx(BaseRenderer): self.logger.error(f"Error generating Excel: {str(e)}") raise - def _populateSummarySheet(self, sheet, title: str): + def _populateSummarySheet(self, sheet, title: str, wb: Workbook = None): """Populate the summary sheet.""" try: # Title @@ -150,7 +173,11 @@ class RendererXlsx(BaseRenderer): sheet['A6'] = "Key Metrics:" sheet['A6'].font = Font(bold=True) sheet['A7'] = "Total Items:" - sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet + # Only add formula if Data sheet exists (check workbook sheets) + if wb and "Data" in [s.title for s in wb.worksheets]: + sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet + else: + sheet['B7'] = "N/A" # Data sheet not available # Auto-adjust column widths sheet.column_dimensions['A'].width = 20 @@ -167,7 +194,7 @@ class RendererXlsx(BaseRenderer): for col, header in enumerate(headers, 1): cell = sheet.cell(row=1, column=col, value=header) cell.font = Font(bold=True) - cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid") + cell.fill = PatternFill(start_color="FFCCCCCC", end_color="FFCCCCCC", fill_type="solid") # Process content lines = content.split('\n') @@ -271,10 +298,33 @@ class RendererXlsx(BaseRenderer): # Populate sheets with content self._populateExcelSheets(sheets, jsonContent, styles) - # Save to buffer + # Ensure workbook has at least one sheet (Excel requirement) + if len(wb.worksheets) == 0: + wb.create_sheet("Sheet1") + + # Save to buffer with error handling buffer = io.BytesIO() - wb.save(buffer) - buffer.seek(0) + try: + wb.save(buffer) + buffer.seek(0) + except Exception as save_error: + self.logger.error(f"Error saving Excel workbook: {str(save_error)}") + # Try to fix common issues and retry + try: + # Remove any invalid sheet names or empty sheets + for sheet in list(wb.worksheets): + if not sheet.title or len(sheet.title.strip()) == 0: + wb.remove(sheet) + # Ensure at least one sheet exists + if len(wb.worksheets) == 0: + wb.create_sheet("Sheet1") + # Retry save + buffer = io.BytesIO() + wb.save(buffer) + buffer.seek(0) + except Exception as retry_error: + self.logger.error(f"Retry save also failed: {str(retry_error)}") + raise Exception(f"Failed to save Excel workbook: {str(save_error)}") # Convert to base64 excelBytes = buffer.getvalue() @@ -348,30 +398,46 @@ class RendererXlsx(BaseRenderer): # Fix table header contrast if "table_header" in styles: header = styles["table_header"] - bgColor = header.get("background", "#FFFFFF") - textColor = header.get("text_color", "#000000") + bgColor = header.get("background", "FFFFFFFF") + textColor = header.get("text_color", "FF000000") + + # Normalize colors (remove # if present, ensure aRGB format) + bgColor = self._normalizeColor(bgColor) + textColor = self._normalizeColor(textColor) # If both are white or both are dark, fix it - if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": - header["background"] = "#FF4F4F4F" - header["text_color"] = "#FFFFFFFF" - elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": - header["background"] = "#FF4F4F4F" - header["text_color"] = "#FFFFFFFF" + if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF": + header["background"] = "FF4F4F4F" + header["text_color"] = "FFFFFFFF" + elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000": + header["background"] = "FF4F4F4F" + header["text_color"] = "FFFFFFFF" + else: + # Ensure colors are in correct format + header["background"] = bgColor + header["text_color"] = textColor # Fix table cell contrast if "table_cell" in styles: cell = styles["table_cell"] - bgColor = cell.get("background", "#FFFFFF") - textColor = cell.get("text_color", "#000000") + bgColor = cell.get("background", "FFFFFFFF") + textColor = cell.get("text_color", "FF000000") + + # Normalize colors (remove # if present, ensure aRGB format) + bgColor = self._normalizeColor(bgColor) + textColor = self._normalizeColor(textColor) # If both are white or both are dark, fix it - if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": - cell["background"] = "#FFFFFFFF" - cell["text_color"] = "#FF2F2F2F" - elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": - cell["background"] = "#FFFFFFFF" - cell["text_color"] = "#FF2F2F2F" + if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF": + cell["background"] = "FFFFFFFF" + cell["text_color"] = "FF2F2F2F" + elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000": + cell["background"] = "FFFFFFFF" + cell["text_color"] = "FF2F2F2F" + else: + # Ensure colors are in correct format + cell["background"] = bgColor + cell["text_color"] = textColor return styles @@ -379,16 +445,39 @@ class RendererXlsx(BaseRenderer): self.logger.warning(f"Style validation failed: {str(e)}") return self._getDefaultStyleSet() + def _normalizeColor(self, colorValue: str) -> str: + """Normalize color to aRGB format without # prefix.""" + if not isinstance(colorValue, str): + return "FF000000" + + # Remove # prefix if present + if colorValue.startswith('#'): + colorValue = colorValue[1:] + + # Convert to uppercase for consistency + colorValue = colorValue.upper() + + # Ensure aRGB format (8 characters) + if len(colorValue) == 6: + # Convert RRGGBB to AARRGGBB (add FF alpha channel) + return f"FF{colorValue}" + elif len(colorValue) == 8: + # Already aRGB format + return colorValue + else: + # Unexpected format, return default black + return "FF000000" + def _getDefaultStyleSet(self) -> Dict[str, Any]: """Default Excel style set - used when no style instructions present.""" return { - "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "left"}, - "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, - "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, - "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"}, - "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2}, - "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"}, - "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} + "title": {"font_size": 16, "color": "FF1F4E79", "bold": True, "align": "left"}, + "heading": {"font_size": 14, "color": "FF2F2F2F", "bold": True, "align": "left"}, + "table_header": {"background": "FF4F4F4F", "text_color": "FFFFFFFF", "bold": True, "align": "center"}, + "table_cell": {"background": "FFFFFFFF", "text_color": "FF2F2F2F", "bold": False, "align": "left"}, + "bullet_list": {"font_size": 11, "color": "FF2F2F2F", "indent": 2}, + "paragraph": {"font_size": 11, "color": "FF2F2F2F", "bold": False, "align": "left"}, + "code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"} } async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: @@ -450,37 +539,26 @@ class RendererXlsx(BaseRenderer): """Get a safe aRGB color value for Excel (without # prefix).""" if not isinstance(colorValue, str): return default - - # Remove # prefix if present - if colorValue.startswith('#'): - colorValue = colorValue[1:] - - if len(colorValue) == 6: - # Convert RRGGBB to AARRGGBB - return f"FF{colorValue}" - elif len(colorValue) == 8: - # Already aRGB format - return colorValue - else: - # Unexpected format, return default + # Use the normalize function for consistency + try: + normalized = self._normalizeColor(colorValue) + return normalized + except Exception: return default def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Convert hex colors to aRGB format for Excel compatibility.""" + """Convert hex colors to aRGB format for Excel compatibility (without # prefix).""" try: self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER") for styleName, styleConfig in styles.items(): if isinstance(styleConfig, dict): for prop, value in styleConfig.items(): - if isinstance(value, str) and value.startswith('#') and len(value) == 7: - # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) - styles[styleName][prop] = f"FF{value[1:]}" - elif isinstance(value, str) and value.startswith('#') and len(value) == 9: - pass # Already aRGB format - elif isinstance(value, str) and value.startswith('#'): - pass # Unexpected format, keep as is + if isinstance(value, str): + # Normalize color to aRGB format without # prefix + styles[styleName][prop] = self._normalizeColor(value) return styles except Exception as e: + self.logger.warning(f"Color conversion failed: {str(e)}") return styles def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: @@ -835,13 +913,13 @@ class RendererXlsx(BaseRenderer): self.logger.warning(f"Could not add section to sheet: {str(e)}") return startRow + 1 - def _sanitizeCellValue(self, value: Any) -> str: - """Sanitize cell value: remove markdown, convert to string, handle None.""" + def _sanitizeCellValue(self, value: Any) -> Any: + """Sanitize cell value: remove markdown, convert to string, handle None, limit length.""" if value is None: return "" if isinstance(value, dict): # Extract value from dict if present - return str(value.get("value", "")) + value = value.get("value", "") if isinstance(value, (int, float)): return value # Keep numbers as-is # Convert to string and remove markdown formatting @@ -852,7 +930,11 @@ class RendererXlsx(BaseRenderer): text = text.replace("*", "") # Remove other markdown text = text.replace("__", "").replace("_", "") - return text.strip() + text = text.strip() + # Excel cell value limit is 32,767 characters - truncate if necessary + if len(text) > 32767: + text = text[:32764] + "..." + return text def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a table element to Excel sheet with proper formatting and borders.""" From bc2dd6687d338e3448947f2a9fba90caf123d97f Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 29 Dec 2025 02:22:01 +0100 Subject: [PATCH 20/21] adapted all renderers from flat to nested element.content.(...) structure --- .../renderers/rendererBaseTemplate.py | 213 ------------------ .../renderers/rendererHtml.py | 86 +++---- .../renderers/rendererMarkdown.py | 46 ++-- .../renderers/rendererText.py | 54 +++-- 4 files changed, 90 insertions(+), 309 deletions(-) diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index e582ddff..efe53eaa 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -198,161 +198,6 @@ class BaseRenderer(ABC): return section.get("id", "unknown") return "unknown" - def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: - """Extract table headers and rows from section data. Expects nested content structure.""" - # Normalize when elements array was passed in - if isinstance(sectionData, list): - if sectionData and isinstance(sectionData[0], dict): - sectionData = sectionData[0] - else: - return [], [] - # Ensure sectionData is a dict - if not isinstance(sectionData, dict): - return [], [] - # Extract from nested content structure - content = sectionData.get("content", {}) - if not isinstance(content, dict): - return [], [] - headers = content.get("headers", []) - rows = content.get("rows", []) - return headers, rows - - def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]: - """Extract bullet list items from section data. Expects nested content structure.""" - # Normalize when elements array was passed in - if isinstance(sectionData, list): - if sectionData and isinstance(sectionData[0], dict): - sectionData = sectionData[0] - else: - return [] - # Ensure sectionData is a dict - if not isinstance(sectionData, dict): - return [] - # Extract from nested content structure - content = sectionData.get("content", {}) - if not isinstance(content, dict): - return [] - items = content.get("items", []) - result = [] - for item in items: - if isinstance(item, str): - result.append(item) - elif isinstance(item, dict) and "text" in item: - result.append(item["text"]) - return result - - def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]: - """Extract heading level and text from section data. Expects nested content structure.""" - # Normalize when elements array was passed in - if isinstance(sectionData, list): - if sectionData and isinstance(sectionData[0], dict): - sectionData = sectionData[0] - else: - return 1, "" - # Ensure sectionData is a dict - if not isinstance(sectionData, dict): - return 1, "" - # Extract from nested content structure - content = sectionData.get("content", {}) - if not isinstance(content, dict): - return 1, "" - level = content.get("level", 1) - text = content.get("text", "") - return level, text - - def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str: - """Extract paragraph text from section data. Expects nested content structure.""" - if isinstance(sectionData, list): - # Join multiple paragraph elements if provided as a list - texts = [] - for el in sectionData: - if isinstance(el, dict): - content = el.get("content", {}) - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - if text: - texts.append(text) - elif isinstance(el, str): - texts.append(el) - return "\n".join(texts) - # Extract from nested content structure - if not isinstance(sectionData, dict): - return "" - content = sectionData.get("content", {}) - if isinstance(content, dict): - return content.get("text", "") - elif isinstance(content, str): - return content - return "" - - def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: - """Extract code and language from section data. Expects nested content structure.""" - # Normalize when elements array was passed in - if isinstance(sectionData, list): - if sectionData and isinstance(sectionData[0], dict): - sectionData = sectionData[0] - else: - return "", "" - # Ensure sectionData is a dict - if not isinstance(sectionData, dict): - return "", "" - # Extract from nested content structure - content = sectionData.get("content", {}) - if not isinstance(content, dict): - return "", "" - code = content.get("code", "") - language = content.get("language", "") - return code, language - - def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: - """Extract base64 data and alt text from section data. Expects nested content structure.""" - # Normalize when elements array was passed in - if isinstance(sectionData, list): - if sectionData and isinstance(sectionData[0], dict): - sectionData = sectionData[0] - else: - return "", "Image" - # Ensure sectionData is a dict - if not isinstance(sectionData, dict): - return "", "Image" - # Extract from nested content structure - content = sectionData.get("content", {}) - if not isinstance(content, dict): - return "", "Image" - base64Data = content.get("base64Data", "") - altText = content.get("altText", "Image") - return base64Data, altText - - def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: - """ - Render an image section. This is a base implementation that should be overridden - by format-specific renderers. - - Args: - section: Image section data - styles: Optional styling information - - Returns: - Format-specific image representation - """ - sectionData = self._getSectionData(section) - base64Data, altText = self._extractImageData(sectionData) - - # Base implementation returns a simple dict - # Format-specific renderers should override this method - return { - "content_type": "image", - "base64Data": base64Data, - "altText": altText, - "width": sectionData.get("width", None), - "height": sectionData.get("height", None), - "caption": sectionData.get("caption", "") - } - def _validateImageData(self, base64Data: str, altText: str) -> bool: """Validate image data.""" if not base64Data: @@ -429,64 +274,6 @@ class BaseRenderer(ABC): """Check if a section type is valid.""" return sectionType in self._getSupportedSectionTypes() - def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]: - """Process a section and return structured data based on its type.""" - sectionType = self._getSectionType(section) - sectionData = self._getSectionData(section) - - if sectionType == "table": - headers, rows = self._extractTableData(sectionData) - return {"content_type": "table", "headers": headers, "rows": rows} - elif sectionType == "bullet_list": - items = self._extractBulletListItems(sectionData) - return {"content_type": "bullet_list", "items": items} - elif sectionType == "heading": - level, text = self._extractHeadingData(sectionData) - return {"content_type": "heading", "level": level, "text": text} - elif sectionType == "paragraph": - text = self._extractParagraphText(sectionData) - return {"content_type": "paragraph", "text": text} - elif sectionType == "code_block": - code, language = self._extractCodeBlockData(sectionData) - return {"content_type": "code_block", "code": code, "language": language} - elif sectionType == "image": - # Extract image data - preserve nested content structure - if isinstance(sectionData, list) and sectionData: - # Get first element from elements array - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - elif isinstance(sectionData, dict): - element = sectionData - else: - return {"content_type": "paragraph", "text": "[Image: Invalid data]"} - - # Extract from nested content structure (standard JSON format) - content = element.get("content", {}) - if not isinstance(content, dict): - return {"content_type": "paragraph", "text": "[Image: Invalid content]"} - - base64Data = content.get("base64Data", "") - altText = content.get("altText", "Image") - caption = content.get("caption", "") - - # Validate image data - if self._validateImageData(base64Data, altText): - # Return nested structure matching standard JSON format - return { - "content_type": "image", - "content": { - "base64Data": base64Data, - "altText": altText, - "caption": caption - } - } - else: - # Return placeholder if image data is invalid - return {"content_type": "paragraph", "text": f"[Image: {altText}]"} - else: - # Fallback to paragraph - text = self._extractParagraphText(sectionData) - return {"content_type": "paragraph", "text": text} - def _formatTimestamp(self, timestamp: str = None) -> str: """Format timestamp for display.""" if timestamp: diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 04e7e543..47fecffa 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -366,39 +366,23 @@ class RendererHtml(BaseRenderer): # Process elements according to section's content_type, not just element types if sectionType == "table": - # Process the section data to extract table structure - processedData = self._processSectionByType(section) - return self._renderJsonTable(processedData, styles) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonTable(element, styles) + return "" elif sectionType == "bullet_list": - # Process the section data to extract bullet list structure - processedData = self._processSectionByType(section) - return self._renderJsonBulletList(processedData, styles) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonBulletList(element, styles) + return "" elif sectionType == "heading": - # Extract text from elements for heading rendering - if isinstance(sectionData, list): - # Extract text from heading elements - headingText = "" - for element in sectionData: - if isinstance(element, dict): - element_type = element.get("type", "") - if element_type == "heading": - headingText = element.get("content", element.get("text", "")) - break - elif element_type == "extracted_text": - # Use extracted text as heading if no heading element found - content = element.get("content", "") - if content and not headingText: - # Extract first line or title from extracted text - headingText = content.split('\n')[0].strip() - # Remove markdown formatting - headingText = headingText.replace('#', '').replace('**', '').strip() - break - elif "text" in element: - headingText = element.get("text", "") - break - if headingText: - return self._renderJsonHeading({"text": headingText, "level": 2}, styles) - return self._renderJsonHeading(sectionData, styles) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonHeading(element, styles) + return "" elif sectionType == "paragraph": # Process paragraph elements, including extracted_text if isinstance(sectionData, list): @@ -435,13 +419,17 @@ class RendererHtml(BaseRenderer): return '\n'.join(htmlParts) return self._renderJsonParagraph(sectionData, styles) elif sectionType == "code_block": - # Process the section data to extract code block structure - processedData = self._processSectionByType(section) - return self._renderJsonCodeBlock(processedData, styles) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonCodeBlock(element, styles) + return "" elif sectionType == "image": - # Process the section data to extract image structure - processedData = self._processSectionByType(section) - return self._renderJsonImage(processedData, styles) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonImage(element, styles) + return "" else: # Fallback: Check for special element types first if isinstance(sectionData, list): @@ -472,7 +460,7 @@ class RendererHtml(BaseRenderer): def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON table to HTML using AI-generated styles.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{headers, rows} content = tableData.get("content", {}) if not isinstance(content, dict): return "" @@ -507,9 +495,9 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON bullet list to HTML using AI-generated styles. Expects nested content structure.""" + """Render a JSON bullet list to HTML using AI-generated styles.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{items} content = listData.get("content", {}) if not isinstance(content, dict): return "" @@ -535,19 +523,7 @@ class RendererHtml(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON heading to HTML using AI-generated styles.""" try: - # Normalize inputs - headingData is typically a list of elements from _getSectionData - if isinstance(headingData, list): - # Extract first element from elements array - if headingData and len(headingData) > 0: - headingData = headingData[0] if isinstance(headingData[0], dict) else {} - else: - return "" - elif isinstance(headingData, str): - headingData = {"text": headingData, "level": 2} - elif not isinstance(headingData, dict): - return "" - - # Extract from nested content structure + # Extract from nested content structure: element.content.{text, level} content = headingData.get("content", {}) if not isinstance(content, dict): return "" @@ -611,9 +587,9 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON code block to HTML using AI-generated styles. Expects nested content structure.""" + """Render a JSON code block to HTML using AI-generated styles.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{code, language} content = codeData.get("content", {}) if not isinstance(content, dict): return "" diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index d491c8c2..4b372bb2 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -144,25 +144,37 @@ class RendererMarkdown(BaseRenderer): return '\n\n'.join(markdownParts) if sectionType == "table": - # Process the section data to extract table structure - processedData = self._processSectionByType(section) - return self._renderJsonTable(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonTable(element) + return "" elif sectionType == "bullet_list": - # Process the section data to extract bullet list structure - processedData = self._processSectionByType(section) - return self._renderJsonBulletList(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonBulletList(element) + return "" elif sectionType == "heading": - return self._renderJsonHeading(sectionData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonHeading(element) + return "" elif sectionType == "paragraph": return self._renderJsonParagraph(sectionData) elif sectionType == "code_block": - # Process the section data to extract code block structure - processedData = self._processSectionByType(section) - return self._renderJsonCodeBlock(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonCodeBlock(element) + return "" elif sectionType == "image": - # Process the section data to extract image structure - processedData = self._processSectionByType(section) - return self._renderJsonImage(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonImage(element) + return "" else: # Fallback to paragraph for unknown types return self._renderJsonParagraph(sectionData) @@ -174,7 +186,7 @@ class RendererMarkdown(BaseRenderer): def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to markdown.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{headers, rows} content = tableData.get("content", {}) if not isinstance(content, dict): return "" @@ -208,7 +220,7 @@ class RendererMarkdown(BaseRenderer): def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to markdown.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{items} content = listData.get("content", {}) if not isinstance(content, dict): return "" @@ -233,7 +245,7 @@ class RendererMarkdown(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to markdown.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{text, level} content = headingData.get("content", {}) if not isinstance(content, dict): return "" @@ -292,7 +304,7 @@ class RendererMarkdown(BaseRenderer): def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: """Render a JSON image to markdown.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{base64Data, altText, caption} content = imageData.get("content", {}) if not isinstance(content, dict): return "" diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 340e55e4..116d73f4 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -167,35 +167,41 @@ class RendererText(BaseRenderer): return '\n\n'.join(textParts) if sectionType == "table": - # Process the section data to extract table structure - processedData = self._processSectionByType(section) - return self._renderJsonTable(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonTable(element) + return "" elif sectionType == "bullet_list": - # Process the section data to extract bullet list structure - processedData = self._processSectionByType(section) - return self._renderJsonBulletList(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonBulletList(element) + return "" elif sectionType == "heading": - # Render each heading element in the elements array - # sectionData is already the elements array from _getSectionData - renderedElements = [] - for element in sectionData: - renderedElements.append(self._renderJsonHeading(element)) - return "\n".join(renderedElements) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonHeading(element) + return "" elif sectionType == "paragraph": # Render each paragraph element in the elements array - # sectionData is already the elements array from _getSectionData renderedElements = [] for element in sectionData: renderedElements.append(self._renderJsonParagraph(element)) return "\n".join(renderedElements) elif sectionType == "code_block": - # Process the section data to extract code block structure - processedData = self._processSectionByType(section) - return self._renderJsonCodeBlock(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonCodeBlock(element) + return "" elif sectionType == "image": - # Process the section data to extract image structure - processedData = self._processSectionByType(section) - return self._renderJsonImage(processedData) + # Work directly with elements like other renderers + if isinstance(sectionData, list) and sectionData: + element = sectionData[0] if isinstance(sectionData[0], dict) else {} + return self._renderJsonImage(element) + return "" else: # Fallback to paragraph for unknown types - render each element # sectionData is already the elements array from _getSectionData @@ -211,7 +217,7 @@ class RendererText(BaseRenderer): def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to text.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{headers, rows} content = tableData.get("content", {}) if not isinstance(content, dict): return "" @@ -245,7 +251,7 @@ class RendererText(BaseRenderer): def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to text.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{items} content = listData.get("content", {}) if not isinstance(content, dict): return "" @@ -270,7 +276,7 @@ class RendererText(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to text.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{text, level} content = headingData.get("content", {}) if not isinstance(content, dict): return "" @@ -312,7 +318,7 @@ class RendererText(BaseRenderer): def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str: """Render a JSON code block to text.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{code, language} content = codeData.get("content", {}) if not isinstance(content, dict): return "" @@ -334,7 +340,7 @@ class RendererText(BaseRenderer): def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: """Render a JSON image to text.""" try: - # Extract from nested content structure + # Extract from nested content structure: element.content.{base64Data, altText, caption} content = imageData.get("content", {}) if isinstance(content, dict): altText = content.get("altText", "Image") From 911bcffcd71e5c33f4858a91e3a206f311f35d63 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 29 Dec 2025 22:21:17 +0100 Subject: [PATCH 21/21] fixed renderers and progress tracking for generation part --- .../services/serviceAi/subStructureFilling.py | 911 +++++++---- .../serviceAi/subStructureGeneration.py | 21 +- .../renderers/rendererPptx.py | 1436 ++++++++++++----- .../renderers/rendererXlsx.py | 286 +++- .../test10_document_generation_formats.py | 8 +- 5 files changed, 1905 insertions(+), 757 deletions(-) diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index e0cdfc53..7089103c 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -114,15 +114,28 @@ class StructureFiller: Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content). Sections enthalten: content_type, contentPartIds, generationHint, useAiCall """ + # Count total chapters for progress tracking + totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) + chapterIndex = 0 + for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): + chapterIndex += 1 chapterId = chapter.get("id", "unknown") chapterLevel = chapter.get("level", 1) - chapterTitle = chapter.get("title", "") + chapterTitle = chapter.get("title", "Untitled Chapter") generationHint = chapter.get("generationHint", "") contentPartIds = chapter.get("contentPartIds", []) contentPartInstructions = chapter.get("contentPartInstructions", {}) + # Update progress for chapter structure generation + progress = chapterIndex / totalChapters if totalChapters > 0 else 1.0 + self.services.chat.progressLogUpdate( + parentOperationId, + progress, + f"Generating sections for Chapter {chapterIndex}/{totalChapters}: {chapterTitle}" + ) + chapterPrompt = self._buildChapterSectionsStructurePrompt( chapterId=chapterId, chapterLevel=chapterLevel, @@ -194,133 +207,384 @@ class StructureFiller: """ Phase 5D.2: Füllt Sections mit ContentParts. """ - # Sammle alle Sections für sequenzielle Verarbeitung - sections_to_process = [] - all_sections_list = [] # Für Kontext-Informationen + # Sammle alle Sections für Kontext-Informationen (für alle Sections) + all_sections_list = [] for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): for section in chapter.get("sections", []): all_sections_list.append(section) - sections_to_process.append((doc, chapter, section)) - # Sequenzielle Section-Generierung + # Berechne Gesamtanzahl Chapters für Progress-Tracking + totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) fillOperationId = parentOperationId - for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process): - sectionId = section.get("id") - contentPartIds = section.get("contentPartIds", []) - contentFormats = section.get("contentFormats", {}) - # Check both camelCase and snake_case for generationHint - generationHint = section.get("generationHint") or section.get("generation_hint") - contentType = section.get("content_type", "paragraph") - useAiCall = section.get("useAiCall", False) + + # Helper function to calculate overall progress + def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections): + """Calculate overall progress: 0.0 to 1.0""" + if totalChapters == 0: + return 1.0 - # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden - # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist - # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) - if len(contentPartIds) == 0 and not generationHint: - useAiCall = False - logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") - elif len(contentPartIds) == 0 and generationHint and not useAiCall: - # Override: If there's a generationHint but no content parts, we should use AI - # This handles cases where structure generation set useAiCall=false incorrectly - useAiCall = True - logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") + # Progress from completed chapters (0 to chapterIndex-1) + completedChaptersProgress = chapterIndex / totalChapters - elements = [] + # Progress from current chapter (sectionIndex / totalSections) + currentChapterProgress = (sectionIndex / totalSections) / totalChapters if totalSections > 0 else 0 - # Prüfe ob Aggregation nötig ist - needsAggregation = self._needsAggregation( - contentType=contentType, - contentPartCount=len(contentPartIds) - ) - - logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") - - if needsAggregation and useAiCall: - # Aggregation: Alle Parts zusammen verarbeiten - sectionParts = [ - self._findContentPartById(pid, contentParts) - for pid in contentPartIds - ] - sectionParts = [p for p in sectionParts if p is not None] + return min(1.0, completedChaptersProgress + currentChapterProgress) + + # Process chapters sequentially with chapter-level progress + chapterIndex = 0 + for doc in chapterStructure.get("documents", []): + for chapter in doc.get("chapters", []): + chapterIndex += 1 + chapterId = chapter.get("id", "unknown") + chapterTitle = chapter.get("title", "Untitled Chapter") + sections = chapter.get("sections", []) + totalSections = len(sections) + + # Start chapter operation + chapterOperationId = f"{fillOperationId}_chapter_{chapterId}" + self.services.chat.progressLogStart( + chapterOperationId, + "Chapter Generation", + f"Chapter {chapterIndex}/{totalChapters}", + chapterTitle, + parentOperationId=fillOperationId + ) + + # Process sections within chapter + for sectionIndex, section in enumerate(sections): + sectionId = section.get("id") + sectionTitle = section.get("title", sectionId) + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + # Check both camelCase and snake_case for generationHint + generationHint = section.get("generationHint") or section.get("generation_hint") + contentType = section.get("content_type", "paragraph") + useAiCall = section.get("useAiCall", False) - if sectionParts: - # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) - extractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" - ] - nonExtractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + # Update overall progress at start of section + overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex, totalSections) + self.services.chat.progressLogUpdate( + fillOperationId, + overallProgress, + f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections}: {sectionTitle}" + ) + + # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden + # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist + # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) + if len(contentPartIds) == 0 and not generationHint: + useAiCall = False + logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") + elif len(contentPartIds) == 0 and generationHint and not useAiCall: + # Override: If there's a generationHint but no content parts, we should use AI + # This handles cases where structure generation set useAiCall=false incorrectly + useAiCall = True + logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") + + elements = [] + + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) + + logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") + + if needsAggregation and useAiCall: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds ] + sectionParts = [p for p in sectionParts if p is not None] - # Verarbeite non-extracted Parts separat (reference, object) - for part in nonExtractedParts: - contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + if sectionParts: + # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] - if contentFormat == "reference": + # Verarbeite non-extracted Parts separat (reference, object) + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + # Nested content structure for objects + if part.typeGroup == "image": + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": part.metadata.get("caption", "") + } + }) + else: + elements.append({ + "type": part.typeGroup, + "content": { + "data": part.data, + "mimeType": part.mimeType, + "label": part.metadata.get("usageHint", part.label) + } + }) + + # Aggregiere extracted Parts mit AI + if extractedParts: + logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, # ALLE PARTS für Aggregation! + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + f"Section {sectionIndex + 1}/{totalSections}", + f"{sectionTitle} ({len(extractedParts)} parts)", + parentOperationId=chapterOperationId + ) + + try: + # Update: Building prompt + self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") + + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"{chapterId}_section_{sectionId}_prompt" + ) + logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt (aggregation)") + + # Update: Calling AI + self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") + + # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + # Use IMAGE_GENERATE for image content type + operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE + + # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) + if operationType == OperationTypeEnum.IMAGE_GENERATE: + maxPromptLength = 4000 + if len(generationPrompt) > maxPromptLength: + logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") + # Keep the beginning (task, metadata, generation hint) and truncate from end + generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline + + # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks + contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts + request = AiCallRequest( + prompt=generationPrompt, + contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others + options=AiCallOptions( + operationType=operationType, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Update: Processing response + self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"{chapterId}_section_{sectionId}_response" + ) + logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response (aggregation)") + + # Update: Validating content + self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") + + # Handle IMAGE_GENERATE differently - returns image data directly + if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: + import base64 + base64Data = "" + + # Convert image data to base64 string if needed + if isinstance(aiResponse.content, bytes): + base64Data = base64.b64encode(aiResponse.content).decode('utf-8') + elif isinstance(aiResponse.content, str): + # Check if it's already a JSON structure + try: + # Try to parse as JSON first + jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) + # If it's already a proper JSON structure with image element, use it + if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + elements.append(jsonContent) + logger.debug("AI returned proper JSON image structure") + continue + elif isinstance(jsonContent, list) and len(jsonContent) > 0: + # Check if first element is an image + if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + elements.extend(jsonContent) + logger.debug("AI returned proper JSON image structure in list") + continue + except (json.JSONDecodeError, ValueError, AttributeError): + # Not JSON, treat as base64 string or data URI + pass + + # Already base64 string or data URI + if aiResponse.content.startswith("data:image/"): + # Extract base64 from data URI + base64Data = aiResponse.content.split(",", 1)[1] + else: + # Check if it looks like base64 (alphanumeric + / + =) + content_stripped = aiResponse.content.strip() + if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): + # Looks like base64, use it + base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + else: + base64Data = aiResponse.content + else: + base64Data = "" + + # Always create proper JSON structure for images + if base64Data: + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") + else: + logger.warning(f"IMAGE_GENERATE returned empty or invalid content for section {sectionId}") + elements.append({ + "type": "error", + "message": f"Image generation returned empty or invalid content", + "sectionId": sectionId + }) + else: + # Parse JSON response for other content types + try: + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + elif isinstance(generatedElements, dict) and generatedElements.get("type"): + # Single element in dict format + elements.append(generatedElements) + except (json.JSONDecodeError, ValueError) as json_error: + logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") + # Try to extract any image data that might be in the response + if contentType == "image": + # Check if response content might be base64 image data + content_str = str(aiResponse.content) + if len(content_str) > 100: + elements.append({ + "type": "error", + "message": f"Failed to parse image generation response: {str(json_error)}", + "sectionId": sectionId + }) + else: + elements.append({ + "type": "error", + "message": f"Failed to parse JSON response: {str(json_error)}", + "sectionId": sectionId + }) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + # Update chapter progress after section completion + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId }) - elif contentFormat == "object": - # Nested content structure for objects - if part.typeGroup == "image": - elements.append({ - "type": "image", - "content": { - "base64Data": part.data, - "altText": part.metadata.get("usageHint", part.label), - "caption": part.metadata.get("caption", "") - } - }) - else: - elements.append({ - "type": part.typeGroup, - "content": { - "data": part.data, - "mimeType": part.mimeType, - "label": part.metadata.get("usageHint", part.label) - } - }) - - # Aggregiere extracted Parts mit AI - if extractedParts: - logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") + logger.error(f"Error generating section {sectionId}: {str(e)}") + # Still update chapter progress even on error + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" + ) + # NICHT raise - Section wird mit Fehlermeldung gerendert + + else: + # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts + # Handle case where no content parts but generationHint exists (e.g., Executive Summary) + if len(contentPartIds) == 0 and useAiCall and generationHint: + # Generate content from scratch using only generationHint + logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") generationPrompt = self._buildSectionGenerationPrompt( section=section, - contentParts=extractedParts, # ALLE PARTS für Aggregation! + contentParts=[], # NO PARTS userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, - isAggregation=True + isAggregation=False ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" - # Starte ChatLog mit Parent-Referenz + # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, - "Section Generation (Aggregation)", - "Section", - f"Generating section {sectionId} with {len(extractedParts)} parts", - parentOperationId=fillOperationId + "Section Generation", + f"Section {sectionIndex + 1}/{totalSections}", + f"{sectionTitle} (from generationHint)", + parentOperationId=chapterOperationId ) try: + # Update: Building prompt + self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") + # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, - f"section_content_{sectionId}_prompt" + f"{chapterId}_section_{sectionId}_prompt" ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt (aggregation)") + logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt") - # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) - # Use IMAGE_GENERATE for image content type + # Update: Calling AI + self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") + + # Verwende callAi ohne ContentParts operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) @@ -331,11 +595,9 @@ class StructureFiller: # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline - # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks - contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts request = AiCallRequest( prompt=generationPrompt, - contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others + contentParts=[], # NO PARTS options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, @@ -344,49 +606,103 @@ class StructureFiller: ) aiResponse = await self.aiService.callAi(request) + # Update: Processing response + self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") + # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, - f"section_content_{sectionId}_response" + f"{chapterId}_section_{sectionId}_response" ) - logger.debug(f"Logged section response: section_content_{sectionId}_response (aggregation)") + logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response") + + # Update: Validating content + self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 + base64Data = "" + # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): + # Check if it's already a JSON structure + try: + jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) + if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + elements.append(jsonContent) + logger.debug("AI returned proper JSON image structure") + continue + elif isinstance(jsonContent, list) and len(jsonContent) > 0: + if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + elements.extend(jsonContent) + logger.debug("AI returned proper JSON image structure in list") + continue + except (json.JSONDecodeError, ValueError, AttributeError): + pass + # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): - # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: - base64Data = aiResponse.content + content_stripped = aiResponse.content.strip() + if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): + base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + else: + base64Data = aiResponse.content else: base64Data = "" - elements.append({ - "type": "image", - "content": { - "base64Data": base64Data, - "altText": generationHint or "Generated image", - "caption": "" - } - }) + # Always create proper JSON structure for images + if base64Data: + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") + else: + logger.warning(f"IMAGE_GENERATE returned empty content for section {sectionId}") + elements.append({ + "type": "error", + "message": f"Image generation returned empty content", + "sectionId": sectionId + }) else: # Parse JSON response for other content types - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + try: + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + elif isinstance(generatedElements, dict) and generatedElements.get("type"): + elements.append(generatedElements) + except (json.JSONDecodeError, ValueError) as json_error: + logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") + elements.append({ + "type": "error", + "message": f"Failed to parse JSON response: {str(json_error)}", + "sectionId": sectionId + }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) + + # Update chapter progress after section completion + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) @@ -397,119 +713,13 @@ class StructureFiller: "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") - # NICHT raise - Section wird mit Fehlermeldung gerendert - - else: - # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts - # Handle case where no content parts but generationHint exists (e.g., Executive Summary) - if len(contentPartIds) == 0 and useAiCall and generationHint: - # Generate content from scratch using only generationHint - logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=[], # NO PARTS - userPrompt=userPrompt, - generationHint=generationHint, - allSections=all_sections_list, - sectionIndex=sectionIndex, - isAggregation=False - ) - - # Erstelle Operation-ID für Section-Generierung - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation", - "Section", - f"Generating section {sectionId} from generationHint", - parentOperationId=fillOperationId - ) - - try: - # Debug: Log Prompt - self.services.utils.writeDebugFile( - generationPrompt, - f"section_content_{sectionId}_prompt" - ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") - - # Verwende callAi ohne ContentParts - operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE - - # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) - if operationType == OperationTypeEnum.IMAGE_GENERATE: - maxPromptLength = 4000 - if len(generationPrompt) > maxPromptLength: - logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") - # Keep the beginning (task, metadata, generation hint) and truncate from end - generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline - - request = AiCallRequest( - prompt=generationPrompt, - contentParts=[], # NO PARTS - options=AiCallOptions( - operationType=operationType, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.DETAILED + # Still update chapter progress even on error + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" ) - ) - aiResponse = await self.aiService.callAi(request) - - # Debug: Log Response - self.services.utils.writeDebugFile( - aiResponse.content, - f"section_content_{sectionId}_response" - ) - logger.debug(f"Logged section response: section_content_{sectionId}_response") - - # Handle IMAGE_GENERATE differently - returns image data directly - if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: - import base64 - # Convert image data to base64 string if needed - if isinstance(aiResponse.content, bytes): - base64Data = base64.b64encode(aiResponse.content).decode('utf-8') - elif isinstance(aiResponse.content, str): - # Already base64 string or data URI - if aiResponse.content.startswith("data:image/"): - # Extract base64 from data URI - base64Data = aiResponse.content.split(",", 1)[1] - else: - base64Data = aiResponse.content - else: - base64Data = "" - - elements.append({ - "type": "image", - "content": { - "base64Data": base64Data, - "altText": generationHint or "Generated image", - "caption": "" - } - }) - else: - # Parse JSON response for other content types - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) - - # ChatLog abschließen - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") # Einzelverarbeitung: Jeder Part einzeln for partId in contentPartIds: @@ -567,22 +777,28 @@ class StructureFiller: # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" - # Starte ChatLog mit Parent-Referenz + # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, "Section Generation", - "Section", - f"Generating section {sectionId}", - parentOperationId=fillOperationId + f"Section {sectionIndex + 1}/{totalSections}", + f"{sectionTitle} (single part)", + parentOperationId=chapterOperationId ) try: + # Update: Building prompt + self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") + # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, - f"section_content_{sectionId}_prompt" + f"{chapterId}_section_{sectionId}_prompt" ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") + logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt") + + # Update: Calling AI + self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") # Verwende callAi für ContentParts-Unterstützung # Use IMAGE_GENERATE for image content type @@ -609,49 +825,103 @@ class StructureFiller: ) aiResponse = await self.aiService.callAi(request) + # Update: Processing response + self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") + # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, - f"section_content_{sectionId}_response" + f"{chapterId}_section_{sectionId}_response" ) - logger.debug(f"Logged section response: section_content_{sectionId}_response") + logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response") + + # Update: Validating content + self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 + base64Data = "" + # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): + # Check if it's already a JSON structure + try: + jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) + if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + elements.append(jsonContent) + logger.debug("AI returned proper JSON image structure") + continue + elif isinstance(jsonContent, list) and len(jsonContent) > 0: + if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + elements.extend(jsonContent) + logger.debug("AI returned proper JSON image structure in list") + continue + except (json.JSONDecodeError, ValueError, AttributeError): + pass + # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): - # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: - base64Data = aiResponse.content + content_stripped = aiResponse.content.strip() + if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): + base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + else: + base64Data = aiResponse.content else: base64Data = "" - elements.append({ - "type": "image", - "content": { - "base64Data": base64Data, - "altText": generationHint or "Generated image", - "caption": "" - } - }) + # Always create proper JSON structure for images + if base64Data: + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") + else: + logger.warning(f"IMAGE_GENERATE returned empty content for section {sectionId}") + elements.append({ + "type": "error", + "message": f"Image generation returned empty content", + "sectionId": sectionId + }) else: # Parse JSON response for other content types - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + try: + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + elif isinstance(generatedElements, dict) and generatedElements.get("type"): + elements.append(generatedElements) + except (json.JSONDecodeError, ValueError) as json_error: + logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") + elements.append({ + "type": "error", + "message": f"Failed to parse JSON response: {str(json_error)}", + "sectionId": sectionId + }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) + + # Update chapter progress after section completion + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) @@ -662,6 +932,13 @@ class StructureFiller: "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") + # Still update chapter progress even on error + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" + ) # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Füge extrahierten Content direkt hinzu (kein AI-Call) @@ -687,7 +964,35 @@ class StructureFiller: "extractionPrompt": part.metadata.get("extractionPrompt") }) - section["elements"] = elements + # Assign elements to section (for all processing paths) + section["elements"] = elements + + # Update chapter progress after section completion (for all sections, including non-AI) + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) + + # Update overall progress after section completion + overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex + 1, totalSections) + self.services.chat.progressLogUpdate( + fillOperationId, + overallProgress, + f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections} completed" + ) + + # Finish chapter operation after all sections processed + self.services.chat.progressLogFinish(chapterOperationId, True) + + # Update overall progress after chapter completion + overallProgress = chapterIndex / totalChapters if totalChapters > 0 else 1.0 + self.services.chat.progressLogUpdate( + fillOperationId, + overallProgress, + f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}" + ) return chapterStructure @@ -744,7 +1049,10 @@ class StructureFiller: ) -> Dict[str, Any]: """ Flattening: Konvertiert Chapters zu finaler Section-Struktur. - Jedes Chapter wird zu einer Heading-Section + dessen Sections. + Jedes Chapter wird zu einer Heading-Section (Level 1) + dessen Sections. + + IMPORTANT: Chapters are the main structure elements (heading level 1). + All section headings with level < 2 are adjusted to level 2. """ result = { "metadata": chapterStructure.get("metadata", {}), @@ -760,7 +1068,7 @@ class StructureFiller: } for chapter in doc.get("chapters", []): - # 1. Vordefinierte Heading-Section für Chapter-Title + # 1. Vordefinierte Heading-Section für Chapter-Title (ALWAYS Level 1) heading_section = { "id": f"{chapter['id']}_heading", "content_type": "heading", @@ -768,19 +1076,42 @@ class StructureFiller: "type": "heading", "content": { "text": chapter.get("title", ""), - "level": chapter.get("level", 1) + "level": 1 # Chapters are always level 1 } }] } flattened_doc["sections"].append(heading_section) - # 2. Generierte Sections - flattened_doc["sections"].extend(chapter.get("sections", [])) + # 2. Generierte Sections - adjust heading levels + for section in chapter.get("sections", []): + adjusted_section = self._adjustSectionHeadingLevels(section) + flattened_doc["sections"].append(adjusted_section) result["documents"].append(flattened_doc) return result + def _adjustSectionHeadingLevels(self, section: Dict[str, Any]) -> Dict[str, Any]: + """ + Adjust heading levels in sections: sections with type heading and level < 2 are changed to level 2. + Only chapter headings have level 1. + """ + adjusted_section = copy.deepcopy(section) + + # Check if this is a heading section + if adjusted_section.get("content_type") == "heading": + elements = adjusted_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "heading": + content = element.get("content", {}) + if isinstance(content, dict): + level = content.get("level", 1) + # If level < 2, change to level 2 (only chapters have level 1) + if level < 2: + content["level"] = 2 + + return adjusted_section + def _buildChapterSectionsStructurePrompt( self, chapterId: str, @@ -975,6 +1306,9 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th contentStructureExample = self._getContentStructureExample(contentType) + # Special handling for image content type with IMAGE_GENERATE + isImageGeneration = contentType == "image" and len(validParts) == 0 + if isAggregation: prompt = f"""# TASK: Generate Section Content (Aggregation) @@ -982,22 +1316,10 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} -{contextText} - -## USER REQUEST (for context) -``` -{userPrompt} -``` ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} -## IMPORTANT - SECTION INDEPENDENCE: -- This section is independent and self-contained -- You do NOT have information about other sections' content -- Provide all necessary context within this section -- Context above is for logical flow only, NOT for content dependencies - ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) @@ -1007,6 +1329,10 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th 6. Ensure the generated content is self-contained and understandable independently 7. Return ONLY a JSON object with an "elements" array 8. Each element should match the content_type: {contentType} +9. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. +10. For paragraphs: Return plain text only, no HTML tags like
, ,

, or style attributes +11. For headings: Return plain text only, no HTML tags or styling +12. For images: Do NOT include base64 data in JSON - images are handled separately ## OUTPUT FORMAT Return a JSON object with this structure: @@ -1020,7 +1346,16 @@ Return a JSON object with this structure: ] }} -CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +CRITICAL: +- "content" MUST always be an object (never a string) +- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup +- Return ONLY valid JSON. Do not include any explanatory text outside the JSON. + +## CONTEXT (for reference only) +{contextText if contextText else ""} +``` +{userPrompt} +``` """ else: prompt = f"""# TASK: Generate Section Content @@ -1029,30 +1364,21 @@ CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} -{contextText} - -## USER REQUEST (for context) -``` -{userPrompt} -``` ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} -## IMPORTANT - SECTION INDEPENDENCE: -- This section is independent and self-contained -- You do NOT have information about other sections' content -- Provide all necessary context within this section -- Context above is for logical flow only, NOT for content dependencies - ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. Use the available content parts to populate this section -3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data -4. For extracted text: Format appropriately based on content_type ({contentType}) -5. Ensure the generated content is self-contained and understandable independently -6. Return ONLY a JSON object with an "elements" array -7. Each element should match the content_type: {contentType} +3. For extracted text: Format appropriately based on content_type ({contentType}) +4. Ensure the generated content is self-contained and understandable independently +5. Return ONLY a JSON object with an "elements" array +6. Each element should match the content_type: {contentType} +7. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. +8. For paragraphs: Return plain text only, no HTML tags like

, ,

, or style attributes +9. For headings: Return plain text only, no HTML tags or styling +10. For images: If you need to reference an image, describe it in altText. Do NOT include base64 data - images are handled separately ## OUTPUT FORMAT Return a JSON object with this structure: @@ -1066,7 +1392,16 @@ Return a JSON object with this structure: ] }} -CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +CRITICAL: +- "content" MUST always be an object (never a string) +- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup +- Return ONLY valid JSON. Do not include any explanatory text outside the JSON + +## CONTEXT (for reference only) +{contextText if contextText else ""} +``` +{userPrompt} +``` """ return prompt diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index 84e659a4..d3b46e0e 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -160,18 +160,30 @@ IMPORTANT - CHAPTER INDEPENDENCE: - One chapter does NOT have information about another chapter - Each chapter must provide its own context and be understandable alone +CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS: +- You MUST assign available ContentParts to chapters using contentPartIds +- Based on the user request, determine which content should be used in which chapter +- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter +- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis +- To include document content analysis, chapters MUST have contentPartIds assigned +- Review the user request carefully to match ContentParts to chapters based on context and purpose + CRITICAL - CHAPTERS WITHOUT CONTENT PARTS: - If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch - Include: what to generate, what information to include, purpose, specific details -- Without content parts, AI relies ENTIRELY on generationHint -- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]." -- BAD: "Create title" or "Add section" (too vague) +- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content + +IMPORTANT - FORMATTING: +- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer +- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments") +- Focus on CONTENT and STRUCTURE, not visual formatting +- The renderer will apply appropriate styling based on the output format ({outputFormat}) For each chapter: - chapter id - level (1, 2, 3, etc.) - title -- contentPartIds: [List of ContentPart IDs] +- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose - contentPartInstructions: {{ "partId": {{ "instruction": "How content should be structured" @@ -179,6 +191,7 @@ For each chapter: }} - generationHint: Description of the content (must be self-contained with all necessary context) * If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch + * Focus on content and structure, NOT formatting details OUTPUT FORMAT: {outputFormat} diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 9e6f41c9..5525ae89 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -82,205 +82,119 @@ class RendererPptx(BaseRenderer): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars") - # Determine layout: first slide (i==0) uses title slide layout - # For image-only slides, use blank layout to avoid placeholder interference - # Otherwise use title+content layout - if i == 0: - slideLayoutIndex = 0 # Title slide layout - elif hasImages and not hasSections and not slide_content: - # Image-only slide: use blank layout (typically index 6, fallback to 5 if not available) - try: - slideLayoutIndex = 6 # Blank layout - # Verify layout exists, fallback if not - if slideLayoutIndex >= len(prs.slide_layouts): - slideLayoutIndex = 5 # Alternative blank layout - except (AttributeError, IndexError): - slideLayoutIndex = 1 # Fallback to title+content - else: - slideLayoutIndex = 1 # Title and content layout + # Use blank layout for all slides to avoid placeholder interference + # Find blank layout (typically index 6, fallback to 5) + slideLayoutIndex = None + for idx in [6, 5]: + if idx < len(prs.slide_layouts): + try: + layout = prs.slide_layouts[idx] + # Check if it's a blank layout (no placeholders) + if len(layout.placeholders) == 0: + slideLayoutIndex = idx + break + except (AttributeError, IndexError): + continue + + # If no blank layout found, use layout with fewest placeholders + if slideLayoutIndex is None: + min_placeholders = float('inf') + for idx in range(len(prs.slide_layouts)): + try: + layout = prs.slide_layouts[idx] + placeholder_count = len(layout.placeholders) if hasattr(layout, 'placeholders') else 0 + if placeholder_count < min_placeholders: + min_placeholders = placeholder_count + slideLayoutIndex = idx + except: + continue + + # Fallback to first layout if still None + if slideLayoutIndex is None: + slideLayoutIndex = 0 slide_layout = prs.slide_layouts[slideLayoutIndex] slide = prs.slides.add_slide(slide_layout) - # Set title with AI-generated styling - # For blank layouts, add title as textbox since there's no title placeholder + # Clear placeholder text instead of removing placeholders (safer approach) + # This avoids corrupting the PPTX file structure try: - title_shape = slide.shapes.title - title_shape.text = slide_data.get("title", "Slide") - - # Apply title styling - LEFT ALIGNED by default - title_style = styles.get("title", {}) - if title_shape.text_frame.paragraphs[0].font: - title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) - title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) - title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - # Set left alignment for title - title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT - except AttributeError: - # Blank layout has no title placeholder - add title as textbox - from pptx.util import Inches - titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), prs.slide_width - Inches(1), Inches(0.8)) - titleFrame = titleBox.text_frame - titleFrame.text = slide_data.get("title", "Slide") - title_style = styles.get("title", {}) - titleFrame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) - titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) - titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT + for shape in slide.shapes: + if hasattr(shape, 'is_placeholder') and shape.is_placeholder: + try: + if hasattr(shape, 'text_frame'): + shape.text_frame.clear() + # Set text to empty string to remove "Click to add text" + if len(shape.text_frame.paragraphs) > 0: + shape.text_frame.paragraphs[0].text = "" + except: + pass + except Exception as placeholder_error: + logger.warning(f"Could not clear placeholders: {str(placeholder_error)}") + + # Add title as textbox (smaller size for slides) + from pptx.util import Inches + titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), prs.slide_width - Inches(1), Inches(0.6)) + titleFrame = titleBox.text_frame + titleFrame.text = slide_data.get("title", "Slide") + title_style = styles.get("title", {}) + # Smaller title size for slides (default 32 instead of 44) + title_font_size = title_style.get("font_size", 32) + # Reduce further for slides (max 32pt, min 10pt for readability) + title_font_size = max(10, min(title_font_size, 32)) + titleFrame.paragraphs[0].font.size = Pt(title_font_size) + titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) + title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) + titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) + titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT + titleFrame.word_wrap = True # Render sections with proper PowerPoint objects (tables, lists, etc.) + # Organize content into frames for better layout if hasSections: - # Use content placeholder for structured content (only if layout has placeholder[1]) - try: - content_shape = slide.placeholders[1] - text_frame = content_shape.text_frame - text_frame.clear() - except (AttributeError, IndexError): - # Layout might not have placeholder[1], create textbox instead - from pptx.util import Inches - left = Inches(0.5) - top = Inches(1.5) - width = prs.slide_width - Inches(1) - height = prs.slide_height - top - Inches(0.5) - textbox = slide.shapes.add_textbox(left, top, width, height) - text_frame = textbox.text_frame - text_frame.word_wrap = True - - # Track vertical position for multiple content types - current_y = Inches(1.5) # Start below title - - for section in slide_sections: - section_type = section.get("content_type", "paragraph") - elements = section.get("elements", []) - - # Check if section has image content_type - if section_type == "image": - # Extract images from this section - for element in elements: - if isinstance(element, dict) and element.get("type") == "image": - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - if base64Data: - slide_images.append({ - "base64Data": base64Data, - "altText": content.get("altText", "Image"), - "caption": content.get("caption", "") - }) - continue # Skip rendering image sections as text - - # Handle sections without elements (e.g., headings that create slides) - if not elements: - continue - - for element in elements: - if not isinstance(element, dict): - continue - - # Check element type first, fall back to section type - element_type = element.get("type", "") - if not element_type: - element_type = section_type - - # Skip image elements - they're handled separately - if element_type == "image": - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - if base64Data: - slide_images.append({ - "base64Data": base64Data, - "altText": content.get("altText", "Image"), - "caption": content.get("caption", "") - }) - continue - - if element_type == "table": - # Render as actual PowerPoint table - self._addTableToSlide(slide, element, styles, current_y) - current_y += Inches(2) # Space for table - elif element_type == "bullet_list" or element_type == "list": - # Render as actual PowerPoint bullet list - if text_frame: - self._addBulletListToSlide(slide, element, styles, text_frame) - elif element_type == "heading": - # Render as heading in text frame - if text_frame: - self._addHeadingToSlide(slide, element, styles, text_frame) - elif element_type == "paragraph": - # Render as paragraph in text frame - if text_frame: - self._addParagraphToSlide(slide, element, styles, text_frame) - elif element_type == "code_block" or element_type == "code": - # Render as formatted code block - if text_frame: - self._addCodeBlockToSlide(slide, element, styles, text_frame) - elif element_type == "extracted_text": - # Render extracted text as paragraph with styling - if text_frame: - content = element.get("content", "") - source = element.get("source", "") - if content: - paragraph_style = styles.get("paragraph", {}) - p = text_frame.add_paragraph() - p.text = content - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) - p.alignment = PP_ALIGN.LEFT # Left align by default - if source: - p.add_run(f" (Source: {source})").font.italic = True - elif element_type == "reference": - # Render reference - if text_frame: - label = element.get("label", "Reference") - p = text_frame.add_paragraph() - p.text = f"[Reference: {label}]" - p.font.italic = True - p.alignment = PP_ALIGN.LEFT - else: - # Fallback: try to render as paragraph - if text_frame: - content = element.get("content", "") - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - - if text: - self._addParagraphToSlide(slide, element, styles, text_frame) + # Organize sections into content groups for frame-based layout + # Images are handled within the frame rendering method + self._renderSlideContentWithFrames(slide, slide_sections, slide_images, styles, prs) - # Handle images after processing sections (images may have been extracted from sections) - # Update hasImages in case images were added during section processing - hasImages = len(slide_images) > 0 - if hasImages: - self._addImagesToSlide(slide, slide_images, styles) - - # Fallback: if no sections but has content text, render as before + # Fallback: if no sections but has content text, render in textbox elif slide_content and not hasImages: - content_shape = slide.placeholders[1] - text_frame = content_shape.text_frame - text_frame.clear() + # Create textbox for content (no placeholders in blank layout) + from pptx.util import Inches + title_height_used = Inches(1.0) # Title height for blank slides + content_left = Inches(0.5) + content_top = title_height_used + Inches(0.3) + content_width = prs.slide_width - Inches(1) + content_height = prs.slide_height - content_top - Inches(0.5) + content_textbox = slide.shapes.add_textbox(content_left, content_top, content_width, content_height) + text_frame = content_textbox.text_frame + text_frame.word_wrap = True + text_frame.auto_size = None # Split content into paragraphs paragraphs = slide_content.split('\n\n') - for paraIdx, paragraph in enumerate(paragraphs): + for paragraph in paragraphs: if paragraph.strip(): - if paraIdx == 0: - p = text_frame.paragraphs[0] - else: - p = text_frame.add_paragraph() - + p = text_frame.add_paragraph() p.text = paragraph.strip() - # Apply AI-generated styling + # Apply AI-generated styling with adaptive sizing paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) + base_font_size = paragraph_style.get("font_size", 18) + # Calculate adaptive font size based on content length + try: + total_chars = len(slide_content) + chars_per_line = max(1, int(content_width / Pt(10))) + lines_needed = total_chars / chars_per_line + available_lines = max(1, int(content_height / Pt(14))) + font_multiplier = 1.0 + if available_lines > 0 and lines_needed > available_lines: + font_multiplier = max(0.6, min(1.0, (available_lines / lines_needed) * 1.1)) + calculated_size = max(6, int(base_font_size * font_multiplier)) # Minimum 6pt + except (ZeroDivisionError, ValueError, TypeError): + calculated_size = max(6, base_font_size) # Fallback to base size with minimum + + p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) p.font.color.rgb = RGBColor(*paragraph_color) @@ -567,11 +481,11 @@ class RendererPptx(BaseRenderer): def _getDefaultStyleSet(self) -> Dict[str, Any]: """Default PowerPoint style set - used when no style instructions present.""" return { - "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"}, - "heading": {"font_size": 36, "color": "#2C5F2D", "bold": True, "align": "left"}, - "subheading": {"font_size": 28, "color": "#4A90E2", "bold": True, "align": "left"}, - "paragraph": {"font_size": 20, "color": "#2F2F2F", "bold": False, "align": "left"}, - "bullet_list": {"font_size": 20, "color": "#2F2F2F", "indent": 20}, + "title": {"font_size": 32, "color": "#1B365D", "bold": True, "align": "left"}, + "heading": {"font_size": 24, "color": "#1B365D", "bold": True, "align": "left"}, + "subheading": {"font_size": 20, "color": "#4A90E2", "bold": True, "align": "left"}, + "paragraph": {"font_size": 14, "color": "#2F2F2F", "bold": False, "align": "left"}, + "bullet_list": {"font_size": 14, "color": "#2F2F2F", "indent": 20}, "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"}, "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"}, "slide_size": "16:9", @@ -724,11 +638,15 @@ JSON ONLY. NO OTHER TEXT.""" # Get section title from data or use default section_title = "Untitled Section" if section.get("content_type") == "heading": - # Extract text from elements array + # Extract text from elements array - use nested content structure for element in section.get("elements", []): - if isinstance(element, dict) and "text" in element: - section_title = element.get("text", "Untitled Section") - break + if isinstance(element, dict): + content = element.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + if text: + section_title = text + break elif section.get("title"): section_title = section.get("title") @@ -738,7 +656,10 @@ JSON ONLY. NO OTHER TEXT.""" # Check for three content formats from Phase 5D in elements content_parts = [] for element in elements: - element_type = element.get("type", "") if isinstance(element, dict) else "" + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") # Support three content formats from Phase 5D if element_type == "reference": @@ -782,25 +703,47 @@ JSON ONLY. NO OTHER TEXT.""" }) return { - "title": section_title or (elements[0].get("altText", "Image") if elements else "Image"), + "title": section_title or (elements[0].get("content", {}).get("altText", "Image") if elements and isinstance(elements[0], dict) else "Image"), "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present "images": images } - # Build slide content based on section type + # Build slide content based on section type - iterate over elements and format each if not content_parts: # Only if we didn't process reference/extracted_text above - if content_type == "table": - content_parts.append(self._formatTableForSlide(elements)) - elif content_type == "list": - content_parts.append(self._formatListForSlide(elements)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(elements)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(elements)) - elif content_type == "code": - content_parts.append(self._formatCodeForSlide(elements)) - else: - content_parts.append(self._formatParagraphForSlide(elements)) + for element in elements: + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") + # Use element type if available, otherwise fall back to section content_type + if not element_type: + element_type = content_type + + if element_type == "table": + formatted = self._formatTableForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "bullet_list" or element_type == "list": + formatted = self._formatListForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "heading": + formatted = self._formatHeadingForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "paragraph": + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "code_block" or element_type == "code": + formatted = self._formatCodeForSlide(element) + if formatted: + content_parts.append(formatted) + else: + # Fallback to paragraph formatting + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) @@ -1002,7 +945,7 @@ JSON ONLY. NO OTHER TEXT.""" return 1 # Default to title and content layout def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: - """Create slides from sections: each heading creates a new slide, content accumulates until next heading.""" + """Create slides from sections: each heading level 1 (chapter) creates a new slide, content accumulates until next level 1 heading.""" try: slides = [] current_slide_sections = [] # Store sections (not formatted text) for proper rendering @@ -1017,74 +960,43 @@ JSON ONLY. NO OTHER TEXT.""" continue if section_type == "heading": - # If we have accumulated content, create a slide - if current_slide_sections: - slides.append({ - "title": current_slide_title, - "sections": current_slide_sections.copy(), # Store sections for proper rendering - "images": [] - }) - current_slide_sections = [] - - # Start new slide with heading as title - heading_found = False + # Extract heading level + level = 1 # Default + heading_text = "" for element in elements: if isinstance(element, dict): # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): heading_text = content.get("text", "") + level = content.get("level", 1) elif isinstance(content, str): heading_text = content - else: - heading_text = "" - - if heading_text: - current_slide_title = heading_text - heading_found = True - break + level = 1 - # If no heading text found but this is a heading section, use section ID or default - if not heading_found: - current_slide_title = section.get("id", "Untitled Section") + # Only level 1 headings (chapters) create new slides + if level == 1: + # If we have accumulated content, create a slide + if current_slide_sections: + slides.append({ + "title": current_slide_title, + "sections": current_slide_sections.copy(), # Store sections for proper rendering + "images": [] + }) + current_slide_sections = [] + + # Start new slide with heading as title + if heading_text: + current_slide_title = heading_text + else: + # If no heading text found but this is a heading section, use section ID or default + current_slide_title = section.get("id", "Untitled Section") + else: + # Level 2+ headings are added as sections to current slide + current_slide_sections.append(section) elif section_type == "image": - # Create separate slide for image - if current_slide_sections: - slides.append({ - "title": current_slide_title, - "sections": current_slide_sections.copy(), - "images": [] - }) - current_slide_sections = [] - - # Extract image data - imageData = [] - for element in elements: - if isinstance(element, dict): - # Extract from nested content structure - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - altText = content.get("altText", "Image") - caption = content.get("caption", "") - else: - # Fallback to direct element fields - base64Data = element.get("base64Data") - altText = element.get("altText", "Image") - caption = element.get("caption", "") - - if base64Data: - imageData.append({ - "base64Data": base64Data, - "altText": altText, - "caption": caption - }) - - slides.append({ - "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), - "sections": [], - "images": imageData - }) + # Images are added to current slide (will be organized in frames) + current_slide_sections.append(section) else: # Add section to current slide (will be rendered properly) current_slide_sections.append(section) @@ -1113,21 +1025,42 @@ JSON ONLY. NO OTHER TEXT.""" if content_type == "image": return "" - # Process each element in the section + # Process each element in the section - use element type, not section type content_parts = [] for element in elements: - if content_type == "table": - content_parts.append(self._formatTableForSlide(element)) - elif content_type == "bullet_list" or content_type == "list": - content_parts.append(self._formatListForSlide(element)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(element)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(element)) - elif content_type == "code_block" or content_type == "code": - content_parts.append(self._formatCodeForSlide(element)) + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") + # Use element type if available, otherwise fall back to section content_type + if not element_type: + element_type = content_type + + if element_type == "table": + formatted = self._formatTableForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "bullet_list" or element_type == "list": + formatted = self._formatListForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "heading": + formatted = self._formatHeadingForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "paragraph": + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "code_block" or element_type == "code": + formatted = self._formatCodeForSlide(element) + if formatted: + content_parts.append(formatted) else: - content_parts.append(self._formatParagraphForSlide(element)) + # Fallback to paragraph formatting + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) return "\n\n".join(filter(None, content_parts)) @@ -1166,80 +1099,80 @@ JSON ONLY. NO OTHER TEXT.""" img = images[0] base64Data = img.get("base64Data") # Validate base64Data is present and not empty - if base64Data and isinstance(base64Data, str) and len(base64Data.strip()) > 0: - try: - imageBytes = base64.b64decode(base64Data) - if len(imageBytes) == 0: - logger.error("Decoded image bytes are empty") - return - imageStream = io.BytesIO(imageBytes) - except Exception as decode_error: - logger.error(f"Failed to decode base64 image data: {str(decode_error)}") - return - else: + if not base64Data or not isinstance(base64Data, str) or len(base64Data.strip()) == 0: logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}") return + + try: + imageBytes = base64.b64decode(base64Data) + if len(imageBytes) == 0: + logger.error("Decoded image bytes are empty") + return + imageStream = io.BytesIO(imageBytes) + except Exception as decode_error: + logger.error(f"Failed to decode base64 image data: {str(decode_error)}") + return + + # Get image dimensions + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size - # Get image dimensions - try: - from PIL import Image as PILImage - pilImage = PILImage.open(imageStream) - imgWidth, imgHeight = pilImage.size - - # Scale to fit available space (max 90% of slide for better visibility) - # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) - # Conversion: pixels * (72/96) = points - imgWidthPoints = imgWidth * (72.0 / 96.0) - imgHeightPoints = imgHeight * (72.0 / 96.0) - - maxWidth = availableWidth * 0.9 - maxHeight = availableHeight * 0.9 - - scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) - finalWidth = imgWidthPoints * scale - finalHeight = imgHeightPoints * scale - - # Center image - left = (slideWidth - finalWidth) / 2 - top = titleHeight + (availableHeight - finalHeight) / 2 - - imageStream.seek(0) - except Exception: - # Fallback: use default size - finalWidth = Inches(6) - finalHeight = Inches(4.5) - left = (slideWidth - finalWidth) / 2 - top = titleHeight + Inches(1) - imageStream.seek(0) + # Scale to fit available space (max 90% of slide for better visibility) + # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) + # Conversion: pixels * (72/96) = points + imgWidthPoints = imgWidth * (72.0 / 96.0) + imgHeightPoints = imgHeight * (72.0 / 96.0) - # Add image to slide - try: + maxWidth = availableWidth * 0.9 + maxHeight = availableHeight * 0.9 + + scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) + finalWidth = imgWidthPoints * scale + finalHeight = imgHeightPoints * scale + + # Center image + left = (slideWidth - finalWidth) / 2 + top = titleHeight + (availableHeight - finalHeight) / 2 + + imageStream.seek(0) + except Exception: + # Fallback: use default size + finalWidth = Inches(6) + finalHeight = Inches(4.5) + left = (slideWidth - finalWidth) / 2 + top = titleHeight + Inches(1) + imageStream.seek(0) + + # Add image to slide + try: + slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + except Exception as add_error: + # If add_picture fails, try with explicit format + imageStream.seek(0) + # Ensure we have valid image data + if len(imageBytes) > 0: slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) - except Exception as add_error: - # If add_picture fails, try with explicit format - imageStream.seek(0) - # Ensure we have valid image data - if len(imageBytes) > 0: - slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) - else: - raise Exception(f"Empty image data: {add_error}") - - # Add caption if available - caption = img.get("caption") or img.get("altText") - if caption and caption != "Image": - # Add text box below image - captionTop = top + finalHeight + Inches(0.2) - captionBox = slide.shapes.add_textbox( - Inches(1), - captionTop, - slideWidth - Inches(2), - Inches(0.5) - ) - captionFrame = captionBox.text_frame - captionFrame.text = caption - captionFrame.paragraphs[0].font.size = Pt(12) - captionFrame.paragraphs[0].font.italic = True - captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + else: + raise Exception(f"Empty image data: {add_error}") + + # Add caption if available + caption = img.get("caption") or img.get("altText") + if caption and caption != "Image": + # Add text box below image + captionTop = top + finalHeight + Inches(0.2) + captionBox = slide.shapes.add_textbox( + Inches(1), + captionTop, + slideWidth - Inches(2), + Inches(0.5) + ) + captionFrame = captionBox.text_frame + captionFrame.text = caption + captionFrame.paragraphs[0].font.size = Pt(12) + captionFrame.paragraphs[0].font.italic = True + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER else: # Multiple images: arrange in grid cols = 2 if len(images) <= 4 else 3 @@ -1267,7 +1200,7 @@ JSON ONLY. NO OTHER TEXT.""" import traceback logger.error(f"Traceback: {traceback.format_exc()}") - def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None: + def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float, max_width: float = None) -> None: """Add a PowerPoint table to slide.""" try: from pptx.util import Inches, Pt @@ -1286,25 +1219,27 @@ JSON ONLY. NO OTHER TEXT.""" return # Calculate table dimensions - num_cols = len(headers) - num_rows = len(rows) + 1 # +1 for header row + num_cols = int(len(headers)) # Ensure integer + num_rows = int(len(rows) + 1) # +1 for header row, ensure integer left = Inches(0.5) # Get presentation from stored reference or slide if hasattr(self, '_currentPresentation'): prs = self._currentPresentation else: prs = slide.presentation - width = prs.slide_width - Inches(1) + width = max_width if max_width is not None else (prs.slide_width - Inches(1)) row_height = Inches(0.4) - # Create table - table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, row_height * num_rows) + # Create table - ensure all parameters are proper types + table_height = row_height * num_rows + table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height) table = table_shape.table - # Set column widths - col_width = width / num_cols + # Set column widths - width is in EMU, divide evenly + # python-pptx expects EMU values (914400 EMU = 1 inch) + col_width_emu = int(width) // num_cols # Ensure integer division for EMU for col_idx in range(num_cols): - table.columns[col_idx].width = col_width + table.columns[col_idx].width = col_width_emu # Add headers with styling header_style = styles.get("table_header", {}) @@ -1314,20 +1249,33 @@ JSON ONLY. NO OTHER TEXT.""" for col_idx, header in enumerate(headers): cell = table.cell(0, col_idx) - cell.text = str(header) + # Clear existing text and set new text + cell.text_frame.clear() + cell.text = str(header) if header else "" + + # Ensure paragraph exists + if len(cell.text_frame.paragraphs) == 0: + cell.text_frame.add_paragraph() + + # Apply styling cell.fill.solid() cell.fill.fore_color.rgb = RGBColor(*header_bg_color) - cell.text_frame.paragraphs[0].font.bold = header_style.get("bold", True) - cell.text_frame.paragraphs[0].font.size = Pt(header_font_size) - cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*header_text_color) + para = cell.text_frame.paragraphs[0] + para.font.bold = header_style.get("bold", True) + para.font.size = Pt(header_font_size) + para.font.color.rgb = RGBColor(*header_text_color) align = header_style.get("align", "center") if align == "left": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + para.alignment = PP_ALIGN.LEFT elif align == "right": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + para.alignment = PP_ALIGN.RIGHT else: - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + para.alignment = PP_ALIGN.CENTER + + # Ensure text is set on paragraph + if not para.text: + para.text = str(header) if header else "" # Add data rows with styling cell_style = styles.get("table_cell", {}) @@ -1338,25 +1286,38 @@ JSON ONLY. NO OTHER TEXT.""" for row_idx, row_data in enumerate(rows, 1): for col_idx, cell_data in enumerate(row_data[:num_cols]): cell = table.cell(row_idx, col_idx) - cell.text = str(cell_data) + # Clear existing text and set new text + cell.text_frame.clear() + cell.text = str(cell_data) if cell_data is not None else "" + + # Ensure paragraph exists + if len(cell.text_frame.paragraphs) == 0: + cell.text_frame.add_paragraph() + + # Apply styling cell.fill.solid() cell.fill.fore_color.rgb = RGBColor(*cell_bg_color) - cell.text_frame.paragraphs[0].font.size = Pt(cell_font_size) - cell.text_frame.paragraphs[0].font.bold = cell_style.get("bold", False) - cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*cell_text_color) + para = cell.text_frame.paragraphs[0] + para.font.size = Pt(cell_font_size) + para.font.bold = cell_style.get("bold", False) + para.font.color.rgb = RGBColor(*cell_text_color) align = cell_style.get("align", "left") if align == "center": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + para.alignment = PP_ALIGN.CENTER elif align == "right": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + para.alignment = PP_ALIGN.RIGHT else: - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + para.alignment = PP_ALIGN.LEFT + + # Ensure text is set on paragraph + if not para.text: + para.text = str(cell_data) if cell_data is not None else "" except Exception as e: logger.warning(f"Error adding table to slide: {str(e)}") - def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add bullet list to slide text frame.""" try: from pptx.util import Pt @@ -1373,31 +1334,91 @@ JSON ONLY. NO OTHER TEXT.""" return list_style = styles.get("bullet_list", {}) - for item in items: - p = text_frame.add_paragraph() - if isinstance(item, dict): - p.text = item.get("text", "") - else: - p.text = str(item) - - p.level = 0 - p.font.size = Pt(list_style.get("font_size", 18)) - p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) - p.alignment = PP_ALIGN.LEFT # Left align bullet lists - p.space_before = Pt(6) - # Enable bullet points - set bullet type to enable bullets + base_font_size = list_style.get("font_size", 14) + calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability + + logger.debug(f"Rendering bullet list with {len(items)} items") + + for idx, item in enumerate(items): try: - from pptx.enum.text import MSO_AUTO_NUMBER - p.paragraph_format.bullet.type = MSO_AUTO_NUMBER.BULLET - except (ImportError, AttributeError): - # Fallback: bullets are usually enabled by default when level is set - # Just ensure level is set (already done above) - pass + # Get text content first + if isinstance(item, dict): + item_text = item.get("text", "") + else: + item_text = str(item) + + # Skip empty items + if not item_text or len(item_text.strip()) == 0: + logger.debug(f"Skipping empty bullet item {idx}") + continue + + # Create new paragraph for each bullet item + p = text_frame.add_paragraph() + + # Set level to 1 for bullet points BEFORE setting text + # In python-pptx, setting level > 0 should automatically enable bullets + p.level = 1 + + # Set text content + p.text = item_text + + # Apply formatting first + p.font.size = Pt(calculated_size) + p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) + p.alignment = PP_ALIGN.LEFT # Left align bullet lists + p.space_before = Pt(2) # Small spacing before + p.space_after = Pt(2) # Small spacing after + + # In python-pptx, setting level > 0 should enable bullets automatically + # However, some versions may not support paragraph_format, so we'll use manual bullets as fallback + # Always add manual bullet character to ensure visibility + if not (p.text.startswith('•') or p.text.startswith('-') or p.text.startswith('*') or p.text.startswith('◦')): + p.text = '• ' + p.text + logger.debug(f"Added manual bullet character to item {idx}") + + # Set proper indentation for multiline bullets (hanging indent) + # For multiline bullets: bullet at left margin, text indented, wrapped lines align with text + try: + # Try accessing paragraph_format - it may not exist in all python-pptx versions + if hasattr(p, 'paragraph_format'): + pf = p.paragraph_format + # Left indent: indents the entire paragraph (bullet + text) + pf.left_indent = Pt(18) + # First line indent: negative value creates hanging indent + # This brings the bullet back to the left while keeping text indented + pf.first_line_indent = Pt(-18) # Negative to create hanging indent + logger.debug(f"Set hanging indent for bullet item {idx}") + else: + # Try via _element if paragraph_format not available + try: + from pptx.util import Pt as PtUtil + pPr = p._element.get_or_add_pPr() + # Set left margin (indents entire paragraph) + pPr.left_margin = PtUtil(18) + # Set first line indent (negative for hanging indent) + pPr.first_line_indent = PtUtil(-18) + logger.debug(f"Set hanging indent via XML for bullet item {idx}") + except Exception as xml_error: + logger.debug(f"Could not set hanging indent via XML: {str(xml_error)}") + # Indentation is optional, continue without it + pass + except Exception as indent_error: + logger.debug(f"Could not set indent for item {idx}: {str(indent_error)}") + # Continue without indent - bullets will still show, but multiline won't be properly indented + + logger.debug(f"Successfully added bullet item {idx}: '{item_text[:50]}...'") + + except Exception as item_error: + logger.error(f"Error adding bullet item {idx}: {str(item_error)}", exc_info=True) + # Continue with next item even if one fails + continue + + logger.debug(f"Completed rendering bullet list, added {len(text_frame.paragraphs)} paragraphs") except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") - def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add heading to slide text frame.""" try: from pptx.util import Pt @@ -1414,17 +1435,32 @@ JSON ONLY. NO OTHER TEXT.""" if text: p = text_frame.add_paragraph() p.text = text - p.level = min(level - 1, 2) # PowerPoint supports 0-2 levels + # Headings should be level 0 (no indentation) regardless of heading level + p.level = 0 heading_style = styles.get("heading", {}) - p.font.size = Pt(heading_style.get("font_size", 32)) + # Different font sizes for different heading levels + if level == 1: + base_font_size = heading_style.get("font_size", 28) # Largest for H1 + elif level == 2: + base_font_size = heading_style.get("font_size", 22) # Medium for H2 + elif level == 3: + base_font_size = heading_style.get("font_size", 18) # Smaller for H3 + else: + base_font_size = heading_style.get("font_size", 16) # Default for H4+ + + calculated_size = max(12, int(base_font_size * font_size_multiplier)) # Minimum 12pt for headings + p.font.size = Pt(calculated_size) p.font.bold = heading_style.get("bold", True) - p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (47, 47, 47)))) + p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121)))) + # Add spacing before and after headings + p.space_before = Pt(12 if level == 1 else 8) # More space before H1 + p.space_after = Pt(6) # Space after heading except Exception as e: logger.warning(f"Error adding heading to slide: {str(e)}") - def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add paragraph to slide text frame.""" try: from pptx.util import Pt @@ -1443,12 +1479,28 @@ JSON ONLY. NO OTHER TEXT.""" if text: p = text_frame.add_paragraph() p.text = text + # Explicitly set level to 0 for regular paragraphs (not bullets) + p.level = 0 + + # Ensure no bullet formatting + try: + if hasattr(p, 'paragraph_format'): + p.paragraph_format.bullet.type = None + except (AttributeError, TypeError): + pass paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) + base_font_size = paragraph_style.get("font_size", 14) # Smaller default for better readability + calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability + p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + # Add proper spacing + p.space_before = Pt(6) # Space before paragraph + p.space_after = Pt(6) # Space after paragraph + p.line_spacing = 1.2 # Line spacing for readability + align = paragraph_style.get("align", "left") if align == "center": p.alignment = PP_ALIGN.CENTER @@ -1460,7 +1512,7 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.warning(f"Error adding paragraph to slide: {str(e)}") - def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add code block to slide text frame.""" try: from pptx.util import Pt @@ -1477,13 +1529,15 @@ JSON ONLY. NO OTHER TEXT.""" if code: code_style = styles.get("code_block", {}) code_font = code_style.get("font", "Courier New") - code_font_size = code_style.get("font_size", 9) + base_code_font_size = code_style.get("font_size", 9) + code_font_size = max(6, int(base_code_font_size * font_size_multiplier)) # Minimum 6pt for code code_color = self._getSafeColor(code_style.get("color", (47, 47, 47))) p = text_frame.add_paragraph() if language: p.text = f"Code ({language}):" p.font.bold = True + p.font.size = Pt(code_font_size) p = text_frame.add_paragraph() p.text = code @@ -1498,3 +1552,593 @@ JSON ONLY. NO OTHER TEXT.""" """Format current timestamp for presentation generation.""" # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") + + def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None: + """ + Organize slide content into frames for better layout. + Groups content by type (images, bullet lists, paragraphs, tables) and renders each in appropriately sized frames. + """ + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + # Extract images from sections first + images_to_render = list(slide_images) if slide_images else [] + text_sections = [] + table_sections = [] + + for section in slide_sections: + section_type = section.get("content_type", "paragraph") + elements = section.get("elements", []) + + if not elements: + # Skip empty sections + continue + + # Extract images from all sections + section_has_images = False + for element in elements: + if isinstance(element, dict) and element.get("type") == "image": + content = element.get("content", {}) + base64Data = None + + # Handle different content formats + if isinstance(content, dict): + base64Data = content.get("base64Data") + altText = content.get("altText", "Image") + caption = content.get("caption", "") + elif isinstance(content, str): + # If content is a string, it might be base64 data directly + # Check if it looks like base64 + if len(content) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in content[:100]): + base64Data = content + altText = "Image" + caption = "" + else: + # Not base64, skip + continue + else: + # Try to get base64Data directly from element + base64Data = element.get("base64Data") + altText = element.get("altText", "Image") + caption = element.get("caption", "") + + if base64Data: + images_to_render.append({ + "base64Data": base64Data, + "altText": altText, + "caption": caption + }) + section_has_images = True + + # Skip image-only sections (they're already added to images_to_render) + if section_type == "image" and section_has_images: + continue + + # Categorize sections (excluding image elements) + has_table = False + non_image_elements = [] + + for element in elements: + if isinstance(element, dict): + element_type = element.get("type", "") + # Skip image elements when categorizing + if element_type == "image": + continue + if element_type == "table" or section_type == "table": + has_table = True + non_image_elements.append(element) + + # Only add sections that have non-image content + if non_image_elements: + if has_table: + # Create a copy of section without image elements for table rendering + table_section = { + **section, + "elements": non_image_elements + } + table_sections.append(table_section) + else: + # Create a copy of section without image elements for text rendering + text_section = { + **section, + "elements": non_image_elements + } + text_sections.append(text_section) + + # Calculate layout dimensions + title_height = Inches(1.5) + available_height = prs.slide_height - title_height - Inches(0.5) # Title + margin + available_width = prs.slide_width - Inches(1) # Margins + margin = Inches(0.5) + + current_y = title_height + Inches(0.3) + + # Determine layout strategy based on content types + has_images = len(images_to_render) > 0 + has_tables = len(table_sections) > 0 + has_text = len(text_sections) > 0 + + # Layout 1: Images + Text (horizontal split for landscape) + if has_images and has_text and not has_tables: + # Horizontal split: images on left, text on right (landscape format) + img_width = available_width * 0.48 + text_width = available_width * 0.48 + img_left = margin + text_left = margin + img_width + Inches(0.2) + + # Render images in left column (full height) + if images_to_render: + img_height = available_height - Inches(0.2) + self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) + + # Render text in right column (full height, adaptive font size) + if text_sections: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) + + # Layout 2: Tables + Text (horizontal split for landscape) + elif has_tables and has_text: + # Horizontal split: tables on left, text on right (landscape format) + table_width = available_width * 0.48 + text_width = available_width * 0.48 + table_left = margin + text_left = margin + table_width + Inches(0.2) + + # Render tables in left column (full height) + table_y = current_y + for table_section in table_sections: + elements = table_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "table": + try: + self._addTableToSlide(slide, element, styles, table_y, max_width=table_width) + # Calculate actual table height + content = element.get("content", {}) + if isinstance(content, dict): + rows = content.get("rows", []) + num_rows = len(rows) + 1 # +1 for header + actual_height = Inches(0.4) * num_rows + table_y += actual_height + Inches(0.15) + else: + table_y += Inches(2) + except Exception as table_error: + logger.error(f"Error rendering table: {str(table_error)}") + # Continue with next table + break + + # Render text in right column (full height, adaptive font size) + if text_sections: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) + + # Layout 3: Images + Tables + Text (horizontal split for landscape) + elif has_images and has_tables and has_text: + # Horizontal split: Images (left), Tables (middle), Text (right) + img_width = available_width * 0.31 + table_width = available_width * 0.31 + text_width = available_width * 0.31 + img_left = margin + table_left = margin + img_width + Inches(0.15) + text_left = margin + img_width + table_width + Inches(0.3) + + # Render images in left column (full height) + if images_to_render: + img_height = available_height - Inches(0.2) + self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) + + # Render tables in middle column (full height) + table_y = current_y + for table_section in table_sections: + elements = table_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "table": + try: + self._addTableToSlide(slide, element, styles, table_y, max_width=table_width) + content = element.get("content", {}) + if isinstance(content, dict): + rows = content.get("rows", []) + num_rows = len(rows) + 1 + actual_height = Inches(0.4) * num_rows + table_y += actual_height + Inches(0.15) + else: + table_y += Inches(2) + except Exception as table_error: + logger.error(f"Error rendering table: {str(table_error)}") + break + + # Render text in right column (full height, adaptive font size) + if text_sections: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) + + # Layout 4: Images only + elif has_images and not has_text and not has_tables: + img_width = available_width * 0.8 + img_height = available_height * 0.8 + img_left = (available_width - img_width) / 2 + margin + self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) + + # Layout 5: Text only (default, adaptive font size) + elif has_text and not has_images and not has_tables: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, margin, current_y, available_width, text_height, adaptiveFontSize=True) + + # Layout 6: Tables only + elif has_tables and not has_images and not has_text: + table_height = available_height / max(len(table_sections), 1) + table_width = available_width + for table_section in table_sections: + elements = table_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "table": + try: + self._addTableToSlide(slide, element, styles, current_y, max_width=table_width) + # Calculate actual table height + content = element.get("content", {}) + if isinstance(content, dict): + rows = content.get("rows", []) + num_rows = len(rows) + 1 # +1 for header + actual_height = min(Inches(0.4) * num_rows, table_height) + current_y += actual_height + Inches(0.2) + else: + current_y += table_height + Inches(0.2) + except Exception as table_error: + logger.error(f"Error rendering table: {str(table_error)}") + # Continue with next table + break + + except Exception as e: + logger.error(f"Error rendering slide content with frames: {str(e)}") + # Fallback to simple rendering + try: + content_shape = slide.placeholders[1] + text_frame = content_shape.text_frame + text_frame.clear() + except (AttributeError, IndexError): + from pptx.util import Inches + left = Inches(0.5) + top = Inches(1.5) + width = prs.slide_width - Inches(1) + height = prs.slide_height - top - Inches(0.5) + textbox = slide.shapes.add_textbox(left, top, width, height) + text_frame = textbox.text_frame + text_frame.word_wrap = True + + # Simple fallback rendering + for section in slide_sections: + self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier=1.0) + + def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None: + """Render text sections (paragraphs, lists, headings) in a text frame.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + # Calculate total text length for adaptive font sizing + total_text_length = 0 + if adaptiveFontSize: + for section in text_sections: + elements = section.get("elements", []) + for element in elements: + if isinstance(element, dict): + element_type = element.get("type", "") + if element_type in ["paragraph", "bullet_list", "list", "heading"]: + content = element.get("content", "") + if isinstance(content, dict): + if "text" in content: + total_text_length += len(str(content["text"])) + elif "items" in content: + for item in content.get("items", []): + total_text_length += len(str(item)) + elif isinstance(content, str): + total_text_length += len(content) + + # Calculate adaptive font size multiplier based on text length and frame size + font_size_multiplier = 1.0 + if adaptiveFontSize and total_text_length > 0: + try: + # More accurate calculation: estimate characters per line based on average character width + # Average character width is approximately 0.6 * font_size in points + # For 14pt font, average char width ≈ 8.4pt + avg_char_width_pt = 8.4 # Approximate for 14pt font + chars_per_line = max(1, int(float(width) / avg_char_width_pt)) + + # Estimate lines needed + lines_needed = total_text_length / max(chars_per_line, 1) + + # Available lines based on height (line height ≈ 1.2 * font_size) + line_height_pt = 16.8 # Approximate for 14pt font with 1.2 spacing + available_lines = max(1, int(float(height) / line_height_pt)) + + if available_lines > 0 and lines_needed > available_lines: + # More aggressive scaling for long texts + # Calculate exact scale needed, then add 10% buffer + scale_needed = available_lines / lines_needed + font_size_multiplier = scale_needed * 0.9 # 10% buffer + # Allow scaling down to 50% for very long texts (minimum readable) + font_size_multiplier = max(0.5, min(1.0, font_size_multiplier)) + elif lines_needed <= available_lines * 0.7: + # If text is much shorter than available space, can use slightly larger font + font_size_multiplier = min(1.1, (available_lines / lines_needed) * 0.8) + except (ZeroDivisionError, ValueError, TypeError) as calc_error: + logger.debug(f"Font size calculation error: {str(calc_error)}") + # Fallback to default if calculation fails + font_size_multiplier = 1.0 + + textbox = slide.shapes.add_textbox(left, top, width, height) + text_frame = textbox.text_frame + text_frame.word_wrap = True + text_frame.auto_size = None # Disable auto-size for fixed frame + # Ensure text frame can display bullets + text_frame.margin_left = Pt(0) + text_frame.margin_right = Pt(0) + text_frame.margin_top = Pt(0) + text_frame.margin_bottom = Pt(0) + + # Pass font size multiplier to rendering methods + for section in text_sections: + self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier) + + except Exception as e: + logger.warning(f"Error rendering text sections in frame: {str(e)}") + + def _renderSectionToTextFrame(self, slide, section: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: + """Render a single section to a text frame.""" + try: + from pptx.util import Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + section_type = section.get("content_type", "paragraph") + elements = section.get("elements", []) + + if not elements: + return + + for element in elements: + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") + if not element_type: + element_type = section_type + + # Skip images - handled separately + if element_type == "image": + continue + + if element_type == "bullet_list" or element_type == "list": + self._addBulletListToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "heading": + self._addHeadingToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "paragraph": + self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "code_block" or element_type == "code": + self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + paragraph_style = styles.get("paragraph", {}) + p = text_frame.add_paragraph() + p.text = content + base_font_size = paragraph_style.get("font_size", 18) + p.font.size = Pt(int(base_font_size * font_size_multiplier)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + p.alignment = PP_ALIGN.LEFT + if source: + p.add_run(f" (Source: {source})").font.italic = True + elif element_type == "reference": + label = element.get("label", "Reference") + p = text_frame.add_paragraph() + p.text = f"[Reference: {label}]" + p.font.italic = True + p.alignment = PP_ALIGN.LEFT + else: + # Fallback to paragraph + content = element.get("content", "") + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier=1.0) + + except Exception as e: + logger.warning(f"Error rendering section to text frame: {str(e)}") + + def _addImagesToSlideInFrame(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float) -> None: + """Add images to slide within a specific frame area.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + import base64 + import io + + if not images: + logger.debug("No images to render in frame") + return + + logger.info(f"Rendering {len(images)} image(s) in frame at ({left}, {top}), size ({width}, {height})") + + # Calculate image dimensions within frame + if len(images) == 1: + # Single image: fit to frame + img = images[0] + base64Data = img.get("base64Data") + + if not base64Data: + logger.warning("Image has no base64Data") + return + + # Clean base64 data (remove data URI prefix if present) + if isinstance(base64Data, str): + if base64Data.startswith("data:image/"): + # Extract base64 from data URI + base64Data = base64Data.split(",", 1)[1] + # Remove any whitespace + base64Data = base64Data.strip() + + try: + # Decode base64 + imageBytes = base64.b64decode(base64Data, validate=True) + if len(imageBytes) == 0: + logger.error("Decoded image bytes are empty") + return + + imageStream = io.BytesIO(imageBytes) + + # Get image dimensions using PIL + imgWidth, imgHeight = None, None + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size + imageStream.seek(0) # Reset stream for PowerPoint + + # Validate image dimensions - ensure they're reasonable + if imgWidth <= 1 or imgHeight <= 1: + logger.warning(f"Image has invalid dimensions: {imgWidth}x{imgHeight}, using default size") + imgWidth, imgHeight = 800, 600 + imageStream.seek(0) + elif imgWidth < 100 or imgHeight < 100: + logger.warning(f"Image dimensions very small: {imgWidth}x{imgHeight}, may appear tiny") + except ImportError: + logger.warning("PIL not available, using default image size") + imgWidth, imgHeight = 800, 600 # Default dimensions + except Exception as pil_error: + logger.warning(f"Error getting image dimensions with PIL: {str(pil_error)}, using default size") + imgWidth, imgHeight = 800, 600 + imageStream.seek(0) + + # Ensure we have valid dimensions + if not imgWidth or not imgHeight or imgWidth <= 1 or imgHeight <= 1: + logger.warning("Invalid image dimensions, using default 800x600") + imgWidth, imgHeight = 800, 600 + + # Scale to fit frame while maintaining aspect ratio + # width and height parameters are already in Inches (from pptx.util.Inches) + # Convert PIL pixel dimensions to Inches (assuming 96 DPI for PIL images) + imgWidthInches = Inches(imgWidth / 96.0) + imgHeightInches = Inches(imgHeight / 96.0) + + # Calculate scale to fit within frame + # Inches objects support division, result is a float + try: + scale_width = width / imgWidthInches if imgWidthInches > 0 else 1.0 + scale_height = height / imgHeightInches if imgHeightInches > 0 else 1.0 + scale = min(scale_width, scale_height, 1.0) # Don't scale up, only down + + finalWidth = imgWidthInches * scale + finalHeight = imgHeightInches * scale + + # Ensure minimum size (at least 1 inch) to prevent tiny rendering + minSize = Inches(1) + if finalWidth < minSize or finalHeight < minSize: + # Use minimum size while maintaining aspect ratio + min_scale = max(minSize / imgWidthInches if imgWidthInches > 0 else 1.0, + minSize / imgHeightInches if imgHeightInches > 0 else 1.0) + finalWidth = max(minSize, imgWidthInches * min_scale) + finalHeight = max(minSize, imgHeightInches * min_scale) + + # Ensure we don't exceed frame bounds + if finalWidth > width: + finalWidth = width + finalHeight = imgHeightInches * (width / imgWidthInches) if imgWidthInches > 0 else finalHeight + if finalHeight > height: + finalHeight = height + finalWidth = imgWidthInches * (height / imgHeightInches) if imgHeightInches > 0 else finalWidth + except (ZeroDivisionError, TypeError, AttributeError) as calc_error: + logger.warning(f"Error calculating image size: {str(calc_error)}, using frame size") + finalWidth = width * 0.9 # Use 90% of frame width + finalHeight = height * 0.9 # Use 90% of frame height + + # Center in frame + frame_left = left + (width - finalWidth) / 2 + frame_top = top + (height - finalHeight) / 2 + + # Add image to slide + imageStream.seek(0) + slide.shapes.add_picture(imageStream, frame_left, frame_top, width=finalWidth, height=finalHeight) + logger.info(f"Successfully added image to slide at ({frame_left}, {frame_top}), size ({finalWidth}, {finalHeight})") + + # Add caption if available + caption = img.get("caption") or img.get("altText") + if caption and caption != "Image": + captionTop = frame_top + finalHeight + Inches(0.1) + captionBox = slide.shapes.add_textbox(left, captionTop, width, Inches(0.4)) + captionFrame = captionBox.text_frame + captionFrame.text = caption + captionFrame.paragraphs[0].font.size = Pt(10) + captionFrame.paragraphs[0].font.italic = True + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + except base64.binascii.Error as b64_error: + logger.error(f"Invalid base64 data: {str(b64_error)}") + except Exception as img_error: + logger.error(f"Error adding image to frame: {str(img_error)}", exc_info=True) + else: + # Multiple images: grid layout + cols = 2 if len(images) <= 4 else 3 + rows = (len(images) + cols - 1) // cols + imgWidth = (width - Inches(0.2) * (cols - 1)) / cols + imgHeight = (height - Inches(0.2) * (rows - 1)) / rows + + for idx, img in enumerate(images): + base64Data = img.get("base64Data") + if not base64Data: + logger.warning(f"Image {idx} has no base64Data") + continue + + # Clean base64 data + if isinstance(base64Data, str): + if base64Data.startswith("data:image/"): + base64Data = base64Data.split(",", 1)[1] + base64Data = base64Data.strip().replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + + row = idx // cols + col = idx % cols + img_left = left + col * (imgWidth + Inches(0.2)) + img_top = top + row * (imgHeight + Inches(0.2)) + + try: + imageBytes = base64.b64decode(base64Data, validate=True) + if len(imageBytes) == 0: + logger.error(f"Decoded image {idx} bytes are empty") + continue + + imageStream = io.BytesIO(imageBytes) + + # Try to get dimensions for better scaling + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgW, imgH = pilImage.size + # Scale to fit grid cell while maintaining aspect ratio + scale = min(imgWidth / (imgW * (72.0 / 96.0)), imgHeight / (imgH * (72.0 / 96.0)), 1.0) + finalW = (imgW * (72.0 / 96.0)) * scale + finalH = (imgH * (72.0 / 96.0)) * scale + # Center in grid cell + cell_left = img_left + (imgWidth - finalW) / 2 + cell_top = img_top + (imgHeight - finalH) / 2 + imageStream.seek(0) + slide.shapes.add_picture(imageStream, cell_left, cell_top, width=finalW, height=finalH) + except (ImportError, Exception): + # Fallback: use grid cell size directly + imageStream.seek(0) + slide.shapes.add_picture(imageStream, img_left, img_top, width=imgWidth, height=imgHeight) + + logger.info(f"Successfully added image {idx+1}/{len(images)} to slide grid") + except base64.binascii.Error as b64_error: + logger.error(f"Invalid base64 data for image {idx}: {str(b64_error)}") + except Exception as img_error: + logger.error(f"Error adding image {idx} to frame: {str(img_error)}", exc_info=True) + + except Exception as e: + logger.error(f"Error adding images to slide frame: {str(e)}", exc_info=True) diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index c1992f94..24c620d2 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -535,6 +535,45 @@ class RendererXlsx(BaseRenderer): self.logger.warning(f"AI styling failed: {str(e)}, using defaults") return defaultStyles + def _getSafeAlignment(self, alignValue: Any) -> str: + """Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'.""" + if not alignValue: + return "left" + + alignStr = str(alignValue).lower().strip() + + # Map common alignment values to openpyxl values + alignmentMap = { + "left": "left", + "right": "right", + "center": "center", + "centre": "center", + "general": "general", + "distributed": "distributed", + "fill": "fill", + "justify": "justify", + "centercontinuous": "centerContinuous", + "center-continuous": "centerContinuous", + "start": "left", + "end": "right", + "middle": "center" + } + + # Check direct mapping + if alignStr in alignmentMap: + return alignmentMap[alignStr] + + # Check if it contains alignment keywords + if "left" in alignStr or "start" in alignStr: + return "left" + elif "right" in alignStr or "end" in alignStr: + return "right" + elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr: + return "center" + + # Default to left if unknown + return "left" + def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str: """Get a safe aRGB color value for Excel (without # prefix).""" if not isinstance(colorValue, str): @@ -603,30 +642,34 @@ class RendererXlsx(BaseRenderer): return sanitized[:31] def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: - """Generate sheet names: each heading section creates a new tab.""" + """Generate sheet names: each heading level 1 (chapter) creates a new tab.""" sections = self._extractSections(jsonContent) # If no sections, create a single sheet if not sections: return ["Content"] - # Simple logic: each heading section creates a new tab + # Only heading level 1 (chapters) create new tabs sheetNames = [] for section in sections: if section.get("content_type") == "heading": - # Extract heading text from elements + # Extract heading text and level from elements elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: headingElement = elements[0] content = headingElement.get("content", {}) if isinstance(content, dict): headingText = content.get("text", "") + level = content.get("level", 1) elif isinstance(content, str): headingText = content + level = 1 else: headingText = "" + level = 1 - if headingText: + # Only level 1 headings (chapters) create tabs + if headingText and level == 1: sanitized_name = self._sanitizeSheetName(headingText) # Ensure unique sheet names if sanitized_name not in sheetNames: @@ -639,7 +682,7 @@ class RendererXlsx(BaseRenderer): counter += 1 sheetNames.append(f"{base_name} ({counter})"[:31]) - # If no headings found, use document title + # If no level 1 headings found, use document title if not sheetNames: documentTitle = jsonContent.get("metadata", {}).get("title", "Document") sheetNames.append(self._sanitizeSheetName(documentTitle)) @@ -647,7 +690,7 @@ class RendererXlsx(BaseRenderer): return sheetNames def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Populate Excel sheets: each heading creates a new tab, all following content goes in that tab.""" + """Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab.""" try: # Get the actual sheet names that were created (keys are lowercase) sheetNames = list(sheets.keys()) @@ -657,7 +700,7 @@ class RendererXlsx(BaseRenderer): sections = self._extractSections(jsonContent) - # Simple logic: iterate through sections, each heading creates a new tab + # Only heading level 1 (chapters) create new tabs currentSheetIndex = 0 currentSheet = None currentRow = 1 @@ -665,17 +708,28 @@ class RendererXlsx(BaseRenderer): for section in sections: contentType = section.get("content_type", "paragraph") - # Heading section: switch to next sheet + # Heading section: check if it's level 1 (chapter) to switch to next sheet if contentType == "heading": - if currentSheetIndex < len(sheetNames): - sheetName = sheetNames[currentSheetIndex] - currentSheet = sheets[sheetName] # sheets dict uses lowercase keys - currentSheetIndex += 1 - currentRow = 1 # Start at row 1 for new sheet - else: - # More headings than sheets - use last sheet - if sheetNames: - currentSheet = sheets[sheetNames[-1]] + # Extract level from heading element + elements = section.get("elements", []) + level = 1 # Default + if elements and isinstance(elements, list) and len(elements) > 0: + headingElement = elements[0] + content = headingElement.get("content", {}) + if isinstance(content, dict): + level = content.get("level", 1) + + # Only level 1 headings (chapters) create new tabs + if level == 1: + if currentSheetIndex < len(sheetNames): + sheetName = sheetNames[currentSheetIndex] + currentSheet = sheets[sheetName] # sheets dict uses lowercase keys + currentSheetIndex += 1 + currentRow = 1 # Start at row 1 for new sheet + else: + # More headings than sheets - use last sheet + if sheetNames: + currentSheet = sheets[sheetNames[-1]] # Render content in current sheet (or first sheet if no headings yet) if currentSheet is None and sheetNames: @@ -695,7 +749,7 @@ class RendererXlsx(BaseRenderer): sheet['A1'] = sheetTitle title_style = styles.get("title", {}) sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79"))) - sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left")) + sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left"))) # Get table data from elements (canonical JSON format) elements = section.get("elements", []) @@ -707,8 +761,13 @@ class RendererXlsx(BaseRenderer): headers = [] rows = [] else: - headers = content.get("headers", []) - rows = content.get("rows", []) + headers = content.get("headers") or [] + rows = content.get("rows") or [] + # Ensure headers and rows are lists + if not isinstance(headers, list): + headers = [] + if not isinstance(rows, list): + rows = [] else: headers = [] rows = [] @@ -770,11 +829,11 @@ class RendererXlsx(BaseRenderer): try: safe_color = self._getSafeColor(title_style["color"]) sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color) - sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) + sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"])) except Exception as font_error: # Try with a safe color sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000") - sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) + sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"])) # Generation info sheet['A3'] = "Generated:" @@ -892,6 +951,8 @@ class RendererXlsx(BaseRenderer): startRow = self._addHeadingToExcel(sheet, element, styles, startRow) elif element_type == "image": startRow = self._addImageToExcel(sheet, element, styles, startRow) + elif element_type == "code_block" or element_type == "code": + startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow) else: # Fallback: if element_type not set, use section_type if section_type == "table": @@ -904,6 +965,8 @@ class RendererXlsx(BaseRenderer): startRow = self._addHeadingToExcel(sheet, element, styles, startRow) elif section_type == "image": startRow = self._addImageToExcel(sheet, element, styles, startRow) + elif section_type == "code_block" or section_type == "code": + startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow) else: startRow = self._addParagraphToExcel(sheet, element, styles, startRow) @@ -943,9 +1006,16 @@ class RendererXlsx(BaseRenderer): content = element.get("content", {}) if not isinstance(content, dict): return startRow + headers = content.get("headers", []) rows = content.get("rows", []) + # Ensure headers and rows are lists + if not isinstance(headers, list): + headers = [] + if not isinstance(rows, list): + rows = [] + if not headers and not rows: return startRow @@ -965,60 +1035,95 @@ class RendererXlsx(BaseRenderer): sanitized_header = self._sanitizeCellValue(header) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) - # Font styling - cell.font = Font( - bold=header_style.get("bold", True), - color=self._getSafeColor(header_style.get("text_color", "FF000000")) - ) - - # Background color - if header_style.get("background"): - cell.fill = PatternFill( - start_color=self._getSafeColor(header_style["background"]), - end_color=self._getSafeColor(header_style["background"]), - fill_type="solid" + # Apply styling with fallbacks - don't let styling errors prevent data rendering + try: + # Font styling + cell.font = Font( + bold=header_style.get("bold", True), + color=self._getSafeColor(header_style.get("text_color", "FF000000")) ) + except Exception: + # Fallback to default font if styling fails + try: + cell.font = Font(bold=True, color=self._getSafeColor("FF000000")) + except Exception: + pass # Continue even if font fails - # Alignment - cell.alignment = Alignment( - horizontal=header_style.get("align", "left"), - vertical="center" - ) + try: + # Background color + if header_style.get("background"): + cell.fill = PatternFill( + start_color=self._getSafeColor(header_style["background"]), + end_color=self._getSafeColor(header_style["background"]), + fill_type="solid" + ) + except Exception: + pass # Continue without background color if it fails - # Border - cell.border = thin_border + try: + # Alignment + cell.alignment = Alignment( + horizontal=self._getSafeAlignment(header_style.get("align", "left")), + vertical="center" + ) + except Exception: + # Fallback to default alignment if it fails + try: + cell.alignment = Alignment(horizontal="left", vertical="center") + except Exception: + pass # Continue even if alignment fails + + try: + # Border + cell.border = thin_border + except Exception: + pass # Continue without border if it fails startRow += 1 # Add rows with formatting cell_style = styles.get("table_cell", {}) for row_data in rows: - # Handle different row formats - if isinstance(row_data, list): - cell_values = row_data - elif isinstance(row_data, dict) and "cells" in row_data: - cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] - else: - continue - - for col, cell_value in enumerate(cell_values, 1): - sanitized_value = self._sanitizeCellValue(cell_value) - cell = sheet.cell(row=startRow, column=col, value=sanitized_value) + # Handle different row formats + if isinstance(row_data, list): + cell_values = row_data + elif isinstance(row_data, dict) and "cells" in row_data: + cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] + else: + continue - # Font styling - if cell_style.get("text_color"): - cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + for col, cell_value in enumerate(cell_values, 1): + sanitized_value = self._sanitizeCellValue(cell_value) + cell = sheet.cell(row=startRow, column=col, value=sanitized_value) + + # Apply styling with fallbacks - don't let styling errors prevent data rendering + try: + # Font styling + if cell_style.get("text_color"): + cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + except Exception: + pass # Continue without font color if it fails + + try: + # Alignment + cell.alignment = Alignment( + horizontal=self._getSafeAlignment(cell_style.get("align", "left")), + vertical="center" + ) + except Exception: + # Fallback to default alignment if it fails + try: + cell.alignment = Alignment(horizontal="left", vertical="center") + except Exception: + pass # Continue even if alignment fails + + try: + # Border + cell.border = thin_border + except Exception: + pass # Continue without border if it fails - # Alignment - cell.alignment = Alignment( - horizontal=cell_style.get("align", "left"), - vertical="center" - ) - - # Border - cell.border = thin_border - - startRow += 1 + startRow += 1 # Auto-adjust column widths for col in range(1, len(headers) + 1): @@ -1038,7 +1143,10 @@ class RendererXlsx(BaseRenderer): content = element.get("content", {}) if not isinstance(content, dict): return startRow - list_items = content.get("items", []) + list_items = content.get("items") or [] + # Ensure list_items is a list + if not isinstance(list_items, list): + list_items = [] list_style = styles.get("bullet_list", {}) for item in list_items: @@ -1199,6 +1307,52 @@ class RendererXlsx(BaseRenderer): errorCell = sheet.cell(row=startRow, column=1, value=errorMsg) errorCell.font = Font(color="FFFF0000", italic=True) # Red color return startRow + 1 + + def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: + """Add a code block element to Excel sheet. Expects nested content structure.""" + try: + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return startRow + code = content.get("code", "") + language = content.get("language", "") + + if code: + code_style = styles.get("code_block", {}) + + # Add language label if present + if language: + langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):") + langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000"))) + startRow += 1 + + # Split code into lines and add each line + code_lines = code.split('\n') + for line in code_lines: + codeCell = sheet.cell(row=startRow, column=1, value=line) + codeCell.font = Font( + name=code_style.get("font", "Courier New"), + size=code_style.get("font_size", 10), + color=self._getSafeColor(code_style.get("color", "FF2F2F2F")) + ) + # Set background color if specified + if code_style.get("background"): + codeCell.fill = PatternFill( + start_color=self._getSafeColor(code_style["background"]), + end_color=self._getSafeColor(code_style["background"]), + fill_type="solid" + ) + startRow += 1 + + # Add spacing after code block + startRow += 1 + + return startRow + + except Exception as e: + self.logger.warning(f"Could not add code block to Excel: {str(e)}") + return startRow + 1 def _formatTimestamp(self) -> str: """Format current timestamp for document generation.""" diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py index 05532313..8d963643 100644 --- a/tests/functional/test10_document_generation_formats.py +++ b/tests/functional/test10_document_generation_formats.py @@ -413,10 +413,12 @@ class DocumentGenerationFormatsTester10: async def testAllFormats(self) -> Dict[str, Any]: """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats.""" print("\n" + "="*80) - print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS") + print("TESTING DOCUMENT GENERATION IN HTML FORMAT") print("="*80) - formats = ["docx", "xlsx", "pptx", "pdf", "html"] + # Only test HTML format + formats = ["html"] + # formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats results = {} for format in formats: @@ -469,7 +471,7 @@ class DocumentGenerationFormatsTester10: async def runTest(self): """Run the complete test.""" print("\n" + "="*80) - print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML") + print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY") print("="*80) try: