From 1cbc6699705dd877797198d5b6522cdfacdd49ff Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Fri, 3 Oct 2025 22:40:41 +0200
Subject: [PATCH] fexed react planning
---
modules/interfaces/interfaceDbChatObjects.py | 121 ++++++-
modules/services/serviceAi/mainServiceAi.py | 2 +-
.../services/serviceExtraction/subPipeline.py | 2 +-
.../mainServiceGeneration.py | 2 +-
.../renderers/csv_renderer.py | 4 +-
.../renderers/docx_renderer.py | 4 +-
.../renderers/excel_renderer.py | 4 +-
.../renderers/html_renderer.py | 4 +-
.../renderers/json_renderer.py | 4 +-
.../renderers/markdown_renderer.py | 4 +-
.../renderers/pdf_renderer.py | 4 +-
.../renderers/text_renderer.py | 4 +-
.../serviceWorkflow/mainServiceWorkflow.py | 2 +-
modules/workflows/methods/methodAi.py | 92 ++++--
modules/workflows/methods/methodDocument.py | 173 ++--------
modules/workflows/methods/methodOutlook.py | 41 ++-
modules/workflows/methods/methodSharepoint.py | 38 ++-
modules/workflows/processing/handlingTasks.py | 292 ++++++++++++++---
modules/workflows/processing/promptFactory.py | 4 +-
.../processing/promptFactoryPlaceholders.py | 303 +++++++-----------
.../ai_result_r0t0a0.txt | 32 ++
.../method_ai_20251003-200841/raw_result.txt | 32 ++
.../method_ai_20251003-200841/summary.txt | 2 +
.../ai_result_r0t0a0.txt | 3 +
.../method_ai_20251003-200852/raw_result.txt | 11 +
.../method_ai_20251003-200852/summary.txt | 2 +
.../ai_result_r0t0a0.txt | 3 +
.../method_ai_20251003-200904/raw_result.txt | 11 +
.../method_ai_20251003-200904/summary.txt | 2 +
.../obj/m20251003-220751_1_0_0/message.json | 19 ++
.../m20251003-220751_1_0_0/message_text.txt | 1 +
.../obj/m20251003-220757_1_1_0/message.json | 19 ++
.../m20251003-220757_1_1_0/message_text.txt | 3 +
.../obj/m20251003-220841_1_1_1/message.json | 19 ++
.../m20251003-220841_1_1_1/message_text.txt | 4 +
.../document_001_metadata.json | 12 +
.../obj/m20251003-220843_1_1_1/message.json | 19 ++
.../m20251003-220843_1_1_1/message_text.txt | 1 +
.../obj/m20251003-220843_1_2_0/message.json | 19 ++
.../m20251003-220843_1_2_0/message_text.txt | 3 +
.../obj/m20251003-220853_1_2_1/message.json | 19 ++
.../m20251003-220853_1_2_1/message_text.txt | 4 +
.../document_001_metadata.json | 12 +
.../obj/m20251003-220854_1_2_1/message.json | 19 ++
.../m20251003-220854_1_2_1/message_text.txt | 1 +
.../obj/m20251003-220904_1_2_2/message.json | 19 ++
.../m20251003-220904_1_2_2/message_text.txt | 4 +
.../document_001_metadata.json | 12 +
.../obj/m20251003-220906_1_2_2/message.json | 19 ++
.../m20251003-220906_1_2_2/message_text.txt | 1 +
.../obj/m20251003-220907_1_0_0/message.json | 19 ++
.../m20251003-220907_1_0_0/message_text.txt | 4 +
52 files changed, 1012 insertions(+), 442 deletions(-)
create mode 100644 test-chat/extraction/method_ai_20251003-200841/ai_result_r0t0a0.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200841/raw_result.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200841/summary.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200852/ai_result_r0t0a0.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200852/raw_result.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200852/summary.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200904/ai_result_r0t0a0.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200904/raw_result.txt
create mode 100644 test-chat/extraction/method_ai_20251003-200904/summary.txt
create mode 100644 test-chat/obj/m20251003-220751_1_0_0/message.json
create mode 100644 test-chat/obj/m20251003-220751_1_0_0/message_text.txt
create mode 100644 test-chat/obj/m20251003-220757_1_1_0/message.json
create mode 100644 test-chat/obj/m20251003-220757_1_1_0/message_text.txt
create mode 100644 test-chat/obj/m20251003-220841_1_1_1/message.json
create mode 100644 test-chat/obj/m20251003-220841_1_1_1/message_text.txt
create mode 100644 test-chat/obj/m20251003-220841_1_1_1/round1_task1_action1_results/document_001_metadata.json
create mode 100644 test-chat/obj/m20251003-220843_1_1_1/message.json
create mode 100644 test-chat/obj/m20251003-220843_1_1_1/message_text.txt
create mode 100644 test-chat/obj/m20251003-220843_1_2_0/message.json
create mode 100644 test-chat/obj/m20251003-220843_1_2_0/message_text.txt
create mode 100644 test-chat/obj/m20251003-220853_1_2_1/message.json
create mode 100644 test-chat/obj/m20251003-220853_1_2_1/message_text.txt
create mode 100644 test-chat/obj/m20251003-220853_1_2_1/round1_task2_action1_results/document_001_metadata.json
create mode 100644 test-chat/obj/m20251003-220854_1_2_1/message.json
create mode 100644 test-chat/obj/m20251003-220854_1_2_1/message_text.txt
create mode 100644 test-chat/obj/m20251003-220904_1_2_2/message.json
create mode 100644 test-chat/obj/m20251003-220904_1_2_2/message_text.txt
create mode 100644 test-chat/obj/m20251003-220904_1_2_2/round1_task2_action2_results/document_001_metadata.json
create mode 100644 test-chat/obj/m20251003-220906_1_2_2/message.json
create mode 100644 test-chat/obj/m20251003-220906_1_2_2/message_text.txt
create mode 100644 test-chat/obj/m20251003-220907_1_0_0/message.json
create mode 100644 test-chat/obj/m20251003-220907_1_0_0/message_text.txt
diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py
index 699f6bca..88af1d81 100644
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@@ -549,7 +549,7 @@ class ChatObjects:
created_documents.append(created_doc)
# Convert to ChatMessage model
- return ChatMessage(
+ chat_message = ChatMessage(
id=createdMessage["id"],
workflowId=createdMessage["workflowId"],
parentMessageId=createdMessage.get("parentMessageId"),
@@ -570,6 +570,11 @@ class ChatObjects:
actionMethod=createdMessage.get("actionMethod"),
actionName=createdMessage.get("actionName")
)
+
+ # Debug: Store message and documents for debugging TODO REMOVE
+ self._storeDebugMessageAndDocuments(chat_message)
+
+ return chat_message
except Exception as e:
logger.error(f"Error creating workflow message: {str(e)}")
@@ -1045,6 +1050,120 @@ class ChatObjects:
return {"items": items}
+ def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
+ """
+ Store message and documents for debugging purposes in fileshare.
+ Structure: gateway/test-chat/obj/m_round_task_action_timestamp/documentlist_label/documents
+
+ Args:
+ message: ChatMessage object to store
+ """
+ try:
+ import os
+ import json
+ from datetime import datetime
+
+ # Create base debug directory
+ debug_root = "./test-chat/obj"
+ os.makedirs(debug_root, exist_ok=True)
+
+ # Generate timestamp
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+
+ # Create message folder name: m_round_task_action_timestamp
+ # Use actual values from message, not defaults
+ round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
+ task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
+ action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
+ message_folder = f"m{timestamp}_{round_str}_{task_str}_{action_str}"
+
+ message_path = os.path.join(debug_root, message_folder)
+ os.makedirs(message_path, exist_ok=True)
+
+ # Store message data - use dict() instead of model_dump() for compatibility
+ message_file = os.path.join(message_path, "message.json")
+ with open(message_file, "w", encoding="utf-8") as f:
+ # Convert message to dict manually to avoid model_dump() issues
+ message_dict = {
+ "id": message.id,
+ "workflowId": message.workflowId,
+ "parentMessageId": message.parentMessageId,
+ "message": message.message,
+ "role": message.role,
+ "status": message.status,
+ "sequenceNr": message.sequenceNr,
+ "publishedAt": message.publishedAt,
+ "roundNumber": message.roundNumber,
+ "taskNumber": message.taskNumber,
+ "actionNumber": message.actionNumber,
+ "documentsLabel": message.documentsLabel,
+ "actionId": message.actionId,
+ "actionMethod": message.actionMethod,
+ "actionName": message.actionName,
+ "success": message.success,
+ "documents": []
+ }
+ json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
+
+ # Store message content as text
+ if message.message:
+ message_text_file = os.path.join(message_path, "message_text.txt")
+ with open(message_text_file, "w", encoding="utf-8") as f:
+ f.write(str(message.message))
+
+ # Store documents if provided
+ if message.documents and len(message.documents) > 0:
+ logger.info(f"Debug: Processing {len(message.documents)} documents")
+
+ # Group documents by documentsLabel
+ documents_by_label = {}
+ for doc in message.documents:
+ label = message.documentsLabel or 'default'
+ if label not in documents_by_label:
+ documents_by_label[label] = []
+ documents_by_label[label].append(doc)
+
+ # Create subfolder for each document label
+ for label, docs in documents_by_label.items():
+ # Sanitize label for filesystem
+ safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
+ safe_label = safe_label.replace(' ', '_')
+ if not safe_label:
+ safe_label = "default"
+
+ label_folder = os.path.join(message_path, safe_label)
+ os.makedirs(label_folder, exist_ok=True)
+ logger.info(f"Debug: Created document folder: {label_folder}")
+
+ # Store each document
+ for i, doc in enumerate(docs):
+ # Create document metadata file
+ doc_meta = {
+ "id": doc.id,
+ "messageId": doc.messageId,
+ "fileId": doc.fileId,
+ "fileName": doc.fileName,
+ "fileSize": doc.fileSize,
+ "mimeType": doc.mimeType,
+ "roundNumber": doc.roundNumber,
+ "taskNumber": doc.taskNumber,
+ "actionNumber": doc.actionNumber,
+ "actionId": doc.actionId
+ }
+
+ doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
+ with open(doc_meta_file, "w", encoding="utf-8") as f:
+ json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
+
+ logger.info(f"Debug: Stored document metadata for {doc.fileName}")
+
+ logger.info(f"Debug: Stored message and documents in {message_path}")
+
+ except Exception as e:
+ logger.error(f"Debug: Failed to store message and documents: {e}")
+ import traceback
+ logger.error(f"Debug: Traceback: {traceback.format_exc()}")
+
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
"""
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index c0e200d4..72049a60 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -535,7 +535,7 @@ class AiService:
# Prepare debug directory TODO TO REMOVE
import os
from datetime import datetime
- debug_root = "../local/testing_extraction"
+ debug_root = "./test-chat/extraction"
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
debug_dir = os.path.join(debug_root, f"per_chunk_{ts}")
try:
diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py
index d74cb974..1c1bfc85 100644
--- a/modules/services/serviceExtraction/subPipeline.py
+++ b/modules/services/serviceExtraction/subPipeline.py
@@ -94,7 +94,7 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
# DEBUG: dump parts and chunks to files under @testing_extraction/ TODO TO REMOVE
try:
- base_dir = "../local/testing_extraction"
+ base_dir = "./test-chat/extraction"
doc_dir = os.path.join(base_dir, f"extraction_{fileName}")
os.makedirs(doc_dir, exist_ok=True)
# Write a summary file
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index e1168b3c..7a7b6baf 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -313,7 +313,7 @@ class GenerationService:
try:
import os
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
- debug_root = "../local/testing_extraction"
+ debug_root = "./test-chat/extraction"
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
os.makedirs(debug_dir, exist_ok=True)
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
diff --git a/modules/services/serviceGeneration/renderers/csv_renderer.py b/modules/services/serviceGeneration/renderers/csv_renderer.py
index 8e2344ac..42248922 100644
--- a/modules/services/serviceGeneration/renderers/csv_renderer.py
+++ b/modules/services/serviceGeneration/renderers/csv_renderer.py
@@ -60,7 +60,9 @@ OUTPUT POLICY:
- Include all necessary information
- Valid CSV that can be imported
-Generate the complete CSV report:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete CSV report using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/docx_renderer.py b/modules/services/serviceGeneration/renderers/docx_renderer.py
index e2ea7f3f..75663b4d 100644
--- a/modules/services/serviceGeneration/renderers/docx_renderer.py
+++ b/modules/services/serviceGeneration/renderers/docx_renderer.py
@@ -87,7 +87,9 @@ OUTPUT POLICY:
- Professional document format
- Include all necessary information
-Generate the complete DOCX report content:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete DOCX report content using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/excel_renderer.py b/modules/services/serviceGeneration/renderers/excel_renderer.py
index e22a5b5f..e9ec80cf 100644
--- a/modules/services/serviceGeneration/renderers/excel_renderer.py
+++ b/modules/services/serviceGeneration/renderers/excel_renderer.py
@@ -97,7 +97,9 @@ OUTPUT POLICY:
- Professional spreadsheet format
- Include all necessary information
-Generate the complete Excel report data:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete Excel report data using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/html_renderer.py b/modules/services/serviceGeneration/renderers/html_renderer.py
index 5d3c886a..81fab683 100644
--- a/modules/services/serviceGeneration/renderers/html_renderer.py
+++ b/modules/services/serviceGeneration/renderers/html_renderer.py
@@ -57,7 +57,9 @@ OUTPUT POLICY:
- Include all necessary CSS inline
- Make it look professional and polished
-Generate the complete HTML report:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete HTML report using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/json_renderer.py b/modules/services/serviceGeneration/renderers/json_renderer.py
index e6f3921b..5d7f5f2e 100644
--- a/modules/services/serviceGeneration/renderers/json_renderer.py
+++ b/modules/services/serviceGeneration/renderers/json_renderer.py
@@ -65,7 +65,9 @@ OUTPUT POLICY:
- Include all necessary information
- Valid JSON that can be parsed
-Generate the complete JSON report:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete JSON report using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/markdown_renderer.py b/modules/services/serviceGeneration/renderers/markdown_renderer.py
index 655d2e3f..c5dd29c4 100644
--- a/modules/services/serviceGeneration/renderers/markdown_renderer.py
+++ b/modules/services/serviceGeneration/renderers/markdown_renderer.py
@@ -59,7 +59,9 @@ OUTPUT POLICY:
- Professional appearance with good structure
- Include all necessary information
-Generate the complete Markdown report:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete Markdown report using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/pdf_renderer.py b/modules/services/serviceGeneration/renderers/pdf_renderer.py
index efe56920..1948f034 100644
--- a/modules/services/serviceGeneration/renderers/pdf_renderer.py
+++ b/modules/services/serviceGeneration/renderers/pdf_renderer.py
@@ -78,7 +78,9 @@ OUTPUT POLICY:
- Professional document format
- Include all necessary information
-Generate the complete PDF report content:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete PDF report content using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceGeneration/renderers/text_renderer.py b/modules/services/serviceGeneration/renderers/text_renderer.py
index 0f4c5dc0..e7b9baae 100644
--- a/modules/services/serviceGeneration/renderers/text_renderer.py
+++ b/modules/services/serviceGeneration/renderers/text_renderer.py
@@ -88,7 +88,9 @@ OUTPUT POLICY:
- Preserve code structure when appropriate
- Include all necessary information
-Generate the complete text report:
+CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
+
+Generate the complete text report using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
diff --git a/modules/services/serviceWorkflow/mainServiceWorkflow.py b/modules/services/serviceWorkflow/mainServiceWorkflow.py
index d6dc96c1..5795a084 100644
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@@ -534,4 +534,4 @@ class WorkflowService:
return self.interfaceDbChat.createLog(logData)
except Exception as e:
logger.error(f"Error creating log: {str(e)}")
- raise
\ No newline at end of file
+ raise
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py
index 371b1bbc..25a3498e 100644
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@@ -30,12 +30,20 @@ class MethodAi(MethodBase):
@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
"""
- Perform a generic AI call with optional document references, producing plain text output
+ AI text processing and analysis - returns plain text only, NO document generation
+
+ USE FOR: Text analysis, data processing, content generation, research, Q&A, brainstorming, summarization, translation, code generation
+ DO NOT USE FOR: Creating formatted documents (Word, PDF, Excel), document generation, file creation
+
+ INPUT REQUIREMENTS: Requires aiPrompt parameter (the question or task for AI)
+ OUTPUT FORMAT: Plain text only (.txt, .json, .md, .csv, .xml) - NO binary files
+ DEPENDENCIES: None - can work standalone
+ WORKFLOW POSITION: Use for analysis, research, or text processing tasks
Parameters:
aiPrompt (str): The AI prompt for processing
documentList (list, optional): List of document references to include in context
- expectedDocumentFormat (str, optional): Preferred output extension (string or dict). Note: This action only returns plain text content.
+ resultType (str, optional): Output format type - use 'txt', 'json', 'md', 'csv', or 'xml' (defaults to 'txt')
processingMode (str, optional): Processing mode - use 'basic', 'advanced', or 'detailed' (defaults to 'basic')
includeMetadata (bool, optional): Whether to include metadata (default: True)
operationType (str, optional): Operation type - use 'general', 'generate_plan', 'analyse_content', 'generate_content', 'web_research', 'image_analysis', or 'image_generation'
@@ -49,7 +57,7 @@ class MethodAi(MethodBase):
documentList = parameters.get("documentList", [])
if isinstance(documentList, str):
documentList = [documentList]
- expectedDocumentFormat = parameters.get("expectedDocumentFormat", "")
+ resultType = parameters.get("resultType", "txt")
processingMode = parameters.get("processingMode", "basic")
includeMetadata = parameters.get("includeMetadata", True)
operationType = parameters.get("operationType", "general")
@@ -63,19 +71,24 @@ class MethodAi(MethodBase):
error="AI prompt is required"
)
- # Determine output format first (needed for context building)
- output_extension = ".txt" # Default
- output_mime_type = "text/plain" # Default
+ # Validate and determine output format
+ valid_result_types = ["txt", "json", "md", "csv", "xml"]
+ if resultType not in valid_result_types:
+ return ActionResult.isFailure(
+ error=f"Invalid resultType '{resultType}'. Must be one of: {', '.join(valid_result_types)}"
+ )
- if expectedDocumentFormat:
- if isinstance(expectedDocumentFormat, dict):
- output_extension = expectedDocumentFormat.get("extension", ".txt")
- output_mime_type = expectedDocumentFormat.get("mimeType", "text/plain")
- else:
- # If it's a string, treat it as the extension
- output_extension = expectedDocumentFormat
- output_mime_type = "text/plain"
- logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+ # Map resultType to file extension and MIME type
+ format_mapping = {
+ "txt": (".txt", "text/plain"),
+ "json": (".json", "application/json"),
+ "md": (".md", "text/markdown"),
+ "csv": (".csv", "text/csv"),
+ "xml": (".xml", "application/xml")
+ }
+
+ output_extension, output_mime_type = format_mapping[resultType]
+ logger.info(f"Using result type: {resultType} -> {output_extension} ({output_mime_type})")
# Get ChatDocuments for AI service - let AI service handle all document processing
chatDocuments = []
@@ -96,32 +109,49 @@ class MethodAi(MethodBase):
# Note: customInstructions parameter was removed as it's not defined in the method signature
# Add format guidance to prompt
- if expectedDocumentFormat:
- enhanced_prompt += f"\n\nPlease try to deliver the result in {output_extension.upper()} format. If you cannot deliver in that specific format, please use an appropriate alternative format and include a comment explaining the format used."
+ if resultType != "txt":
+ enhanced_prompt += f"\n\nPlease deliver the result in {resultType.upper()} format. Ensure the output follows the proper {resultType.upper()} syntax and structure."
# Call AI service - it will handle all document processing internally
logger.info(f"Executing AI call with mode: {processingMode}, prompt length: {len(enhanced_prompt)}")
if chatDocuments:
logger.info(f"Including {len(chatDocuments)} documents for AI processing")
- # Add JSON format instruction for structured response
- json_instruction = """
-
+ # Add format-specific instruction for structured response
+ if resultType == "json":
+ format_instruction = """
+
Please return your response in the following JSON format:
{{
"documents": [
{{
"data": "your actual content here",
- "mimeType": "appropriate/mime-type",
- "comment": "optional comment about format or content"
+ "mimeType": "application/json",
+ "comment": "optional comment about content"
}}
]
}}
-If you need to return multiple documents, add more objects to the documents array. The data field should contain the actual content, mimeType should be appropriate for the content format, and comment is optional.
+The data field should contain valid JSON content.
+"""
+ else:
+ format_instruction = f"""
+
+Please return your response in the following JSON format:
+{{
+ "documents": [
+ {{
+ "data": "your actual content here in {resultType.upper()} format",
+ "mimeType": "{output_mime_type}",
+ "comment": "optional comment about content"
+ }}
+ ]
+}}
+
+The data field should contain the content in {resultType.upper()} format.
"""
- call_prompt = enhanced_prompt + json_instruction
+ call_prompt = enhanced_prompt + format_instruction
output_format = output_extension.replace('.', '') or 'txt'
@@ -150,7 +180,7 @@ If you need to return multiple documents, add more objects to the documents arra
try:
import os
from datetime import datetime
- debug_root = "../local/testing_extraction"
+ debug_root = "./test-chat/extraction"
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_dir = os.path.join(debug_root, f"method_ai_{ts}")
os.makedirs(debug_dir, exist_ok=True)
@@ -243,7 +273,7 @@ If you need to return multiple documents, add more objects to the documents arra
# Reuse the same debug_dir if created above; otherwise create a new one
import os
from datetime import datetime
- debug_root = "../local/testing_extraction"
+ debug_root = "./test-chat/extraction"
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_dir = os.path.join(debug_root, f"method_ai_{ts}")
os.makedirs(debug_dir, exist_ok=True)
@@ -274,7 +304,15 @@ If you need to return multiple documents, add more objects to the documents arra
@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
"""
- Perform comprehensive web research using the full workflow.
+ Comprehensive web research and information gathering from the internet
+
+ USE FOR: Finding current information, researching topics, gathering external data, fact-checking, market research
+ DO NOT USE FOR: Processing local documents, creating formatted reports, email operations
+
+ INPUT REQUIREMENTS: Requires user_prompt parameter (the research question or topic)
+ OUTPUT FORMAT: JSON with research results, sources, and analysis
+ DEPENDENCIES: Requires internet connection and web search capabilities
+ WORKFLOW POSITION: Use when external information is needed, before document processing
Parameters:
user_prompt (str): The user input or question to investigate
diff --git a/modules/workflows/methods/methodDocument.py b/modules/workflows/methods/methodDocument.py
index c19c2cd5..df3962d1 100644
--- a/modules/workflows/methods/methodDocument.py
+++ b/modules/workflows/methods/methodDocument.py
@@ -31,7 +31,15 @@ class MethodDocument(MethodBase):
@action
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
"""
- Extract content from any document using AI prompt.
+ Extract and analyze content from existing documents using AI
+
+ USE FOR: Analyzing documents, extracting specific information, summarizing content, finding patterns, data extraction
+ DO NOT USE FOR: Creating new documents, generating reports, web research, email operations
+
+ INPUT REQUIREMENTS: Requires documentList (existing documents) and prompt (what to extract)
+ OUTPUT FORMAT: Plain text extracted content (.txt files)
+ DEPENDENCIES: Requires existing documents in documentList parameter
+ WORKFLOW POSITION: Use after documents are available, before generating reports
Parameters:
documentList (list): Document list reference(s) - List of document references to extract content from
@@ -183,9 +191,17 @@ class MethodDocument(MethodBase):
@action
- async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
+ async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
"""
- Generate report from multiple documents using AI.
+ Generate formatted documents and reports from source documents - creates actual files (Word, PDF, Excel, etc.)
+
+ USE FOR: Creating formatted documents, reports, presentations, spreadsheets, structured outputs, professional documents
+ DO NOT USE FOR: Simple text analysis, Q&A, web research, email operations
+
+ INPUT REQUIREMENTS: Requires documentList (source documents) and prompt (what kind of report to generate)
+ OUTPUT FORMAT: Formatted documents (.html, .pdf, .docx, .txt, .md, .json, .csv, .xlsx)
+ DEPENDENCIES: Requires existing documents in documentList parameter
+ WORKFLOW POSITION: Use after document analysis, as final output generation step
Parameters:
documentList (list): Document list reference(s) - List of document references to include in report
@@ -384,155 +400,4 @@ class MethodDocument(MethodBase):
# Return minimal fallback content
return f"Error extracting content: {str(e)}"
- async def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool, prompt: str) -> str:
- """
- Generate a comprehensive HTML report using AI from all input documents.
- """
- try:
- # Filter out empty documents and collect content
- validDocuments = []
- allContent = []
-
- for doc in chatDocuments:
- content = ""
- logger.info(f"Processing document: type={type(doc)}")
-
- # Use new extraction service for each document
- try:
- # Build extraction options for report generation from AI planner parameters
- extraction_options = {
- "prompt": prompt,
- "operationType": operationType,
- "processDocumentsIndividually": processDocumentsIndividually,
- "chunkAllowed": chunkAllowed,
- "mergeStrategy": mergeStrategy
- }
-
- # Add optional parameters if provided by AI planner
- if not includeMetadata:
- extraction_options["includeMetadata"] = False
-
- # Extract content using new service
- extracted_list = self.services.extraction.extractContent(
- documents=[doc],
- options=extraction_options
- )
-
- ec = extracted_list[0] if extracted_list else None
- if ec and hasattr(ec, 'parts'):
- for part in ec.parts:
- try:
- if part.typeGroup in ("text", "table", "structure") and part.data:
- content += part.data + " "
- except Exception:
- continue
- if content.strip():
- logger.info(f" Retrieved content from file: {len(content)} characters")
- else:
- logger.info(f" No readable text content found (binary file)")
- else:
- logger.info(f" No content extracted (binary file)")
- except Exception as e:
- logger.info(f" Could not extract content (binary file): {str(e)}")
-
- # Skip empty documents
- if content and content.strip():
- validDocuments.append(doc)
- allContent.append(f"Document: {doc.fileName}\n{content}\n")
- logger.info(f" Added document to valid documents list")
- else:
- logger.info(f" Skipping document with no readable text content")
-
- if not validDocuments:
- # No readable content; return a minimal valid HTML document
- timestamp = int(self.services.utils.getUtcTimestamp())
- return f"{title}{title}
Keine auswertbaren Inhalte gefunden.
Generated: {timestamp}
"
-
- # Create AI prompt for comprehensive report generation using user's prompt
- combinedContent = "\n\n".join(allContent)
- aiPrompt = f"""
-{prompt}
-
-Report Title: {title}
-
-OUTPUT POLICY:
-- Return ONLY a complete, raw HTML document.
-- Start with:
-- Must include: , (with and ), and .
-- The response must be valid, self-contained HTML suitable for saving as .html.
-
-Structure:
-- Title and short subtitle
-- Executive summary
-- Sections with clear headings
-- Use tables for structured data when helpful
-- Key findings and recommendations
-- Generation date and number of documents
-
-Quality and design requirements:
-- Use clear, professional, and accessible styling in a