diff --git a/app.py b/app.py index 30def90e..4f9ddc52 100644 --- a/app.py +++ b/app.py @@ -131,6 +131,11 @@ def initLogging(): import unicodedata # Remove emoji characters specifically record.msg = ''.join(char for char in record.msg if unicodedata.category(char) != 'So' or not (0x1F600 <= ord(char) <= 0x1F64F or 0x1F300 <= ord(char) <= 0x1F5FF or 0x1F680 <= ord(char) <= 0x1F6FF or 0x1F1E0 <= ord(char) <= 0x1F1FF or 0x2600 <= ord(char) <= 0x26FF or 0x2700 <= ord(char) <= 0x27BF)) + # Additionally strip characters not representable in Windows cp1252 (e.g., arrows) + try: + record.msg.encode('cp1252', errors='strict') + except UnicodeEncodeError: + record.msg = record.msg.encode('cp1252', errors='ignore').decode('cp1252', errors='ignore') return True # Configure handlers based on config diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py index 4cd13f8a..52163432 100644 --- a/modules/services/serviceAi/subCoreAi.py +++ b/modules/services/serviceAi/subCoreAi.py @@ -128,7 +128,7 @@ class SubCoreAi: else: full_prompt = prompt - if documentProcessor: + if documentProcessor and documents: result = await documentProcessor.callAiText(full_prompt, documents, options) else: # Fallback to direct AI call if no document processor available diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py index e9e087d2..f86be535 100644 --- a/modules/services/serviceAi/subDocumentProcessing.py +++ b/modules/services/serviceAi/subDocumentProcessing.py @@ -104,6 +104,20 @@ class SubDocumentProcessing: # FIXED: Merge with preserved chunk relationships mergedContent = self._mergeChunkResults(chunkResults, options) + # Save merged extraction content to debug file - only if debug enabled + try: + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_merged.txt"), "w", encoding="utf-8") as f: + f.write(mergedContent or "") + except Exception: + pass + return mergedContent except Exception as e: @@ -162,6 +176,21 @@ class SubDocumentProcessing: # Merge with JSON mode mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) + # Save merged JSON extraction content to debug file - only if debug enabled + try: + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + import os + import json as _json + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_merged.json"), "w", encoding="utf-8") as f: + f.write(_json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2)) + except Exception: + pass + return mergedJsonDocument except Exception as e: @@ -356,6 +385,19 @@ class SubDocumentProcessing: ) self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE") + # Save image extraction response to debug file - only if debug enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_image_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION IMAGE RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass # Check if result is empty or None if not ai_result or not ai_result.strip(): diff --git a/modules/services/serviceWorkflow/mainServiceWorkflow.py b/modules/services/serviceWorkflow/mainServiceWorkflow.py index 1bb5607c..628eb1d2 100644 --- a/modules/services/serviceWorkflow/mainServiceWorkflow.py +++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py @@ -613,12 +613,12 @@ class WorkflowService: # Get document reference list using the exact same logic as old system document_list = self._getDocumentReferenceList(workflow) - # Build technical context string for AI action planning (exact copy of old system) - context = "AVAILABLE DOCUMENTS:\n\n" + # Build index string for AI action planning + context = "" - # Process chat exchanges (current round) - exact copy of old system + # Process current round exchanges first if document_list["chat"]: - context += "CURRENT ROUND DOCUMENTS:\n" + context += "\nCurrent round documents:\n" for exchange in document_list["chat"]: # Generate docList reference for the exchange (using message ID and label) # Find the message that corresponds to this exchange @@ -644,9 +644,9 @@ class WorkflowService: context += f" - docItem:{doc_ref}\n" context += "\n" - # Process history exchanges (previous rounds) - exact copy of old system + # Process previous rounds after if document_list["history"]: - context += "WORKFLOW HISTORY DOCUMENTS:\n" + context += "\nPast rounds documents:\n" for exchange in document_list["history"]: # Generate docList reference for the exchange (using message ID and label) # Find the message that corresponds to this exchange @@ -673,7 +673,7 @@ class WorkflowService: context += "\n" if not document_list["chat"] and not document_list["history"]: - context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n" + context += "\nNO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n" return context @@ -701,39 +701,23 @@ class WorkflowService: for message in reversed(workflow.messages): is_first = message.status == "first" if hasattr(message, 'status') else False - # Build a DocumentExchange if message has documents + # Build a DocumentExchange if message has documents and an explicit documentsLabel doc_exchange = None if message.documents: - if message.actionId and message.documentsLabel: - # Validate that we use the same label as in the message + existing_label = getattr(message, 'documentsLabel', None) + if existing_label: + # Validate and use the message's actual documentsLabel validated_label = self._validateDocumentLabelConsistency(message) - - # Use the message's actual documentsLabel doc_refs = [] for doc in message.documents: doc_ref = self._getDocumentReferenceFromChatDocument(doc, message) doc_refs.append(doc_ref) - doc_exchange = { 'documentsLabel': validated_label, 'documents': doc_refs } - else: - # Generate new labels for documents without explicit labels - doc_refs = [] - for doc in message.documents: - doc_ref = self._getDocumentReferenceFromChatDocument(doc, message) - doc_refs.append(doc_ref) - - if doc_refs: - # Create a label based on message context - context_prefix = self._generateWorkflowContextPrefix(message) - context_label = f"{context_prefix}_context" - - doc_exchange = { - 'documentsLabel': context_label, - 'documents': doc_refs - } + # IMPORTANT: Never synthesize new labels here. If a message lacks + # a documentsLabel, we skip adding an exchange for it. # Append to appropriate container based on boundary if doc_exchange: diff --git a/modules/workflows/methods/methodDocument.py b/modules/workflows/methods/_EXCLUDED_methodDocument.py similarity index 99% rename from modules/workflows/methods/methodDocument.py rename to modules/workflows/methods/_EXCLUDED_methodDocument.py index 89aff4b4..d31631ce 100644 --- a/modules/workflows/methods/methodDocument.py +++ b/modules/workflows/methods/_EXCLUDED_methodDocument.py @@ -153,9 +153,6 @@ class MethodDocument(MethodBase): error=str(e) ) - - - @action async def generate(self, parameters: Dict[str, Any]) -> ActionResult: """ diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py index 2b4cb677..eba17bcb 100644 --- a/modules/workflows/methods/methodAi.py +++ b/modules/workflows/methods/methodAi.py @@ -31,14 +31,14 @@ class MethodAi(MethodBase): async def process(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - - Purpose: AI-based analysis and content generation with optional document context. - - Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags. - - Output format: Single or multiple documents in requested format. + - Purpose: Process a user prompt with optional unlimited input documents to produce one or many output documents of the SAME format. + - Input requirements: aiPrompt (required); optional documentList. + - Output format: Exactly one file format to select. For multiple output file formats to do different calls. Parameters: - aiPrompt (str, required): Instruction for the AI. - documentList (list, optional): Document reference(s) for context. - - resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt. + - resultType (str, optional): Output file extension - only one extension allowed (e.g. txt, json, md, csv, xml, html, pdf, docx, xlsx, png, ...). Default: txt. - processingMode (str, optional): basic | advanced | detailed. Default: basic. - includeMetadata (bool, optional): Include metadata when available. Default: True. - operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general. diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py index 90aa8ba8..225edbe7 100644 --- a/modules/workflows/processing/shared/placeholderFactory.py +++ b/modules/workflows/processing/shared/placeholderFactory.py @@ -373,7 +373,8 @@ def extractAvailableDocumentsSummary(service: Any, context: Any) -> str: try: documents = service.workflow.getAvailableDocuments(context.workflow) if documents and documents != "No documents available": - doc_count = documents.count("docList:") + documents.count("docItem:") + # Count only actual documents, not list labels + doc_count = documents.count("docItem:") return f"{doc_count} documents available from previous tasks" return "No documents available" except Exception as e: diff --git a/modules/workflows/processing/shared/promptGenerationActionsReact.py b/modules/workflows/processing/shared/promptGenerationActionsReact.py index fc5ca265..10b7f9f6 100644 --- a/modules/workflows/processing/shared/promptGenerationActionsReact.py +++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py @@ -32,7 +32,7 @@ def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle: PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False), ] - template = """Select exactly one action to advance the task. + template = """Select exactly one next action to advance the task incrementally. OBJECTIVE: {{KEY:USER_PROMPT}} @@ -52,7 +52,11 @@ AVAILABLE_DOCUMENTS_INDEX: AVAILABLE_CONNECTIONS_INDEX: {{KEY:AVAILABLE_CONNECTIONS_INDEX}} -REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text): +REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text). The chosen action MUST: +- be the next logical incremental step toward fulfilling the objective +- not attempt to complete the entire objective in one step +- if producing files, target exactly one output format for this step +- reference ONLY existing document IDs/labels from AVAILABLE_DOCUMENTS_INDEX {{ "action": "method.action_name", "actionObjective": "...", @@ -76,6 +80,7 @@ RULES: - DO NOT create new references - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX 6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX +7. Plan incrementally: if the overall intent needs multiple output formats (e.g., CSV and HTML), choose one format in this step and leave the other(s) for subsequent steps """ return PromptBundle(prompt=template, placeholders=placeholders) diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index 88ef4023..5a3098b1 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -329,8 +329,6 @@ class WorkflowManager: if not message.documents: message.documents = [] message.documents.extend(created_docs) - # Ensure label is user_context for discoverability - message.documentsLabel = context_label self.services.workflow.updateMessage(message.id, { "documents": [d.to_dict() for d in message.documents], "documentsLabel": context_label