From aa86caa229ebd5a6e35152bf861005a5abeb06b1 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 12 Oct 2025 14:59:51 +0200 Subject: [PATCH] file verbose trace with config var APP_DEBUG_CHAT_WORKFLOW_ENABLED installed --- modules/interfaces/interfaceDbChatObjects.py | 6 +- modules/services/serviceAi/mainServiceAi.py | 66 +++++++++++-------- .../services/serviceExtraction/subPipeline.py | 65 +++++++++--------- .../mainServiceGeneration.py | 22 ++++--- .../serviceGeneration/subPromptBuilder.py | 42 ++++++------ 5 files changed, 110 insertions(+), 91 deletions(-) diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py index 42c96158..ff18a9a9 100644 --- a/modules/interfaces/interfaceDbChatObjects.py +++ b/modules/interfaces/interfaceDbChatObjects.py @@ -571,8 +571,10 @@ class ChatObjects: actionName=createdMessage.get("actionName") ) - # Debug: Store message and documents for debugging TODO REMOVE - self._storeDebugMessageAndDocuments(chat_message) + # Debug: Store message and documents for debugging - only if debug enabled + debug_enabled = APP_CONFIG.get("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + self._storeDebugMessageAndDocuments(chat_message) return chat_message diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 031f2ea4..f3be97b2 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -786,19 +786,21 @@ class AiService: # Log extraction response self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - # Save full extraction prompt and response to debug file - try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - os.makedirs(debug_root, exist_ok=True) - with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: - f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n") - f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n") - f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") - except Exception: - pass + # Save full extraction prompt and response to debug file - only if debug enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n") + f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n") + f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass # If generating JSON, validate the response if generate_json: @@ -878,17 +880,19 @@ class AiService: # Log extraction response length self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - # Save extraction response to debug file (without verbose prompt) - try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - os.makedirs(debug_root, exist_ok=True) - with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: - f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") - except Exception: - pass + # Save extraction response to debug file (without verbose prompt) - only if debug enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass # If generating JSON, validate the response if generate_json: @@ -1325,7 +1329,6 @@ class AiService: call_type = self._determineCallType(documents, options.operationType) options.callType = call_type - # Log the prompt being sent to AI for debugging (before routing) TODO TO REMOVE try: # Build the full prompt that will be sent to AI if placeholders: @@ -1349,7 +1352,7 @@ class AiService: # Handle document generation with specific output format if outputFormat: result = await self._callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title) - # Log AI response for debugging TODO TO REMOVE + # Log AI response for debugging try: if isinstance(result, dict) and 'content' in result: self._writeAiResponseDebug( @@ -1365,7 +1368,7 @@ class AiService: if call_type == "planning": result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options) - # Log AI response for debugging TODO TO REMOVE + # Log AI response for debugging try: self._writeAiResponseDebug( label='ai_planning', @@ -1390,7 +1393,7 @@ class AiService: full_prompt = prompt result = await self._callAiText(full_prompt, documents, options) - # Log AI response for debugging (additional logging for text calls) TODO TO REMOVE + # Log AI response for debugging (additional logging for text calls) try: self._writeAiResponseDebug( label='ai_text_main', @@ -1697,8 +1700,13 @@ class AiService: pass def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None: - """Persist raw AI response parts for debugging under test-chat/ai.""" + """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled.""" try: + # Check if debug logging is enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if not debug_enabled: + return + import os from datetime import datetime, UTC # Base dir: gateway/test-chat/ai (go up 4 levels from this file) diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py index 382bd74d..9b18ea88 100644 --- a/modules/services/serviceExtraction/subPipeline.py +++ b/modules/services/serviceExtraction/subPipeline.py @@ -3,6 +3,7 @@ import logging import os from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart +from modules.shared.configuration import APP_CONFIG from .subUtils import makeId from .subRegistry import ExtractorRegistry, ChunkerRegistry from .merging.mergerText import TextMerger @@ -100,38 +101,40 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})") logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})") - # DEBUG: dump parts and chunks to files TODO TO REMOVE + # DEBUG: dump parts and chunks to files - only if debug enabled try: - base_dir = "./test-chat/ai" - os.makedirs(base_dir, exist_ok=True) - - # Generate timestamp for consistent naming - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3] - - # Write a summary file - summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"] - text_index = 0 - for idx, part in enumerate(parts): - is_texty = part.typeGroup in ("text", "table", "structure") - size = int(part.metadata.get("size", 0) or 0) - is_chunk = bool(part.metadata.get("chunk", False)) - summary_lines.append( - f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}" - ) - if is_texty and getattr(part, "data", None): - text_index += 1 - fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt" - fpath = os.path.join(base_dir, fname) - with open(fpath, "w", encoding="utf-8") as f: - f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n") - f.write(str(part.data)) - - # Write summary file - summary_fname = f"{ts}_extract_{fileName}_summary.txt" - summary_fpath = os.path.join(base_dir, summary_fname) - with open(summary_fpath, "w", encoding="utf-8") as f: - f.write("\n".join(summary_lines)) + debug_enabled = APP_CONFIG.get("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + base_dir = "./test-chat/ai" + os.makedirs(base_dir, exist_ok=True) + + # Generate timestamp for consistent naming + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3] + + # Write a summary file + summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"] + text_index = 0 + for idx, part in enumerate(parts): + is_texty = part.typeGroup in ("text", "table", "structure") + size = int(part.metadata.get("size", 0) or 0) + is_chunk = bool(part.metadata.get("chunk", False)) + summary_lines.append( + f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}" + ) + if is_texty and getattr(part, "data", None): + text_index += 1 + fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt" + fpath = os.path.join(base_dir, fname) + with open(fpath, "w", encoding="utf-8") as f: + f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n") + f.write(str(part.data)) + + # Write summary file + summary_fname = f"{ts}_extract_{fileName}_summary.txt" + summary_fpath = os.path.join(base_dir, summary_fname) + with open(summary_fpath, "w", encoding="utf-8") as f: + f.write("\n".join(summary_lines)) except Exception as _e: logger.debug(f"Debug dump skipped: {_e}") diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 0380455e..4c76c95e 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -318,17 +318,19 @@ class GenerationService: if "sections" not in extractedContent: raise ValueError("extractedContent must contain 'sections' field") - # DEBUG: Log renderer input metadata only (no verbose JSON) TODO REMOVE + # DEBUG: Log renderer input metadata only (no verbose JSON) - only if debug enabled try: - import os - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - debug_dir = os.path.join(debug_root, f"render_input_{ts}") - os.makedirs(debug_dir, exist_ok=True) - with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f: - f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n") - f.write(f"content_size: {len(str(extractedContent))} characters\n") - f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n") + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + import os + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + debug_dir = os.path.join(debug_root, f"render_input_{ts}") + os.makedirs(debug_dir, exist_ok=True) + with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f: + f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n") + f.write(f"content_size: {len(str(extractedContent))} characters\n") + f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n") except Exception: pass diff --git a/modules/services/serviceGeneration/subPromptBuilder.py b/modules/services/serviceGeneration/subPromptBuilder.py index 00b9be5c..dd2a6717 100644 --- a/modules/services/serviceGeneration/subPromptBuilder.py +++ b/modules/services/serviceGeneration/subPromptBuilder.py @@ -114,16 +114,18 @@ Return only the JSON structure with actual data from the documents. Do not inclu # Debug output services.utils.debugLogToFile(f"EXTRACTION INTENT: Processed", "PROMPT_BUILDER") - # Save full extraction prompt to debug file + # Save full extraction prompt to debug file - only if debug enabled try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - os.makedirs(debug_root, exist_ok=True) - with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f: - f.write(f"EXTRACTION PROMPT:\n{finalPrompt}\n\n") - f.write(f"EXTRACTION INTENT:\n{extractionIntent}\n") + debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION PROMPT:\n{finalPrompt}\n\n") + f.write(f"EXTRACTION INTENT:\n{extractionIntent}\n") except Exception: pass @@ -194,17 +196,19 @@ Return only the generation prompt, starting with "Generate a {outputFormat} docu # Debug output services.utils.debugLogToFile(f"GENERATION PROMPT: Generated successfully", "PROMPT_BUILDER") - # Save full generation prompt and AI response to debug file + # Save full generation prompt and AI response to debug file - only if debug enabled try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - os.makedirs(debug_root, exist_ok=True) - with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f: - f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n") - f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n") - f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n") + debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f: + f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n") + f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n") + f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n") except Exception: pass