From a602d13e16790ab2c60e2706661858cd75cc2f22 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 15 Jul 2025 23:58:24 +0200
Subject: [PATCH] MVP2 READY - 95% works
---
modules/methods/methodDocument.py | 116 +++++++++++++++++++++++++++++-
1 file changed, 114 insertions(+), 2 deletions(-)
diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py
index 585d952c..0146215b 100644
--- a/modules/methods/methodDocument.py
+++ b/modules/methods/methodDocument.py
@@ -149,6 +149,23 @@ class MethodDocument(MethodBase):
# Fallback: convert to string representation
text_content = str(extracted_content)
+ # Skip empty or whitespace-only content
+ if not text_content or text_content.strip() == "":
+ logger.info(f"Skipping document {chatDocument.filename} - extraction result is empty or whitespace only")
+ continue
+
+ # Skip minimal content that is essentially empty (like "{}", "[]", etc.)
+ stripped_content = text_content.strip()
+ minimal_content_patterns = ['{}', '[]', '""', "''", 'null', 'undefined']
+ if stripped_content in minimal_content_patterns:
+ logger.info(f"Skipping document {chatDocument.filename} - extraction result is minimal content: '{stripped_content}'")
+ continue
+
+ # Skip content that's just whitespace or very short meaningless content
+ if len(stripped_content) <= 2:
+ logger.info(f"Skipping document {chatDocument.filename} - extraction result is too short: '{stripped_content}' ({len(stripped_content)} chars)")
+ continue
+
# Create output filename based on original filename
original_filename = chatDocument.filename
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
@@ -172,6 +189,14 @@ class MethodDocument(MethodBase):
"mimeType": output_mime_type
})
+ # Check if we have any valid output documents
+ if not output_documents:
+ return self._createResult(
+ success=False,
+ data={},
+ error="No valid content could be extracted from any documents (all results were empty or whitespace only)"
+ )
+
return self._createResult(
success=True,
data={
@@ -252,7 +277,88 @@ class MethodDocument(MethodBase):
def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
"""
- Generate a simple HTML report from chat documents.
+ Generate a comprehensive HTML report using AI from all input documents.
+ """
+ try:
+ # Filter out empty documents and collect content
+ validDocuments = []
+ allContent = []
+
+ for doc in chatDocuments:
+ content = ""
+ if hasattr(doc, 'content') and doc.content:
+ content = doc.content.strip()
+ elif hasattr(doc, 'data') and doc.data:
+ content = doc.data.strip()
+
+ # Skip empty documents
+ if content:
+ validDocuments.append(doc)
+ allContent.append(f"Document: {doc.filename}\n{content}\n")
+
+ if not validDocuments:
+ # If no valid documents, create a simple report
+ html = ["" + title + ""]
+ html.append(f"{title}
")
+ html.append(f"Generated: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}
")
+ html.append("No content available in the provided documents.
")
+ html.append("")
+ return '\n'.join(html)
+
+ # Create AI prompt for comprehensive report generation
+ combinedContent = "\n\n".join(allContent)
+ aiPrompt = f"""
+ Create a comprehensive, well-structured HTML report based on the following documents and content.
+
+ Report Title: {title}
+
+ Requirements:
+ 1. Create a professional, well-formatted HTML report
+ 2. Include an executive summary at the beginning
+ 3. Organize information logically with clear sections
+ 4. Highlight key findings and insights
+ 5. Include relevant data, statistics, and conclusions
+ 6. Use proper HTML formatting with headers, lists, and styling
+ 7. Make it readable and professional
+
+ Document Content:
+ {combinedContent}
+
+ Generate a complete HTML report that integrates all the information into a cohesive, professional document.
+ """
+
+ # Call AI to generate the report
+ logger.info(f"Generating AI report for {len(validDocuments)} documents")
+ aiReport = self.service.callAiTextBasic(aiPrompt, combinedContent)
+
+ # If AI call fails, fall back to basic HTML
+ if not aiReport or aiReport.strip() == "":
+ logger.warning("AI report generation failed, using fallback HTML")
+ return self._generateFallbackHtmlReport(validDocuments, title, includeMetadata)
+
+ # Clean up the AI response and ensure it's valid HTML
+ if not aiReport.strip().startswith('" + title + ""]
+ html.append(f"{title}
")
+ html.append(f"Generated: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}
")
+ html.append(f"Total Documents Analyzed: {len(validDocuments)}
")
+ html.append("
")
+ html.append(aiReport)
+ html.append("")
+ return '\n'.join(html)
+ else:
+ # AI returned complete HTML, use it directly
+ return aiReport
+
+ except Exception as e:
+ logger.error(f"Error generating AI report: {str(e)}")
+ # Fall back to basic HTML report
+ return self._generateFallbackHtmlReport(chatDocuments, title, includeMetadata)
+
+ def _generateFallbackHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
+ """
+ Generate a basic HTML report as fallback when AI generation fails.
"""
html = ["" + title + ""]
html.append(f"{title}
")
@@ -272,8 +378,14 @@ class MethodDocument(MethodBase):
html.append("")
# Add document content if available
+ content = ""
if hasattr(doc, 'content') and doc.content:
- html.append(f"{doc.content}
")
+ content = doc.content
+ elif hasattr(doc, 'data') and doc.data:
+ content = doc.data
+
+ if content:
+ html.append(f"{content}
")
else:
html.append("No content available
")