MVP2 READY - 95% works

This commit is contained in:
ValueOn AG 2025-07-15 23:58:24 +02:00
parent 559a216001
commit a602d13e16

View file

@ -149,6 +149,23 @@ class MethodDocument(MethodBase):
# Fallback: convert to string representation
text_content = str(extracted_content)
# Skip empty or whitespace-only content
if not text_content or text_content.strip() == "":
logger.info(f"Skipping document {chatDocument.filename} - extraction result is empty or whitespace only")
continue
# Skip minimal content that is essentially empty (like "{}", "[]", etc.)
stripped_content = text_content.strip()
minimal_content_patterns = ['{}', '[]', '""', "''", 'null', 'undefined']
if stripped_content in minimal_content_patterns:
logger.info(f"Skipping document {chatDocument.filename} - extraction result is minimal content: '{stripped_content}'")
continue
# Skip content that's just whitespace or very short meaningless content
if len(stripped_content) <= 2:
logger.info(f"Skipping document {chatDocument.filename} - extraction result is too short: '{stripped_content}' ({len(stripped_content)} chars)")
continue
# Create output filename based on original filename
original_filename = chatDocument.filename
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
@ -172,6 +189,14 @@ class MethodDocument(MethodBase):
"mimeType": output_mime_type
})
# Check if we have any valid output documents
if not output_documents:
return self._createResult(
success=False,
data={},
error="No valid content could be extracted from any documents (all results were empty or whitespace only)"
)
return self._createResult(
success=True,
data={
@ -252,7 +277,88 @@ class MethodDocument(MethodBase):
def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
"""
Generate a simple HTML report from chat documents.
Generate a comprehensive HTML report using AI from all input documents.
"""
try:
# Filter out empty documents and collect content
validDocuments = []
allContent = []
for doc in chatDocuments:
content = ""
if hasattr(doc, 'content') and doc.content:
content = doc.content.strip()
elif hasattr(doc, 'data') and doc.data:
content = doc.data.strip()
# Skip empty documents
if content:
validDocuments.append(doc)
allContent.append(f"Document: {doc.filename}\n{content}\n")
if not validDocuments:
# If no valid documents, create a simple report
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
html.append(f"<h1>{title}</h1>")
html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
html.append("<p><em>No content available in the provided documents.</em></p>")
html.append("</body></html>")
return '\n'.join(html)
# Create AI prompt for comprehensive report generation
combinedContent = "\n\n".join(allContent)
aiPrompt = f"""
Create a comprehensive, well-structured HTML report based on the following documents and content.
Report Title: {title}
Requirements:
1. Create a professional, well-formatted HTML report
2. Include an executive summary at the beginning
3. Organize information logically with clear sections
4. Highlight key findings and insights
5. Include relevant data, statistics, and conclusions
6. Use proper HTML formatting with headers, lists, and styling
7. Make it readable and professional
Document Content:
{combinedContent}
Generate a complete HTML report that integrates all the information into a cohesive, professional document.
"""
# Call AI to generate the report
logger.info(f"Generating AI report for {len(validDocuments)} documents")
aiReport = self.service.callAiTextBasic(aiPrompt, combinedContent)
# If AI call fails, fall back to basic HTML
if not aiReport or aiReport.strip() == "":
logger.warning("AI report generation failed, using fallback HTML")
return self._generateFallbackHtmlReport(validDocuments, title, includeMetadata)
# Clean up the AI response and ensure it's valid HTML
if not aiReport.strip().startswith('<html'):
# Wrap the AI content in proper HTML structure
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
html.append(f"<h1>{title}</h1>")
html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
html.append(f"<p><b>Total Documents Analyzed:</b> {len(validDocuments)}</p>")
html.append("<hr>")
html.append(aiReport)
html.append("</body></html>")
return '\n'.join(html)
else:
# AI returned complete HTML, use it directly
return aiReport
except Exception as e:
logger.error(f"Error generating AI report: {str(e)}")
# Fall back to basic HTML report
return self._generateFallbackHtmlReport(chatDocuments, title, includeMetadata)
def _generateFallbackHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
"""
Generate a basic HTML report as fallback when AI generation fails.
"""
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
html.append(f"<h1>{title}</h1>")
@ -272,8 +378,14 @@ class MethodDocument(MethodBase):
html.append("</ul>")
# Add document content if available
content = ""
if hasattr(doc, 'content') and doc.content:
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{doc.content}</div>")
content = doc.content
elif hasattr(doc, 'data') and doc.data:
content = doc.data
if content:
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{content}</div>")
else:
html.append("<p><em>No content available</em></p>")