MVP2 READY - 95% works
This commit is contained in:
parent
559a216001
commit
a602d13e16
1 changed files with 114 additions and 2 deletions
|
|
@ -149,6 +149,23 @@ class MethodDocument(MethodBase):
|
||||||
# Fallback: convert to string representation
|
# Fallback: convert to string representation
|
||||||
text_content = str(extracted_content)
|
text_content = str(extracted_content)
|
||||||
|
|
||||||
|
# Skip empty or whitespace-only content
|
||||||
|
if not text_content or text_content.strip() == "":
|
||||||
|
logger.info(f"Skipping document {chatDocument.filename} - extraction result is empty or whitespace only")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip minimal content that is essentially empty (like "{}", "[]", etc.)
|
||||||
|
stripped_content = text_content.strip()
|
||||||
|
minimal_content_patterns = ['{}', '[]', '""', "''", 'null', 'undefined']
|
||||||
|
if stripped_content in minimal_content_patterns:
|
||||||
|
logger.info(f"Skipping document {chatDocument.filename} - extraction result is minimal content: '{stripped_content}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip content that's just whitespace or very short meaningless content
|
||||||
|
if len(stripped_content) <= 2:
|
||||||
|
logger.info(f"Skipping document {chatDocument.filename} - extraction result is too short: '{stripped_content}' ({len(stripped_content)} chars)")
|
||||||
|
continue
|
||||||
|
|
||||||
# Create output filename based on original filename
|
# Create output filename based on original filename
|
||||||
original_filename = chatDocument.filename
|
original_filename = chatDocument.filename
|
||||||
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
|
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
|
||||||
|
|
@ -172,6 +189,14 @@ class MethodDocument(MethodBase):
|
||||||
"mimeType": output_mime_type
|
"mimeType": output_mime_type
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Check if we have any valid output documents
|
||||||
|
if not output_documents:
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="No valid content could be extracted from any documents (all results were empty or whitespace only)"
|
||||||
|
)
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
|
|
@ -252,7 +277,88 @@ class MethodDocument(MethodBase):
|
||||||
|
|
||||||
def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
|
def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
|
||||||
"""
|
"""
|
||||||
Generate a simple HTML report from chat documents.
|
Generate a comprehensive HTML report using AI from all input documents.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Filter out empty documents and collect content
|
||||||
|
validDocuments = []
|
||||||
|
allContent = []
|
||||||
|
|
||||||
|
for doc in chatDocuments:
|
||||||
|
content = ""
|
||||||
|
if hasattr(doc, 'content') and doc.content:
|
||||||
|
content = doc.content.strip()
|
||||||
|
elif hasattr(doc, 'data') and doc.data:
|
||||||
|
content = doc.data.strip()
|
||||||
|
|
||||||
|
# Skip empty documents
|
||||||
|
if content:
|
||||||
|
validDocuments.append(doc)
|
||||||
|
allContent.append(f"Document: {doc.filename}\n{content}\n")
|
||||||
|
|
||||||
|
if not validDocuments:
|
||||||
|
# If no valid documents, create a simple report
|
||||||
|
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||||
|
html.append(f"<h1>{title}</h1>")
|
||||||
|
html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
|
||||||
|
html.append("<p><em>No content available in the provided documents.</em></p>")
|
||||||
|
html.append("</body></html>")
|
||||||
|
return '\n'.join(html)
|
||||||
|
|
||||||
|
# Create AI prompt for comprehensive report generation
|
||||||
|
combinedContent = "\n\n".join(allContent)
|
||||||
|
aiPrompt = f"""
|
||||||
|
Create a comprehensive, well-structured HTML report based on the following documents and content.
|
||||||
|
|
||||||
|
Report Title: {title}
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Create a professional, well-formatted HTML report
|
||||||
|
2. Include an executive summary at the beginning
|
||||||
|
3. Organize information logically with clear sections
|
||||||
|
4. Highlight key findings and insights
|
||||||
|
5. Include relevant data, statistics, and conclusions
|
||||||
|
6. Use proper HTML formatting with headers, lists, and styling
|
||||||
|
7. Make it readable and professional
|
||||||
|
|
||||||
|
Document Content:
|
||||||
|
{combinedContent}
|
||||||
|
|
||||||
|
Generate a complete HTML report that integrates all the information into a cohesive, professional document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Call AI to generate the report
|
||||||
|
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||||
|
aiReport = self.service.callAiTextBasic(aiPrompt, combinedContent)
|
||||||
|
|
||||||
|
# If AI call fails, fall back to basic HTML
|
||||||
|
if not aiReport or aiReport.strip() == "":
|
||||||
|
logger.warning("AI report generation failed, using fallback HTML")
|
||||||
|
return self._generateFallbackHtmlReport(validDocuments, title, includeMetadata)
|
||||||
|
|
||||||
|
# Clean up the AI response and ensure it's valid HTML
|
||||||
|
if not aiReport.strip().startswith('<html'):
|
||||||
|
# Wrap the AI content in proper HTML structure
|
||||||
|
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||||
|
html.append(f"<h1>{title}</h1>")
|
||||||
|
html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
|
||||||
|
html.append(f"<p><b>Total Documents Analyzed:</b> {len(validDocuments)}</p>")
|
||||||
|
html.append("<hr>")
|
||||||
|
html.append(aiReport)
|
||||||
|
html.append("</body></html>")
|
||||||
|
return '\n'.join(html)
|
||||||
|
else:
|
||||||
|
# AI returned complete HTML, use it directly
|
||||||
|
return aiReport
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating AI report: {str(e)}")
|
||||||
|
# Fall back to basic HTML report
|
||||||
|
return self._generateFallbackHtmlReport(chatDocuments, title, includeMetadata)
|
||||||
|
|
||||||
|
def _generateFallbackHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
|
||||||
|
"""
|
||||||
|
Generate a basic HTML report as fallback when AI generation fails.
|
||||||
"""
|
"""
|
||||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||||
html.append(f"<h1>{title}</h1>")
|
html.append(f"<h1>{title}</h1>")
|
||||||
|
|
@ -272,8 +378,14 @@ class MethodDocument(MethodBase):
|
||||||
html.append("</ul>")
|
html.append("</ul>")
|
||||||
|
|
||||||
# Add document content if available
|
# Add document content if available
|
||||||
|
content = ""
|
||||||
if hasattr(doc, 'content') and doc.content:
|
if hasattr(doc, 'content') and doc.content:
|
||||||
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{doc.content}</div>")
|
content = doc.content
|
||||||
|
elif hasattr(doc, 'data') and doc.data:
|
||||||
|
content = doc.data
|
||||||
|
|
||||||
|
if content:
|
||||||
|
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{content}</div>")
|
||||||
else:
|
else:
|
||||||
html.append("<p><em>No content available</em></p>")
|
html.append("<p><em>No content available</em></p>")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue