integration testing of adapted ai workflow with fixes
This commit is contained in:
parent
aff37fd2e2
commit
b401be703f
18 changed files with 648 additions and 293 deletions
|
|
@ -400,6 +400,10 @@ class ActionDocument(BaseModel):
|
|||
None,
|
||||
description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)"
|
||||
)
|
||||
validationMetadata: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)"
|
||||
)
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
|
|
|
|||
|
|
@ -262,11 +262,17 @@ class AiObjects:
|
|||
logger.info(f"✅ Image content part processed successfully with model: {model.name}")
|
||||
|
||||
# Convert to AiCallResponse format
|
||||
# Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
|
||||
# Calculate processing time if not provided (fallback to 0.0)
|
||||
processingTime = getattr(modelResponse, 'processingTime', None)
|
||||
if processingTime is None:
|
||||
processingTime = 0.0
|
||||
|
||||
return AiCallResponse(
|
||||
content=modelResponse.content,
|
||||
modelName=model.name,
|
||||
priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0,
|
||||
processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0,
|
||||
priceUsd=0.0, # Price will be calculated elsewhere if needed
|
||||
processingTime=processingTime,
|
||||
bytesSent=0, # Will be calculated elsewhere
|
||||
bytesReceived=0, # Will be calculated elsewhere
|
||||
errorCount=0
|
||||
|
|
|
|||
|
|
@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
)
|
||||
|
||||
try:
|
||||
# Default outputFormat to "txt" if not specified (unified path - all formats handled the same way)
|
||||
if not outputFormat:
|
||||
outputFormat = "txt"
|
||||
|
||||
# Extraction is now separate - contentParts must be extracted before calling
|
||||
# Require operationType to be set before calling
|
||||
opType = getattr(options, "operationType", None)
|
||||
if not opType:
|
||||
# If outputFormat is specified, default to DATA_GENERATE
|
||||
if outputFormat:
|
||||
options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
opType = OperationTypeEnum.DATA_GENERATE
|
||||
else:
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
|
||||
analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
|
||||
if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
|
||||
options.operationType = analyzedOptions.operationType
|
||||
# Merge other analyzed options
|
||||
if hasattr(analyzedOptions, "priority"):
|
||||
options.priority = analyzedOptions.priority
|
||||
if hasattr(analyzedOptions, "processingMode"):
|
||||
options.processingMode = analyzedOptions.processingMode
|
||||
if hasattr(analyzedOptions, "compressPrompt"):
|
||||
options.compressPrompt = analyzedOptions.compressPrompt
|
||||
if hasattr(analyzedOptions, "compressContext"):
|
||||
options.compressContext = analyzedOptions.compressContext
|
||||
else:
|
||||
# Default to DATA_ANALYSE if analysis fails
|
||||
options.operationType = OperationTypeEnum.DATA_ANALYSE
|
||||
opType = options.operationType
|
||||
# outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE
|
||||
options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
opType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
# Handle IMAGE_GENERATE operations
|
||||
if opType == OperationTypeEnum.IMAGE_GENERATE:
|
||||
|
|
@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# Handle document generation (outputFormat specified)
|
||||
if outputFormat:
|
||||
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
||||
options.compressPrompt = False
|
||||
options.compressContext = False
|
||||
# Handle document generation (outputFormat always set, defaults to "txt")
|
||||
# Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way
|
||||
# outputFormat is always set now (defaults to "txt" if not specified)
|
||||
|
||||
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
||||
options.compressPrompt = False
|
||||
options.compressContext = False
|
||||
|
||||
# Process contentParts for generation prompt (if provided)
|
||||
# Use generic _callWithContentParts() which handles all content types (images, text, etc.)
|
||||
# This automatically processes images with vision models and merges all results
|
||||
if contentParts:
|
||||
# Filter out binary/other parts that shouldn't be processed
|
||||
processableParts = []
|
||||
skippedParts = []
|
||||
for p in contentParts:
|
||||
if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
|
||||
processableParts.append(p)
|
||||
else:
|
||||
skippedParts.append(p)
|
||||
|
||||
# Convert contentParts to text for generation prompt (if provided)
|
||||
if contentParts:
|
||||
# Convert contentParts to text for generation prompt
|
||||
content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
|
||||
if skippedParts:
|
||||
logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
|
||||
|
||||
if processableParts:
|
||||
# Count images for progress update
|
||||
imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
|
||||
if imageCount > 0:
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
|
||||
|
||||
# Build proper extraction prompt using buildExtractionPrompt
|
||||
# This creates a focused extraction prompt, not the user's generation prompt
|
||||
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
|
||||
|
||||
# Determine renderer for format-specific guidelines
|
||||
renderer = None
|
||||
if outputFormat:
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
renderer = generationService.getRendererForFormat(outputFormat)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
|
||||
|
||||
extractionPrompt = await buildExtractionPrompt(
|
||||
outputFormat=outputFormat or "txt",
|
||||
userPrompt=prompt, # User's prompt as context for what to extract
|
||||
title=title or "Document",
|
||||
aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None,
|
||||
services=self.services,
|
||||
renderer=renderer
|
||||
)
|
||||
|
||||
logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
|
||||
|
||||
# Use DATA_EXTRACT operation type for extraction
|
||||
extractionOptions = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
|
||||
compressPrompt=options.compressPrompt,
|
||||
compressContext=options.compressContext
|
||||
)
|
||||
|
||||
extractionRequest = AiCallRequest(
|
||||
prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
|
||||
context="",
|
||||
options=extractionOptions,
|
||||
contentParts=processableParts
|
||||
)
|
||||
|
||||
# Write debug file for extraction prompt (all parts)
|
||||
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
|
||||
|
||||
# Call generic content parts processor - handles images, text, chunking, merging
|
||||
extractionResponse = await self.aiObjects.call(extractionRequest)
|
||||
|
||||
# Write debug file for extraction response
|
||||
if extractionResponse.content:
|
||||
self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
|
||||
else:
|
||||
self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
|
||||
logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
|
||||
|
||||
# Use extracted content directly for generation prompt
|
||||
if extractionResponse.errorCount == 0 and extractionResponse.content:
|
||||
# The extracted content is already merged and ready to use
|
||||
content_for_generation = extractionResponse.content
|
||||
logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation")
|
||||
else:
|
||||
# Extraction failed - use placeholders
|
||||
logger.warning(f"Content extraction failed, using placeholders")
|
||||
placeholderParts = []
|
||||
for p in processableParts:
|
||||
placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]")
|
||||
content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None
|
||||
else:
|
||||
content_for_generation = None
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
|
||||
generation_prompt = await buildGenerationPrompt(
|
||||
outputFormat, prompt, title, content_for_generation, None
|
||||
logger.debug("No processable parts found in contentParts")
|
||||
else:
|
||||
content_for_generation = None
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
|
||||
generation_prompt = await buildGenerationPrompt(
|
||||
outputFormat, prompt, title, content_for_generation, None
|
||||
)
|
||||
|
||||
promptArgs = {
|
||||
"outputFormat": outputFormat,
|
||||
"userPrompt": prompt,
|
||||
"title": title,
|
||||
"extracted_content": content_for_generation
|
||||
}
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
||||
# Extract user prompt from promptArgs for task completion analysis
|
||||
userPrompt = None
|
||||
if promptArgs:
|
||||
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
|
||||
|
||||
generated_json = await self._callAiWithLooping(
|
||||
generation_prompt,
|
||||
options,
|
||||
"document_generation",
|
||||
buildGenerationPrompt,
|
||||
promptArgs,
|
||||
aiOperationId,
|
||||
userPrompt=userPrompt
|
||||
)
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
||||
try:
|
||||
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
||||
generated_data = json.loads(extracted_json)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
||||
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
|
||||
|
||||
# Extract title and filename from generated document structure
|
||||
extractedTitle = title
|
||||
extractedFilename = None
|
||||
if isinstance(generated_data, dict) and "documents" in generated_data:
|
||||
docs = generated_data["documents"]
|
||||
if isinstance(docs, list) and len(docs) > 0:
|
||||
firstDoc = docs[0]
|
||||
if isinstance(firstDoc, dict):
|
||||
if firstDoc.get("title"):
|
||||
extractedTitle = firstDoc["title"]
|
||||
if firstDoc.get("filename"):
|
||||
extractedFilename = firstDoc["filename"]
|
||||
|
||||
# Ensure metadata contains the extracted title
|
||||
if "metadata" not in generated_data:
|
||||
generated_data["metadata"] = {}
|
||||
if extractedTitle:
|
||||
generated_data["metadata"]["title"] = extractedTitle
|
||||
|
||||
# Create separate operation for content rendering
|
||||
renderOperationId = f"{aiOperationId}_render"
|
||||
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
|
||||
self.services.chat.progressLogStart(
|
||||
renderOperationId,
|
||||
"Content Rendering",
|
||||
"Rendering",
|
||||
f"Format: {outputFormat}",
|
||||
parentId=renderParentLogId
|
||||
)
|
||||
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
|
||||
)
|
||||
self.services.chat.progressLogFinish(renderOperationId, True)
|
||||
|
||||
promptArgs = {
|
||||
"outputFormat": outputFormat,
|
||||
"userPrompt": prompt,
|
||||
"title": title,
|
||||
"extracted_content": content_for_generation
|
||||
}
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
||||
# Extract user prompt from promptArgs for task completion analysis
|
||||
userPrompt = None
|
||||
if promptArgs:
|
||||
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
|
||||
|
||||
generated_json = await self._callAiWithLooping(
|
||||
generation_prompt,
|
||||
options,
|
||||
"document_generation",
|
||||
buildGenerationPrompt,
|
||||
promptArgs,
|
||||
aiOperationId,
|
||||
userPrompt=userPrompt
|
||||
)
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
||||
try:
|
||||
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
||||
generated_data = json.loads(extracted_json)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
||||
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
|
||||
|
||||
# Extract title and filename from generated document structure
|
||||
extractedTitle = title
|
||||
extractedFilename = None
|
||||
if isinstance(generated_data, dict) and "documents" in generated_data:
|
||||
docs = generated_data["documents"]
|
||||
if isinstance(docs, list) and len(docs) > 0:
|
||||
firstDoc = docs[0]
|
||||
if isinstance(firstDoc, dict):
|
||||
if firstDoc.get("title"):
|
||||
extractedTitle = firstDoc["title"]
|
||||
if firstDoc.get("filename"):
|
||||
extractedFilename = firstDoc["filename"]
|
||||
|
||||
# Ensure metadata contains the extracted title
|
||||
if "metadata" not in generated_data:
|
||||
generated_data["metadata"] = {}
|
||||
if extractedTitle:
|
||||
generated_data["metadata"]["title"] = extractedTitle
|
||||
|
||||
# Create separate operation for content rendering
|
||||
renderOperationId = f"{aiOperationId}_render"
|
||||
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
|
||||
self.services.chat.progressLogStart(
|
||||
renderOperationId,
|
||||
"Content Rendering",
|
||||
"Rendering",
|
||||
f"Format: {outputFormat}",
|
||||
parentId=renderParentLogId
|
||||
)
|
||||
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
|
||||
)
|
||||
self.services.chat.progressLogFinish(renderOperationId, True)
|
||||
|
||||
# Determine document name
|
||||
if extractedFilename:
|
||||
documentName = extractedFilename
|
||||
elif extractedTitle and extractedTitle != "Generated Document":
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if sanitized:
|
||||
if not sanitized.lower().endswith(f".{outputFormat}"):
|
||||
documentName = f"{sanitized}.{outputFormat}"
|
||||
else:
|
||||
documentName = sanitized
|
||||
# Determine document name
|
||||
if extractedFilename:
|
||||
documentName = extractedFilename
|
||||
elif extractedTitle and extractedTitle != "Generated Document":
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if sanitized:
|
||||
if not sanitized.lower().endswith(f".{outputFormat}"):
|
||||
documentName = f"{sanitized}.{outputFormat}"
|
||||
else:
|
||||
documentName = f"generated.{outputFormat}"
|
||||
documentName = sanitized
|
||||
else:
|
||||
documentName = f"generated.{outputFormat}"
|
||||
|
||||
# Build document data
|
||||
docData = DocumentData(
|
||||
documentName=documentName,
|
||||
documentData=rendered_content,
|
||||
mimeType=mime_type,
|
||||
sourceJson=generated_data # Preserve source JSON for structure validation
|
||||
)
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=extractedTitle or title or "Generated Document",
|
||||
filename=extractedFilename,
|
||||
operationType=opType.value if opType else None
|
||||
)
|
||||
|
||||
self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
|
||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=json.dumps(generated_data),
|
||||
metadata=metadata,
|
||||
documents=[docData]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering document: {str(e)}")
|
||||
if renderOperationId:
|
||||
self.services.chat.progressLogFinish(renderOperationId, False)
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(f"Rendering failed: {str(e)}")
|
||||
|
||||
# Handle text processing (no outputFormat)
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
|
||||
|
||||
if contentParts:
|
||||
# Process contentParts through AI
|
||||
# Convert contentParts to text for prompt
|
||||
contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
|
||||
fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
|
||||
result_content = await self._callAiWithLooping(
|
||||
fullPrompt, options, "text", None, None, aiOperationId
|
||||
else:
|
||||
documentName = f"generated.{outputFormat}"
|
||||
|
||||
# Build document data
|
||||
docData = DocumentData(
|
||||
documentName=documentName,
|
||||
documentData=rendered_content,
|
||||
mimeType=mime_type,
|
||||
sourceJson=generated_data # Preserve source JSON for structure validation
|
||||
)
|
||||
else:
|
||||
# Direct text call (no documents to process)
|
||||
result_content = await self._callAiWithLooping(
|
||||
prompt, options, "text", None, None, aiOperationId
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=extractedTitle or title or "Generated Document",
|
||||
filename=extractedFilename,
|
||||
operationType=opType.value if opType else None
|
||||
)
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
operationType=opType.value if opType else None
|
||||
)
|
||||
|
||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=result_content,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
# Write JSON with proper formatting (not str() which can truncate)
|
||||
jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False)
|
||||
self.services.utils.writeDebugFile(jsonStr, "document_generation_response")
|
||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=json.dumps(generated_data),
|
||||
metadata=metadata,
|
||||
documents=[docData]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering document: {str(e)}")
|
||||
if renderOperationId:
|
||||
self.services.chat.progressLogFinish(renderOperationId, False)
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(f"Rendering failed: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in callAiContent: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -1236,8 +1236,12 @@ class JsonResponseHandler:
|
|||
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
|
||||
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
|
||||
|
||||
# Handle None (path doesn't exist - incomplete JSON)
|
||||
if value is None:
|
||||
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
||||
logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
|
||||
# Count items/rows/elements based on type
|
||||
if isinstance(value, list):
|
||||
elif isinstance(value, list):
|
||||
updatedKpi["currentValue"] = len(value)
|
||||
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
|
||||
elif isinstance(value, (int, float)):
|
||||
|
|
@ -1296,8 +1300,12 @@ class JsonResponseHandler:
|
|||
# Extract value using path
|
||||
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
|
||||
|
||||
# Handle None (path doesn't exist - incomplete JSON)
|
||||
if value is None:
|
||||
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
||||
logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
|
||||
# Count items/rows/elements based on type
|
||||
if isinstance(value, list):
|
||||
elif isinstance(value, list):
|
||||
updatedKpi["currentValue"] = len(value)
|
||||
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
|
||||
elif isinstance(value, (int, float)):
|
||||
|
|
@ -1321,6 +1329,7 @@ class JsonResponseHandler:
|
|||
Extract value from object using dot-notation path with array indices.
|
||||
|
||||
Example: "sections[0].elements[0].items"
|
||||
Returns None if path doesn't exist (for incomplete JSON handling).
|
||||
"""
|
||||
parts = path.split('.')
|
||||
current = obj
|
||||
|
|
@ -1332,20 +1341,30 @@ class JsonResponseHandler:
|
|||
index = int(part[part.index('[') + 1:part.index(']')])
|
||||
|
||||
if key:
|
||||
current = current.get(key, [])
|
||||
if isinstance(current, list) and 0 <= index < len(current):
|
||||
current = current[index]
|
||||
if isinstance(current, dict):
|
||||
current = current.get(key)
|
||||
if current is None:
|
||||
return None # Key doesn't exist
|
||||
else:
|
||||
return None # Can't access key on non-dict
|
||||
|
||||
if isinstance(current, list):
|
||||
if 0 <= index < len(current):
|
||||
current = current[index]
|
||||
else:
|
||||
# Index out of range - return None for incomplete JSON
|
||||
return None
|
||||
else:
|
||||
raise KeyError(f"Invalid index {index} for {key}")
|
||||
# Not a list, can't index
|
||||
return None
|
||||
else:
|
||||
# Handle dict access
|
||||
if isinstance(current, dict):
|
||||
current = current.get(part)
|
||||
if current is None:
|
||||
return None # Key doesn't exist
|
||||
else:
|
||||
raise KeyError(f"Cannot access {part} on {type(current)}")
|
||||
|
||||
if current is None:
|
||||
raise KeyError(f"Path {path} returned None at {part}")
|
||||
return None # Can't access key on non-dict
|
||||
|
||||
return current
|
||||
|
||||
|
|
|
|||
|
|
@ -92,13 +92,16 @@ class ChatService:
|
|||
if docRef.startswith("docItem:"):
|
||||
# docItem:<id>:<filename> or docItem:<id> (filename is optional)
|
||||
# ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
|
||||
# Both formats are supported: docItem:<documentId> and docItem:<documentId>:<filename>
|
||||
parts = docRef.split(':')
|
||||
if len(parts) >= 2:
|
||||
docId = parts[1] # This should be the documentId (UUID)
|
||||
docFound = False
|
||||
|
||||
# ALWAYS try to match by documentId first (regardless of number of parts)
|
||||
# This handles: docItem:documentId and docItem:documentId:filename
|
||||
# This handles both formats:
|
||||
# - docItem:<documentId> (without filename - still works)
|
||||
# - docItem:<documentId>:<filename> (with filename - preferred)
|
||||
for message in workflow.messages:
|
||||
# Validate message belongs to this workflow
|
||||
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||
|
|
|
|||
|
|
@ -138,6 +138,36 @@ class ExtractionService:
|
|||
f"extraction.process.{doc.mimeType}"
|
||||
)
|
||||
|
||||
# Write extraction results to debug file
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
import json
|
||||
# Create summary of extraction results for debug
|
||||
extractionSummary = {
|
||||
"documentName": doc.fileName,
|
||||
"documentMimeType": doc.mimeType,
|
||||
"partsCount": len(ec.parts),
|
||||
"parts": []
|
||||
}
|
||||
for part in ec.parts:
|
||||
partSummary = {
|
||||
"typeGroup": part.typeGroup,
|
||||
"mimeType": part.mimeType,
|
||||
"label": part.label,
|
||||
"dataLength": len(part.data) if part.data else 0,
|
||||
"metadata": part.metadata
|
||||
}
|
||||
# Include data preview for small parts (first 500 chars)
|
||||
if part.data and len(part.data) <= 500:
|
||||
partSummary["dataPreview"] = part.data[:500]
|
||||
elif part.data:
|
||||
partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
|
||||
extractionSummary["parts"].append(partSummary)
|
||||
|
||||
writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to write extraction debug file: {str(e)}")
|
||||
|
||||
results.append(ec)
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -99,9 +99,16 @@ async def buildExtractionPrompt(
|
|||
# Parse extraction intent if AI service is available
|
||||
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
|
||||
|
||||
# Build base prompt
|
||||
# Build base prompt with clear user prompt markers
|
||||
sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
|
||||
adaptive_prompt = f"""
|
||||
{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
||||
{'='*80}
|
||||
USER REQUEST / USER PROMPT:
|
||||
{'='*80}
|
||||
{sanitized_user_prompt}
|
||||
{'='*80}
|
||||
END OF USER REQUEST / USER PROMPT
|
||||
{'='*80}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
|
|
|
|||
|
|
@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
|
||||
else:
|
||||
# Single table or mixed content - create main sheet
|
||||
# Single table or mixed content - create only main sheet
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
||||
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
|
||||
|
||||
# Add additional sheets for other content types
|
||||
contentTypes = set()
|
||||
for section in sections:
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
contentTypes.add(contentType)
|
||||
|
||||
if "table" in contentTypes and len(tableSections) == 1:
|
||||
sheetNames.append("Table Data")
|
||||
if "list" in contentTypes:
|
||||
sheetNames.append("Lists")
|
||||
if "paragraph" in contentTypes or "heading" in contentTypes:
|
||||
sheetNames.append("Text")
|
||||
|
||||
# Limit to 4 sheets maximum
|
||||
return sheetNames[:4]
|
||||
return sheetNames
|
||||
|
||||
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Populate Excel sheets with content from JSON based on actual sheet names."""
|
||||
|
|
@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer):
|
|||
sheetTitle = caption
|
||||
self._populateTableSheet(sheet, section, styles, sheetTitle)
|
||||
else:
|
||||
# Single table or mixed content - use original logic
|
||||
# Single table or mixed content - populate only main sheet
|
||||
firstSheetName = sheetNames[0]
|
||||
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
|
||||
|
||||
# If we have multiple sheets, distribute content by type
|
||||
if len(sheetNames) > 1:
|
||||
self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
|
||||
|
||||
|
|
|
|||
|
|
@ -72,7 +72,13 @@ async def buildGenerationPrompt(
|
|||
continuationText += "Start directly with the next element/section that should follow.\n\n"
|
||||
|
||||
# PROMPT FOR CONTINUATION
|
||||
generationPrompt = f"""User request: "{userPrompt}"
|
||||
generationPrompt = f"""{'='*80}
|
||||
USER REQUEST / USER PROMPT:
|
||||
{'='*80}
|
||||
{userPrompt}
|
||||
{'='*80}
|
||||
END OF USER REQUEST / USER PROMPT
|
||||
{'='*80}
|
||||
|
||||
⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
|
||||
|
||||
|
|
@ -93,8 +99,57 @@ Continue generating the remaining content now.
|
|||
else:
|
||||
|
||||
# PROMPT FOR FIRST CALL
|
||||
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
|
||||
|
||||
if extracted_content:
|
||||
# If we have extracted content, put it FIRST and make it very clear it's the source data
|
||||
generationPrompt = f"""{'='*80}
|
||||
USER REQUEST / USER PROMPT:
|
||||
{'='*80}
|
||||
{userPrompt}
|
||||
{'='*80}
|
||||
END OF USER REQUEST / USER PROMPT
|
||||
{'='*80}
|
||||
|
||||
generationPrompt = f"""User request: "{userPrompt}"
|
||||
{'='*80}
|
||||
⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
|
||||
{'='*80}
|
||||
The content below contains the ACTUAL DATA extracted from the source documents.
|
||||
You MUST use this data - DO NOT generate fake or example data.
|
||||
{'='*80}
|
||||
EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
{'='*80}
|
||||
{extracted_content}
|
||||
{'='*80}
|
||||
END OF EXTRACTED CONTENT
|
||||
{'='*80}
|
||||
|
||||
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
|
||||
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
|
||||
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
|
||||
|
||||
JSON structure template (structure only - use data from EXTRACTED CONTENT above):
|
||||
{jsonTemplate}
|
||||
|
||||
Instructions:
|
||||
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
|
||||
- Do NOT reuse example section IDs; create your own.
|
||||
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
|
||||
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
|
||||
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
|
||||
- Output JSON only; no markdown fences or extra text.
|
||||
|
||||
Generate your complete response using the extracted content data.
|
||||
"""
|
||||
else:
|
||||
# No extracted content - generate from scratch
|
||||
generationPrompt = f"""{'='*80}
|
||||
USER REQUEST / USER PROMPT:
|
||||
{'='*80}
|
||||
{userPrompt}
|
||||
{'='*80}
|
||||
END OF USER REQUEST / USER PROMPT
|
||||
{'='*80}
|
||||
|
||||
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
|
||||
|
||||
|
|
@ -111,12 +166,5 @@ Instructions:
|
|||
Generate your complete response.
|
||||
"""
|
||||
|
||||
# If we have extracted content, prepend it to the prompt
|
||||
if extracted_content:
|
||||
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
{extracted_content}
|
||||
|
||||
{generationPrompt}"""
|
||||
|
||||
return generationPrompt.strip()
|
||||
|
||||
|
|
|
|||
|
|
@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
Attempt to repair broken JSON using multiple strategies.
|
||||
Generic solution that works for any content type.
|
||||
Returns the best repair attempt or None if all fail.
|
||||
|
||||
IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
|
||||
Only uses truncation as a last resort when structure closing fails.
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# Strategy 1: Try to extract sections from the entire text first
|
||||
# Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
|
||||
# This preserves all data and should be tried first
|
||||
closedStr = closeJsonStructures(text)
|
||||
obj, err, _ = tryParseJson(closedStr)
|
||||
if err is None and isinstance(obj, dict):
|
||||
sections = extractSectionsFromDocument(obj)
|
||||
if sections:
|
||||
logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
|
||||
return obj
|
||||
else:
|
||||
# Structure closing worked but no sections found - still return it
|
||||
logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
|
||||
return obj
|
||||
|
||||
# Strategy 2: Try to extract sections from the entire text using regex
|
||||
# This handles cases where the JSON structure is broken but content is intact
|
||||
# NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
|
||||
extractedSections = _extractSectionsRegex(text)
|
||||
if extractedSections:
|
||||
logger.info(f"Extracted {len(extractedSections)} sections using regex")
|
||||
|
|
@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
"documents": [{"sections": extractedSections}]
|
||||
}
|
||||
|
||||
# Strategy 2: Progressive parsing - try to find longest valid prefix
|
||||
# Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
|
||||
# WARNING: This strategy truncates the input and loses data after the truncation point
|
||||
# Only use as last resort when other strategies fail
|
||||
logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
|
||||
bestResult = None
|
||||
bestValidLength = 0
|
||||
|
||||
|
|
@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
if err is None and isinstance(obj, dict):
|
||||
bestResult = obj
|
||||
bestValidLength = i
|
||||
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
|
||||
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
|
||||
break
|
||||
if bestResult:
|
||||
break
|
||||
|
||||
if bestResult:
|
||||
logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
|
||||
logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
|
||||
|
||||
# Check if we have sections in the result
|
||||
sections = extractSectionsFromDocument(bestResult)
|
||||
|
|
@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
bestResult["documents"][0]["sections"].extend(extractedSections)
|
||||
return bestResult
|
||||
|
||||
# Strategy 3: Structure closing - close incomplete structures
|
||||
closedStr = closeJsonStructures(text)
|
||||
obj, err, _ = tryParseJson(closedStr)
|
||||
if err is None and isinstance(obj, dict):
|
||||
logger.info("Repaired JSON using structure closing")
|
||||
return obj
|
||||
|
||||
logger.warning("All repair strategies failed")
|
||||
return None
|
||||
|
||||
|
|
@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
def closeJsonStructures(text: str) -> str:
|
||||
"""
|
||||
Close incomplete JSON structures by adding missing closing brackets.
|
||||
Also handles unterminated strings by closing them.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
result = text
|
||||
|
||||
# Handle unterminated strings: find the last unclosed string
|
||||
# Look for patterns like: "value" or "value\n (unterminated)
|
||||
# Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
|
||||
# Try to close it by finding the last opening quote and closing it
|
||||
if result.strip():
|
||||
# Count quotes - if odd number, we have an unterminated string
|
||||
quoteCount = result.count('"')
|
||||
if quoteCount % 2 == 1:
|
||||
# Find the last opening quote that's not escaped
|
||||
lastQuotePos = result.rfind('"')
|
||||
if lastQuotePos >= 0:
|
||||
# Check if it's escaped
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
# If not escaped (even number of backslashes), close the string
|
||||
if escapeCount % 2 == 0:
|
||||
# Find where the string should end (before next comma, bracket, or brace)
|
||||
# For now, just close it at the end
|
||||
result += '"'
|
||||
|
||||
# Count open/close brackets and braces
|
||||
openBraces = text.count('{')
|
||||
closeBraces = text.count('}')
|
||||
openBrackets = text.count('[')
|
||||
closeBrackets = text.count(']')
|
||||
openBraces = result.count('{')
|
||||
closeBraces = result.count('}')
|
||||
openBrackets = result.count('[')
|
||||
closeBrackets = result.count(']')
|
||||
|
||||
# Close incomplete structures
|
||||
result = text
|
||||
for _ in range(openBraces - closeBraces):
|
||||
result += '}'
|
||||
for _ in range(openBrackets - closeBrackets):
|
||||
|
|
@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
|||
|
||||
NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
|
||||
to avoid circular dependency. Instead, it implements its own repair strategies.
|
||||
|
||||
IMPORTANT: Tries to preserve data by using structure closing first before truncation.
|
||||
"""
|
||||
sections = []
|
||||
|
||||
# Strategy 1: Try progressive parsing to find longest valid JSON prefix
|
||||
# Find the longest valid JSON prefix that contains sections
|
||||
# Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
|
||||
closed_str = closeJsonStructures(text)
|
||||
obj, err, _ = tryParseJson(closed_str)
|
||||
if err is None and isinstance(obj, dict):
|
||||
extracted_sections = extractSectionsFromDocument(obj)
|
||||
if extracted_sections:
|
||||
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
|
||||
return extracted_sections
|
||||
|
||||
# Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
|
||||
# WARNING: This truncates the input and loses data
|
||||
# Only use if structure closing failed
|
||||
logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
|
||||
best_result = None
|
||||
best_valid_length = 0
|
||||
for step_size in [1000, 500, 100, 50, 10]:
|
||||
|
|
@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
|||
if err is None and isinstance(obj, dict):
|
||||
extracted_sections = extractSectionsFromDocument(obj)
|
||||
if extracted_sections:
|
||||
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}")
|
||||
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
|
||||
return extracted_sections
|
||||
# Store best result even if no sections found
|
||||
if not best_result:
|
||||
|
|
|
|||
|
|
@ -1183,11 +1183,13 @@ Max length: {maxLength} characters
|
|||
|
||||
Based on the context, decide which documents to attach.
|
||||
|
||||
CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"subject": "subject line",
|
||||
"body": "email body (HTML allowed)",
|
||||
"attachments": ["doc_ref1", "doc_ref2"]
|
||||
"attachments": ["docItem:<documentId>:<filename>"]
|
||||
}}
|
||||
"""
|
||||
|
||||
|
|
@ -1237,6 +1239,9 @@ Return JSON:
|
|||
elif isinstance(ai_attachments, list):
|
||||
ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
|
||||
|
||||
# Initialize normalized_ai_attachments
|
||||
normalized_ai_attachments = []
|
||||
|
||||
if ai_attachments:
|
||||
try:
|
||||
ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
|
||||
|
|
@ -1250,16 +1255,20 @@ Return JSON:
|
|||
selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
|
||||
|
||||
if selected_docs:
|
||||
# Map selected ChatDocuments back to docItem references
|
||||
# Map selected ChatDocuments back to docItem references (with full filename)
|
||||
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
|
||||
# Normalize ai_attachments to full format for storage
|
||||
normalized_ai_attachments = documentList.copy()
|
||||
logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
|
||||
else:
|
||||
# No intersection; use all available documents
|
||||
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
|
||||
normalized_ai_attachments = documentList.copy()
|
||||
logger.warning("AI selected attachments not found in available documents, using all documents")
|
||||
else:
|
||||
# No AI selection; use all available documents
|
||||
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
|
||||
normalized_ai_attachments = documentList.copy()
|
||||
logger.warning("AI did not specify attachments, using all available documents")
|
||||
else:
|
||||
logger.info("No documents provided in documentList; skipping attachment processing")
|
||||
|
|
@ -1363,7 +1372,7 @@ Return JSON:
|
|||
"cc": cc,
|
||||
"bcc": bcc,
|
||||
"attachments": len(documentList),
|
||||
"aiSelectedAttachments": ai_attachments if ai_attachments else "all documents",
|
||||
"aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
|
||||
"aiGenerated": True,
|
||||
"context": context,
|
||||
"emailStyle": emailStyle,
|
||||
|
|
@ -1371,12 +1380,40 @@ Return JSON:
|
|||
"draftData": draft_data
|
||||
}
|
||||
|
||||
# Extract attachment filenames for validation metadata
|
||||
attachmentFilenames = []
|
||||
attachmentReferences = []
|
||||
if documentList:
|
||||
try:
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
|
||||
attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
|
||||
# Store normalized document references (with filenames) - use normalized_ai_attachments if available
|
||||
attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Create validation metadata for content validator
|
||||
validationMetadata = {
|
||||
"actionType": "outlook.composeAndDraftEmailWithContext",
|
||||
"emailRecipients": to,
|
||||
"emailCc": cc,
|
||||
"emailBcc": bcc,
|
||||
"emailSubject": subject,
|
||||
"emailAttachments": attachmentFilenames,
|
||||
"emailAttachmentReferences": attachmentReferences,
|
||||
"emailAttachmentCount": len(attachmentFilenames),
|
||||
"emailStyle": emailStyle,
|
||||
"hasAttachments": len(attachmentFilenames) > 0
|
||||
}
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[ActionDocument(
|
||||
documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
|
||||
documentData=json.dumps(draftResultData, indent=2),
|
||||
mimeType="application/json"
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)]
|
||||
)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase):
|
|||
resultData = json.loads(fileData)
|
||||
foundDocuments = resultData.get("foundDocuments", [])
|
||||
|
||||
# If no foundDocuments, check if it's a listDocuments result (has listResults)
|
||||
if not foundDocuments and "listResults" in resultData:
|
||||
logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
|
||||
listResults = resultData.get("listResults", [])
|
||||
foundDocuments = []
|
||||
siteIdFromList = None
|
||||
siteNameFromList = None
|
||||
|
||||
for listResult in listResults:
|
||||
siteResults = listResult.get("siteResults", [])
|
||||
for siteResult in siteResults:
|
||||
items = siteResult.get("items", [])
|
||||
# Extract site info from first item if available
|
||||
if items and not siteIdFromList:
|
||||
# Try to get site info from the siteResult structure
|
||||
# We need to discover sites to get the siteId
|
||||
siteNameFromList = items[0].get("siteName")
|
||||
|
||||
for item in items:
|
||||
# Convert listDocuments item format to foundDocuments format
|
||||
if item.get("type") == "file":
|
||||
foundDoc = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": "file",
|
||||
"siteName": item.get("siteName"),
|
||||
"siteId": None, # Will be determined from site discovery
|
||||
"webUrl": item.get("webUrl"),
|
||||
"fullPath": item.get("webUrl", ""),
|
||||
"parentPath": item.get("parentPath", "")
|
||||
}
|
||||
foundDocuments.append(foundDoc)
|
||||
|
||||
# Discover sites to get siteId if we have siteName
|
||||
if foundDocuments and siteNameFromList and not siteIdFromList:
|
||||
logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
|
||||
allSites = await self._discoverSharePointSites()
|
||||
matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
|
||||
if matchingSites:
|
||||
siteIdFromList = matchingSites[0].get("id")
|
||||
# Update all foundDocuments with siteId
|
||||
for doc in foundDocuments:
|
||||
doc["siteId"] = siteIdFromList
|
||||
logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
|
||||
|
||||
logger.info(f"Converted {len(foundDocuments)} files from listResults format")
|
||||
|
||||
if foundDocuments:
|
||||
# Extract SharePoint file IDs from foundDocuments
|
||||
sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
|
||||
|
|
@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase):
|
|||
siteName = firstDoc.get("siteName")
|
||||
siteId = firstDoc.get("siteId")
|
||||
|
||||
# If siteId is missing (from listDocuments conversion), discover sites to find it
|
||||
if siteName and not siteId:
|
||||
logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
|
||||
allSites = await self._discoverSharePointSites()
|
||||
matchingSites = self._filterSitesByHint(allSites, siteName)
|
||||
if matchingSites:
|
||||
siteId = matchingSites[0].get("id")
|
||||
logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
|
||||
|
||||
if siteName and siteId:
|
||||
sites = [{
|
||||
"id": siteId,
|
||||
|
|
@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase):
|
|||
"webUrl": firstDoc.get("webUrl", "")
|
||||
}]
|
||||
logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
|
||||
elif siteName:
|
||||
# Try to get site by name
|
||||
allSites = await self._discoverSharePointSites()
|
||||
matchingSites = self._filterSitesByHint(allSites, siteName)
|
||||
if matchingSites:
|
||||
sites = [{
|
||||
"id": matchingSites[0].get("id"),
|
||||
"displayName": siteName,
|
||||
"webUrl": matchingSites[0].get("webUrl", "")
|
||||
}]
|
||||
logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
|
||||
else:
|
||||
return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
|
||||
else:
|
||||
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -421,14 +421,6 @@ class ContentValidator:
|
|||
if actionName:
|
||||
# Convert action name to human-readable format
|
||||
actionDescription = actionName.replace("ai.", "").replace(".", " ").title()
|
||||
if "convert" in actionName.lower():
|
||||
actionDescription = "Document format conversion"
|
||||
elif "generate" in actionName.lower() or "create" in actionName.lower():
|
||||
actionDescription = "Document generation"
|
||||
elif "extract" in actionName.lower():
|
||||
actionDescription = "Content extraction"
|
||||
elif "process" in actionName.lower():
|
||||
actionDescription = "Content processing"
|
||||
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
|
||||
|
||||
# Build action parameters context
|
||||
|
|
@ -441,6 +433,25 @@ class ContentValidator:
|
|||
paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
|
||||
actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
|
||||
|
||||
# Extract validation metadata from documents (action-specific context)
|
||||
validationMetadataContext = ""
|
||||
if documents:
|
||||
metadataList = []
|
||||
for doc in documents:
|
||||
metadata = getattr(doc, 'validationMetadata', None)
|
||||
if metadata and isinstance(metadata, dict):
|
||||
metadataList.append(metadata)
|
||||
|
||||
if metadataList:
|
||||
# Combine all metadata (usually just one document)
|
||||
combinedMetadata = {}
|
||||
for meta in metadataList:
|
||||
combinedMetadata.update(meta)
|
||||
|
||||
if combinedMetadata:
|
||||
metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
|
||||
validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
|
||||
|
||||
# Format success criteria for display with index numbers
|
||||
if successCriteria:
|
||||
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
|
||||
|
|
@ -452,7 +463,7 @@ class ContentValidator:
|
|||
=== TASK INFORMATION ===
|
||||
{objectiveLabel}: '{objectiveText}'
|
||||
EXPECTED DATA TYPE: {dataType}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}
|
||||
|
||||
=== VALIDATION INSTRUCTIONS ===
|
||||
|
||||
|
|
@ -466,6 +477,7 @@ VALIDATION RULES:
|
|||
5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
|
||||
|
||||
VALIDATION STEPS:
|
||||
- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
|
||||
- Check structure summary for quantities, counts, statistics
|
||||
- Compare found values with required values from criteria
|
||||
- If structure unavailable, use metadata only (format, filename, size)
|
||||
|
|
|
|||
|
|
@ -169,6 +169,10 @@ class AutomationMode(BaseMode):
|
|||
Execute task using Automation mode - executes predefined actions directly.
|
||||
No AI planning or review phases - actions are executed sequentially as defined.
|
||||
"""
|
||||
# Get task index from workflow state for consistency
|
||||
if taskIndex is None:
|
||||
taskIndex = workflow.getTaskIndex()
|
||||
|
||||
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
|
||||
|
||||
try:
|
||||
|
|
@ -178,7 +182,6 @@ class AutomationMode(BaseMode):
|
|||
# Update workflow before executing task
|
||||
if taskIndex is not None:
|
||||
self._updateWorkflowBeforeExecutingTask(taskIndex)
|
||||
self.services.chat.setWorkflowContext(taskNumber=taskIndex)
|
||||
|
||||
# Create task start message
|
||||
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
|
||||
|
|
@ -241,7 +244,7 @@ class AutomationMode(BaseMode):
|
|||
|
||||
# Execute action
|
||||
result = await self.actionExecutor.executeSingleAction(
|
||||
action, workflow, taskStep, taskIndex, actionNumber, totalActions
|
||||
action, workflow, taskStep
|
||||
)
|
||||
actionResults.append(result)
|
||||
|
||||
|
|
|
|||
|
|
@ -561,6 +561,11 @@ class DynamicMode(BaseMode):
|
|||
|
||||
# Use connectionReference from selection (required)
|
||||
connectionRef = selection.get('connectionReference')
|
||||
|
||||
# If not found at top level, check in selection['parameters'] (guided action case)
|
||||
if not connectionRef and isinstance(selection, dict) and 'parameters' in selection:
|
||||
connectionRef = selection['parameters'].get('connectionReference')
|
||||
|
||||
if connectionRef:
|
||||
# Check if action actually has connectionReference parameter
|
||||
methodName, actionName = compoundActionName.split('.', 1)
|
||||
|
|
|
|||
|
|
@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}}
|
|||
OBJECTIVE: {{KEY:TASK_OBJECTIVE}}
|
||||
|
||||
=== AVAILABLE RESOURCES ===
|
||||
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||
AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||
CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
AVAILABLE_CONNECTIONS_INDEX:
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
|
||||
=== AVAILABLE ACTIONS ===
|
||||
{{KEY:AVAILABLE_METHODS}}
|
||||
|
|
@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
|
|||
- Be the next logical incremental step (not complete entire objective in one step)
|
||||
- Target exactly one output format if producing files
|
||||
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
|
||||
- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
|
||||
- Learn from previous validation feedback and avoid repeated mistakes
|
||||
- Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria)
|
||||
|
||||
|
|
@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
|
|||
"successCriteria": ["specific criterion 1", "specific criterion 2"],
|
||||
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)",
|
||||
"learnings": ["..."],
|
||||
"requiredInputDocuments": ["docList:..."],
|
||||
"requiredInputDocuments": ["docItem:<documentId>:<filename>", "docList:<label>"],
|
||||
"requiredConnection": "connection:..." | null,
|
||||
"parametersContext": "concise text that Stage 2 will use to set business parameters"
|
||||
}}
|
||||
|
|
@ -115,6 +117,9 @@ Analyze actionObjective to determine:
|
|||
3. parametersContext: short, sufficient for Stage 2
|
||||
4. Return ONLY JSON - no markdown, no explanations
|
||||
5. requiredInputDocuments: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
|
||||
- For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
|
||||
- For document lists: use docList:<label> format
|
||||
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
|
||||
6. requiredConnection: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX
|
||||
7. Plan incrementally: one output format per step
|
||||
8. Learn from validation feedback - avoid repeating mistakes
|
||||
|
|
@ -307,6 +312,7 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
|
|||
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
|
||||
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
|
||||
]
|
||||
|
||||
template = """TASK DECISION
|
||||
|
|
@ -321,7 +327,9 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
|
|||
|
||||
=== AVAILABLE RESOURCES ===
|
||||
ACTIONS: {{KEY:AVAILABLE_METHODS}}
|
||||
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||
AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||
AVAILABLE_CONNECTIONS_INDEX:
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
|
||||
{{KEY:REVIEW_CONTENT}}
|
||||
|
||||
|
|
@ -334,12 +342,20 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
|
|||
- Next action should ONLY generate the MISSING part, NOT repeat what's already delivered
|
||||
|
||||
=== OUTPUT FORMAT ===
|
||||
Return ONLY JSON (no markdown, no explanations). The decision MUST:
|
||||
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
|
||||
- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
|
||||
- Use ONLY exact labels from AVAILABLE_CONNECTIONS_INDEX (connection:...)
|
||||
- Provide concrete parameter values in nextActionParameters (not placeholders)
|
||||
- Match parameter names exactly as defined in AVAILABLE_METHODS
|
||||
|
||||
{{
|
||||
"status": "continue",
|
||||
"reason": "Brief reason explaining why continuing",
|
||||
"nextAction": "Selected_action_from_ACTIONS",
|
||||
"nextActionParameters": {{
|
||||
"documentList": ["docItem:reference_from_DOCUMENTS"],
|
||||
"documentList": ["docItem:<documentId>:<filename>", "docList:<label>"],
|
||||
"connectionReference": "connection:reference_from_AVAILABLE_CONNECTIONS_INDEX",
|
||||
"parameter1": "value1",
|
||||
"parameter2": "value2"
|
||||
}},
|
||||
|
|
@ -347,16 +363,21 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
|
|||
}}
|
||||
|
||||
=== RULES ===
|
||||
- If "continue": MUST provide nextAction and nextActionParameters
|
||||
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
||||
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
||||
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
|
||||
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
|
||||
- CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
|
||||
- Do NOT repeat failed actions - suggest DIFFERENT approach
|
||||
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
|
||||
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
|
||||
- If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
|
||||
1. Return ONLY JSON - no markdown, no explanations
|
||||
2. If "continue": MUST provide nextAction and nextActionParameters
|
||||
3. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
||||
4. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
||||
5. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
|
||||
- For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
|
||||
- For document lists: use docList:<label> format
|
||||
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
|
||||
6. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
|
||||
7. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
|
||||
8. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
|
||||
9. Do NOT repeat failed actions - suggest DIFFERENT approach
|
||||
10. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
|
||||
11. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
|
||||
12. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -428,7 +428,7 @@ class WorkflowProcessor:
|
|||
)
|
||||
|
||||
# Prepare AI call options for fast path (balanced, fast processing)
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest
|
||||
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
|
|
@ -438,16 +438,19 @@ class WorkflowProcessor:
|
|||
maxProcessingTime=15 # Fast path should complete in 15s
|
||||
)
|
||||
|
||||
# Call AI (content call - no documents needed for fast path)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
# Call AI directly (no document generation - just plain text response)
|
||||
# Use aiObjects.call() instead of callAiContent() to avoid document generation path
|
||||
aiRequest = AiCallRequest(
|
||||
prompt=fastPathPrompt,
|
||||
contentParts=None, # Fast path doesn't process documents
|
||||
context="",
|
||||
options=options,
|
||||
outputFormat=None # Text response, not document generation
|
||||
contentParts=None # Fast path doesn't process documents
|
||||
)
|
||||
|
||||
# Extract response content (AiResponse.content is a string)
|
||||
responseText = aiResponse.content if isinstance(aiResponse, str) else (aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse))
|
||||
aiCallResponse = await self.services.ai.aiObjects.call(aiRequest)
|
||||
|
||||
# Extract response content (AiCallResponse.content is a string)
|
||||
responseText = aiCallResponse.content if aiCallResponse.content else ""
|
||||
|
||||
# Create ActionResult with response
|
||||
# For fast path, we create a simple text document with the response
|
||||
|
|
|
|||
|
|
@ -162,30 +162,38 @@ class WorkflowManager:
|
|||
|
||||
self.workflowProcessor = WorkflowProcessor(self.services)
|
||||
|
||||
# Process user-uploaded documents from userInput for complexity detection
|
||||
# This is the correct way: use the input data directly, not workflow state
|
||||
documents = []
|
||||
if userInput.listFileId:
|
||||
try:
|
||||
documents = await self._processFileIds(userInput.listFileId, None)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
|
||||
# Get workflow mode to determine if complexity detection is needed
|
||||
workflowMode = getattr(self.services.workflow, 'workflowMode', None)
|
||||
skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
|
||||
|
||||
# Detect complexity (AI-based semantic understanding) using user input documents
|
||||
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
|
||||
logger.info(f"Request complexity detected: {complexity}")
|
||||
if skipComplexityDetection:
|
||||
logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
|
||||
complexity = "moderate" # Default for automation workflows
|
||||
else:
|
||||
# Process user-uploaded documents from userInput for complexity detection
|
||||
# This is the correct way: use the input data directly, not workflow state
|
||||
documents = []
|
||||
if userInput.listFileId:
|
||||
try:
|
||||
documents = await self._processFileIds(userInput.listFileId, None)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
|
||||
|
||||
# Detect complexity (AI-based semantic understanding) using user input documents
|
||||
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
|
||||
logger.info(f"Request complexity detected: {complexity}")
|
||||
|
||||
# Now send the first message (which will also process the documents again, but that's fine)
|
||||
await self._sendFirstMessage(userInput)
|
||||
|
||||
# Route to fast path for simple requests
|
||||
if complexity == "simple":
|
||||
# Route to fast path for simple requests (skip for automation mode)
|
||||
if not skipComplexityDetection and complexity == "simple":
|
||||
logger.info("Routing to fast path for simple request")
|
||||
await self._executeFastPath(userInput, documents)
|
||||
return # Fast path completes the workflow
|
||||
|
||||
# Route to full workflow for moderate/complex requests
|
||||
logger.info(f"Routing to full workflow for {complexity} request")
|
||||
# Route to full workflow for moderate/complex requests or automation mode
|
||||
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
|
||||
taskPlan = await self._planTasks(userInput)
|
||||
await self._executeTasks(taskPlan)
|
||||
await self._processWorkflowResults()
|
||||
|
|
|
|||
Loading…
Reference in a new issue