testing flow
This commit is contained in:
parent
418bd8281a
commit
ebb15da91b
6 changed files with 195 additions and 20 deletions
|
|
@ -349,6 +349,17 @@ class AiObjects:
|
||||||
# Select model for text generation
|
# Select model for text generation
|
||||||
modelName = self._selectModel(prompt, context, options)
|
modelName = self._selectModel(prompt, context, options)
|
||||||
|
|
||||||
|
# Derive generation parameters
|
||||||
|
temperature = getattr(options, "temperature", None)
|
||||||
|
if temperature is None:
|
||||||
|
temperature = 0.2
|
||||||
|
maxTokens = getattr(options, "maxTokens", None)
|
||||||
|
# Provide a generous default to avoid truncation for long outputs
|
||||||
|
if maxTokens is None:
|
||||||
|
# If resultFormat suggests large outputs (e.g., html, json), allow more tokens
|
||||||
|
wants_large = str(getattr(options, "resultFormat", "")).lower() in ["html", "json", "md", "markdown"]
|
||||||
|
maxTokens = 8000 if wants_large else 2000
|
||||||
|
|
||||||
messages: List[Dict[str, Any]] = []
|
messages: List[Dict[str, Any]] = []
|
||||||
if context:
|
if context:
|
||||||
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
||||||
|
|
@ -360,11 +371,11 @@ class AiObjects:
|
||||||
# Call the appropriate function
|
# Call the appropriate function
|
||||||
if functionName == "callAiBasic":
|
if functionName == "callAiBasic":
|
||||||
if aiModels[modelName]["connector"] == "openai":
|
if aiModels[modelName]["connector"] == "openai":
|
||||||
content = await connector.callAiBasic(messages)
|
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||||
elif aiModels[modelName]["connector"] == "perplexity":
|
elif aiModels[modelName]["connector"] == "perplexity":
|
||||||
content = await connector.callAiBasic(messages)
|
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||||
else:
|
else:
|
||||||
response = await connector.callAiBasic(messages)
|
response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||||
content = response["choices"][0]["message"]["content"]
|
content = response["choices"][0]["message"]["content"]
|
||||||
elif functionName == "callAiWithWebSearch":
|
elif functionName == "callAiWithWebSearch":
|
||||||
# Perplexity web search function
|
# Perplexity web search function
|
||||||
|
|
|
||||||
|
|
@ -532,6 +532,17 @@ class AiService:
|
||||||
if not isinstance(extractionResult, list):
|
if not isinstance(extractionResult, list):
|
||||||
return "[Error: No extraction results]"
|
return "[Error: No extraction results]"
|
||||||
|
|
||||||
|
# Prepare debug directory TODO TO REMOVE
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
debug_root = "../local/testing_extraction"
|
||||||
|
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
debug_dir = os.path.join(debug_root, f"per_chunk_{ts}")
|
||||||
|
try:
|
||||||
|
os.makedirs(debug_dir, exist_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Process each chunk with AI
|
# Process each chunk with AI
|
||||||
aiResults: List[str] = []
|
aiResults: List[str] = []
|
||||||
|
|
||||||
|
|
@ -568,6 +579,15 @@ class AiService:
|
||||||
logger.info(f"Chunk size: {len(part.data)} chars")
|
logger.info(f"Chunk size: {len(part.data)} chars")
|
||||||
logger.info(f"Chunk preview: {part.data[:200]}...")
|
logger.info(f"Chunk preview: {part.data[:200]}...")
|
||||||
|
|
||||||
|
# Dump input chunk
|
||||||
|
try:
|
||||||
|
idx = len(aiResults) + 1
|
||||||
|
fpath = os.path.join(debug_dir, f"chunk_{idx:03d}_input.txt")
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(part.data))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Create AI call request for this chunk
|
# Create AI call request for this chunk
|
||||||
request = AiCallRequest(
|
request = AiCallRequest(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
|
@ -580,6 +600,14 @@ class AiService:
|
||||||
aiResults.append(response.content)
|
aiResults.append(response.content)
|
||||||
|
|
||||||
logger.info(f"Chunk {len(aiResults)} processed: {len(response.content)} chars response")
|
logger.info(f"Chunk {len(aiResults)} processed: {len(response.content)} chars response")
|
||||||
|
# Dump AI response
|
||||||
|
try:
|
||||||
|
idx = len(aiResults)
|
||||||
|
fpath = os.path.join(debug_dir, f"chunk_{idx:03d}_response.txt")
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(response.content))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Error processing text chunk: {str(e)}")
|
logger.warning(f"Error processing text chunk: {str(e)}")
|
||||||
|
|
@ -601,12 +629,24 @@ class AiService:
|
||||||
mergeStrategy
|
mergeStrategy
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract text from merged content
|
# Extract only AI-generated text from merged content
|
||||||
resultText = ""
|
resultText = ""
|
||||||
for part in mergedContent.parts:
|
for part in mergedContent.parts:
|
||||||
if part.typeGroup in ("text", "table", "structure") and part.data:
|
if (
|
||||||
|
part.typeGroup in ("text", "table", "structure")
|
||||||
|
and part.data
|
||||||
|
and getattr(part, "metadata", {}).get("aiResult", False)
|
||||||
|
):
|
||||||
resultText += part.data + "\n\n"
|
resultText += part.data + "\n\n"
|
||||||
|
|
||||||
|
# Dump merged output
|
||||||
|
try:
|
||||||
|
fpath = os.path.join(debug_dir, "merged_output.txt")
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(resultText.strip())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return resultText.strip()
|
return resultText.strip()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
||||||
from .subUtils import makeId
|
from .subUtils import makeId
|
||||||
|
|
@ -91,6 +92,33 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
|
||||||
parts = non_chunk_parts + chunk_parts
|
parts = non_chunk_parts + chunk_parts
|
||||||
|
|
||||||
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
|
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
|
||||||
|
# DEBUG: dump parts and chunks to files under @testing_extraction/ TODO TO REMOVE
|
||||||
|
try:
|
||||||
|
base_dir = "../local/testing_extraction"
|
||||||
|
doc_dir = os.path.join(base_dir, f"extraction_{fileName}")
|
||||||
|
os.makedirs(doc_dir, exist_ok=True)
|
||||||
|
# Write a summary file
|
||||||
|
summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"]
|
||||||
|
text_index = 0
|
||||||
|
for idx, part in enumerate(parts):
|
||||||
|
is_texty = part.typeGroup in ("text", "table", "structure")
|
||||||
|
size = int(part.metadata.get("size", 0) or 0)
|
||||||
|
is_chunk = bool(part.metadata.get("chunk", False))
|
||||||
|
summary_lines.append(
|
||||||
|
f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}"
|
||||||
|
)
|
||||||
|
if is_texty and getattr(part, "data", None):
|
||||||
|
text_index += 1
|
||||||
|
fname = f"part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt"
|
||||||
|
fpath = os.path.join(doc_dir, fname)
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n")
|
||||||
|
f.write(str(part.data))
|
||||||
|
with open(os.path.join(doc_dir, "summary.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write("\n".join(summary_lines))
|
||||||
|
except Exception as _e:
|
||||||
|
logger.debug(f"Debug dump skipped: {_e}")
|
||||||
|
|
||||||
return ExtractedContent(id=makeId(), parts=parts)
|
return ExtractedContent(id=makeId(), parts=parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,12 +105,49 @@ class GenerationService:
|
||||||
|
|
||||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||||
|
|
||||||
|
# Normalize file extension based on mime type if missing or incorrect
|
||||||
|
try:
|
||||||
|
mime_to_ext = {
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||||
|
"application/pdf": ".pdf",
|
||||||
|
"text/html": ".html",
|
||||||
|
"text/markdown": ".md",
|
||||||
|
"text/plain": ".txt",
|
||||||
|
"application/json": ".json",
|
||||||
|
}
|
||||||
|
expected_ext = mime_to_ext.get(mime_type)
|
||||||
|
if expected_ext:
|
||||||
|
if not document_name.lower().endswith(expected_ext):
|
||||||
|
# Append/replace extension to match mime type
|
||||||
|
if "." in document_name:
|
||||||
|
document_name = document_name.rsplit(".", 1)[0] + expected_ext
|
||||||
|
else:
|
||||||
|
document_name = document_name + expected_ext
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
|
||||||
|
base64encoded = False
|
||||||
|
try:
|
||||||
|
binary_mime_types = {
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
|
"application/pdf",
|
||||||
|
}
|
||||||
|
if isinstance(document_data, str) and mime_type in binary_mime_types:
|
||||||
|
base64encoded = True
|
||||||
|
except Exception:
|
||||||
|
base64encoded = False
|
||||||
|
|
||||||
# Create document with file in one step using interfaces directly
|
# Create document with file in one step using interfaces directly
|
||||||
document = self._createDocument(
|
document = self._createDocument(
|
||||||
fileName=document_name,
|
fileName=document_name,
|
||||||
mimeType=mime_type,
|
mimeType=mime_type,
|
||||||
content=content,
|
content=content,
|
||||||
base64encoded=False,
|
base64encoded=base64encoded,
|
||||||
messageId=message_id
|
messageId=message_id
|
||||||
)
|
)
|
||||||
if document:
|
if document:
|
||||||
|
|
@ -272,6 +309,20 @@ class GenerationService:
|
||||||
tuple: (rendered_content, mime_type)
|
tuple: (rendered_content, mime_type)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
# DEBUG: dump renderer input to @testing_extraction to diagnose JSON+HTML mixtures TODO REMOVE
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||||
|
debug_root = "../local/testing_extraction"
|
||||||
|
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
|
||||||
|
os.makedirs(debug_dir, exist_ok=True)
|
||||||
|
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"title: {title}\nformat: {output_format}\nlength: {len(extracted_content or '')}\nstarts_with_brace: {str(extracted_content.strip().startswith('{') if extracted_content else False)}\n")
|
||||||
|
with open(os.path.join(debug_dir, "extracted_content.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(extracted_content or "")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Get the appropriate renderer for the format
|
# Get the appropriate renderer for the format
|
||||||
renderer = self._getFormatRenderer(output_format)
|
renderer = self._getFormatRenderer(output_format)
|
||||||
if not renderer:
|
if not renderer:
|
||||||
|
|
@ -279,6 +330,13 @@ class GenerationService:
|
||||||
|
|
||||||
# Render the content
|
# Render the content
|
||||||
rendered_content, mime_type = await renderer.render(extracted_content, title)
|
rendered_content, mime_type = await renderer.render(extracted_content, title)
|
||||||
|
# DEBUG: dump rendered output
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(rendered_content or "")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
|
logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
|
||||||
return rendered_content, mime_type
|
return rendered_content, mime_type
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,12 @@ class MethodAi(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Perform an AI call for any type of task with optional document references
|
Perform a generic AI call with optional document references, producing plain text output
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
aiPrompt (str): The AI prompt for processing
|
aiPrompt (str): The AI prompt for processing
|
||||||
documentList (list, optional): List of document references to include in context
|
documentList (list, optional): List of document references to include in context
|
||||||
expectedDocumentFormat (str, optional): Expected document output format with extension, mimeType, description
|
expectedDocumentFormat (str, optional): Preferred output extension (string or dict). Note: This action only returns plain text content.
|
||||||
processingMode (str, optional): Processing mode - use 'basic', 'advanced', or 'detailed' (defaults to 'basic')
|
processingMode (str, optional): Processing mode - use 'basic', 'advanced', or 'detailed' (defaults to 'basic')
|
||||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||||
operationType (str, optional): Operation type - use 'general', 'generate_plan', 'analyse_content', 'generate_content', 'web_research', 'image_analysis', or 'image_generation'
|
operationType (str, optional): Operation type - use 'general', 'generate_plan', 'analyse_content', 'generate_content', 'web_research', 'image_analysis', or 'image_generation'
|
||||||
|
|
@ -146,6 +146,19 @@ If you need to return multiple documents, add more objects to the documents arra
|
||||||
options=options
|
options=options
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# DEBUG dump: write raw AI result to @testing_extraction/ TODO Remove
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
debug_root = "../local/testing_extraction"
|
||||||
|
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||||
|
debug_dir = os.path.join(debug_root, f"method_ai_{ts}")
|
||||||
|
os.makedirs(debug_dir, exist_ok=True)
|
||||||
|
with open(os.path.join(debug_dir, "raw_result.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(result) if result is not None else "")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Parse JSON response from AI and create proper ActionDocument objects
|
# Parse JSON response from AI and create proper ActionDocument objects
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
@ -225,6 +238,28 @@ If you need to return multiple documents, add more objects to the documents arra
|
||||||
mimeType=output_mime_type
|
mimeType=output_mime_type
|
||||||
))
|
))
|
||||||
|
|
||||||
|
# DEBUG dump: write parsed documents to files in the same debug folder
|
||||||
|
try:
|
||||||
|
# Reuse the same debug_dir if created above; otherwise create a new one
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
debug_root = "../local/testing_extraction"
|
||||||
|
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||||
|
debug_dir = os.path.join(debug_root, f"method_ai_{ts}")
|
||||||
|
os.makedirs(debug_dir, exist_ok=True)
|
||||||
|
# Write a summary and individual documents
|
||||||
|
summary_lines: List[str] = [f"documents: {len(action_documents)}"]
|
||||||
|
for i, doc in enumerate(action_documents, 1):
|
||||||
|
summary_lines.append(f"doc[{i}]: name={doc.documentName}, mimeType={doc.mimeType}")
|
||||||
|
safe_name = doc.documentName or f"doc_{i:03d}.txt"
|
||||||
|
fpath = os.path.join(debug_dir, safe_name)
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(doc.documentData) if doc.documentData is not None else "")
|
||||||
|
with open(os.path.join(debug_dir, "summary.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write("\n".join(summary_lines))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Return result in the standard ActionResult format with parsed documents
|
# Return result in the standard ActionResult format with parsed documents
|
||||||
return ActionResult.isSuccess(
|
return ActionResult.isSuccess(
|
||||||
documents=action_documents
|
documents=action_documents
|
||||||
|
|
|
||||||
|
|
@ -527,7 +527,7 @@ class HandlingTasks:
|
||||||
user_prompt = extractUserPrompt(context)
|
user_prompt = extractUserPrompt(context)
|
||||||
available_documents = extractAvailableDocuments(context)
|
available_documents = extractAvailableDocuments(context)
|
||||||
user_language = extractUserLanguage(self.services)
|
user_language = extractUserLanguage(self.services)
|
||||||
available_methods = extractAvailableMethods(self.service)
|
available_methods = extractAvailableMethods(self.services)
|
||||||
|
|
||||||
# Create placeholders dictionary
|
# Create placeholders dictionary
|
||||||
placeholders = {
|
placeholders = {
|
||||||
|
|
@ -574,7 +574,7 @@ class HandlingTasks:
|
||||||
# Extract content for placeholders
|
# Extract content for placeholders
|
||||||
user_prompt = extractUserPrompt(context)
|
user_prompt = extractUserPrompt(context)
|
||||||
available_documents = extractAvailableDocuments(context)
|
available_documents = extractAvailableDocuments(context)
|
||||||
user_language = extractUserLanguage(self.service)
|
user_language = extractUserLanguage(self.services)
|
||||||
|
|
||||||
# Get action signature
|
# Get action signature
|
||||||
method = action.get('method', '')
|
method = action.get('method', '')
|
||||||
|
|
@ -1480,16 +1480,19 @@ class HandlingTasks:
|
||||||
# Always use the action's execResultLabel for message creation to ensure proper document routing
|
# Always use the action's execResultLabel for message creation to ensure proper document routing
|
||||||
message_result_label = action.execResultLabel
|
message_result_label = action.execResultLabel
|
||||||
|
|
||||||
# Create message first to get messageId, then create documents with messageId
|
# Create documents first, then create the message with documents attached in one write
|
||||||
message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index, action_index, total_actions)
|
created_documents = self.services.generation.createDocumentsFromActionResult(result, action, workflow, None)
|
||||||
if message:
|
message = await self.createActionMessage(
|
||||||
# Now create documents with the messageId
|
action,
|
||||||
created_documents = self.services.generation.createDocumentsFromActionResult(result, action, workflow, message.id)
|
result,
|
||||||
# Update the message with the created documents
|
workflow,
|
||||||
if created_documents:
|
message_result_label,
|
||||||
message.documents = created_documents
|
created_documents,
|
||||||
# Update the message in the database
|
task_step,
|
||||||
self.services.interfaceDbChat.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
|
task_index,
|
||||||
|
action_index,
|
||||||
|
total_actions
|
||||||
|
)
|
||||||
|
|
||||||
# Log action results
|
# Log action results
|
||||||
logger.info(f"Action completed successfully")
|
logger.info(f"Action completed successfully")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue