streamlined extraction and generation prompts
This commit is contained in:
parent
ffdaf2a326
commit
be2934d54a
18 changed files with 1077 additions and 970 deletions
254
function_call_diagram.md
Normal file
254
function_call_diagram.md
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
# Complete Function Call Diagram
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph AI_Service["AI Service Modules"]
|
||||
MA[mainServiceAi<br/>AiService]
|
||||
SC[subCoreAi<br/>SubCoreAi]
|
||||
SDG[subDocumentGeneration<br/>SubDocumentGeneration]
|
||||
SDP[subDocumentProcessing<br/>SubDocumentProcessing]
|
||||
SU[subSharedAiUtils<br/>Utilities]
|
||||
end
|
||||
|
||||
subgraph EXT_Service["Extraction Service Modules"]
|
||||
MSE[mainServiceExtraction<br/>ExtractionService]
|
||||
SPE[subPromptBuilderExtraction<br/>buildExtractionPrompt]
|
||||
SP[subPipeline<br/>runExtraction]
|
||||
end
|
||||
|
||||
subgraph GEN_Service["Generation Service Modules"]
|
||||
MSG[mainServiceGeneration<br/>GenerationService]
|
||||
SPG[subPromptBuilderGeneration<br/>buildGenerationPrompt]
|
||||
SJ[subJsonSchema<br/>Schemas]
|
||||
end
|
||||
|
||||
%% subCoreAi calls
|
||||
SC -->|_buildGenerationPrompt| SPG
|
||||
SC -->|callAiDocuments| SDP
|
||||
SC -->|sanitizePromptContent| SU
|
||||
|
||||
%% subDocumentGeneration calls
|
||||
SDG -->|processDocumentsWithContinuation| SDP
|
||||
SDG -->|buildGenerationPrompt| SPG
|
||||
SDG -->|renderReport| MSG
|
||||
SDG -->|sanitizePromptContent| SU
|
||||
|
||||
%% subDocumentProcessing calls
|
||||
SDP -->|extractContent 3x| MSE
|
||||
SDP -->|_applyMerging 3x| SP
|
||||
SDP -->|readImage| SC
|
||||
|
||||
%% mainServiceExtraction calls
|
||||
MSE -->|runExtraction| SP
|
||||
|
||||
%% subPromptBuilderExtraction calls
|
||||
SPE -->|get_document_subJsonSchema| SJ
|
||||
SPE -->|sanitizePromptContent| SU
|
||||
|
||||
%% mainServiceGeneration calls utilities
|
||||
MSG -->|utility functions| SU
|
||||
|
||||
%% subCoreAi detailed calls
|
||||
SC -.->|aiObjects.call| AI_Interface["AiObjects Interface"]
|
||||
SDP -.->|aiObjects.call| AI_Interface
|
||||
|
||||
%% Style
|
||||
classDef aiClass fill:#e1f5ff,stroke:#0066cc,stroke-width:2px
|
||||
classDef extClass fill:#fff5e1,stroke:#cc6600,stroke-width:2px
|
||||
classDef genClass fill:#e1ffe1,stroke:#006600,stroke-width:2px
|
||||
classDef utilClass fill:#f0f0f0,stroke:#666,stroke-width:2px
|
||||
classDef interfaceClass fill:#ffe1f5,stroke:#cc0066,stroke-width:2px
|
||||
|
||||
class MA,SC,SDG,SDP,SU aiClass
|
||||
class MSE,SPE,SP extClass
|
||||
class MSG,SPG,SJ genClass
|
||||
class AI_Interface interfaceClass
|
||||
```
|
||||
|
||||
## Detailed Call Map with Function Names
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
%% Nodes
|
||||
SC[subCoreAi]
|
||||
SDG[subDocumentGeneration]
|
||||
SDP[subDocumentProcessing]
|
||||
SU[subSharedAiUtils]
|
||||
SPE[subPromptBuilderExtraction]
|
||||
SPG[subPromptBuilderGeneration]
|
||||
MSE[mainServiceExtraction]
|
||||
MSG[mainServiceGeneration]
|
||||
SP[subPipeline]
|
||||
SJ[subJsonSchema]
|
||||
|
||||
%% subCoreAi function calls
|
||||
SC -->|"_buildGenerationPrompt()<br/>calls"| SPG
|
||||
SC -->|"callAiDocuments()<br/>calls callAiText()"| SDP
|
||||
SC -->|"sanitizePromptContent()"| SU
|
||||
|
||||
%% subDocumentGeneration function calls
|
||||
SDG -->|"_processDocumentsUnified()<br/>calls"| SDP
|
||||
SDG -->|"_processDocument()<br/>calls"| SPG
|
||||
SDG -->|"_processDocument()<br/>calls"| MSG
|
||||
SDG -->|"sanitizePromptContent()"| SU
|
||||
|
||||
%% subDocumentProcessing function calls
|
||||
SDP -->|"extractContent()"| MSE
|
||||
SDP -->|"_mergePartResults()<br/>_convertPartResultsToJson()<br/>_mergeChunkResultsJson()<br/>all call"| SP
|
||||
SDP -->|"_processChunksWithMapping()<br/>calls readImage()"| SC
|
||||
|
||||
%% Extraction service calls
|
||||
MSE -->|"extractContent()<br/>calls"| SP
|
||||
|
||||
%% Prompt builder calls
|
||||
SPE -->|"get_document_subJsonSchema()"| SJ
|
||||
SPE -->|"sanitizePromptContent()"| SU
|
||||
|
||||
%% Generation service calls
|
||||
MSG -->|"uses utility functions"| SU
|
||||
|
||||
classDef aiModule fill:#e1f5ff,stroke:#0066cc
|
||||
classDef extModule fill:#fff5e1,stroke:#cc6600
|
||||
classDef genModule fill:#e1ffe1,stroke:#006600
|
||||
|
||||
class SC,SDG,SDP,SU aiModule
|
||||
class MSE,SPE,SP extModule
|
||||
class MSG,SPG,SJ genModule
|
||||
```
|
||||
|
||||
## Call Flow by Module
|
||||
|
||||
### 1. subCoreAi (SubCoreAi Class)
|
||||
**Calls Out:**
|
||||
- `buildGenerationPrompt()` → subPromptBuilderGeneration (line 363-366)
|
||||
- `callAiText()` → subDocumentProcessing (line 453)
|
||||
- `renderReport()` → mainServiceGeneration (line 478-482)
|
||||
- `sanitizePromptContent()` → subSharedAiUtils (line 61, via services.ai)
|
||||
|
||||
**Called By:**
|
||||
- mainServiceAi (creates instance)
|
||||
- subDocumentProcessing._processChunksWithMapping (calls readImage at line 672-675)
|
||||
|
||||
---
|
||||
|
||||
### 2. subDocumentGeneration (SubDocumentGeneration Class)
|
||||
**Calls Out:**
|
||||
- `processDocumentsWithContinuation()` → subDocumentProcessing (line 110)
|
||||
- `buildGenerationPrompt()` → subPromptBuilderGeneration (line 330)
|
||||
- `renderReport()` → mainServiceGeneration (line 392)
|
||||
- `sanitizePromptContent()` → subSharedAiUtils (line 466)
|
||||
|
||||
**Called By:**
|
||||
- mainServiceAi (creates instance)
|
||||
|
||||
---
|
||||
|
||||
### 3. subDocumentProcessing (SubDocumentProcessing Class)
|
||||
**Calls Out:**
|
||||
- `extractContent()` → mainServiceExtraction (lines 78, 131, 220)
|
||||
- `_applyMerging()` → subPipeline (lines 1044, 1095, 1232, 1293, 1345)
|
||||
- `readImage()` → subCoreAi (line 672-675)
|
||||
- `sanitizePromptContent()` → subSharedAiUtils (via self.services.ai)
|
||||
|
||||
**Called By:**
|
||||
- mainServiceAi (creates instance)
|
||||
- subCoreAi.callAiDocuments (calls callAiText at line 453)
|
||||
- subDocumentGeneration._processDocumentsUnified (calls processDocumentsWithContinuation)
|
||||
|
||||
---
|
||||
|
||||
### 4. mainServiceExtraction (ExtractionService Class)
|
||||
**Calls Out:**
|
||||
- `runExtraction()` → subPipeline (line 61)
|
||||
- Uses ExtractorRegistry from subRegistry
|
||||
|
||||
**Called By:**
|
||||
- subDocumentProcessing.extractContent (3 times)
|
||||
|
||||
---
|
||||
|
||||
### 5. subPromptBuilderExtraction
|
||||
**Calls Out:**
|
||||
- `get_document_subJsonSchema()` → subJsonSchema (line 172)
|
||||
- `sanitizePromptContent()` → subSharedAiUtils (via services.ai)
|
||||
|
||||
**Called By:**
|
||||
- mainServiceGeneration (indirectly via getAdaptiveExtractionPrompt)
|
||||
|
||||
---
|
||||
|
||||
### 6. mainServiceGeneration (GenerationService Class)
|
||||
**Calls Out:**
|
||||
- `get_renderer()` → renderers.registry (line 501)
|
||||
- Utility functions from subDocumentUtility
|
||||
- Uses modelRegistry (external)
|
||||
|
||||
**Called By:**
|
||||
- subCoreAi.callAiDocuments (calls renderReport)
|
||||
- subDocumentGeneration._processDocument (calls renderReport)
|
||||
|
||||
---
|
||||
|
||||
### 7. subPromptBuilderGeneration
|
||||
**Calls Out:**
|
||||
- Returns prompt template string
|
||||
|
||||
**Called By:**
|
||||
- subCoreAi._buildGenerationPrompt (line 363-366)
|
||||
- subDocumentGeneration._processDocument (line 330)
|
||||
|
||||
---
|
||||
|
||||
### 8. subPipeline
|
||||
**Calls Out:**
|
||||
- Creates IntelligentTokenAwareMerger from subMerger (line 96)
|
||||
- Uses mergers from merging submodules
|
||||
|
||||
**Called By:**
|
||||
- mainServiceExtraction.extractContent (calls runExtraction)
|
||||
- subDocumentProcessing (calls _applyMerging 5 times)
|
||||
|
||||
---
|
||||
|
||||
### 9. subSharedAiUtils
|
||||
**Functions Provided:**
|
||||
- `buildPromptWithPlaceholders()`
|
||||
- `sanitizePromptContent()`
|
||||
- `extractTextFromContentParts()`
|
||||
- `reduceText()`
|
||||
- `determineCallType()`
|
||||
|
||||
**Called By:**
|
||||
- subCoreAi (imports and calls functions)
|
||||
- subDocumentGeneration (via services.ai.sanitizePromptContent)
|
||||
- subPromptBuilderExtraction (via services.ai.sanitizePromptContent)
|
||||
|
||||
---
|
||||
|
||||
### 10. subJsonSchema
|
||||
**Functions Provided:**
|
||||
- `get_document_subJsonSchema()`
|
||||
- `get_multi_document_subJsonSchema()`
|
||||
|
||||
**Called By:**
|
||||
- subPromptBuilderExtraction.buildExtractionPrompt (line 172)
|
||||
|
||||
---
|
||||
|
||||
## Circular Dependencies
|
||||
|
||||
**AI Service Loop:**
|
||||
1. subDocumentProcessing → subCoreAi.readImage() (for image processing)
|
||||
2. subDocumentProcessing → mainServiceExtraction (for extraction)
|
||||
3. mainServiceExtraction → subPipeline (for processing)
|
||||
4. subPipeline creates IntelligentTokenAwareMerger
|
||||
|
||||
**Flow:**
|
||||
```
|
||||
subDocumentProcessing.extractContent()
|
||||
→ mainServiceExtraction.extractContent()
|
||||
→ subPipeline.runExtraction()
|
||||
→ returns ContentExtracted
|
||||
→ processed by subDocumentProcessing
|
||||
→ calls subPipeline._applyMerging()
|
||||
```
|
||||
|
|
@ -127,7 +127,7 @@ class AiService:
|
|||
"""Planning AI call for task planning, action planning, action selection, etc."""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
# Always use "json" for planning calls since they return JSON
|
||||
return await self.coreAi.callAiPlanning(prompt, placeholders, "json")
|
||||
return await self.coreAi.callAiPlanning(prompt, placeholders)
|
||||
|
||||
async def callAiDocuments(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -12,37 +12,26 @@ from modules.services.serviceAi.subSharedAiUtils import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Generic continuation instruction for all prompts with JSON responses
|
||||
# Used by _callAiWithLooping() to replace LOOP_INSTRUCTION placeholder
|
||||
LOOP_INSTRUCTION_TEXT = """
|
||||
MANDATORY RULE:
|
||||
Return ONLY raw JSON (no ```json blocks, no text before/after)
|
||||
|
||||
CONTINUATION REQUIREMENT:
|
||||
Your response must be a valid JSON object with a "continuation" field.
|
||||
|
||||
- If you can complete the FULL request: Set {"continuation": null}
|
||||
- If you MUST stop early (due to token limits): Set {"continuation": {"last_data_items": "brief summary of what was delivered for context", "next_instruction": "what to deliver next to complete the request"}}
|
||||
|
||||
The "continuation" field controls whether this AI call continues in a loop or stops.
|
||||
Refer to the json template below to see where to set the "continuation" information.
|
||||
"""
|
||||
|
||||
# Rebuild the model to resolve forward references
|
||||
AiCallRequest.model_rebuild()
|
||||
|
||||
|
||||
# Loop instruction texts for different formats
|
||||
LoopInstructionTexts = {
|
||||
"json": """
|
||||
CRITICAL LIMITS: <TOKEN_LIMIT> tokens total (reserve 20% for JSON structure)
|
||||
|
||||
MANDATORY RULES:
|
||||
1. STOP at approximately 80% of limit to ensure valid JSON completion
|
||||
2. Return ONLY raw JSON (no ```json blocks, no text before/after)
|
||||
|
||||
CONTINUATION REQUIREMENTS:
|
||||
Refer to the json object below where to set the "continuation" information:
|
||||
- If you can complete the full request: {"continuation": null}
|
||||
- If you must stop early: {
|
||||
"continuation": {
|
||||
"last_data_items": "delivered last data for context (copy them)",
|
||||
"next_instruction": "instruction for next data to deliver"
|
||||
}
|
||||
}
|
||||
|
||||
BE CONSERVATIVE: Stop generating content when you reach approximately 3200-3500 characters to ensure JSON completion.
|
||||
""",
|
||||
# Add more formats here as needed
|
||||
# "xml": "...",
|
||||
# "text": "...",
|
||||
}
|
||||
|
||||
|
||||
class SubCoreAi:
|
||||
"""Core AI operations including image analysis, text generation, and planning calls."""
|
||||
|
||||
|
|
@ -142,8 +131,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
debugPrefix: str = "ai_call",
|
||||
loopInstructionFormat: str = None
|
||||
debugPrefix: str = "ai_call"
|
||||
) -> str:
|
||||
"""
|
||||
Shared core function for AI calls with looping system.
|
||||
|
|
@ -154,7 +142,6 @@ Respond with ONLY a JSON object in this exact format:
|
|||
prompt: The prompt to send to AI
|
||||
options: AI call configuration options
|
||||
debugPrefix: Prefix for debug file names
|
||||
loopInstructionFormat: If provided, replaces LOOP_INSTRUCTION placeholder and includes in continuation prompts
|
||||
|
||||
Returns:
|
||||
Complete AI response after all iterations
|
||||
|
|
@ -162,18 +149,12 @@ Respond with ONLY a JSON object in this exact format:
|
|||
max_iterations = 100 # Prevent infinite loops
|
||||
iteration = 0
|
||||
accumulatedContent = []
|
||||
lastContinuationData = None
|
||||
|
||||
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix}, loopInstructionFormat: {loopInstructionFormat is not None})")
|
||||
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix})")
|
||||
|
||||
|
||||
# Determine loopInstruction based on loopInstructionFormat (before iterations)
|
||||
if not loopInstructionFormat:
|
||||
loopInstruction = ""
|
||||
elif loopInstructionFormat in LoopInstructionTexts:
|
||||
loopInstruction = LoopInstructionTexts[loopInstructionFormat]
|
||||
else:
|
||||
logger.error(f"Unsupported loopInstructionFormat for prompt: {loopInstructionFormat}")
|
||||
loopInstruction = ""
|
||||
# Use generic LOOP_INSTRUCTION_TEXT
|
||||
loopInstruction = LOOP_INSTRUCTION_TEXT if ("LOOP_INSTRUCTION" in prompt) else ""
|
||||
|
||||
|
||||
while iteration < max_iterations:
|
||||
|
|
@ -182,18 +163,25 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
# Build iteration prompt
|
||||
if iteration == 1:
|
||||
# First iteration - replace LOOP_INSTRUCTION with standardized instruction
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
elif loopInstruction and iteration > 1:
|
||||
continuationContent = self._buildContinuationContent(accumulatedContent, iteration)
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationContent}\n\n{loopInstruction}")
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
# Subsequent iterations - include continuation data if available
|
||||
if lastContinuationData and isinstance(lastContinuationData, dict):
|
||||
continuationPrompt = self._buildContinuationPrompt(lastContinuationData, iteration)
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationPrompt}\n\n{loopInstruction}")
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
else:
|
||||
# No continuation data - re-send original prompt
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
|
||||
# Make AI call
|
||||
try:
|
||||
|
|
@ -234,33 +222,35 @@ Respond with ONLY a JSON object in this exact format:
|
|||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||
break
|
||||
|
||||
# Check if this is a continuation response (only for supported formats)
|
||||
if loopInstructionFormat in LoopInstructionTexts:
|
||||
accumulatedContent.append(result)
|
||||
|
||||
# Check if this is a continuation response (only when LOOP_INSTRUCTION was used)
|
||||
if loopInstruction:
|
||||
try:
|
||||
# Extract JSON substring if wrapped (e.g., ```json ... ```)
|
||||
extracted = self.services.utils.jsonExtractString(result)
|
||||
# Try to parse as JSON to check for continuation attribute
|
||||
parsed_result = json.loads(extracted)
|
||||
if isinstance(parsed_result, dict) and parsed_result.get("continuation") is not None:
|
||||
# This is a continuation response
|
||||
accumulatedContent.append(result)
|
||||
logger.debug(f"Iteration {iteration}: Continuation detected in JSON, continuing...")
|
||||
continue
|
||||
else:
|
||||
# This is the final response (continuation is null or missing)
|
||||
accumulatedContent.append(result)
|
||||
logger.debug(f"Iteration {iteration}: Final response received")
|
||||
break
|
||||
|
||||
if isinstance(parsed_result, dict):
|
||||
continuation = parsed_result.get("continuation")
|
||||
|
||||
if continuation is None:
|
||||
# Final response - break loop
|
||||
logger.debug(f"Iteration {iteration}: Final response received (continuation: null)")
|
||||
break
|
||||
else:
|
||||
# Continuation detected - extract data for next iteration
|
||||
lastContinuationData = continuation if isinstance(continuation, dict) else None
|
||||
logger.debug(f"Iteration {iteration}: Continuation detected, continuing...")
|
||||
continue
|
||||
except json.JSONDecodeError:
|
||||
# Not JSON, treat as final response
|
||||
accumulatedContent.append(result)
|
||||
logger.warning(f"Iteration {iteration}: Non-JSON response received")
|
||||
logger.warning(f"Iteration {iteration}: Non-JSON response - treating as final")
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_error_non_json_response_iteration_{iteration}")
|
||||
break
|
||||
else:
|
||||
# This is the final response
|
||||
accumulatedContent.append(result)
|
||||
logger.debug(f"Iteration {iteration}: Final response received")
|
||||
# No loop instruction format - treat as final response
|
||||
logger.debug(f"Iteration {iteration}: Final response received (no loop format)")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -279,51 +269,26 @@ Respond with ONLY a JSON object in this exact format:
|
|||
logger.info(f"AI call completed: {len(accumulatedContent)} parts from {iteration} iterations")
|
||||
return final_result
|
||||
|
||||
def _buildContinuationContent(
|
||||
def _buildContinuationPrompt(
|
||||
self,
|
||||
accumulatedContent: List[str],
|
||||
continuationData: dict,
|
||||
iteration: int
|
||||
) -> str:
|
||||
"""
|
||||
Build continuation content for follow-up iterations.
|
||||
Build standardized continuation prompt from continuation data dict.
|
||||
This replaces the complex _buildContinuationContent method with a simpler approach.
|
||||
|
||||
Args:
|
||||
continuationData: Dictionary containing last_data_items and next_instruction
|
||||
iteration: Current iteration number
|
||||
|
||||
Returns:
|
||||
Formatted continuation prompt string
|
||||
"""
|
||||
# Extract continuation description from the last response
|
||||
continuation_description = ""
|
||||
if accumulatedContent:
|
||||
try:
|
||||
last_response = accumulatedContent[-1]
|
||||
# Use the same JSON extraction logic as the main loop
|
||||
extracted = self.services.utils.jsonExtractString(last_response)
|
||||
parsed_response = json.loads(extracted)
|
||||
if isinstance(parsed_response, dict):
|
||||
# Check for continuation at root level or in metadata
|
||||
continuation = parsed_response.get("continuation")
|
||||
if continuation is None and "metadata" in parsed_response:
|
||||
continuation = parsed_response["metadata"].get("continuation")
|
||||
|
||||
if continuation:
|
||||
continuation_description = continuation
|
||||
except (json.JSONDecodeError, KeyError, ValueError):
|
||||
pass
|
||||
last_data_items = continuationData.get("last_data_items", "")
|
||||
next_instruction = continuationData.get("next_instruction", "")
|
||||
|
||||
# Extract specific attributes from continuation object
|
||||
last_data_items = ""
|
||||
next_instruction = ""
|
||||
|
||||
if continuation_description:
|
||||
try:
|
||||
if isinstance(continuation_description, str):
|
||||
continuation_obj = json.loads(continuation_description)
|
||||
else:
|
||||
continuation_obj = continuation_description
|
||||
|
||||
if isinstance(continuation_obj, dict):
|
||||
last_data_items = continuation_obj.get("last_data_items", "")
|
||||
next_instruction = continuation_obj.get("next_instruction", "")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}):
|
||||
continuation_prompt = f"""CONTINUATION REQUEST (Iteration {iteration}):
|
||||
You are continuing a previous response. DO NOT repeat any previous content.
|
||||
|
||||
{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
|
||||
|
|
@ -331,12 +296,10 @@ You are continuing a previous response. DO NOT repeat any previous content.
|
|||
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
|
||||
|
||||
CRITICAL REQUIREMENTS:
|
||||
- Start from the exact point specified in continuation instructions
|
||||
- DO NOT repeat any previous content
|
||||
- BE CONSERVATIVE: Stop at approximately 3200-3500 characters to ensure JSON completion
|
||||
- ALWAYS include continuation field - set to null if complete, or provide next instruction if incomplete
|
||||
"""
|
||||
return continuation_content
|
||||
- Start from the exact point specified above
|
||||
- DO NOT repeat any previous content"""
|
||||
|
||||
return continuation_prompt
|
||||
|
||||
def _mergeJsonContent(self, accumulatedContent: List[str]) -> str:
|
||||
"""
|
||||
|
|
@ -387,40 +350,12 @@ CRITICAL REQUIREMENTS:
|
|||
logger.error(f"Error merging JSON content: {str(e)}")
|
||||
return accumulatedContent[0] # Return first response on error
|
||||
|
||||
async def _buildGenerationPrompt(
|
||||
self,
|
||||
prompt: str,
|
||||
extracted_content: Optional[str],
|
||||
outputFormat: str,
|
||||
title: str
|
||||
) -> str:
|
||||
"""
|
||||
Build generation prompt for document generation.
|
||||
"""
|
||||
from modules.services.serviceGeneration.subPromptBuilder import buildGenerationPrompt
|
||||
|
||||
# Build the generation prompt using the existing system
|
||||
generation_prompt = await buildGenerationPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=prompt,
|
||||
title=title
|
||||
)
|
||||
|
||||
# If we have extracted content, prepend it to the prompt
|
||||
if extracted_content:
|
||||
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
{extracted_content}
|
||||
|
||||
{generation_prompt}"""
|
||||
|
||||
return generation_prompt
|
||||
|
||||
# Planning AI Call
|
||||
async def callAiPlanning(
|
||||
self,
|
||||
prompt: str,
|
||||
placeholders: Optional[List[PromptPlaceholder]] = None,
|
||||
loopInstructionFormat: Optional[str] = None
|
||||
placeholders: Optional[List[PromptPlaceholder]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Planning AI call for task planning, action planning, action selection, etc.
|
||||
|
|
@ -429,7 +364,6 @@ CRITICAL REQUIREMENTS:
|
|||
Args:
|
||||
prompt: The planning prompt
|
||||
placeholders: Optional list of placeholder replacements
|
||||
loopInstructionFormat: Optional loop instruction format
|
||||
|
||||
Returns:
|
||||
Planning JSON response
|
||||
|
|
@ -452,7 +386,7 @@ CRITICAL REQUIREMENTS:
|
|||
full_prompt = prompt
|
||||
|
||||
# Use shared core function with planning-specific debug prefix
|
||||
return await self._callAiWithLooping(full_prompt, options, "plan", loopInstructionFormat=loopInstructionFormat)
|
||||
return await self._callAiWithLooping(full_prompt, options, "plan")
|
||||
|
||||
# Document Generation AI Call
|
||||
async def callAiDocuments(
|
||||
|
|
@ -461,8 +395,7 @@ CRITICAL REQUIREMENTS:
|
|||
documents: Optional[List[ChatDocument]] = None,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
outputFormat: Optional[str] = None,
|
||||
title: Optional[str] = None,
|
||||
loopInstructionFormat: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
) -> Union[str, Dict[str, Any]]:
|
||||
"""
|
||||
Document generation AI call for all non-planning calls.
|
||||
|
|
@ -494,8 +427,10 @@ CRITICAL REQUIREMENTS:
|
|||
else:
|
||||
logger.debug("No documents provided - using direct generation")
|
||||
extracted_content = None
|
||||
generation_prompt = await self._buildGenerationPrompt(prompt, extracted_content, outputFormat, title)
|
||||
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation", loopInstructionFormat=loopInstructionFormat)
|
||||
logger.debug(f"[DEBUG] title value: {title}, type: {type(title)}")
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content)
|
||||
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation")
|
||||
|
||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||
try:
|
||||
|
|
@ -552,7 +487,7 @@ CRITICAL REQUIREMENTS:
|
|||
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
|
||||
else:
|
||||
# Use shared core function for direct text calls
|
||||
result = await self._callAiWithLooping(prompt, options, "text", loopInstructionFormat=None)
|
||||
result = await self._callAiWithLooping(prompt, options, "text")
|
||||
|
||||
return result
|
||||
|
||||
|
|
|
|||
|
|
@ -48,11 +48,7 @@ class SubDocumentGeneration:
|
|||
Dict with generated documents and metadata in unified structure
|
||||
"""
|
||||
try:
|
||||
# 1. Analyze prompt intent
|
||||
promptAnalysis = await self._analyzePromptIntent(prompt, self)
|
||||
logger.info(f"Prompt analysis result: {promptAnalysis}")
|
||||
|
||||
# 2. Get unified extraction prompt
|
||||
# 1. Get unified extraction prompt
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
|
|
@ -60,17 +56,16 @@ class SubDocumentGeneration:
|
|||
outputFormat=outputFormat,
|
||||
userPrompt=prompt,
|
||||
title=title,
|
||||
promptAnalysis=promptAnalysis,
|
||||
aiService=self
|
||||
)
|
||||
|
||||
# 3. Process with unified pipeline (always multi-file approach)
|
||||
# 2. Process with unified pipeline (always multi-file approach)
|
||||
aiResponse = await self._processDocumentsUnified(
|
||||
documents, extractionPrompt, options
|
||||
)
|
||||
|
||||
# 4. Return unified result structure
|
||||
return await self._buildUnifiedResult(aiResponse, outputFormat, title, promptAnalysis)
|
||||
# 3. Return unified result structure
|
||||
return await self._buildUnifiedResult(aiResponse, outputFormat, title)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in unified document generation: {str(e)}")
|
||||
|
|
@ -263,9 +258,8 @@ class SubDocumentGeneration:
|
|||
self,
|
||||
aiResponse: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
promptAnalysis: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
title: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build unified result structure that always returns array-based format.
|
||||
Content is always a multi-document structure.
|
||||
|
|
@ -296,7 +290,6 @@ class SubDocumentGeneration:
|
|||
"is_multi_file": len(generatedDocuments) > 1,
|
||||
"format": outputFormat,
|
||||
"title": title,
|
||||
"split_strategy": promptAnalysis.get("strategy", "single"),
|
||||
"total_documents": len(generatedDocuments),
|
||||
"processed_documents": len(generatedDocuments)
|
||||
}
|
||||
|
|
@ -313,7 +306,7 @@ class SubDocumentGeneration:
|
|||
outputFormat: str,
|
||||
title: str,
|
||||
documentIndex: int
|
||||
) -> Dict[str, Any]:
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process individual document with content enhancement and rendering.
|
||||
"""
|
||||
|
|
@ -326,12 +319,12 @@ class SubDocumentGeneration:
|
|||
enhancedContent = docData # Default to original
|
||||
if docData.get("sections"):
|
||||
try:
|
||||
# Get generation prompt
|
||||
generationPrompt = await generationService.getGenerationPrompt(
|
||||
# Get generation prompt directly
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
generationPrompt = await buildGenerationPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=title,
|
||||
title=docData.get("title", title),
|
||||
aiService=self
|
||||
title=docData.get("title", title)
|
||||
)
|
||||
|
||||
# Prepare the AI call
|
||||
|
|
@ -454,57 +447,6 @@ class SubDocumentGeneration:
|
|||
# Process documents with JSON merging
|
||||
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
|
||||
|
||||
async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Use AI to analyze user prompt and determine processing requirements."""
|
||||
if not ai_service:
|
||||
return {"is_multi_file": False, "strategy": "single", "criteria": None}
|
||||
|
||||
try:
|
||||
analysis_prompt = f"""
|
||||
Analyze this user request and determine if it requires multiple file output or single file output.
|
||||
|
||||
User request: "{self.services.ai.sanitizePromptContent(prompt, 'userinput')}"
|
||||
|
||||
Respond with JSON only in this exact format:
|
||||
{{
|
||||
"is_multi_file": true/false,
|
||||
"strategy": "single|per_entity|by_section|by_criteria|custom",
|
||||
"criteria": "description of how to split content",
|
||||
"file_naming_pattern": "suggested pattern for filenames",
|
||||
"reasoning": "brief explanation of the analysis"
|
||||
}}
|
||||
|
||||
Consider:
|
||||
- Does the user want separate files for different entities (customers, products, etc.)?
|
||||
- Does the user want to split content into multiple documents?
|
||||
- What would be the most logical way to organize the content?
|
||||
- What language is the request in? (analyze in the original language)
|
||||
|
||||
Return only the JSON response.
|
||||
"""
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
|
||||
if response and response.content:
|
||||
# Extract JSON from response
|
||||
result = response.content.strip()
|
||||
json_match = re.search(r'\{.*\}', result, re.DOTALL)
|
||||
if json_match:
|
||||
result = json_match.group(0)
|
||||
|
||||
analysis = json.loads(result)
|
||||
return analysis
|
||||
else:
|
||||
return {"is_multi_file": False, "strategy": "single", "criteria": None}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
|
||||
return {"is_multi_file": False, "strategy": "single", "criteria": None}
|
||||
|
||||
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -289,6 +289,11 @@ class SubDocumentProcessing:
|
|||
def _buildContinuationPrompt(self, base_prompt: str) -> str:
|
||||
"""
|
||||
Build a prompt that includes partial results continuation instructions.
|
||||
|
||||
NOTE: This uses a different continuation pattern than SubCoreAi:
|
||||
- SubCoreAi uses "continuation": null/dict for generic JSON responses
|
||||
- This uses "continue": true/false + "continuation_context" for document sections
|
||||
- Kept separate because it's tightly coupled to document processing needs
|
||||
"""
|
||||
continuation_instructions = """
|
||||
|
||||
|
|
|
|||
|
|
@ -29,9 +29,11 @@ def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, st
|
|||
|
||||
full_prompt = prompt
|
||||
for placeholder, content in placeholders.items():
|
||||
# Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
|
||||
full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
|
||||
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
|
||||
# Skip if content is None or empty
|
||||
if content is None:
|
||||
continue
|
||||
# Replace {{KEY:placeholder}}
|
||||
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
||||
|
||||
return full_prompt
|
||||
|
||||
|
|
|
|||
219
modules/services/serviceExtraction/subPromptBuilderExtraction.py
Normal file
219
modules/services/serviceExtraction/subPromptBuilderExtraction.py
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
"""
|
||||
Prompt builder for document extraction.
|
||||
This module builds prompts for extracting content from documents.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
|
||||
# Type hint for renderer parameter
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from modules.services.serviceGeneration.renderers.rendererBaseTemplate import BaseRenderer
|
||||
_RendererLike = BaseRenderer
|
||||
else:
|
||||
_RendererLike = Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def buildExtractionPrompt(
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
aiService=None,
|
||||
services=None,
|
||||
renderer: _RendererLike = None
|
||||
) -> str:
|
||||
"""
|
||||
Build unified extraction prompt for extracting content from documents.
|
||||
Always uses multi-file format (single doc = multi with n=1).
|
||||
|
||||
Args:
|
||||
outputFormat: Target output format
|
||||
userPrompt: User's prompt describing what to extract
|
||||
title: Document title
|
||||
aiService: Optional AI service for intent parsing
|
||||
services: Services instance
|
||||
renderer: Optional renderer for format-specific guidelines
|
||||
|
||||
Returns:
|
||||
Complete extraction prompt string
|
||||
"""
|
||||
|
||||
# Unified multi-file example (single doc = multi with n=1)
|
||||
json_example = {
|
||||
"metadata": {
|
||||
"title": "Multi-Document Example",
|
||||
"split_strategy": "by_section",
|
||||
"source_documents": ["doc_001"],
|
||||
"extraction_method": "ai_extraction"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_section_1",
|
||||
"title": "Section 1 Title",
|
||||
"filename": "section_1.xlsx",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{
|
||||
"level": 1,
|
||||
"text": "1. SECTION TITLE"
|
||||
}
|
||||
],
|
||||
"order": 1
|
||||
},
|
||||
{
|
||||
"id": "section_2",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{
|
||||
"text": "This is the actual content that should be extracted from the document."
|
||||
}
|
||||
],
|
||||
"order": 2
|
||||
},
|
||||
{
|
||||
"id": "section_3",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Column 1", "Column 2"],
|
||||
"rows": [["Value 1", "Value 2"]]
|
||||
}
|
||||
],
|
||||
"order": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
structure_instruction = "CRITICAL: You MUST return a JSON structure with a \"documents\" array. For single documents, create one document entry with all sections."
|
||||
|
||||
# Parse extraction intent if AI service is available
|
||||
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
|
||||
|
||||
# Build base prompt
|
||||
adaptive_prompt = f"""
|
||||
{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
|
||||
|
||||
{extraction_intent}
|
||||
|
||||
REQUIREMENTS:
|
||||
1. Analyze the document content provided in the context below
|
||||
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
|
||||
3. Create one or more JSON document entries based on the content structure
|
||||
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
|
||||
5. Generate appropriate filenames for each document
|
||||
|
||||
{structure_instruction}
|
||||
|
||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
||||
{json.dumps(json_example, indent=2)}
|
||||
|
||||
Requirements:
|
||||
- Preserve all original data - do not summarize or interpret
|
||||
- Use the exact JSON format shown above
|
||||
- Maintain data integrity and structure
|
||||
|
||||
Content Types to Extract:
|
||||
1. Tables: Extract all rows and columns with proper headers
|
||||
2. Lists: Extract all items with proper nesting
|
||||
3. Headings: Extract with appropriate levels
|
||||
4. Paragraphs: Extract as structured text
|
||||
5. Code: Extract code blocks with language identification
|
||||
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
||||
|
||||
Image Analysis Requirements:
|
||||
- If you cannot analyze an image for any reason, explain why in the JSON response
|
||||
- Describe everything you see in the image
|
||||
- Include all text content, tables, logos, graphics, layout, and visual elements
|
||||
- If the image is too small, corrupted, or unclear, explain this
|
||||
- Always provide feedback - never return empty responses
|
||||
|
||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
||||
|
||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
||||
""".strip()
|
||||
|
||||
# Add renderer-specific guidelines if provided
|
||||
if renderer:
|
||||
try:
|
||||
if hasattr(renderer, 'getExtractionGuidelines'):
|
||||
formatGuidelines = renderer.getExtractionGuidelines()
|
||||
adaptive_prompt = f"{adaptive_prompt}\n\n{formatGuidelines}".strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Save extraction prompt to debug file - only if debug enabled
|
||||
if services:
|
||||
try:
|
||||
debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
|
||||
if debug_enabled:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
debug_root = os.path.join(logDir, 'debug')
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(adaptive_prompt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return adaptive_prompt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
|
||||
"""
|
||||
Parse user prompt to extract the core extraction intent.
|
||||
"""
|
||||
if not aiService:
|
||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||
|
||||
try:
|
||||
analysis_prompt = f"""
|
||||
Analyze this user request and extract the core extraction intent:
|
||||
|
||||
User request: "{userPrompt}"
|
||||
Target format: {outputFormat}
|
||||
|
||||
Extract the main intent and requirements for document processing. Focus on:
|
||||
1. What content needs to be extracted
|
||||
2. How it should be organized
|
||||
3. Any specific requirements or preferences
|
||||
|
||||
Respond with a clear, concise statement of the extraction intent.
|
||||
"""
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
|
||||
if response and response.content:
|
||||
return response.content.strip()
|
||||
else:
|
||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||
|
||||
except Exception as e:
|
||||
services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
|
||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||
|
||||
|
|
@ -299,6 +299,7 @@ class GenerationService:
|
|||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
|
||||
"""
|
||||
Render extracted JSON content to the specified output format.
|
||||
Always uses unified "documents" array format.
|
||||
|
||||
Args:
|
||||
extractedContent: Structured JSON document from AI extraction
|
||||
|
|
@ -315,31 +316,25 @@ class GenerationService:
|
|||
if not isinstance(extractedContent, dict):
|
||||
raise ValueError("extractedContent must be a JSON dictionary")
|
||||
|
||||
# Check if this is a multi-document structure
|
||||
if "documents" in extractedContent and len(extractedContent["documents"]) > 1:
|
||||
# Multiple documents - use multi-file renderer
|
||||
generated_documents = await self._renderMultiFileReport(extractedContent, outputFormat, title, userPrompt, aiService)
|
||||
# For multi-document, return the first document's content and mime type
|
||||
if generated_documents:
|
||||
return generated_documents[0]["content"], generated_documents[0]["mime_type"]
|
||||
else:
|
||||
raise ValueError("No documents could be rendered")
|
||||
elif "documents" in extractedContent and len(extractedContent["documents"]) == 1:
|
||||
# Single document in documents array - extract sections
|
||||
single_doc = extractedContent["documents"][0]
|
||||
if "sections" not in single_doc:
|
||||
raise ValueError("Document must contain 'sections' field")
|
||||
# Create content for single document renderer
|
||||
contentToRender = {
|
||||
"sections": single_doc["sections"],
|
||||
"metadata": extractedContent.get("metadata", {}),
|
||||
"continuation": extractedContent.get("continuation", None)
|
||||
}
|
||||
elif "sections" in extractedContent:
|
||||
# Direct sections format
|
||||
contentToRender = extractedContent
|
||||
else:
|
||||
raise ValueError("extractedContent must contain 'sections' field or 'documents' array")
|
||||
# Unified approach: Always expect "documents" array (single doc = n=1)
|
||||
if "documents" not in extractedContent:
|
||||
raise ValueError("extractedContent must contain 'documents' array")
|
||||
|
||||
documents = extractedContent["documents"]
|
||||
if len(documents) == 0:
|
||||
raise ValueError("No documents found in 'documents' array")
|
||||
|
||||
# Use first document for rendering
|
||||
single_doc = documents[0]
|
||||
if "sections" not in single_doc:
|
||||
raise ValueError("Document must contain 'sections' field")
|
||||
|
||||
# Create content for single document renderer
|
||||
contentToRender = {
|
||||
"sections": single_doc["sections"],
|
||||
"metadata": extractedContent.get("metadata", {}),
|
||||
"continuation": extractedContent.get("continuation", None)
|
||||
}
|
||||
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(outputFormat)
|
||||
|
|
@ -362,171 +357,18 @@ class GenerationService:
|
|||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
promptAnalysis: Dict[str, Any],
|
||||
aiService=None
|
||||
) -> str:
|
||||
"""Get adaptive extraction prompt based on AI analysis."""
|
||||
from .subPromptBuilder import buildAdaptiveExtractionPrompt
|
||||
return await buildAdaptiveExtractionPrompt(
|
||||
"""Get adaptive extraction prompt."""
|
||||
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
|
||||
return await buildExtractionPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=userPrompt,
|
||||
title=title,
|
||||
promptAnalysis=promptAnalysis,
|
||||
aiService=aiService,
|
||||
services=self.services
|
||||
)
|
||||
|
||||
async def getGenerationPrompt(
|
||||
self,
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str
|
||||
) -> str:
|
||||
"""Get generation prompt for enhancing extracted JSON content."""
|
||||
from .subPromptBuilder import buildGenerationPrompt
|
||||
return await buildGenerationPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=userPrompt,
|
||||
title=title
|
||||
)
|
||||
|
||||
|
||||
async def renderAdaptiveReport(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
isMultiFile: bool = False
|
||||
) -> Union[Tuple[str, str], List[Dict[str, Any]]]:
|
||||
"""Render report adaptively based on content structure."""
|
||||
|
||||
# Start timing for generation
|
||||
startTime = time.time()
|
||||
|
||||
try:
|
||||
if isMultiFile and "documents" in extractedContent:
|
||||
result = await self._renderMultiFileReport(
|
||||
extractedContent, outputFormat, title, userPrompt, aiService
|
||||
)
|
||||
else:
|
||||
result = await self._renderSingleFileReport(
|
||||
extractedContent, outputFormat, title, userPrompt, aiService
|
||||
)
|
||||
|
||||
# Calculate timing and emit stats
|
||||
endTime = time.time()
|
||||
processingTime = endTime - startTime
|
||||
|
||||
# Calculate bytes (rough estimation)
|
||||
if isinstance(result, tuple):
|
||||
content, mime_type = result
|
||||
bytesReceived = len(content.encode('utf-8')) if isinstance(content, str) else len(content)
|
||||
elif isinstance(result, list):
|
||||
bytesReceived = sum(len(str(doc).encode('utf-8')) for doc in result)
|
||||
else:
|
||||
bytesReceived = len(str(result).encode('utf-8'))
|
||||
|
||||
# Use internal generation model for pricing
|
||||
modelName = "internal_generation"
|
||||
model = modelRegistry.getModel(modelName)
|
||||
priceUsd = model.calculatePriceUsd(processingTime, 0, bytesReceived)
|
||||
|
||||
aiResponse = AiCallResponse(
|
||||
content="", # No content for generation stats needed
|
||||
modelName=modelName,
|
||||
priceUsd=priceUsd,
|
||||
processingTime=processingTime,
|
||||
bytesSent=0, # Input is already processed
|
||||
bytesReceived=bytesReceived,
|
||||
errorCount=0
|
||||
)
|
||||
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
aiResponse,
|
||||
f"generation.render.{outputFormat}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
# Calculate timing for error case
|
||||
endTime = time.time()
|
||||
processingTime = endTime - startTime
|
||||
|
||||
# Use internal generation model for pricing
|
||||
modelName = "internal_generation"
|
||||
model = modelRegistry.getModel(modelName)
|
||||
priceUsd = model.calculatePriceUsd(processingTime, 0, 0)
|
||||
|
||||
aiResponse = AiCallResponse(
|
||||
content="", # No content for generation stats needed
|
||||
modelName=modelName,
|
||||
priceUsd=priceUsd,
|
||||
processingTime=processingTime,
|
||||
bytesSent=0,
|
||||
bytesReceived=0,
|
||||
errorCount=1
|
||||
)
|
||||
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
aiResponse,
|
||||
f"generation.render.{outputFormat}"
|
||||
)
|
||||
|
||||
raise
|
||||
|
||||
async def _renderMultiFileReport(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Render multiple documents from extracted content."""
|
||||
|
||||
generated_documents = []
|
||||
|
||||
for doc_data in extractedContent.get("documents", []):
|
||||
# Use existing single-file renderer for each document
|
||||
renderer = self._getFormatRenderer(outputFormat)
|
||||
if not renderer:
|
||||
continue
|
||||
|
||||
# Render individual document
|
||||
rendered_content, mime_type = await renderer.render(
|
||||
extractedContent={"sections": doc_data["sections"]},
|
||||
title=doc_data["title"],
|
||||
userPrompt=userPrompt,
|
||||
aiService=aiService
|
||||
)
|
||||
|
||||
generated_documents.append({
|
||||
"filename": doc_data["filename"],
|
||||
"content": rendered_content,
|
||||
"mime_type": mime_type,
|
||||
"title": doc_data["title"]
|
||||
})
|
||||
|
||||
return generated_documents
|
||||
|
||||
async def _renderSingleFileReport(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None
|
||||
) -> Tuple[str, str]:
|
||||
"""Render single file report (existing functionality)."""
|
||||
# Use existing renderReport method
|
||||
return await self.renderReport(
|
||||
extractedContent, outputFormat, title, userPrompt, aiService
|
||||
)
|
||||
|
||||
def _getFormatRenderer(self, output_format: str):
|
||||
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"properties": {
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"required": ["title", "splitStrategy"],
|
||||
"required": ["title", "split_strategy"],
|
||||
"properties": {
|
||||
"title": {"type": "string", "description": "Document title"},
|
||||
"splitStrategy": {
|
||||
"split_strategy": {
|
||||
"type": "string",
|
||||
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
|
||||
"description": "Strategy for splitting content into multiple files"
|
||||
|
|
@ -437,7 +437,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
|||
return False
|
||||
|
||||
metadata = json_data["metadata"]
|
||||
if not isinstance(metadata, dict) or "title" not in metadata or "splitStrategy" not in metadata:
|
||||
if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata:
|
||||
return False
|
||||
|
||||
documents = json_data["documents"]
|
||||
|
|
|
|||
|
|
@ -1,397 +0,0 @@
|
|||
"""
|
||||
Prompt builder for AI document generation and extraction.
|
||||
This module builds prompts for AI services to extract and generate documents.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List, TYPE_CHECKING
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
|
||||
# Type hint for renderer parameter
|
||||
if TYPE_CHECKING:
|
||||
from .renderers.rendererBaseTemplate import BaseRenderer
|
||||
_RendererLike = BaseRenderer
|
||||
else:
|
||||
_RendererLike = Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Centralized JSON structure template for document generation
|
||||
JSON_STRUCTURE_TEMPLATE = """{
|
||||
"metadata": {
|
||||
"title": "{{DOCUMENT_TITLE}}",
|
||||
"splitStrategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [{
|
||||
"id": "doc_1",
|
||||
"title": "{{DOCUMENT_TITLE}}",
|
||||
"filename": "document.json",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "heading|paragraph|table|list|code",
|
||||
"elements": [
|
||||
// heading: {"level": 1, "text": "..."}
|
||||
// paragraph: {"text": "..."}
|
||||
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
||||
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
||||
// code: {"code": "...", "language": "..."}
|
||||
],
|
||||
"order": 1
|
||||
}
|
||||
]
|
||||
}],
|
||||
"continuation": null,
|
||||
}"""
|
||||
|
||||
async def buildAdaptiveExtractionPrompt(
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
promptAnalysis: Dict[str, Any],
|
||||
aiService=None,
|
||||
services=None
|
||||
) -> str:
|
||||
"""
|
||||
Build adaptive extraction prompt based on AI analysis.
|
||||
Uses multi-file or single-file approach based on analysis.
|
||||
"""
|
||||
|
||||
# Multi-file example data instead of schema
|
||||
multi_file_example = {
|
||||
"metadata": {
|
||||
"title": "Multi-Document Example",
|
||||
"splitStrategy": "by_section",
|
||||
"source_documents": ["doc_001"],
|
||||
"extraction_method": "ai_extraction"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_section_1",
|
||||
"title": "Section 1 Title",
|
||||
"filename": "section_1.xlsx",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{
|
||||
"level": 1,
|
||||
"text": "1. SECTION TITLE"
|
||||
}
|
||||
],
|
||||
"order": 1
|
||||
},
|
||||
{
|
||||
"id": "section_2",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{
|
||||
"text": "This is the actual content that should be extracted from the document."
|
||||
}
|
||||
],
|
||||
"order": 2
|
||||
},
|
||||
{
|
||||
"id": "section_3",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Column 1", "Column 2"],
|
||||
"rows": [["Value 1", "Value 2"]]
|
||||
}
|
||||
],
|
||||
"order": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# UNIFIED APPROACH: Always use multi-document format (single doc = multi with n=1)
|
||||
adaptive_prompt = f"""
|
||||
{services.ai.sanitizePromptContent(userPrompt, 'userinput')}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
|
||||
|
||||
REQUIREMENTS:
|
||||
1. Analyze the document content provided in the context below
|
||||
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
|
||||
3. Create one or more JSON document entries based on the content structure
|
||||
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
|
||||
5. Generate appropriate filenames for each document
|
||||
|
||||
CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.
|
||||
|
||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
||||
{json.dumps(multi_file_example, indent=2)}
|
||||
|
||||
IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
|
||||
- "id": unique identifier
|
||||
- "title": document title
|
||||
- "filename": appropriate filename for the document
|
||||
- "sections": array of content sections
|
||||
|
||||
DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.
|
||||
|
||||
INSTRUCTIONS:
|
||||
- For single document requests: Create one document with all content in its sections
|
||||
- For multi-document requests: Create multiple documents, each with relevant sections
|
||||
- Use actual section titles, headings, and text from the document
|
||||
- Create meaningful filenames based on content
|
||||
- Ensure each section contains the complete content for that part
|
||||
- Do not use generic placeholder text like "Section 1", "Section 2"
|
||||
- Extract real headings, paragraphs, lists, and other content elements
|
||||
- CRITICAL: Return JSON with "documents" array, not "sections" array
|
||||
|
||||
CONTEXT (Document Content):
|
||||
|
||||
Content Types to Extract:
|
||||
1. Tables: Extract all rows and columns with proper headers
|
||||
2. Lists: Extract all items with proper nesting
|
||||
3. Headings: Extract with appropriate levels
|
||||
4. Paragraphs: Extract as structured text
|
||||
5. Code: Extract code blocks with language identification
|
||||
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
||||
|
||||
Image Analysis Requirements:
|
||||
- If you cannot analyze an image for any reason, explain why in the JSON response
|
||||
- Describe everything you see in the image
|
||||
- Include all text content, tables, logos, graphics, layout, and visual elements
|
||||
- If the image is too small, corrupted, or unclear, explain this
|
||||
- Always provide feedback - never return empty responses
|
||||
|
||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
||||
|
||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
||||
""".strip()
|
||||
|
||||
return adaptive_prompt
|
||||
|
||||
async def buildGenerationPrompt(
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str
|
||||
) -> str:
|
||||
"""Build the unified generation prompt using a single JSON template."""
|
||||
# Create a template with the actual title
|
||||
json_template = JSON_STRUCTURE_TEMPLATE.replace("{{DOCUMENT_TITLE}}", title)
|
||||
|
||||
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
||||
result = f"""Generate structured JSON content for document creation.
|
||||
|
||||
USER CONTEXT: "{userPrompt}"
|
||||
DOCUMENT TITLE: "{title}"
|
||||
TARGET FORMAT: {outputFormat}
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
|
||||
RULES:
|
||||
- Follow the template structure below exactly; emit only one JSON object in the response
|
||||
- Fill sections with content based on the user request
|
||||
- Use appropriate content_type
|
||||
|
||||
Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output:
|
||||
{json_template}
|
||||
"""
|
||||
|
||||
return result.strip()
|
||||
|
||||
async def buildExtractionPrompt(
|
||||
outputFormat: str,
|
||||
renderer: _RendererLike,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
aiService=None,
|
||||
services=None
|
||||
) -> str:
|
||||
"""
|
||||
Build the final extraction prompt by combining:
|
||||
- Parsed extraction intent from user prompt (using AI)
|
||||
- Generic cross-format instructions (filename header + real-data policy)
|
||||
- Format-specific guidelines snippet provided by the renderer
|
||||
|
||||
The AI must place a single filename header at the very top:
|
||||
FILENAME: <safe-file-name-with-extension>
|
||||
followed by a blank line and then ONLY the document content according to the target format.
|
||||
"""
|
||||
|
||||
# Parse user prompt to separate extraction intent from generation format using AI
|
||||
extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services)
|
||||
|
||||
# Import JSON schema for structured output
|
||||
from .subJsonSchema import get_document_subJsonSchema
|
||||
jsonSchema = get_document_subJsonSchema()
|
||||
|
||||
# Generic block for JSON extraction - use mixed example data showing different content types
|
||||
example_data = {
|
||||
"metadata": {
|
||||
"title": "Example Document",
|
||||
"author": "AI Assistant",
|
||||
"source_documents": ["document_001"],
|
||||
"extraction_method": "ai_extraction"
|
||||
},
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_001",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{
|
||||
"level": 1,
|
||||
"text": "1. INTRODUCTION"
|
||||
}
|
||||
],
|
||||
"order": 1,
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"id": "section_002",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{
|
||||
"text": "This is a sample paragraph with actual content that should be extracted from the document."
|
||||
}
|
||||
],
|
||||
"order": 2,
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"id": "section_003",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Column 1", "Column 2", "Column 3"],
|
||||
"rows": [
|
||||
["Value 1", "Value 2", "Value 3"],
|
||||
["Value 4", "Value 5", "Value 6"]
|
||||
]
|
||||
}
|
||||
],
|
||||
"order": 3,
|
||||
"metadata": {}
|
||||
}
|
||||
],
|
||||
"summary": "",
|
||||
"tags": []
|
||||
}
|
||||
|
||||
genericIntro = f"""
|
||||
{extractionIntent}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
TASK: Extract the actual content from the document and organize it into structured sections.
|
||||
|
||||
REQUIREMENTS:
|
||||
1. Analyze the document content provided in the context below
|
||||
2. Extract all content and organize it into logical sections
|
||||
3. Create structured JSON with sections containing the extracted content
|
||||
4. Preserve the original structure and data
|
||||
|
||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
||||
{json.dumps(example_data, indent=2)}
|
||||
|
||||
Requirements:
|
||||
- Preserve all original data - do not summarize or interpret
|
||||
- Use the exact JSON format shown above
|
||||
- Maintain data integrity and structure
|
||||
|
||||
Content Types to Extract:
|
||||
1. Tables: Extract all rows and columns with proper headers
|
||||
2. Lists: Extract all items with proper nesting
|
||||
3. Headings: Extract with appropriate levels
|
||||
4. Paragraphs: Extract as structured text
|
||||
5. Code: Extract code blocks with language identification
|
||||
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
||||
|
||||
Image Analysis Requirements:
|
||||
- If you cannot analyze an image for any reason, explain why in the JSON response
|
||||
- Describe everything you see in the image
|
||||
- Include all text content, tables, logos, graphics, layout, and visual elements
|
||||
- If the image is too small, corrupted, or unclear, explain this
|
||||
- Always provide feedback - never return empty responses
|
||||
|
||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
||||
|
||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
||||
|
||||
DO NOT return a schema description - return actual extracted content in the JSON format shown above.
|
||||
"""
|
||||
|
||||
# Get format-specific guidelines from renderer
|
||||
formatGuidelines = ""
|
||||
try:
|
||||
if hasattr(renderer, 'getExtractionGuidelines'):
|
||||
formatGuidelines = renderer.getExtractionGuidelines()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Combine all parts
|
||||
finalPrompt = f"{genericIntro}\n\n{formatGuidelines}".strip()
|
||||
|
||||
# Save extraction prompt to debug file - only if debug enabled
|
||||
try:
|
||||
debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
|
||||
if debug_enabled:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
# Use configured log directory instead of hardcoded test-chat
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
debug_root = os.path.join(logDir, 'debug')
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(finalPrompt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return finalPrompt
|
||||
|
||||
|
||||
|
||||
|
||||
async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
|
||||
"""
|
||||
Parse user prompt to extract the core extraction intent.
|
||||
"""
|
||||
if not aiService:
|
||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||
|
||||
try:
|
||||
analysis_prompt = f"""
|
||||
Analyze this user request and extract the core extraction intent:
|
||||
|
||||
User request: "{userPrompt}"
|
||||
Target format: {outputFormat}
|
||||
|
||||
Extract the main intent and requirements for document processing. Focus on:
|
||||
1. What content needs to be extracted
|
||||
2. How it should be organized
|
||||
3. Any specific requirements or preferences
|
||||
|
||||
Respond with a clear, concise statement of the extraction intent.
|
||||
"""
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
|
||||
if response and response.content:
|
||||
return response.content.strip()
|
||||
else:
|
||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||
|
||||
except Exception as e:
|
||||
services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
|
||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
"""
|
||||
Prompt builder for document generation.
|
||||
This module builds prompts for generating documents from extracted content.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Centralized JSON structure template for document generation
|
||||
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_1",
|
||||
"title": "{{DOCUMENT_TITLE}}",
|
||||
"filename": "document.json",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "heading|paragraph|table|list|code",
|
||||
"elements": [
|
||||
// heading: {"level": 1, "text": "..."}
|
||||
// paragraph: {"text": "..."}
|
||||
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
||||
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
||||
// code: {"code": "...", "language": "..."}
|
||||
],
|
||||
"order": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"continuation": null
|
||||
}"""
|
||||
|
||||
|
||||
async def buildGenerationPrompt(
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
extracted_content: str = None
|
||||
) -> str:
|
||||
"""
|
||||
Build the unified generation prompt using a single JSON template.
|
||||
|
||||
Args:
|
||||
outputFormat: Target output format (html, pdf, docx, etc.)
|
||||
userPrompt: User's original prompt for document generation
|
||||
title: Title for the document
|
||||
extracted_content: Optional extracted content from documents to prepend to prompt
|
||||
|
||||
Returns:
|
||||
Complete generation prompt string
|
||||
"""
|
||||
# Create a template - let AI generate title if not provided
|
||||
prompt_instruction = f"Use the following title: \"{title}\""
|
||||
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title)
|
||||
|
||||
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
||||
generation_prompt = f"""Generate structured JSON content for document creation.
|
||||
|
||||
USER CONTEXT: "{userPrompt}"
|
||||
TARGET FORMAT: {outputFormat}
|
||||
TITLE INSTRUCTION: {prompt_instruction}
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
|
||||
RULES:
|
||||
- Follow the template structure below exactly; emit only one JSON object in the response
|
||||
- Fill sections with content based on the user request
|
||||
- Use appropriate content_type
|
||||
|
||||
{json_template}
|
||||
"""
|
||||
|
||||
# If we have extracted content, prepend it to the prompt
|
||||
if extracted_content:
|
||||
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
{extracted_content}
|
||||
|
||||
{generation_prompt}"""
|
||||
|
||||
return generation_prompt.strip()
|
||||
|
||||
|
|
@ -20,61 +20,6 @@ class WorkflowService:
|
|||
self.interfaceDbApp = serviceCenter.interfaceDbApp
|
||||
self._progressLogger = None
|
||||
|
||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||
"""
|
||||
Summarize chat messages from last to first message with status="first"
|
||||
|
||||
Args:
|
||||
messages: List of chat messages to summarize
|
||||
|
||||
Returns:
|
||||
str: Summary of the chat in user's language
|
||||
"""
|
||||
try:
|
||||
# Get messages from last to first, stopping at first message with status="first"
|
||||
relevantMessages = []
|
||||
for msg in reversed(messages):
|
||||
relevantMessages.append(msg)
|
||||
if msg.status == "first":
|
||||
break
|
||||
|
||||
# Create prompt for AI
|
||||
prompt = f"""
|
||||
You are an AI assistant providing a summary of a chat conversation.
|
||||
Please respond in '{self.user.language}' language.
|
||||
|
||||
Chat History:
|
||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||
|
||||
Instructions:
|
||||
1. Summarize the conversation's key points and outcomes
|
||||
2. Be concise but informative
|
||||
3. Use a professional but friendly tone
|
||||
4. Focus on important decisions and next steps if any
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
|
||||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Get summary using AI service through proper main service interface
|
||||
|
||||
return await self.services.ai.callAiDocuments(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
maxCost=0.01
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
return f"Error summarizing chat: {str(e)}"
|
||||
|
||||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||
try:
|
||||
|
|
@ -928,7 +873,9 @@ Please provide a comprehensive summary of this conversation."""
|
|||
def _getProgressLogger(self):
|
||||
"""Get or create the progress logger instance"""
|
||||
if self._progressLogger is None:
|
||||
self._progressLogger = ProgressLogger(self, self.workflow)
|
||||
# Use currentWorkflow from self.services instead of self.workflow (which is self)
|
||||
workflow = getattr(self.services, 'currentWorkflow', None)
|
||||
self._progressLogger = ProgressLogger(self, workflow)
|
||||
return self._progressLogger
|
||||
|
||||
def createProgressLogger(self, workflow) -> ProgressLogger:
|
||||
|
|
|
|||
|
|
@ -42,15 +42,22 @@ class MethodAi(MethodBase):
|
|||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
operationId = f"ai_process_{self.services.currentWorkflow.id}_{int(time.time())}"
|
||||
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.workflow.progressLogStart(
|
||||
operationId,
|
||||
"Generate",
|
||||
"AI Processing",
|
||||
f"Format: {parameters.get('resultType', 'txt')}"
|
||||
)
|
||||
if hasattr(self.services, 'workflow') and self.services.workflow: # TODO: Entfernen für PROD! (block)
|
||||
try:
|
||||
self.services.workflow.progressLogStart(
|
||||
operationId,
|
||||
"Generate",
|
||||
"AI Processing",
|
||||
f"Format: {parameters.get('resultType', 'txt')}"
|
||||
)
|
||||
except Exception as e:
|
||||
# Silently skip progress tracking errors (e.g., in test environments)
|
||||
logger.debug(f"Skipping progress logging: {str(e)}")
|
||||
|
||||
|
||||
# Debug logging to see what parameters are received
|
||||
logger.info(f"MethodAi.process received parameters: {parameters}")
|
||||
|
|
|
|||
369
test4_method_ai_operations.py
Normal file
369
test4_method_ai_operations.py
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for methodAi operations.
|
||||
Tests all OperationType's with various prompts through the workflow action interface.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List
|
||||
|
||||
# Add the gateway to path
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
|
||||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow, ChatDocument
|
||||
from modules.datamodels.datamodelUam import User
|
||||
|
||||
|
||||
class MethodAiOperationsTester:
|
||||
"""Test all operation types through methodAi.process() action."""
|
||||
|
||||
def __init__(self):
|
||||
# Use root user for testing (has full access to everything)
|
||||
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||
rootInterface = getRootInterface()
|
||||
self.testUser = rootInterface.currentUser
|
||||
|
||||
self.services = None
|
||||
self.methodAi = None
|
||||
self.testResults = []
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
self.logsDir = os.path.join(os.path.dirname(__file__), "..", "local", "logs")
|
||||
os.makedirs(self.logsDir, exist_ok=True)
|
||||
|
||||
# Create modeltest subdirectory
|
||||
self.modelTestDir = os.path.join(self.logsDir, "modeltest")
|
||||
os.makedirs(self.modelTestDir, exist_ok=True)
|
||||
|
||||
# Test prompts for each operation type
|
||||
self.testPrompts = {
|
||||
OperationTypeEnum.PLAN: {
|
||||
"aiPrompt": "Create a 5-step plan to organize a project meeting and include the manual for the project management office.",
|
||||
"resultType": "json"
|
||||
},
|
||||
OperationTypeEnum.DATA_ANALYSE: {
|
||||
"aiPrompt": "Analyze the following text and extract the main topics and key points: 'Machine learning is transforming healthcare by enabling early disease detection through pattern recognition in medical images.'",
|
||||
"resultType": "json"
|
||||
},
|
||||
OperationTypeEnum.DATA_GENERATE: {
|
||||
"aiPrompt": "Generate the first 9000 prime numbers.",
|
||||
"resultType": "txt"
|
||||
},
|
||||
OperationTypeEnum.DATA_EXTRACT: {
|
||||
"aiPrompt": "Extract all email addresses and phone numbers from the following text: 'Contact us at support@example.com or call 123-456-7890. For sales, email sales@example.com or call 987-654-3210.'",
|
||||
"resultType": "json"
|
||||
},
|
||||
OperationTypeEnum.IMAGE_ANALYSE: {
|
||||
"aiPrompt": "Analyze this image and describe what you see, including any text or numbers visible.",
|
||||
"resultType": "json",
|
||||
"documentList": ["_testdata_photo_2025-06-03_13-05-52.jpg"] if os.path.exists(os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg")) else []
|
||||
},
|
||||
OperationTypeEnum.IMAGE_GENERATE: {
|
||||
"aiPrompt": "A beautiful sunset over the ocean with purple and orange hues",
|
||||
"resultType": "png"
|
||||
},
|
||||
OperationTypeEnum.WEB_SEARCH: {
|
||||
"aiPrompt": "Find recent articles about ValueOn AG in Switzeerland in 2025",
|
||||
"resultType": "json"
|
||||
},
|
||||
OperationTypeEnum.WEB_CRAWL: {
|
||||
"aiPrompt": "Extract who works in this company",
|
||||
"resultType": "json",
|
||||
"documentList": ["https://www.valueon.com"]
|
||||
}
|
||||
}
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize services and methodAi."""
|
||||
print("🔧 Initializing services...")
|
||||
|
||||
# Set logging level to DEBUG to see debug messages
|
||||
import logging
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
# Import and initialize services - use the same approach as routeChatPlayground
|
||||
import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects
|
||||
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
|
||||
|
||||
# Import and initialize services
|
||||
from modules.features.chatPlayground.mainChatPlayground import getServices
|
||||
|
||||
# Get services first
|
||||
self.services = getServices(self.testUser, None)
|
||||
|
||||
# Now create AND SAVE workflow in database using the interface
|
||||
import uuid
|
||||
import time
|
||||
currentTimestamp = time.time()
|
||||
|
||||
testWorkflow = ChatWorkflow(
|
||||
id=str(uuid.uuid4()),
|
||||
name="Test Workflow",
|
||||
status="running",
|
||||
startedAt=currentTimestamp,
|
||||
lastActivity=currentTimestamp,
|
||||
currentRound=1,
|
||||
currentTask=0,
|
||||
currentAction=0,
|
||||
totalTasks=0,
|
||||
totalActions=0,
|
||||
mandateId=self.testUser.mandateId,
|
||||
messageIds=[],
|
||||
workflowMode="React",
|
||||
maxSteps=5
|
||||
)
|
||||
|
||||
# SAVE workflow to database so it exists for access control
|
||||
# Convert ChatWorkflow to dict for createWorkflow
|
||||
workflowDict = testWorkflow.model_dump()
|
||||
interfaceDbChat.createWorkflow(workflowDict)
|
||||
|
||||
# Set the workflow in services
|
||||
self.services.currentWorkflow = testWorkflow
|
||||
|
||||
# Debug: Print workflow status
|
||||
print(f"Debug: services.currentWorkflow is set: {hasattr(self.services, 'currentWorkflow') and self.services.currentWorkflow is not None}")
|
||||
if self.services.currentWorkflow:
|
||||
print(f"Debug: Workflow ID: {self.services.currentWorkflow.id}")
|
||||
|
||||
# Import and initialize methodAi AFTER setting workflow
|
||||
from modules.workflows.methods.methodAi import MethodAi
|
||||
self.methodAi = MethodAi(self.services)
|
||||
|
||||
# Verify methodAi has access to the workflow
|
||||
if hasattr(self.methodAi, 'services'):
|
||||
print(f"Debug: methodAi.services.currentWorkflow is set: {hasattr(self.methodAi.services, 'currentWorkflow') and self.methodAi.services.currentWorkflow is not None}")
|
||||
|
||||
print("✅ Services initialized")
|
||||
print(f"📁 Results will be saved to: {self.modelTestDir}")
|
||||
|
||||
async def testOperation(self, operationType: OperationTypeEnum) -> Dict[str, Any]:
|
||||
"""Test a specific operation type."""
|
||||
print(f"\n{'='*80}")
|
||||
print(f"TESTING OPERATION: {operationType.value}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
startTime = asyncio.get_event_loop().time()
|
||||
|
||||
# Get test prompt for this operation
|
||||
testConfig = self.testPrompts.get(operationType, {})
|
||||
|
||||
if not testConfig:
|
||||
result = {
|
||||
"operationType": operationType.value,
|
||||
"status": "ERROR",
|
||||
"error": "No test configuration found for this operation type",
|
||||
"processingTime": 0.0
|
||||
}
|
||||
self.testResults.append(result)
|
||||
return result
|
||||
|
||||
print(f"Prompt: {testConfig.get('aiPrompt', 'N/A')}")
|
||||
print(f"Result Type: {testConfig.get('resultType', 'txt')}")
|
||||
|
||||
try:
|
||||
# Prepare parameters
|
||||
parameters = {
|
||||
"aiPrompt": testConfig.get("aiPrompt"),
|
||||
"resultType": testConfig.get("resultType", "txt")
|
||||
}
|
||||
|
||||
# Add document list if provided
|
||||
if "documentList" in testConfig and testConfig["documentList"]:
|
||||
parameters["documentList"] = testConfig["documentList"]
|
||||
|
||||
# Ensure workflow is still set in both self.services AND methodAi.services
|
||||
if not self.services.currentWorkflow or (hasattr(self, 'methodAi') and hasattr(self.methodAi, 'services') and not self.methodAi.services.currentWorkflow):
|
||||
print(f"⚠️ Warning: Workflow is None, trying to re-set it...")
|
||||
import time
|
||||
import uuid
|
||||
currentTimestamp = time.time()
|
||||
testWorkflow = ChatWorkflow(
|
||||
id=str(uuid.uuid4()),
|
||||
name="Test Workflow",
|
||||
status="running",
|
||||
startedAt=currentTimestamp,
|
||||
lastActivity=currentTimestamp,
|
||||
currentRound=1,
|
||||
currentTask=0,
|
||||
currentAction=0,
|
||||
totalTasks=0,
|
||||
totalActions=0,
|
||||
mandateId="test_mandate",
|
||||
messageIds=[],
|
||||
workflowMode="React",
|
||||
maxSteps=5
|
||||
)
|
||||
self.services.currentWorkflow = testWorkflow
|
||||
# Also set in methodAi.services if it exists
|
||||
if hasattr(self, 'methodAi') and hasattr(self.methodAi, 'services'):
|
||||
self.methodAi.services.currentWorkflow = testWorkflow
|
||||
|
||||
# Call methodAi.process()
|
||||
print(f"Calling methodAi.process()...")
|
||||
print(f"Debug: Current workflow ID before call: {self.services.currentWorkflow.id if self.services.currentWorkflow else 'None'}")
|
||||
print(f"Debug: methodAi.services.currentWorkflow: {self.methodAi.services.currentWorkflow.id if hasattr(self.methodAi, 'services') and self.methodAi.services.currentWorkflow else 'None/NotSet'}")
|
||||
print(f"Debug: Is same services object? {self.services is self.methodAi.services}")
|
||||
print(f"Debug: services id: {id(self.services)}")
|
||||
print(f"Debug: methodAi.services id: {id(self.methodAi.services)}")
|
||||
|
||||
# Final safety check: ensure methodAi.services has the workflow
|
||||
if hasattr(self.methodAi, 'services') and not self.methodAi.services.currentWorkflow:
|
||||
print(f"⚠️ Fixing: Setting workflow in methodAi.services...")
|
||||
self.methodAi.services.currentWorkflow = self.services.currentWorkflow
|
||||
|
||||
actionResult = await self.methodAi.process(parameters)
|
||||
|
||||
endTime = asyncio.get_event_loop().time()
|
||||
processingTime = endTime - startTime
|
||||
|
||||
# Analyze result
|
||||
result = {
|
||||
"operationType": operationType.value,
|
||||
"status": "SUCCESS" if actionResult.success else "ERROR",
|
||||
"processingTime": round(processingTime, 2),
|
||||
"hasDocuments": len(actionResult.documents) > 0 if actionResult.documents else False,
|
||||
"documentCount": len(actionResult.documents) if actionResult.documents else 0,
|
||||
"error": actionResult.error if not actionResult.success else None
|
||||
}
|
||||
|
||||
# Extract document information
|
||||
if actionResult.documents:
|
||||
doc = actionResult.documents[0]
|
||||
result["documentName"] = doc.documentName
|
||||
result["mimeType"] = doc.mimeType
|
||||
result["dataSize"] = len(doc.documentData) if doc.documentData else 0
|
||||
result["dataPreview"] = str(doc.documentData)[:200] + "..." if len(str(doc.documentData)) > 200 else str(doc.documentData)
|
||||
|
||||
print(f"✅ Status: {result['status']}")
|
||||
print(f"⏱️ Processing time: {result['processingTime']}s")
|
||||
print(f"📄 Documents: {result.get('documentCount', 0)}")
|
||||
|
||||
if actionResult.success:
|
||||
if result.get('documentName'):
|
||||
print(f"📄 Saved: {result['documentName']}")
|
||||
print(f"📄 MIME type: {result.get('mimeType')}")
|
||||
print(f"📄 Size: {result.get('dataSize')} bytes")
|
||||
|
||||
# Try to decode if it's JSON
|
||||
if result.get('mimeType') == 'application/json':
|
||||
try:
|
||||
import json
|
||||
jsonData = json.loads(actionResult.documents[0].documentData)
|
||||
result["isValidJson"] = True
|
||||
result["jsonKeys"] = list(jsonData.keys()) if isinstance(jsonData, dict) else "Not a dict"
|
||||
print(f"✅ Valid JSON with keys: {result['jsonKeys']}")
|
||||
except:
|
||||
result["isValidJson"] = False
|
||||
print(f"⚠️ Not valid JSON")
|
||||
else:
|
||||
print(f"❌ Error: {result.get('error')}")
|
||||
|
||||
self.testResults.append(result)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
endTime = asyncio.get_event_loop().time()
|
||||
processingTime = endTime - startTime
|
||||
|
||||
result = {
|
||||
"operationType": operationType.value,
|
||||
"status": "EXCEPTION",
|
||||
"processingTime": round(processingTime, 2),
|
||||
"error": str(e),
|
||||
"hasDocuments": False
|
||||
}
|
||||
|
||||
print(f"💥 EXCEPTION: {str(e)}")
|
||||
self.testResults.append(result)
|
||||
return result
|
||||
|
||||
async def testAllOperations(self):
|
||||
"""Test all operation types."""
|
||||
print(f"\n{'='*80}")
|
||||
print("STARTING METHODAI OPERATIONS TESTS - DATA_GENERATE ONLY")
|
||||
print(f"{'='*80}")
|
||||
print("Testing DATA_GENERATE operation type...")
|
||||
|
||||
# Test only DATA_GENERATE
|
||||
await self.testOperation(OperationTypeEnum.DATA_GENERATE)
|
||||
print(f"\n{'─'*80}")
|
||||
|
||||
# Print summary
|
||||
self.printSummary()
|
||||
|
||||
def printSummary(self):
|
||||
"""Print test summary."""
|
||||
print(f"\n{'='*80}")
|
||||
print("TEST SUMMARY")
|
||||
print(f"{'='*80}")
|
||||
|
||||
successfulTests = [r for r in self.testResults if r["status"] == "SUCCESS"]
|
||||
failedTests = [r for r in self.testResults if r["status"] == "ERROR"]
|
||||
exceptionTests = [r for r in self.testResults if r["status"] == "EXCEPTION"]
|
||||
|
||||
print(f"\nTotal tests: {len(self.testResults)}")
|
||||
print(f"✅ Successful: {len(successfulTests)}")
|
||||
print(f"❌ Failed: {len(failedTests)}")
|
||||
print(f"💥 Exceptions: {len(exceptionTests)}")
|
||||
|
||||
if successfulTests:
|
||||
print(f"\n{'─'*80}")
|
||||
print("SUCCESSFUL TESTS")
|
||||
print(f"{'─'*80}")
|
||||
for result in successfulTests:
|
||||
print(f"✅ {result['operationType']}: {result['processingTime']}s")
|
||||
|
||||
if failedTests:
|
||||
print(f"\n{'─'*80}")
|
||||
print("FAILED TESTS")
|
||||
print(f"{'─'*80}")
|
||||
for result in failedTests:
|
||||
print(f"❌ {result['operationType']}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
if exceptionTests:
|
||||
print(f"\n{'─'*80}")
|
||||
print("EXCEPTIONS")
|
||||
print(f"{'─'*80}")
|
||||
for result in exceptionTests:
|
||||
print(f"💥 {result['operationType']}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
# Save results
|
||||
import json
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
resultsFile = os.path.join(self.modelTestDir, f"method_ai_operations_test_{timestamp}.json")
|
||||
|
||||
with open(resultsFile, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
"timestamp": timestamp,
|
||||
"summary": {
|
||||
"total": len(self.testResults),
|
||||
"successful": len(successfulTests),
|
||||
"failed": len(failedTests),
|
||||
"exceptions": len(exceptionTests)
|
||||
},
|
||||
"results": self.testResults
|
||||
}, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n📄 Results saved to: {resultsFile}")
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run methodAI operations tests."""
|
||||
tester = MethodAiOperationsTester()
|
||||
|
||||
await tester.initialize()
|
||||
await tester.testAllOperations()
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("TESTING COMPLETED")
|
||||
print(f"{'='*80}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
|
|
@ -1,107 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to demonstrate the new operation type rating system.
|
||||
This shows how models are now sorted by their capability ratings for specific operation types.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from modules.datamodels.datamodelAi import OperationTypeEnum, createOperationTypeRatings, AiCallOptions, PriorityEnum, ProcessingModeEnum
|
||||
from modules.aicore.aicorePluginPerplexity import AiPerplexity
|
||||
from modules.aicore.aicorePluginTavily import ConnectorWeb
|
||||
from modules.aicore.aicorePluginAnthropic import AiAnthropic
|
||||
from modules.aicore.aicorePluginOpenai import AiOpenai
|
||||
from modules.aicore.aicorePluginInternal import AiInternal
|
||||
from modules.aicore.aicoreModelSelector import ModelSelector
|
||||
|
||||
def testOperationTypeRatings():
|
||||
"""Test the new operation type rating system."""
|
||||
print("🧪 Testing Operation Type Rating System")
|
||||
print("=" * 50)
|
||||
|
||||
# Initialize connectors
|
||||
perplexity = AiPerplexity()
|
||||
tavily = ConnectorWeb()
|
||||
anthropic = AiAnthropic()
|
||||
openai = AiOpenai()
|
||||
internal = AiInternal()
|
||||
modelSelector = ModelSelector()
|
||||
|
||||
# Get all models
|
||||
allModels = (perplexity.getModels() + tavily.getModels() +
|
||||
anthropic.getModels() + openai.getModels() + internal.getModels())
|
||||
|
||||
print(f"📊 Total models available: {len(allModels)}")
|
||||
print()
|
||||
|
||||
# Test different operation types
|
||||
testCases = [
|
||||
(OperationTypeEnum.WEB_RESEARCH, "Web Research"),
|
||||
(OperationTypeEnum.WEB_NEWS, "Web News"),
|
||||
(OperationTypeEnum.WEB_QUESTIONS, "Web Questions"),
|
||||
(OperationTypeEnum.WEB_SEARCH, "Web Search"),
|
||||
(OperationTypeEnum.DATA_ANALYSE, "Data Analysis tasks"),
|
||||
(OperationTypeEnum.DATA_GENERATE, "Data Generation tasks"),
|
||||
(OperationTypeEnum.DATA_EXTRACT, "Data Extraction tasks"),
|
||||
(OperationTypeEnum.PLAN, "Planning tasks")
|
||||
]
|
||||
|
||||
for operationType, description in testCases:
|
||||
print(f"🎯 Testing: {description} ({operationType.value})")
|
||||
print("-" * 40)
|
||||
|
||||
# Create AI call options
|
||||
options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.BASIC
|
||||
)
|
||||
|
||||
# Get failover model list (sorted by rating)
|
||||
failoverModels = modelSelector.getFailoverModelList(
|
||||
prompt="Test prompt",
|
||||
context="Test context",
|
||||
options=options,
|
||||
availableModels=allModels
|
||||
)
|
||||
|
||||
if failoverModels:
|
||||
print(f"✅ Found {len(failoverModels)} suitable models:")
|
||||
for i, model in enumerate(failoverModels[:5]): # Show top 5
|
||||
# Get the rating for this operation type
|
||||
rating = 0
|
||||
for ot_rating in model.operationTypes:
|
||||
if ot_rating.operationType == operationType:
|
||||
rating = ot_rating.rating
|
||||
break
|
||||
|
||||
print(f" {i+1}. {model.displayName}")
|
||||
print(f" Rating: {rating}/10 | Speed: {model.speedRating}/10 | Quality: {model.qualityRating}/10")
|
||||
print(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
|
||||
else:
|
||||
print("❌ No suitable models found")
|
||||
|
||||
print()
|
||||
|
||||
# Test the helper function
|
||||
print("🔧 Testing Helper Function")
|
||||
print("-" * 30)
|
||||
|
||||
# Create operation type ratings using the helper
|
||||
ratings = createOperationTypeRatings(
|
||||
(OperationTypeEnum.WEB_RESEARCH, 10),
|
||||
(OperationTypeEnum.WEB_NEWS, 8),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 6)
|
||||
)
|
||||
|
||||
print("Created ratings:")
|
||||
for rating in ratings:
|
||||
print(f" {rating.operationType.value}: {rating.rating}/10")
|
||||
|
||||
print()
|
||||
print("✅ All tests completed successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
testOperationTypeRatings()
|
||||
Loading…
Reference in a new issue