streamlined extraction and generation prompts
This commit is contained in:
parent
ffdaf2a326
commit
be2934d54a
18 changed files with 1077 additions and 970 deletions
254
function_call_diagram.md
Normal file
254
function_call_diagram.md
Normal file
|
|
@ -0,0 +1,254 @@
|
||||||
|
# Complete Function Call Diagram
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TB
|
||||||
|
subgraph AI_Service["AI Service Modules"]
|
||||||
|
MA[mainServiceAi<br/>AiService]
|
||||||
|
SC[subCoreAi<br/>SubCoreAi]
|
||||||
|
SDG[subDocumentGeneration<br/>SubDocumentGeneration]
|
||||||
|
SDP[subDocumentProcessing<br/>SubDocumentProcessing]
|
||||||
|
SU[subSharedAiUtils<br/>Utilities]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph EXT_Service["Extraction Service Modules"]
|
||||||
|
MSE[mainServiceExtraction<br/>ExtractionService]
|
||||||
|
SPE[subPromptBuilderExtraction<br/>buildExtractionPrompt]
|
||||||
|
SP[subPipeline<br/>runExtraction]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph GEN_Service["Generation Service Modules"]
|
||||||
|
MSG[mainServiceGeneration<br/>GenerationService]
|
||||||
|
SPG[subPromptBuilderGeneration<br/>buildGenerationPrompt]
|
||||||
|
SJ[subJsonSchema<br/>Schemas]
|
||||||
|
end
|
||||||
|
|
||||||
|
%% subCoreAi calls
|
||||||
|
SC -->|_buildGenerationPrompt| SPG
|
||||||
|
SC -->|callAiDocuments| SDP
|
||||||
|
SC -->|sanitizePromptContent| SU
|
||||||
|
|
||||||
|
%% subDocumentGeneration calls
|
||||||
|
SDG -->|processDocumentsWithContinuation| SDP
|
||||||
|
SDG -->|buildGenerationPrompt| SPG
|
||||||
|
SDG -->|renderReport| MSG
|
||||||
|
SDG -->|sanitizePromptContent| SU
|
||||||
|
|
||||||
|
%% subDocumentProcessing calls
|
||||||
|
SDP -->|extractContent 3x| MSE
|
||||||
|
SDP -->|_applyMerging 3x| SP
|
||||||
|
SDP -->|readImage| SC
|
||||||
|
|
||||||
|
%% mainServiceExtraction calls
|
||||||
|
MSE -->|runExtraction| SP
|
||||||
|
|
||||||
|
%% subPromptBuilderExtraction calls
|
||||||
|
SPE -->|get_document_subJsonSchema| SJ
|
||||||
|
SPE -->|sanitizePromptContent| SU
|
||||||
|
|
||||||
|
%% mainServiceGeneration calls utilities
|
||||||
|
MSG -->|utility functions| SU
|
||||||
|
|
||||||
|
%% subCoreAi detailed calls
|
||||||
|
SC -.->|aiObjects.call| AI_Interface["AiObjects Interface"]
|
||||||
|
SDP -.->|aiObjects.call| AI_Interface
|
||||||
|
|
||||||
|
%% Style
|
||||||
|
classDef aiClass fill:#e1f5ff,stroke:#0066cc,stroke-width:2px
|
||||||
|
classDef extClass fill:#fff5e1,stroke:#cc6600,stroke-width:2px
|
||||||
|
classDef genClass fill:#e1ffe1,stroke:#006600,stroke-width:2px
|
||||||
|
classDef utilClass fill:#f0f0f0,stroke:#666,stroke-width:2px
|
||||||
|
classDef interfaceClass fill:#ffe1f5,stroke:#cc0066,stroke-width:2px
|
||||||
|
|
||||||
|
class MA,SC,SDG,SDP,SU aiClass
|
||||||
|
class MSE,SPE,SP extClass
|
||||||
|
class MSG,SPG,SJ genClass
|
||||||
|
class AI_Interface interfaceClass
|
||||||
|
```
|
||||||
|
|
||||||
|
## Detailed Call Map with Function Names
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph LR
|
||||||
|
%% Nodes
|
||||||
|
SC[subCoreAi]
|
||||||
|
SDG[subDocumentGeneration]
|
||||||
|
SDP[subDocumentProcessing]
|
||||||
|
SU[subSharedAiUtils]
|
||||||
|
SPE[subPromptBuilderExtraction]
|
||||||
|
SPG[subPromptBuilderGeneration]
|
||||||
|
MSE[mainServiceExtraction]
|
||||||
|
MSG[mainServiceGeneration]
|
||||||
|
SP[subPipeline]
|
||||||
|
SJ[subJsonSchema]
|
||||||
|
|
||||||
|
%% subCoreAi function calls
|
||||||
|
SC -->|"_buildGenerationPrompt()<br/>calls"| SPG
|
||||||
|
SC -->|"callAiDocuments()<br/>calls callAiText()"| SDP
|
||||||
|
SC -->|"sanitizePromptContent()"| SU
|
||||||
|
|
||||||
|
%% subDocumentGeneration function calls
|
||||||
|
SDG -->|"_processDocumentsUnified()<br/>calls"| SDP
|
||||||
|
SDG -->|"_processDocument()<br/>calls"| SPG
|
||||||
|
SDG -->|"_processDocument()<br/>calls"| MSG
|
||||||
|
SDG -->|"sanitizePromptContent()"| SU
|
||||||
|
|
||||||
|
%% subDocumentProcessing function calls
|
||||||
|
SDP -->|"extractContent()"| MSE
|
||||||
|
SDP -->|"_mergePartResults()<br/>_convertPartResultsToJson()<br/>_mergeChunkResultsJson()<br/>all call"| SP
|
||||||
|
SDP -->|"_processChunksWithMapping()<br/>calls readImage()"| SC
|
||||||
|
|
||||||
|
%% Extraction service calls
|
||||||
|
MSE -->|"extractContent()<br/>calls"| SP
|
||||||
|
|
||||||
|
%% Prompt builder calls
|
||||||
|
SPE -->|"get_document_subJsonSchema()"| SJ
|
||||||
|
SPE -->|"sanitizePromptContent()"| SU
|
||||||
|
|
||||||
|
%% Generation service calls
|
||||||
|
MSG -->|"uses utility functions"| SU
|
||||||
|
|
||||||
|
classDef aiModule fill:#e1f5ff,stroke:#0066cc
|
||||||
|
classDef extModule fill:#fff5e1,stroke:#cc6600
|
||||||
|
classDef genModule fill:#e1ffe1,stroke:#006600
|
||||||
|
|
||||||
|
class SC,SDG,SDP,SU aiModule
|
||||||
|
class MSE,SPE,SP extModule
|
||||||
|
class MSG,SPG,SJ genModule
|
||||||
|
```
|
||||||
|
|
||||||
|
## Call Flow by Module
|
||||||
|
|
||||||
|
### 1. subCoreAi (SubCoreAi Class)
|
||||||
|
**Calls Out:**
|
||||||
|
- `buildGenerationPrompt()` → subPromptBuilderGeneration (line 363-366)
|
||||||
|
- `callAiText()` → subDocumentProcessing (line 453)
|
||||||
|
- `renderReport()` → mainServiceGeneration (line 478-482)
|
||||||
|
- `sanitizePromptContent()` → subSharedAiUtils (line 61, via services.ai)
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- mainServiceAi (creates instance)
|
||||||
|
- subDocumentProcessing._processChunksWithMapping (calls readImage at line 672-675)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. subDocumentGeneration (SubDocumentGeneration Class)
|
||||||
|
**Calls Out:**
|
||||||
|
- `processDocumentsWithContinuation()` → subDocumentProcessing (line 110)
|
||||||
|
- `buildGenerationPrompt()` → subPromptBuilderGeneration (line 330)
|
||||||
|
- `renderReport()` → mainServiceGeneration (line 392)
|
||||||
|
- `sanitizePromptContent()` → subSharedAiUtils (line 466)
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- mainServiceAi (creates instance)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. subDocumentProcessing (SubDocumentProcessing Class)
|
||||||
|
**Calls Out:**
|
||||||
|
- `extractContent()` → mainServiceExtraction (lines 78, 131, 220)
|
||||||
|
- `_applyMerging()` → subPipeline (lines 1044, 1095, 1232, 1293, 1345)
|
||||||
|
- `readImage()` → subCoreAi (line 672-675)
|
||||||
|
- `sanitizePromptContent()` → subSharedAiUtils (via self.services.ai)
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- mainServiceAi (creates instance)
|
||||||
|
- subCoreAi.callAiDocuments (calls callAiText at line 453)
|
||||||
|
- subDocumentGeneration._processDocumentsUnified (calls processDocumentsWithContinuation)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. mainServiceExtraction (ExtractionService Class)
|
||||||
|
**Calls Out:**
|
||||||
|
- `runExtraction()` → subPipeline (line 61)
|
||||||
|
- Uses ExtractorRegistry from subRegistry
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- subDocumentProcessing.extractContent (3 times)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. subPromptBuilderExtraction
|
||||||
|
**Calls Out:**
|
||||||
|
- `get_document_subJsonSchema()` → subJsonSchema (line 172)
|
||||||
|
- `sanitizePromptContent()` → subSharedAiUtils (via services.ai)
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- mainServiceGeneration (indirectly via getAdaptiveExtractionPrompt)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 6. mainServiceGeneration (GenerationService Class)
|
||||||
|
**Calls Out:**
|
||||||
|
- `get_renderer()` → renderers.registry (line 501)
|
||||||
|
- Utility functions from subDocumentUtility
|
||||||
|
- Uses modelRegistry (external)
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- subCoreAi.callAiDocuments (calls renderReport)
|
||||||
|
- subDocumentGeneration._processDocument (calls renderReport)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 7. subPromptBuilderGeneration
|
||||||
|
**Calls Out:**
|
||||||
|
- Returns prompt template string
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- subCoreAi._buildGenerationPrompt (line 363-366)
|
||||||
|
- subDocumentGeneration._processDocument (line 330)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 8. subPipeline
|
||||||
|
**Calls Out:**
|
||||||
|
- Creates IntelligentTokenAwareMerger from subMerger (line 96)
|
||||||
|
- Uses mergers from merging submodules
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- mainServiceExtraction.extractContent (calls runExtraction)
|
||||||
|
- subDocumentProcessing (calls _applyMerging 5 times)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 9. subSharedAiUtils
|
||||||
|
**Functions Provided:**
|
||||||
|
- `buildPromptWithPlaceholders()`
|
||||||
|
- `sanitizePromptContent()`
|
||||||
|
- `extractTextFromContentParts()`
|
||||||
|
- `reduceText()`
|
||||||
|
- `determineCallType()`
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- subCoreAi (imports and calls functions)
|
||||||
|
- subDocumentGeneration (via services.ai.sanitizePromptContent)
|
||||||
|
- subPromptBuilderExtraction (via services.ai.sanitizePromptContent)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 10. subJsonSchema
|
||||||
|
**Functions Provided:**
|
||||||
|
- `get_document_subJsonSchema()`
|
||||||
|
- `get_multi_document_subJsonSchema()`
|
||||||
|
|
||||||
|
**Called By:**
|
||||||
|
- subPromptBuilderExtraction.buildExtractionPrompt (line 172)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Circular Dependencies
|
||||||
|
|
||||||
|
**AI Service Loop:**
|
||||||
|
1. subDocumentProcessing → subCoreAi.readImage() (for image processing)
|
||||||
|
2. subDocumentProcessing → mainServiceExtraction (for extraction)
|
||||||
|
3. mainServiceExtraction → subPipeline (for processing)
|
||||||
|
4. subPipeline creates IntelligentTokenAwareMerger
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
```
|
||||||
|
subDocumentProcessing.extractContent()
|
||||||
|
→ mainServiceExtraction.extractContent()
|
||||||
|
→ subPipeline.runExtraction()
|
||||||
|
→ returns ContentExtracted
|
||||||
|
→ processed by subDocumentProcessing
|
||||||
|
→ calls subPipeline._applyMerging()
|
||||||
|
```
|
||||||
|
|
@ -127,7 +127,7 @@ class AiService:
|
||||||
"""Planning AI call for task planning, action planning, action selection, etc."""
|
"""Planning AI call for task planning, action planning, action selection, etc."""
|
||||||
await self._ensureAiObjectsInitialized()
|
await self._ensureAiObjectsInitialized()
|
||||||
# Always use "json" for planning calls since they return JSON
|
# Always use "json" for planning calls since they return JSON
|
||||||
return await self.coreAi.callAiPlanning(prompt, placeholders, "json")
|
return await self.coreAi.callAiPlanning(prompt, placeholders)
|
||||||
|
|
||||||
async def callAiDocuments(
|
async def callAiDocuments(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -12,37 +12,26 @@ from modules.services.serviceAi.subSharedAiUtils import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Generic continuation instruction for all prompts with JSON responses
|
||||||
|
# Used by _callAiWithLooping() to replace LOOP_INSTRUCTION placeholder
|
||||||
|
LOOP_INSTRUCTION_TEXT = """
|
||||||
|
MANDATORY RULE:
|
||||||
|
Return ONLY raw JSON (no ```json blocks, no text before/after)
|
||||||
|
|
||||||
|
CONTINUATION REQUIREMENT:
|
||||||
|
Your response must be a valid JSON object with a "continuation" field.
|
||||||
|
|
||||||
|
- If you can complete the FULL request: Set {"continuation": null}
|
||||||
|
- If you MUST stop early (due to token limits): Set {"continuation": {"last_data_items": "brief summary of what was delivered for context", "next_instruction": "what to deliver next to complete the request"}}
|
||||||
|
|
||||||
|
The "continuation" field controls whether this AI call continues in a loop or stops.
|
||||||
|
Refer to the json template below to see where to set the "continuation" information.
|
||||||
|
"""
|
||||||
|
|
||||||
# Rebuild the model to resolve forward references
|
# Rebuild the model to resolve forward references
|
||||||
AiCallRequest.model_rebuild()
|
AiCallRequest.model_rebuild()
|
||||||
|
|
||||||
|
|
||||||
# Loop instruction texts for different formats
|
|
||||||
LoopInstructionTexts = {
|
|
||||||
"json": """
|
|
||||||
CRITICAL LIMITS: <TOKEN_LIMIT> tokens total (reserve 20% for JSON structure)
|
|
||||||
|
|
||||||
MANDATORY RULES:
|
|
||||||
1. STOP at approximately 80% of limit to ensure valid JSON completion
|
|
||||||
2. Return ONLY raw JSON (no ```json blocks, no text before/after)
|
|
||||||
|
|
||||||
CONTINUATION REQUIREMENTS:
|
|
||||||
Refer to the json object below where to set the "continuation" information:
|
|
||||||
- If you can complete the full request: {"continuation": null}
|
|
||||||
- If you must stop early: {
|
|
||||||
"continuation": {
|
|
||||||
"last_data_items": "delivered last data for context (copy them)",
|
|
||||||
"next_instruction": "instruction for next data to deliver"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
BE CONSERVATIVE: Stop generating content when you reach approximately 3200-3500 characters to ensure JSON completion.
|
|
||||||
""",
|
|
||||||
# Add more formats here as needed
|
|
||||||
# "xml": "...",
|
|
||||||
# "text": "...",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SubCoreAi:
|
class SubCoreAi:
|
||||||
"""Core AI operations including image analysis, text generation, and planning calls."""
|
"""Core AI operations including image analysis, text generation, and planning calls."""
|
||||||
|
|
||||||
|
|
@ -142,8 +131,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
options: AiCallOptions,
|
options: AiCallOptions,
|
||||||
debugPrefix: str = "ai_call",
|
debugPrefix: str = "ai_call"
|
||||||
loopInstructionFormat: str = None
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Shared core function for AI calls with looping system.
|
Shared core function for AI calls with looping system.
|
||||||
|
|
@ -154,7 +142,6 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
prompt: The prompt to send to AI
|
prompt: The prompt to send to AI
|
||||||
options: AI call configuration options
|
options: AI call configuration options
|
||||||
debugPrefix: Prefix for debug file names
|
debugPrefix: Prefix for debug file names
|
||||||
loopInstructionFormat: If provided, replaces LOOP_INSTRUCTION placeholder and includes in continuation prompts
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Complete AI response after all iterations
|
Complete AI response after all iterations
|
||||||
|
|
@ -162,18 +149,12 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
max_iterations = 100 # Prevent infinite loops
|
max_iterations = 100 # Prevent infinite loops
|
||||||
iteration = 0
|
iteration = 0
|
||||||
accumulatedContent = []
|
accumulatedContent = []
|
||||||
|
lastContinuationData = None
|
||||||
|
|
||||||
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix}, loopInstructionFormat: {loopInstructionFormat is not None})")
|
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix})")
|
||||||
|
|
||||||
|
# Use generic LOOP_INSTRUCTION_TEXT
|
||||||
# Determine loopInstruction based on loopInstructionFormat (before iterations)
|
loopInstruction = LOOP_INSTRUCTION_TEXT if ("LOOP_INSTRUCTION" in prompt) else ""
|
||||||
if not loopInstructionFormat:
|
|
||||||
loopInstruction = ""
|
|
||||||
elif loopInstructionFormat in LoopInstructionTexts:
|
|
||||||
loopInstruction = LoopInstructionTexts[loopInstructionFormat]
|
|
||||||
else:
|
|
||||||
logger.error(f"Unsupported loopInstructionFormat for prompt: {loopInstructionFormat}")
|
|
||||||
loopInstruction = ""
|
|
||||||
|
|
||||||
|
|
||||||
while iteration < max_iterations:
|
while iteration < max_iterations:
|
||||||
|
|
@ -182,18 +163,25 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
|
|
||||||
# Build iteration prompt
|
# Build iteration prompt
|
||||||
if iteration == 1:
|
if iteration == 1:
|
||||||
|
# First iteration - replace LOOP_INSTRUCTION with standardized instruction
|
||||||
if "LOOP_INSTRUCTION" in prompt:
|
if "LOOP_INSTRUCTION" in prompt:
|
||||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
||||||
else:
|
else:
|
||||||
iterationPrompt = prompt
|
iterationPrompt = prompt
|
||||||
elif loopInstruction and iteration > 1:
|
|
||||||
continuationContent = self._buildContinuationContent(accumulatedContent, iteration)
|
|
||||||
if "LOOP_INSTRUCTION" in prompt:
|
|
||||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationContent}\n\n{loopInstruction}")
|
|
||||||
else:
|
|
||||||
iterationPrompt = prompt
|
|
||||||
else:
|
else:
|
||||||
iterationPrompt = prompt
|
# Subsequent iterations - include continuation data if available
|
||||||
|
if lastContinuationData and isinstance(lastContinuationData, dict):
|
||||||
|
continuationPrompt = self._buildContinuationPrompt(lastContinuationData, iteration)
|
||||||
|
if "LOOP_INSTRUCTION" in prompt:
|
||||||
|
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationPrompt}\n\n{loopInstruction}")
|
||||||
|
else:
|
||||||
|
iterationPrompt = prompt
|
||||||
|
else:
|
||||||
|
# No continuation data - re-send original prompt
|
||||||
|
if "LOOP_INSTRUCTION" in prompt:
|
||||||
|
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
||||||
|
else:
|
||||||
|
iterationPrompt = prompt
|
||||||
|
|
||||||
# Make AI call
|
# Make AI call
|
||||||
try:
|
try:
|
||||||
|
|
@ -234,33 +222,35 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check if this is a continuation response (only for supported formats)
|
accumulatedContent.append(result)
|
||||||
if loopInstructionFormat in LoopInstructionTexts:
|
|
||||||
|
# Check if this is a continuation response (only when LOOP_INSTRUCTION was used)
|
||||||
|
if loopInstruction:
|
||||||
try:
|
try:
|
||||||
# Extract JSON substring if wrapped (e.g., ```json ... ```)
|
# Extract JSON substring if wrapped (e.g., ```json ... ```)
|
||||||
extracted = self.services.utils.jsonExtractString(result)
|
extracted = self.services.utils.jsonExtractString(result)
|
||||||
# Try to parse as JSON to check for continuation attribute
|
|
||||||
parsed_result = json.loads(extracted)
|
parsed_result = json.loads(extracted)
|
||||||
if isinstance(parsed_result, dict) and parsed_result.get("continuation") is not None:
|
|
||||||
# This is a continuation response
|
if isinstance(parsed_result, dict):
|
||||||
accumulatedContent.append(result)
|
continuation = parsed_result.get("continuation")
|
||||||
logger.debug(f"Iteration {iteration}: Continuation detected in JSON, continuing...")
|
|
||||||
continue
|
if continuation is None:
|
||||||
else:
|
# Final response - break loop
|
||||||
# This is the final response (continuation is null or missing)
|
logger.debug(f"Iteration {iteration}: Final response received (continuation: null)")
|
||||||
accumulatedContent.append(result)
|
break
|
||||||
logger.debug(f"Iteration {iteration}: Final response received")
|
else:
|
||||||
break
|
# Continuation detected - extract data for next iteration
|
||||||
|
lastContinuationData = continuation if isinstance(continuation, dict) else None
|
||||||
|
logger.debug(f"Iteration {iteration}: Continuation detected, continuing...")
|
||||||
|
continue
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# Not JSON, treat as final response
|
# Not JSON, treat as final response
|
||||||
accumulatedContent.append(result)
|
logger.warning(f"Iteration {iteration}: Non-JSON response - treating as final")
|
||||||
logger.warning(f"Iteration {iteration}: Non-JSON response received")
|
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_error_non_json_response_iteration_{iteration}")
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_error_non_json_response_iteration_{iteration}")
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# This is the final response
|
# No loop instruction format - treat as final response
|
||||||
accumulatedContent.append(result)
|
logger.debug(f"Iteration {iteration}: Final response received (no loop format)")
|
||||||
logger.debug(f"Iteration {iteration}: Final response received")
|
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -279,51 +269,26 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
logger.info(f"AI call completed: {len(accumulatedContent)} parts from {iteration} iterations")
|
logger.info(f"AI call completed: {len(accumulatedContent)} parts from {iteration} iterations")
|
||||||
return final_result
|
return final_result
|
||||||
|
|
||||||
def _buildContinuationContent(
|
def _buildContinuationPrompt(
|
||||||
self,
|
self,
|
||||||
accumulatedContent: List[str],
|
continuationData: dict,
|
||||||
iteration: int
|
iteration: int
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Build continuation content for follow-up iterations.
|
Build standardized continuation prompt from continuation data dict.
|
||||||
|
This replaces the complex _buildContinuationContent method with a simpler approach.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
continuationData: Dictionary containing last_data_items and next_instruction
|
||||||
|
iteration: Current iteration number
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted continuation prompt string
|
||||||
"""
|
"""
|
||||||
# Extract continuation description from the last response
|
last_data_items = continuationData.get("last_data_items", "")
|
||||||
continuation_description = ""
|
next_instruction = continuationData.get("next_instruction", "")
|
||||||
if accumulatedContent:
|
|
||||||
try:
|
|
||||||
last_response = accumulatedContent[-1]
|
|
||||||
# Use the same JSON extraction logic as the main loop
|
|
||||||
extracted = self.services.utils.jsonExtractString(last_response)
|
|
||||||
parsed_response = json.loads(extracted)
|
|
||||||
if isinstance(parsed_response, dict):
|
|
||||||
# Check for continuation at root level or in metadata
|
|
||||||
continuation = parsed_response.get("continuation")
|
|
||||||
if continuation is None and "metadata" in parsed_response:
|
|
||||||
continuation = parsed_response["metadata"].get("continuation")
|
|
||||||
|
|
||||||
if continuation:
|
|
||||||
continuation_description = continuation
|
|
||||||
except (json.JSONDecodeError, KeyError, ValueError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Extract specific attributes from continuation object
|
continuation_prompt = f"""CONTINUATION REQUEST (Iteration {iteration}):
|
||||||
last_data_items = ""
|
|
||||||
next_instruction = ""
|
|
||||||
|
|
||||||
if continuation_description:
|
|
||||||
try:
|
|
||||||
if isinstance(continuation_description, str):
|
|
||||||
continuation_obj = json.loads(continuation_description)
|
|
||||||
else:
|
|
||||||
continuation_obj = continuation_description
|
|
||||||
|
|
||||||
if isinstance(continuation_obj, dict):
|
|
||||||
last_data_items = continuation_obj.get("last_data_items", "")
|
|
||||||
next_instruction = continuation_obj.get("next_instruction", "")
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}):
|
|
||||||
You are continuing a previous response. DO NOT repeat any previous content.
|
You are continuing a previous response. DO NOT repeat any previous content.
|
||||||
|
|
||||||
{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
|
{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
|
||||||
|
|
@ -331,12 +296,10 @@ You are continuing a previous response. DO NOT repeat any previous content.
|
||||||
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
|
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
|
||||||
|
|
||||||
CRITICAL REQUIREMENTS:
|
CRITICAL REQUIREMENTS:
|
||||||
- Start from the exact point specified in continuation instructions
|
- Start from the exact point specified above
|
||||||
- DO NOT repeat any previous content
|
- DO NOT repeat any previous content"""
|
||||||
- BE CONSERVATIVE: Stop at approximately 3200-3500 characters to ensure JSON completion
|
|
||||||
- ALWAYS include continuation field - set to null if complete, or provide next instruction if incomplete
|
return continuation_prompt
|
||||||
"""
|
|
||||||
return continuation_content
|
|
||||||
|
|
||||||
def _mergeJsonContent(self, accumulatedContent: List[str]) -> str:
|
def _mergeJsonContent(self, accumulatedContent: List[str]) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -387,40 +350,12 @@ CRITICAL REQUIREMENTS:
|
||||||
logger.error(f"Error merging JSON content: {str(e)}")
|
logger.error(f"Error merging JSON content: {str(e)}")
|
||||||
return accumulatedContent[0] # Return first response on error
|
return accumulatedContent[0] # Return first response on error
|
||||||
|
|
||||||
async def _buildGenerationPrompt(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
extracted_content: Optional[str],
|
|
||||||
outputFormat: str,
|
|
||||||
title: str
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Build generation prompt for document generation.
|
|
||||||
"""
|
|
||||||
from modules.services.serviceGeneration.subPromptBuilder import buildGenerationPrompt
|
|
||||||
|
|
||||||
# Build the generation prompt using the existing system
|
|
||||||
generation_prompt = await buildGenerationPrompt(
|
|
||||||
outputFormat=outputFormat,
|
|
||||||
userPrompt=prompt,
|
|
||||||
title=title
|
|
||||||
)
|
|
||||||
|
|
||||||
# If we have extracted content, prepend it to the prompt
|
|
||||||
if extracted_content:
|
|
||||||
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
|
||||||
{extracted_content}
|
|
||||||
|
|
||||||
{generation_prompt}"""
|
|
||||||
|
|
||||||
return generation_prompt
|
|
||||||
|
|
||||||
# Planning AI Call
|
# Planning AI Call
|
||||||
async def callAiPlanning(
|
async def callAiPlanning(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
placeholders: Optional[List[PromptPlaceholder]] = None,
|
placeholders: Optional[List[PromptPlaceholder]] = None
|
||||||
loopInstructionFormat: Optional[str] = None
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Planning AI call for task planning, action planning, action selection, etc.
|
Planning AI call for task planning, action planning, action selection, etc.
|
||||||
|
|
@ -429,7 +364,6 @@ CRITICAL REQUIREMENTS:
|
||||||
Args:
|
Args:
|
||||||
prompt: The planning prompt
|
prompt: The planning prompt
|
||||||
placeholders: Optional list of placeholder replacements
|
placeholders: Optional list of placeholder replacements
|
||||||
loopInstructionFormat: Optional loop instruction format
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Planning JSON response
|
Planning JSON response
|
||||||
|
|
@ -452,7 +386,7 @@ CRITICAL REQUIREMENTS:
|
||||||
full_prompt = prompt
|
full_prompt = prompt
|
||||||
|
|
||||||
# Use shared core function with planning-specific debug prefix
|
# Use shared core function with planning-specific debug prefix
|
||||||
return await self._callAiWithLooping(full_prompt, options, "plan", loopInstructionFormat=loopInstructionFormat)
|
return await self._callAiWithLooping(full_prompt, options, "plan")
|
||||||
|
|
||||||
# Document Generation AI Call
|
# Document Generation AI Call
|
||||||
async def callAiDocuments(
|
async def callAiDocuments(
|
||||||
|
|
@ -461,8 +395,7 @@ CRITICAL REQUIREMENTS:
|
||||||
documents: Optional[List[ChatDocument]] = None,
|
documents: Optional[List[ChatDocument]] = None,
|
||||||
options: Optional[AiCallOptions] = None,
|
options: Optional[AiCallOptions] = None,
|
||||||
outputFormat: Optional[str] = None,
|
outputFormat: Optional[str] = None,
|
||||||
title: Optional[str] = None,
|
title: Optional[str] = None
|
||||||
loopInstructionFormat: Optional[str] = None
|
|
||||||
) -> Union[str, Dict[str, Any]]:
|
) -> Union[str, Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Document generation AI call for all non-planning calls.
|
Document generation AI call for all non-planning calls.
|
||||||
|
|
@ -494,8 +427,10 @@ CRITICAL REQUIREMENTS:
|
||||||
else:
|
else:
|
||||||
logger.debug("No documents provided - using direct generation")
|
logger.debug("No documents provided - using direct generation")
|
||||||
extracted_content = None
|
extracted_content = None
|
||||||
generation_prompt = await self._buildGenerationPrompt(prompt, extracted_content, outputFormat, title)
|
logger.debug(f"[DEBUG] title value: {title}, type: {type(title)}")
|
||||||
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation", loopInstructionFormat=loopInstructionFormat)
|
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||||
|
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content)
|
||||||
|
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation")
|
||||||
|
|
||||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||||
try:
|
try:
|
||||||
|
|
@ -552,7 +487,7 @@ CRITICAL REQUIREMENTS:
|
||||||
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
|
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
|
||||||
else:
|
else:
|
||||||
# Use shared core function for direct text calls
|
# Use shared core function for direct text calls
|
||||||
result = await self._callAiWithLooping(prompt, options, "text", loopInstructionFormat=None)
|
result = await self._callAiWithLooping(prompt, options, "text")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,11 +48,7 @@ class SubDocumentGeneration:
|
||||||
Dict with generated documents and metadata in unified structure
|
Dict with generated documents and metadata in unified structure
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 1. Analyze prompt intent
|
# 1. Get unified extraction prompt
|
||||||
promptAnalysis = await self._analyzePromptIntent(prompt, self)
|
|
||||||
logger.info(f"Prompt analysis result: {promptAnalysis}")
|
|
||||||
|
|
||||||
# 2. Get unified extraction prompt
|
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||||
generationService = GenerationService(self.services)
|
generationService = GenerationService(self.services)
|
||||||
|
|
||||||
|
|
@ -60,17 +56,16 @@ class SubDocumentGeneration:
|
||||||
outputFormat=outputFormat,
|
outputFormat=outputFormat,
|
||||||
userPrompt=prompt,
|
userPrompt=prompt,
|
||||||
title=title,
|
title=title,
|
||||||
promptAnalysis=promptAnalysis,
|
|
||||||
aiService=self
|
aiService=self
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. Process with unified pipeline (always multi-file approach)
|
# 2. Process with unified pipeline (always multi-file approach)
|
||||||
aiResponse = await self._processDocumentsUnified(
|
aiResponse = await self._processDocumentsUnified(
|
||||||
documents, extractionPrompt, options
|
documents, extractionPrompt, options
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. Return unified result structure
|
# 3. Return unified result structure
|
||||||
return await self._buildUnifiedResult(aiResponse, outputFormat, title, promptAnalysis)
|
return await self._buildUnifiedResult(aiResponse, outputFormat, title)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in unified document generation: {str(e)}")
|
logger.error(f"Error in unified document generation: {str(e)}")
|
||||||
|
|
@ -263,9 +258,8 @@ class SubDocumentGeneration:
|
||||||
self,
|
self,
|
||||||
aiResponse: Dict[str, Any],
|
aiResponse: Dict[str, Any],
|
||||||
outputFormat: str,
|
outputFormat: str,
|
||||||
title: str,
|
title: str
|
||||||
promptAnalysis: Dict[str, Any]
|
) -> Dict[str, Any]:
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
"""
|
||||||
Build unified result structure that always returns array-based format.
|
Build unified result structure that always returns array-based format.
|
||||||
Content is always a multi-document structure.
|
Content is always a multi-document structure.
|
||||||
|
|
@ -296,7 +290,6 @@ class SubDocumentGeneration:
|
||||||
"is_multi_file": len(generatedDocuments) > 1,
|
"is_multi_file": len(generatedDocuments) > 1,
|
||||||
"format": outputFormat,
|
"format": outputFormat,
|
||||||
"title": title,
|
"title": title,
|
||||||
"split_strategy": promptAnalysis.get("strategy", "single"),
|
|
||||||
"total_documents": len(generatedDocuments),
|
"total_documents": len(generatedDocuments),
|
||||||
"processed_documents": len(generatedDocuments)
|
"processed_documents": len(generatedDocuments)
|
||||||
}
|
}
|
||||||
|
|
@ -313,7 +306,7 @@ class SubDocumentGeneration:
|
||||||
outputFormat: str,
|
outputFormat: str,
|
||||||
title: str,
|
title: str,
|
||||||
documentIndex: int
|
documentIndex: int
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Process individual document with content enhancement and rendering.
|
Process individual document with content enhancement and rendering.
|
||||||
"""
|
"""
|
||||||
|
|
@ -326,12 +319,12 @@ class SubDocumentGeneration:
|
||||||
enhancedContent = docData # Default to original
|
enhancedContent = docData # Default to original
|
||||||
if docData.get("sections"):
|
if docData.get("sections"):
|
||||||
try:
|
try:
|
||||||
# Get generation prompt
|
# Get generation prompt directly
|
||||||
generationPrompt = await generationService.getGenerationPrompt(
|
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||||
|
generationPrompt = await buildGenerationPrompt(
|
||||||
outputFormat=outputFormat,
|
outputFormat=outputFormat,
|
||||||
userPrompt=title,
|
userPrompt=title,
|
||||||
title=docData.get("title", title),
|
title=docData.get("title", title)
|
||||||
aiService=self
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare the AI call
|
# Prepare the AI call
|
||||||
|
|
@ -454,57 +447,6 @@ class SubDocumentGeneration:
|
||||||
# Process documents with JSON merging
|
# Process documents with JSON merging
|
||||||
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
|
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
|
||||||
|
|
||||||
async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
|
|
||||||
"""Use AI to analyze user prompt and determine processing requirements."""
|
|
||||||
if not ai_service:
|
|
||||||
return {"is_multi_file": False, "strategy": "single", "criteria": None}
|
|
||||||
|
|
||||||
try:
|
|
||||||
analysis_prompt = f"""
|
|
||||||
Analyze this user request and determine if it requires multiple file output or single file output.
|
|
||||||
|
|
||||||
User request: "{self.services.ai.sanitizePromptContent(prompt, 'userinput')}"
|
|
||||||
|
|
||||||
Respond with JSON only in this exact format:
|
|
||||||
{{
|
|
||||||
"is_multi_file": true/false,
|
|
||||||
"strategy": "single|per_entity|by_section|by_criteria|custom",
|
|
||||||
"criteria": "description of how to split content",
|
|
||||||
"file_naming_pattern": "suggested pattern for filenames",
|
|
||||||
"reasoning": "brief explanation of the analysis"
|
|
||||||
}}
|
|
||||||
|
|
||||||
Consider:
|
|
||||||
- Does the user want separate files for different entities (customers, products, etc.)?
|
|
||||||
- Does the user want to split content into multiple documents?
|
|
||||||
- What would be the most logical way to organize the content?
|
|
||||||
- What language is the request in? (analyze in the original language)
|
|
||||||
|
|
||||||
Return only the JSON response.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
|
||||||
request_options = AiCallOptions()
|
|
||||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
|
||||||
|
|
||||||
request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
|
|
||||||
response = await ai_service.aiObjects.call(request)
|
|
||||||
|
|
||||||
if response and response.content:
|
|
||||||
# Extract JSON from response
|
|
||||||
result = response.content.strip()
|
|
||||||
json_match = re.search(r'\{.*\}', result, re.DOTALL)
|
|
||||||
if json_match:
|
|
||||||
result = json_match.group(0)
|
|
||||||
|
|
||||||
analysis = json.loads(result)
|
|
||||||
return analysis
|
|
||||||
else:
|
|
||||||
return {"is_multi_file": False, "strategy": "single", "criteria": None}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
|
|
||||||
return {"is_multi_file": False, "strategy": "single", "criteria": None}
|
|
||||||
|
|
||||||
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
|
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -289,6 +289,11 @@ class SubDocumentProcessing:
|
||||||
def _buildContinuationPrompt(self, base_prompt: str) -> str:
|
def _buildContinuationPrompt(self, base_prompt: str) -> str:
|
||||||
"""
|
"""
|
||||||
Build a prompt that includes partial results continuation instructions.
|
Build a prompt that includes partial results continuation instructions.
|
||||||
|
|
||||||
|
NOTE: This uses a different continuation pattern than SubCoreAi:
|
||||||
|
- SubCoreAi uses "continuation": null/dict for generic JSON responses
|
||||||
|
- This uses "continue": true/false + "continuation_context" for document sections
|
||||||
|
- Kept separate because it's tightly coupled to document processing needs
|
||||||
"""
|
"""
|
||||||
continuation_instructions = """
|
continuation_instructions = """
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,9 +29,11 @@ def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, st
|
||||||
|
|
||||||
full_prompt = prompt
|
full_prompt = prompt
|
||||||
for placeholder, content in placeholders.items():
|
for placeholder, content in placeholders.items():
|
||||||
# Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
|
# Skip if content is None or empty
|
||||||
full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
|
if content is None:
|
||||||
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
|
continue
|
||||||
|
# Replace {{KEY:placeholder}}
|
||||||
|
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
||||||
|
|
||||||
return full_prompt
|
return full_prompt
|
||||||
|
|
||||||
|
|
|
||||||
219
modules/services/serviceExtraction/subPromptBuilderExtraction.py
Normal file
219
modules/services/serviceExtraction/subPromptBuilderExtraction.py
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
"""
|
||||||
|
Prompt builder for document extraction.
|
||||||
|
This module builds prompts for extracting content from documents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||||
|
|
||||||
|
# Type hint for renderer parameter
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from modules.services.serviceGeneration.renderers.rendererBaseTemplate import BaseRenderer
|
||||||
|
_RendererLike = BaseRenderer
|
||||||
|
else:
|
||||||
|
_RendererLike = Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def buildExtractionPrompt(
|
||||||
|
outputFormat: str,
|
||||||
|
userPrompt: str,
|
||||||
|
title: str,
|
||||||
|
aiService=None,
|
||||||
|
services=None,
|
||||||
|
renderer: _RendererLike = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Build unified extraction prompt for extracting content from documents.
|
||||||
|
Always uses multi-file format (single doc = multi with n=1).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputFormat: Target output format
|
||||||
|
userPrompt: User's prompt describing what to extract
|
||||||
|
title: Document title
|
||||||
|
aiService: Optional AI service for intent parsing
|
||||||
|
services: Services instance
|
||||||
|
renderer: Optional renderer for format-specific guidelines
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete extraction prompt string
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Unified multi-file example (single doc = multi with n=1)
|
||||||
|
json_example = {
|
||||||
|
"metadata": {
|
||||||
|
"title": "Multi-Document Example",
|
||||||
|
"split_strategy": "by_section",
|
||||||
|
"source_documents": ["doc_001"],
|
||||||
|
"extraction_method": "ai_extraction"
|
||||||
|
},
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"id": "doc_section_1",
|
||||||
|
"title": "Section 1 Title",
|
||||||
|
"filename": "section_1.xlsx",
|
||||||
|
"sections": [
|
||||||
|
{
|
||||||
|
"id": "section_1",
|
||||||
|
"content_type": "heading",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"level": 1,
|
||||||
|
"text": "1. SECTION TITLE"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_2",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"text": "This is the actual content that should be extracted from the document."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_3",
|
||||||
|
"content_type": "table",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"headers": ["Column 1", "Column 2"],
|
||||||
|
"rows": [["Value 1", "Value 2"]]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
structure_instruction = "CRITICAL: You MUST return a JSON structure with a \"documents\" array. For single documents, create one document entry with all sections."
|
||||||
|
|
||||||
|
# Parse extraction intent if AI service is available
|
||||||
|
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
|
||||||
|
|
||||||
|
# Build base prompt
|
||||||
|
adaptive_prompt = f"""
|
||||||
|
{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
||||||
|
|
||||||
|
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||||
|
|
||||||
|
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
|
||||||
|
|
||||||
|
{extraction_intent}
|
||||||
|
|
||||||
|
REQUIREMENTS:
|
||||||
|
1. Analyze the document content provided in the context below
|
||||||
|
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
|
||||||
|
3. Create one or more JSON document entries based on the content structure
|
||||||
|
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
|
||||||
|
5. Generate appropriate filenames for each document
|
||||||
|
|
||||||
|
{structure_instruction}
|
||||||
|
|
||||||
|
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
||||||
|
{json.dumps(json_example, indent=2)}
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Preserve all original data - do not summarize or interpret
|
||||||
|
- Use the exact JSON format shown above
|
||||||
|
- Maintain data integrity and structure
|
||||||
|
|
||||||
|
Content Types to Extract:
|
||||||
|
1. Tables: Extract all rows and columns with proper headers
|
||||||
|
2. Lists: Extract all items with proper nesting
|
||||||
|
3. Headings: Extract with appropriate levels
|
||||||
|
4. Paragraphs: Extract as structured text
|
||||||
|
5. Code: Extract code blocks with language identification
|
||||||
|
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
||||||
|
|
||||||
|
Image Analysis Requirements:
|
||||||
|
- If you cannot analyze an image for any reason, explain why in the JSON response
|
||||||
|
- Describe everything you see in the image
|
||||||
|
- Include all text content, tables, logos, graphics, layout, and visual elements
|
||||||
|
- If the image is too small, corrupted, or unclear, explain this
|
||||||
|
- Always provide feedback - never return empty responses
|
||||||
|
|
||||||
|
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
||||||
|
|
||||||
|
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
# Add renderer-specific guidelines if provided
|
||||||
|
if renderer:
|
||||||
|
try:
|
||||||
|
if hasattr(renderer, 'getExtractionGuidelines'):
|
||||||
|
formatGuidelines = renderer.getExtractionGuidelines()
|
||||||
|
adaptive_prompt = f"{adaptive_prompt}\n\n{formatGuidelines}".strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Save extraction prompt to debug file - only if debug enabled
|
||||||
|
if services:
|
||||||
|
try:
|
||||||
|
debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
|
||||||
|
if debug_enabled:
|
||||||
|
import os
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
debug_root = os.path.join(logDir, 'debug')
|
||||||
|
os.makedirs(debug_root, exist_ok=True)
|
||||||
|
with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(adaptive_prompt)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return adaptive_prompt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
|
||||||
|
"""
|
||||||
|
Parse user prompt to extract the core extraction intent.
|
||||||
|
"""
|
||||||
|
if not aiService:
|
||||||
|
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||||
|
|
||||||
|
try:
|
||||||
|
analysis_prompt = f"""
|
||||||
|
Analyze this user request and extract the core extraction intent:
|
||||||
|
|
||||||
|
User request: "{userPrompt}"
|
||||||
|
Target format: {outputFormat}
|
||||||
|
|
||||||
|
Extract the main intent and requirements for document processing. Focus on:
|
||||||
|
1. What content needs to be extracted
|
||||||
|
2. How it should be organized
|
||||||
|
3. Any specific requirements or preferences
|
||||||
|
|
||||||
|
Respond with a clear, concise statement of the extraction intent.
|
||||||
|
"""
|
||||||
|
request_options = AiCallOptions()
|
||||||
|
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||||
|
|
||||||
|
request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
|
||||||
|
response = await aiService.aiObjects.call(request)
|
||||||
|
|
||||||
|
if response and response.content:
|
||||||
|
return response.content.strip()
|
||||||
|
else:
|
||||||
|
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
|
||||||
|
return f"Extract content from the provided documents and create a {outputFormat} report."
|
||||||
|
|
||||||
|
|
@ -299,6 +299,7 @@ class GenerationService:
|
||||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
|
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Render extracted JSON content to the specified output format.
|
Render extracted JSON content to the specified output format.
|
||||||
|
Always uses unified "documents" array format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
extractedContent: Structured JSON document from AI extraction
|
extractedContent: Structured JSON document from AI extraction
|
||||||
|
|
@ -315,31 +316,25 @@ class GenerationService:
|
||||||
if not isinstance(extractedContent, dict):
|
if not isinstance(extractedContent, dict):
|
||||||
raise ValueError("extractedContent must be a JSON dictionary")
|
raise ValueError("extractedContent must be a JSON dictionary")
|
||||||
|
|
||||||
# Check if this is a multi-document structure
|
# Unified approach: Always expect "documents" array (single doc = n=1)
|
||||||
if "documents" in extractedContent and len(extractedContent["documents"]) > 1:
|
if "documents" not in extractedContent:
|
||||||
# Multiple documents - use multi-file renderer
|
raise ValueError("extractedContent must contain 'documents' array")
|
||||||
generated_documents = await self._renderMultiFileReport(extractedContent, outputFormat, title, userPrompt, aiService)
|
|
||||||
# For multi-document, return the first document's content and mime type
|
documents = extractedContent["documents"]
|
||||||
if generated_documents:
|
if len(documents) == 0:
|
||||||
return generated_documents[0]["content"], generated_documents[0]["mime_type"]
|
raise ValueError("No documents found in 'documents' array")
|
||||||
else:
|
|
||||||
raise ValueError("No documents could be rendered")
|
# Use first document for rendering
|
||||||
elif "documents" in extractedContent and len(extractedContent["documents"]) == 1:
|
single_doc = documents[0]
|
||||||
# Single document in documents array - extract sections
|
if "sections" not in single_doc:
|
||||||
single_doc = extractedContent["documents"][0]
|
raise ValueError("Document must contain 'sections' field")
|
||||||
if "sections" not in single_doc:
|
|
||||||
raise ValueError("Document must contain 'sections' field")
|
# Create content for single document renderer
|
||||||
# Create content for single document renderer
|
contentToRender = {
|
||||||
contentToRender = {
|
"sections": single_doc["sections"],
|
||||||
"sections": single_doc["sections"],
|
"metadata": extractedContent.get("metadata", {}),
|
||||||
"metadata": extractedContent.get("metadata", {}),
|
"continuation": extractedContent.get("continuation", None)
|
||||||
"continuation": extractedContent.get("continuation", None)
|
}
|
||||||
}
|
|
||||||
elif "sections" in extractedContent:
|
|
||||||
# Direct sections format
|
|
||||||
contentToRender = extractedContent
|
|
||||||
else:
|
|
||||||
raise ValueError("extractedContent must contain 'sections' field or 'documents' array")
|
|
||||||
|
|
||||||
# Get the appropriate renderer for the format
|
# Get the appropriate renderer for the format
|
||||||
renderer = self._getFormatRenderer(outputFormat)
|
renderer = self._getFormatRenderer(outputFormat)
|
||||||
|
|
@ -362,171 +357,18 @@ class GenerationService:
|
||||||
outputFormat: str,
|
outputFormat: str,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
title: str,
|
title: str,
|
||||||
promptAnalysis: Dict[str, Any],
|
|
||||||
aiService=None
|
aiService=None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Get adaptive extraction prompt based on AI analysis."""
|
"""Get adaptive extraction prompt."""
|
||||||
from .subPromptBuilder import buildAdaptiveExtractionPrompt
|
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
|
||||||
return await buildAdaptiveExtractionPrompt(
|
return await buildExtractionPrompt(
|
||||||
outputFormat=outputFormat,
|
outputFormat=outputFormat,
|
||||||
userPrompt=userPrompt,
|
userPrompt=userPrompt,
|
||||||
title=title,
|
title=title,
|
||||||
promptAnalysis=promptAnalysis,
|
|
||||||
aiService=aiService,
|
aiService=aiService,
|
||||||
services=self.services
|
services=self.services
|
||||||
)
|
)
|
||||||
|
|
||||||
async def getGenerationPrompt(
|
|
||||||
self,
|
|
||||||
outputFormat: str,
|
|
||||||
userPrompt: str,
|
|
||||||
title: str
|
|
||||||
) -> str:
|
|
||||||
"""Get generation prompt for enhancing extracted JSON content."""
|
|
||||||
from .subPromptBuilder import buildGenerationPrompt
|
|
||||||
return await buildGenerationPrompt(
|
|
||||||
outputFormat=outputFormat,
|
|
||||||
userPrompt=userPrompt,
|
|
||||||
title=title
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def renderAdaptiveReport(
|
|
||||||
self,
|
|
||||||
extractedContent: Dict[str, Any],
|
|
||||||
outputFormat: str,
|
|
||||||
title: str,
|
|
||||||
userPrompt: str = None,
|
|
||||||
aiService=None,
|
|
||||||
isMultiFile: bool = False
|
|
||||||
) -> Union[Tuple[str, str], List[Dict[str, Any]]]:
|
|
||||||
"""Render report adaptively based on content structure."""
|
|
||||||
|
|
||||||
# Start timing for generation
|
|
||||||
startTime = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
if isMultiFile and "documents" in extractedContent:
|
|
||||||
result = await self._renderMultiFileReport(
|
|
||||||
extractedContent, outputFormat, title, userPrompt, aiService
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
result = await self._renderSingleFileReport(
|
|
||||||
extractedContent, outputFormat, title, userPrompt, aiService
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate timing and emit stats
|
|
||||||
endTime = time.time()
|
|
||||||
processingTime = endTime - startTime
|
|
||||||
|
|
||||||
# Calculate bytes (rough estimation)
|
|
||||||
if isinstance(result, tuple):
|
|
||||||
content, mime_type = result
|
|
||||||
bytesReceived = len(content.encode('utf-8')) if isinstance(content, str) else len(content)
|
|
||||||
elif isinstance(result, list):
|
|
||||||
bytesReceived = sum(len(str(doc).encode('utf-8')) for doc in result)
|
|
||||||
else:
|
|
||||||
bytesReceived = len(str(result).encode('utf-8'))
|
|
||||||
|
|
||||||
# Use internal generation model for pricing
|
|
||||||
modelName = "internal_generation"
|
|
||||||
model = modelRegistry.getModel(modelName)
|
|
||||||
priceUsd = model.calculatePriceUsd(processingTime, 0, bytesReceived)
|
|
||||||
|
|
||||||
aiResponse = AiCallResponse(
|
|
||||||
content="", # No content for generation stats needed
|
|
||||||
modelName=modelName,
|
|
||||||
priceUsd=priceUsd,
|
|
||||||
processingTime=processingTime,
|
|
||||||
bytesSent=0, # Input is already processed
|
|
||||||
bytesReceived=bytesReceived,
|
|
||||||
errorCount=0
|
|
||||||
)
|
|
||||||
|
|
||||||
self.services.workflow.storeWorkflowStat(
|
|
||||||
self.services.currentWorkflow,
|
|
||||||
aiResponse,
|
|
||||||
f"generation.render.{outputFormat}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# Calculate timing for error case
|
|
||||||
endTime = time.time()
|
|
||||||
processingTime = endTime - startTime
|
|
||||||
|
|
||||||
# Use internal generation model for pricing
|
|
||||||
modelName = "internal_generation"
|
|
||||||
model = modelRegistry.getModel(modelName)
|
|
||||||
priceUsd = model.calculatePriceUsd(processingTime, 0, 0)
|
|
||||||
|
|
||||||
aiResponse = AiCallResponse(
|
|
||||||
content="", # No content for generation stats needed
|
|
||||||
modelName=modelName,
|
|
||||||
priceUsd=priceUsd,
|
|
||||||
processingTime=processingTime,
|
|
||||||
bytesSent=0,
|
|
||||||
bytesReceived=0,
|
|
||||||
errorCount=1
|
|
||||||
)
|
|
||||||
|
|
||||||
self.services.workflow.storeWorkflowStat(
|
|
||||||
self.services.currentWorkflow,
|
|
||||||
aiResponse,
|
|
||||||
f"generation.render.{outputFormat}"
|
|
||||||
)
|
|
||||||
|
|
||||||
raise
|
|
||||||
|
|
||||||
async def _renderMultiFileReport(
|
|
||||||
self,
|
|
||||||
extractedContent: Dict[str, Any],
|
|
||||||
outputFormat: str,
|
|
||||||
title: str,
|
|
||||||
userPrompt: str = None,
|
|
||||||
aiService=None
|
|
||||||
) -> List[Dict[str, Any]]:
|
|
||||||
"""Render multiple documents from extracted content."""
|
|
||||||
|
|
||||||
generated_documents = []
|
|
||||||
|
|
||||||
for doc_data in extractedContent.get("documents", []):
|
|
||||||
# Use existing single-file renderer for each document
|
|
||||||
renderer = self._getFormatRenderer(outputFormat)
|
|
||||||
if not renderer:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Render individual document
|
|
||||||
rendered_content, mime_type = await renderer.render(
|
|
||||||
extractedContent={"sections": doc_data["sections"]},
|
|
||||||
title=doc_data["title"],
|
|
||||||
userPrompt=userPrompt,
|
|
||||||
aiService=aiService
|
|
||||||
)
|
|
||||||
|
|
||||||
generated_documents.append({
|
|
||||||
"filename": doc_data["filename"],
|
|
||||||
"content": rendered_content,
|
|
||||||
"mime_type": mime_type,
|
|
||||||
"title": doc_data["title"]
|
|
||||||
})
|
|
||||||
|
|
||||||
return generated_documents
|
|
||||||
|
|
||||||
async def _renderSingleFileReport(
|
|
||||||
self,
|
|
||||||
extractedContent: Dict[str, Any],
|
|
||||||
outputFormat: str,
|
|
||||||
title: str,
|
|
||||||
userPrompt: str = None,
|
|
||||||
aiService=None
|
|
||||||
) -> Tuple[str, str]:
|
|
||||||
"""Render single file report (existing functionality)."""
|
|
||||||
# Use existing renderReport method
|
|
||||||
return await self.renderReport(
|
|
||||||
extractedContent, outputFormat, title, userPrompt, aiService
|
|
||||||
)
|
|
||||||
|
|
||||||
def _getFormatRenderer(self, output_format: str):
|
def _getFormatRenderer(self, output_format: str):
|
||||||
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,10 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"properties": {
|
"properties": {
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": ["title", "splitStrategy"],
|
"required": ["title", "split_strategy"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"title": {"type": "string", "description": "Document title"},
|
"title": {"type": "string", "description": "Document title"},
|
||||||
"splitStrategy": {
|
"split_strategy": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
|
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
|
||||||
"description": "Strategy for splitting content into multiple files"
|
"description": "Strategy for splitting content into multiple files"
|
||||||
|
|
@ -437,7 +437,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
metadata = json_data["metadata"]
|
metadata = json_data["metadata"]
|
||||||
if not isinstance(metadata, dict) or "title" not in metadata or "splitStrategy" not in metadata:
|
if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
documents = json_data["documents"]
|
documents = json_data["documents"]
|
||||||
|
|
|
||||||
|
|
@ -1,397 +0,0 @@
|
||||||
"""
|
|
||||||
Prompt builder for AI document generation and extraction.
|
|
||||||
This module builds prompts for AI services to extract and generate documents.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, Optional, List, TYPE_CHECKING
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
|
||||||
|
|
||||||
# Type hint for renderer parameter
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from .renderers.rendererBaseTemplate import BaseRenderer
|
|
||||||
_RendererLike = BaseRenderer
|
|
||||||
else:
|
|
||||||
_RendererLike = Any
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Centralized JSON structure template for document generation
|
|
||||||
JSON_STRUCTURE_TEMPLATE = """{
|
|
||||||
"metadata": {
|
|
||||||
"title": "{{DOCUMENT_TITLE}}",
|
|
||||||
"splitStrategy": "single_document",
|
|
||||||
"source_documents": [],
|
|
||||||
"extraction_method": "ai_generation"
|
|
||||||
},
|
|
||||||
"documents": [{
|
|
||||||
"id": "doc_1",
|
|
||||||
"title": "{{DOCUMENT_TITLE}}",
|
|
||||||
"filename": "document.json",
|
|
||||||
"sections": [
|
|
||||||
{
|
|
||||||
"id": "section_1",
|
|
||||||
"content_type": "heading|paragraph|table|list|code",
|
|
||||||
"elements": [
|
|
||||||
// heading: {"level": 1, "text": "..."}
|
|
||||||
// paragraph: {"text": "..."}
|
|
||||||
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
|
||||||
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
|
||||||
// code: {"code": "...", "language": "..."}
|
|
||||||
],
|
|
||||||
"order": 1
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}],
|
|
||||||
"continuation": null,
|
|
||||||
}"""
|
|
||||||
|
|
||||||
async def buildAdaptiveExtractionPrompt(
|
|
||||||
outputFormat: str,
|
|
||||||
userPrompt: str,
|
|
||||||
title: str,
|
|
||||||
promptAnalysis: Dict[str, Any],
|
|
||||||
aiService=None,
|
|
||||||
services=None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Build adaptive extraction prompt based on AI analysis.
|
|
||||||
Uses multi-file or single-file approach based on analysis.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Multi-file example data instead of schema
|
|
||||||
multi_file_example = {
|
|
||||||
"metadata": {
|
|
||||||
"title": "Multi-Document Example",
|
|
||||||
"splitStrategy": "by_section",
|
|
||||||
"source_documents": ["doc_001"],
|
|
||||||
"extraction_method": "ai_extraction"
|
|
||||||
},
|
|
||||||
"documents": [
|
|
||||||
{
|
|
||||||
"id": "doc_section_1",
|
|
||||||
"title": "Section 1 Title",
|
|
||||||
"filename": "section_1.xlsx",
|
|
||||||
"sections": [
|
|
||||||
{
|
|
||||||
"id": "section_1",
|
|
||||||
"content_type": "heading",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"level": 1,
|
|
||||||
"text": "1. SECTION TITLE"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 1
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_2",
|
|
||||||
"content_type": "paragraph",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"text": "This is the actual content that should be extracted from the document."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_3",
|
|
||||||
"content_type": "table",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"headers": ["Column 1", "Column 2"],
|
|
||||||
"rows": [["Value 1", "Value 2"]]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 3
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
# UNIFIED APPROACH: Always use multi-document format (single doc = multi with n=1)
|
|
||||||
adaptive_prompt = f"""
|
|
||||||
{services.ai.sanitizePromptContent(userPrompt, 'userinput')}
|
|
||||||
|
|
||||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
|
||||||
|
|
||||||
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
|
|
||||||
|
|
||||||
REQUIREMENTS:
|
|
||||||
1. Analyze the document content provided in the context below
|
|
||||||
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
|
|
||||||
3. Create one or more JSON document entries based on the content structure
|
|
||||||
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
|
|
||||||
5. Generate appropriate filenames for each document
|
|
||||||
|
|
||||||
CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.
|
|
||||||
|
|
||||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
|
||||||
{json.dumps(multi_file_example, indent=2)}
|
|
||||||
|
|
||||||
IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
|
|
||||||
- "id": unique identifier
|
|
||||||
- "title": document title
|
|
||||||
- "filename": appropriate filename for the document
|
|
||||||
- "sections": array of content sections
|
|
||||||
|
|
||||||
DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.
|
|
||||||
|
|
||||||
INSTRUCTIONS:
|
|
||||||
- For single document requests: Create one document with all content in its sections
|
|
||||||
- For multi-document requests: Create multiple documents, each with relevant sections
|
|
||||||
- Use actual section titles, headings, and text from the document
|
|
||||||
- Create meaningful filenames based on content
|
|
||||||
- Ensure each section contains the complete content for that part
|
|
||||||
- Do not use generic placeholder text like "Section 1", "Section 2"
|
|
||||||
- Extract real headings, paragraphs, lists, and other content elements
|
|
||||||
- CRITICAL: Return JSON with "documents" array, not "sections" array
|
|
||||||
|
|
||||||
CONTEXT (Document Content):
|
|
||||||
|
|
||||||
Content Types to Extract:
|
|
||||||
1. Tables: Extract all rows and columns with proper headers
|
|
||||||
2. Lists: Extract all items with proper nesting
|
|
||||||
3. Headings: Extract with appropriate levels
|
|
||||||
4. Paragraphs: Extract as structured text
|
|
||||||
5. Code: Extract code blocks with language identification
|
|
||||||
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
|
||||||
|
|
||||||
Image Analysis Requirements:
|
|
||||||
- If you cannot analyze an image for any reason, explain why in the JSON response
|
|
||||||
- Describe everything you see in the image
|
|
||||||
- Include all text content, tables, logos, graphics, layout, and visual elements
|
|
||||||
- If the image is too small, corrupted, or unclear, explain this
|
|
||||||
- Always provide feedback - never return empty responses
|
|
||||||
|
|
||||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
|
||||||
|
|
||||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
|
||||||
""".strip()
|
|
||||||
|
|
||||||
return adaptive_prompt
|
|
||||||
|
|
||||||
async def buildGenerationPrompt(
|
|
||||||
outputFormat: str,
|
|
||||||
userPrompt: str,
|
|
||||||
title: str
|
|
||||||
) -> str:
|
|
||||||
"""Build the unified generation prompt using a single JSON template."""
|
|
||||||
# Create a template with the actual title
|
|
||||||
json_template = JSON_STRUCTURE_TEMPLATE.replace("{{DOCUMENT_TITLE}}", title)
|
|
||||||
|
|
||||||
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
|
||||||
result = f"""Generate structured JSON content for document creation.
|
|
||||||
|
|
||||||
USER CONTEXT: "{userPrompt}"
|
|
||||||
DOCUMENT TITLE: "{title}"
|
|
||||||
TARGET FORMAT: {outputFormat}
|
|
||||||
|
|
||||||
LOOP_INSTRUCTION
|
|
||||||
|
|
||||||
RULES:
|
|
||||||
- Follow the template structure below exactly; emit only one JSON object in the response
|
|
||||||
- Fill sections with content based on the user request
|
|
||||||
- Use appropriate content_type
|
|
||||||
|
|
||||||
Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output:
|
|
||||||
{json_template}
|
|
||||||
"""
|
|
||||||
|
|
||||||
return result.strip()
|
|
||||||
|
|
||||||
async def buildExtractionPrompt(
|
|
||||||
outputFormat: str,
|
|
||||||
renderer: _RendererLike,
|
|
||||||
userPrompt: str,
|
|
||||||
title: str,
|
|
||||||
aiService=None,
|
|
||||||
services=None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Build the final extraction prompt by combining:
|
|
||||||
- Parsed extraction intent from user prompt (using AI)
|
|
||||||
- Generic cross-format instructions (filename header + real-data policy)
|
|
||||||
- Format-specific guidelines snippet provided by the renderer
|
|
||||||
|
|
||||||
The AI must place a single filename header at the very top:
|
|
||||||
FILENAME: <safe-file-name-with-extension>
|
|
||||||
followed by a blank line and then ONLY the document content according to the target format.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Parse user prompt to separate extraction intent from generation format using AI
|
|
||||||
extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services)
|
|
||||||
|
|
||||||
# Import JSON schema for structured output
|
|
||||||
from .subJsonSchema import get_document_subJsonSchema
|
|
||||||
jsonSchema = get_document_subJsonSchema()
|
|
||||||
|
|
||||||
# Generic block for JSON extraction - use mixed example data showing different content types
|
|
||||||
example_data = {
|
|
||||||
"metadata": {
|
|
||||||
"title": "Example Document",
|
|
||||||
"author": "AI Assistant",
|
|
||||||
"source_documents": ["document_001"],
|
|
||||||
"extraction_method": "ai_extraction"
|
|
||||||
},
|
|
||||||
"sections": [
|
|
||||||
{
|
|
||||||
"id": "section_001",
|
|
||||||
"content_type": "heading",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"level": 1,
|
|
||||||
"text": "1. INTRODUCTION"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 1,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_002",
|
|
||||||
"content_type": "paragraph",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"text": "This is a sample paragraph with actual content that should be extracted from the document."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 2,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_003",
|
|
||||||
"content_type": "table",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"headers": ["Column 1", "Column 2", "Column 3"],
|
|
||||||
"rows": [
|
|
||||||
["Value 1", "Value 2", "Value 3"],
|
|
||||||
["Value 4", "Value 5", "Value 6"]
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 3,
|
|
||||||
"metadata": {}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"summary": "",
|
|
||||||
"tags": []
|
|
||||||
}
|
|
||||||
|
|
||||||
genericIntro = f"""
|
|
||||||
{extractionIntent}
|
|
||||||
|
|
||||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
|
||||||
|
|
||||||
TASK: Extract the actual content from the document and organize it into structured sections.
|
|
||||||
|
|
||||||
REQUIREMENTS:
|
|
||||||
1. Analyze the document content provided in the context below
|
|
||||||
2. Extract all content and organize it into logical sections
|
|
||||||
3. Create structured JSON with sections containing the extracted content
|
|
||||||
4. Preserve the original structure and data
|
|
||||||
|
|
||||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
|
||||||
{json.dumps(example_data, indent=2)}
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
- Preserve all original data - do not summarize or interpret
|
|
||||||
- Use the exact JSON format shown above
|
|
||||||
- Maintain data integrity and structure
|
|
||||||
|
|
||||||
Content Types to Extract:
|
|
||||||
1. Tables: Extract all rows and columns with proper headers
|
|
||||||
2. Lists: Extract all items with proper nesting
|
|
||||||
3. Headings: Extract with appropriate levels
|
|
||||||
4. Paragraphs: Extract as structured text
|
|
||||||
5. Code: Extract code blocks with language identification
|
|
||||||
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
|
||||||
|
|
||||||
Image Analysis Requirements:
|
|
||||||
- If you cannot analyze an image for any reason, explain why in the JSON response
|
|
||||||
- Describe everything you see in the image
|
|
||||||
- Include all text content, tables, logos, graphics, layout, and visual elements
|
|
||||||
- If the image is too small, corrupted, or unclear, explain this
|
|
||||||
- Always provide feedback - never return empty responses
|
|
||||||
|
|
||||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
|
||||||
|
|
||||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
|
||||||
|
|
||||||
DO NOT return a schema description - return actual extracted content in the JSON format shown above.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Get format-specific guidelines from renderer
|
|
||||||
formatGuidelines = ""
|
|
||||||
try:
|
|
||||||
if hasattr(renderer, 'getExtractionGuidelines'):
|
|
||||||
formatGuidelines = renderer.getExtractionGuidelines()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Combine all parts
|
|
||||||
finalPrompt = f"{genericIntro}\n\n{formatGuidelines}".strip()
|
|
||||||
|
|
||||||
# Save extraction prompt to debug file - only if debug enabled
|
|
||||||
try:
|
|
||||||
debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
|
|
||||||
if debug_enabled:
|
|
||||||
import os
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
||||||
# Use configured log directory instead of hardcoded test-chat
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
|
||||||
if not os.path.isabs(logDir):
|
|
||||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
logDir = os.path.join(gatewayDir, logDir)
|
|
||||||
debug_root = os.path.join(logDir, 'debug')
|
|
||||||
os.makedirs(debug_root, exist_ok=True)
|
|
||||||
with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
|
|
||||||
f.write(finalPrompt)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return finalPrompt
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
|
|
||||||
"""
|
|
||||||
Parse user prompt to extract the core extraction intent.
|
|
||||||
"""
|
|
||||||
if not aiService:
|
|
||||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
|
||||||
|
|
||||||
try:
|
|
||||||
analysis_prompt = f"""
|
|
||||||
Analyze this user request and extract the core extraction intent:
|
|
||||||
|
|
||||||
User request: "{userPrompt}"
|
|
||||||
Target format: {outputFormat}
|
|
||||||
|
|
||||||
Extract the main intent and requirements for document processing. Focus on:
|
|
||||||
1. What content needs to be extracted
|
|
||||||
2. How it should be organized
|
|
||||||
3. Any specific requirements or preferences
|
|
||||||
|
|
||||||
Respond with a clear, concise statement of the extraction intent.
|
|
||||||
"""
|
|
||||||
request_options = AiCallOptions()
|
|
||||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
|
||||||
|
|
||||||
request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
|
|
||||||
response = await aiService.aiObjects.call(request)
|
|
||||||
|
|
||||||
if response and response.content:
|
|
||||||
return response.content.strip()
|
|
||||||
else:
|
|
||||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
|
|
||||||
return f"Extract content from the provided documents and create a {outputFormat} report."
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
"""
|
||||||
|
Prompt builder for document generation.
|
||||||
|
This module builds prompts for generating documents from extracted content.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Centralized JSON structure template for document generation
|
||||||
|
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "ai_generation"
|
||||||
|
},
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"id": "doc_1",
|
||||||
|
"title": "{{DOCUMENT_TITLE}}",
|
||||||
|
"filename": "document.json",
|
||||||
|
"sections": [
|
||||||
|
{
|
||||||
|
"id": "section_1",
|
||||||
|
"content_type": "heading|paragraph|table|list|code",
|
||||||
|
"elements": [
|
||||||
|
// heading: {"level": 1, "text": "..."}
|
||||||
|
// paragraph: {"text": "..."}
|
||||||
|
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
||||||
|
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
||||||
|
// code: {"code": "...", "language": "..."}
|
||||||
|
],
|
||||||
|
"order": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"continuation": null
|
||||||
|
}"""
|
||||||
|
|
||||||
|
|
||||||
|
async def buildGenerationPrompt(
|
||||||
|
outputFormat: str,
|
||||||
|
userPrompt: str,
|
||||||
|
title: str,
|
||||||
|
extracted_content: str = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Build the unified generation prompt using a single JSON template.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputFormat: Target output format (html, pdf, docx, etc.)
|
||||||
|
userPrompt: User's original prompt for document generation
|
||||||
|
title: Title for the document
|
||||||
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete generation prompt string
|
||||||
|
"""
|
||||||
|
# Create a template - let AI generate title if not provided
|
||||||
|
prompt_instruction = f"Use the following title: \"{title}\""
|
||||||
|
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title)
|
||||||
|
|
||||||
|
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
||||||
|
generation_prompt = f"""Generate structured JSON content for document creation.
|
||||||
|
|
||||||
|
USER CONTEXT: "{userPrompt}"
|
||||||
|
TARGET FORMAT: {outputFormat}
|
||||||
|
TITLE INSTRUCTION: {prompt_instruction}
|
||||||
|
|
||||||
|
LOOP_INSTRUCTION
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
- Follow the template structure below exactly; emit only one JSON object in the response
|
||||||
|
- Fill sections with content based on the user request
|
||||||
|
- Use appropriate content_type
|
||||||
|
|
||||||
|
{json_template}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# If we have extracted content, prepend it to the prompt
|
||||||
|
if extracted_content:
|
||||||
|
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||||
|
{extracted_content}
|
||||||
|
|
||||||
|
{generation_prompt}"""
|
||||||
|
|
||||||
|
return generation_prompt.strip()
|
||||||
|
|
||||||
|
|
@ -20,61 +20,6 @@ class WorkflowService:
|
||||||
self.interfaceDbApp = serviceCenter.interfaceDbApp
|
self.interfaceDbApp = serviceCenter.interfaceDbApp
|
||||||
self._progressLogger = None
|
self._progressLogger = None
|
||||||
|
|
||||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
|
||||||
"""
|
|
||||||
Summarize chat messages from last to first message with status="first"
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of chat messages to summarize
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Summary of the chat in user's language
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Get messages from last to first, stopping at first message with status="first"
|
|
||||||
relevantMessages = []
|
|
||||||
for msg in reversed(messages):
|
|
||||||
relevantMessages.append(msg)
|
|
||||||
if msg.status == "first":
|
|
||||||
break
|
|
||||||
|
|
||||||
# Create prompt for AI
|
|
||||||
prompt = f"""
|
|
||||||
You are an AI assistant providing a summary of a chat conversation.
|
|
||||||
Please respond in '{self.user.language}' language.
|
|
||||||
|
|
||||||
Chat History:
|
|
||||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
|
||||||
|
|
||||||
Instructions:
|
|
||||||
1. Summarize the conversation's key points and outcomes
|
|
||||||
2. Be concise but informative
|
|
||||||
3. Use a professional but friendly tone
|
|
||||||
4. Focus on important decisions and next steps if any
|
|
||||||
|
|
||||||
LOOP_INSTRUCTION
|
|
||||||
|
|
||||||
Please provide a comprehensive summary of this conversation."""
|
|
||||||
|
|
||||||
# Get summary using AI service through proper main service interface
|
|
||||||
|
|
||||||
return await self.services.ai.callAiDocuments(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=None,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
|
||||||
priority=PriorityEnum.SPEED,
|
|
||||||
processingMode=ProcessingModeEnum.BASIC,
|
|
||||||
compressPrompt=True,
|
|
||||||
compressContext=False,
|
|
||||||
maxCost=0.01
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error summarizing chat: {str(e)}")
|
|
||||||
return f"Error summarizing chat: {str(e)}"
|
|
||||||
|
|
||||||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -928,7 +873,9 @@ Please provide a comprehensive summary of this conversation."""
|
||||||
def _getProgressLogger(self):
|
def _getProgressLogger(self):
|
||||||
"""Get or create the progress logger instance"""
|
"""Get or create the progress logger instance"""
|
||||||
if self._progressLogger is None:
|
if self._progressLogger is None:
|
||||||
self._progressLogger = ProgressLogger(self, self.workflow)
|
# Use currentWorkflow from self.services instead of self.workflow (which is self)
|
||||||
|
workflow = getattr(self.services, 'currentWorkflow', None)
|
||||||
|
self._progressLogger = ProgressLogger(self, workflow)
|
||||||
return self._progressLogger
|
return self._progressLogger
|
||||||
|
|
||||||
def createProgressLogger(self, workflow) -> ProgressLogger:
|
def createProgressLogger(self, workflow) -> ProgressLogger:
|
||||||
|
|
|
||||||
|
|
@ -42,15 +42,22 @@ class MethodAi(MethodBase):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
operationId = f"ai_process_{self.services.currentWorkflow.id}_{int(time.time())}"
|
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
|
||||||
|
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
# Start progress tracking
|
# Start progress tracking
|
||||||
self.services.workflow.progressLogStart(
|
if hasattr(self.services, 'workflow') and self.services.workflow: # TODO: Entfernen für PROD! (block)
|
||||||
operationId,
|
try:
|
||||||
"Generate",
|
self.services.workflow.progressLogStart(
|
||||||
"AI Processing",
|
operationId,
|
||||||
f"Format: {parameters.get('resultType', 'txt')}"
|
"Generate",
|
||||||
)
|
"AI Processing",
|
||||||
|
f"Format: {parameters.get('resultType', 'txt')}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# Silently skip progress tracking errors (e.g., in test environments)
|
||||||
|
logger.debug(f"Skipping progress logging: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# Debug logging to see what parameters are received
|
# Debug logging to see what parameters are received
|
||||||
logger.info(f"MethodAi.process received parameters: {parameters}")
|
logger.info(f"MethodAi.process received parameters: {parameters}")
|
||||||
|
|
|
||||||
369
test4_method_ai_operations.py
Normal file
369
test4_method_ai_operations.py
Normal file
|
|
@ -0,0 +1,369 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script for methodAi operations.
|
||||||
|
Tests all OperationType's with various prompts through the workflow action interface.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
|
# Add the gateway to path
|
||||||
|
sys.path.append(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow, ChatDocument
|
||||||
|
from modules.datamodels.datamodelUam import User
|
||||||
|
|
||||||
|
|
||||||
|
class MethodAiOperationsTester:
|
||||||
|
"""Test all operation types through methodAi.process() action."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Use root user for testing (has full access to everything)
|
||||||
|
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
self.testUser = rootInterface.currentUser
|
||||||
|
|
||||||
|
self.services = None
|
||||||
|
self.methodAi = None
|
||||||
|
self.testResults = []
|
||||||
|
|
||||||
|
# Create logs directory if it doesn't exist
|
||||||
|
self.logsDir = os.path.join(os.path.dirname(__file__), "..", "local", "logs")
|
||||||
|
os.makedirs(self.logsDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create modeltest subdirectory
|
||||||
|
self.modelTestDir = os.path.join(self.logsDir, "modeltest")
|
||||||
|
os.makedirs(self.modelTestDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Test prompts for each operation type
|
||||||
|
self.testPrompts = {
|
||||||
|
OperationTypeEnum.PLAN: {
|
||||||
|
"aiPrompt": "Create a 5-step plan to organize a project meeting and include the manual for the project management office.",
|
||||||
|
"resultType": "json"
|
||||||
|
},
|
||||||
|
OperationTypeEnum.DATA_ANALYSE: {
|
||||||
|
"aiPrompt": "Analyze the following text and extract the main topics and key points: 'Machine learning is transforming healthcare by enabling early disease detection through pattern recognition in medical images.'",
|
||||||
|
"resultType": "json"
|
||||||
|
},
|
||||||
|
OperationTypeEnum.DATA_GENERATE: {
|
||||||
|
"aiPrompt": "Generate the first 9000 prime numbers.",
|
||||||
|
"resultType": "txt"
|
||||||
|
},
|
||||||
|
OperationTypeEnum.DATA_EXTRACT: {
|
||||||
|
"aiPrompt": "Extract all email addresses and phone numbers from the following text: 'Contact us at support@example.com or call 123-456-7890. For sales, email sales@example.com or call 987-654-3210.'",
|
||||||
|
"resultType": "json"
|
||||||
|
},
|
||||||
|
OperationTypeEnum.IMAGE_ANALYSE: {
|
||||||
|
"aiPrompt": "Analyze this image and describe what you see, including any text or numbers visible.",
|
||||||
|
"resultType": "json",
|
||||||
|
"documentList": ["_testdata_photo_2025-06-03_13-05-52.jpg"] if os.path.exists(os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg")) else []
|
||||||
|
},
|
||||||
|
OperationTypeEnum.IMAGE_GENERATE: {
|
||||||
|
"aiPrompt": "A beautiful sunset over the ocean with purple and orange hues",
|
||||||
|
"resultType": "png"
|
||||||
|
},
|
||||||
|
OperationTypeEnum.WEB_SEARCH: {
|
||||||
|
"aiPrompt": "Find recent articles about ValueOn AG in Switzeerland in 2025",
|
||||||
|
"resultType": "json"
|
||||||
|
},
|
||||||
|
OperationTypeEnum.WEB_CRAWL: {
|
||||||
|
"aiPrompt": "Extract who works in this company",
|
||||||
|
"resultType": "json",
|
||||||
|
"documentList": ["https://www.valueon.com"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async def initialize(self):
|
||||||
|
"""Initialize services and methodAi."""
|
||||||
|
print("🔧 Initializing services...")
|
||||||
|
|
||||||
|
# Set logging level to DEBUG to see debug messages
|
||||||
|
import logging
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# Import and initialize services - use the same approach as routeChatPlayground
|
||||||
|
import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects
|
||||||
|
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
|
||||||
|
|
||||||
|
# Import and initialize services
|
||||||
|
from modules.features.chatPlayground.mainChatPlayground import getServices
|
||||||
|
|
||||||
|
# Get services first
|
||||||
|
self.services = getServices(self.testUser, None)
|
||||||
|
|
||||||
|
# Now create AND SAVE workflow in database using the interface
|
||||||
|
import uuid
|
||||||
|
import time
|
||||||
|
currentTimestamp = time.time()
|
||||||
|
|
||||||
|
testWorkflow = ChatWorkflow(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
name="Test Workflow",
|
||||||
|
status="running",
|
||||||
|
startedAt=currentTimestamp,
|
||||||
|
lastActivity=currentTimestamp,
|
||||||
|
currentRound=1,
|
||||||
|
currentTask=0,
|
||||||
|
currentAction=0,
|
||||||
|
totalTasks=0,
|
||||||
|
totalActions=0,
|
||||||
|
mandateId=self.testUser.mandateId,
|
||||||
|
messageIds=[],
|
||||||
|
workflowMode="React",
|
||||||
|
maxSteps=5
|
||||||
|
)
|
||||||
|
|
||||||
|
# SAVE workflow to database so it exists for access control
|
||||||
|
# Convert ChatWorkflow to dict for createWorkflow
|
||||||
|
workflowDict = testWorkflow.model_dump()
|
||||||
|
interfaceDbChat.createWorkflow(workflowDict)
|
||||||
|
|
||||||
|
# Set the workflow in services
|
||||||
|
self.services.currentWorkflow = testWorkflow
|
||||||
|
|
||||||
|
# Debug: Print workflow status
|
||||||
|
print(f"Debug: services.currentWorkflow is set: {hasattr(self.services, 'currentWorkflow') and self.services.currentWorkflow is not None}")
|
||||||
|
if self.services.currentWorkflow:
|
||||||
|
print(f"Debug: Workflow ID: {self.services.currentWorkflow.id}")
|
||||||
|
|
||||||
|
# Import and initialize methodAi AFTER setting workflow
|
||||||
|
from modules.workflows.methods.methodAi import MethodAi
|
||||||
|
self.methodAi = MethodAi(self.services)
|
||||||
|
|
||||||
|
# Verify methodAi has access to the workflow
|
||||||
|
if hasattr(self.methodAi, 'services'):
|
||||||
|
print(f"Debug: methodAi.services.currentWorkflow is set: {hasattr(self.methodAi.services, 'currentWorkflow') and self.methodAi.services.currentWorkflow is not None}")
|
||||||
|
|
||||||
|
print("✅ Services initialized")
|
||||||
|
print(f"📁 Results will be saved to: {self.modelTestDir}")
|
||||||
|
|
||||||
|
async def testOperation(self, operationType: OperationTypeEnum) -> Dict[str, Any]:
|
||||||
|
"""Test a specific operation type."""
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"TESTING OPERATION: {operationType.value}")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
|
||||||
|
startTime = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
# Get test prompt for this operation
|
||||||
|
testConfig = self.testPrompts.get(operationType, {})
|
||||||
|
|
||||||
|
if not testConfig:
|
||||||
|
result = {
|
||||||
|
"operationType": operationType.value,
|
||||||
|
"status": "ERROR",
|
||||||
|
"error": "No test configuration found for this operation type",
|
||||||
|
"processingTime": 0.0
|
||||||
|
}
|
||||||
|
self.testResults.append(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
print(f"Prompt: {testConfig.get('aiPrompt', 'N/A')}")
|
||||||
|
print(f"Result Type: {testConfig.get('resultType', 'txt')}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Prepare parameters
|
||||||
|
parameters = {
|
||||||
|
"aiPrompt": testConfig.get("aiPrompt"),
|
||||||
|
"resultType": testConfig.get("resultType", "txt")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add document list if provided
|
||||||
|
if "documentList" in testConfig and testConfig["documentList"]:
|
||||||
|
parameters["documentList"] = testConfig["documentList"]
|
||||||
|
|
||||||
|
# Ensure workflow is still set in both self.services AND methodAi.services
|
||||||
|
if not self.services.currentWorkflow or (hasattr(self, 'methodAi') and hasattr(self.methodAi, 'services') and not self.methodAi.services.currentWorkflow):
|
||||||
|
print(f"⚠️ Warning: Workflow is None, trying to re-set it...")
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
currentTimestamp = time.time()
|
||||||
|
testWorkflow = ChatWorkflow(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
name="Test Workflow",
|
||||||
|
status="running",
|
||||||
|
startedAt=currentTimestamp,
|
||||||
|
lastActivity=currentTimestamp,
|
||||||
|
currentRound=1,
|
||||||
|
currentTask=0,
|
||||||
|
currentAction=0,
|
||||||
|
totalTasks=0,
|
||||||
|
totalActions=0,
|
||||||
|
mandateId="test_mandate",
|
||||||
|
messageIds=[],
|
||||||
|
workflowMode="React",
|
||||||
|
maxSteps=5
|
||||||
|
)
|
||||||
|
self.services.currentWorkflow = testWorkflow
|
||||||
|
# Also set in methodAi.services if it exists
|
||||||
|
if hasattr(self, 'methodAi') and hasattr(self.methodAi, 'services'):
|
||||||
|
self.methodAi.services.currentWorkflow = testWorkflow
|
||||||
|
|
||||||
|
# Call methodAi.process()
|
||||||
|
print(f"Calling methodAi.process()...")
|
||||||
|
print(f"Debug: Current workflow ID before call: {self.services.currentWorkflow.id if self.services.currentWorkflow else 'None'}")
|
||||||
|
print(f"Debug: methodAi.services.currentWorkflow: {self.methodAi.services.currentWorkflow.id if hasattr(self.methodAi, 'services') and self.methodAi.services.currentWorkflow else 'None/NotSet'}")
|
||||||
|
print(f"Debug: Is same services object? {self.services is self.methodAi.services}")
|
||||||
|
print(f"Debug: services id: {id(self.services)}")
|
||||||
|
print(f"Debug: methodAi.services id: {id(self.methodAi.services)}")
|
||||||
|
|
||||||
|
# Final safety check: ensure methodAi.services has the workflow
|
||||||
|
if hasattr(self.methodAi, 'services') and not self.methodAi.services.currentWorkflow:
|
||||||
|
print(f"⚠️ Fixing: Setting workflow in methodAi.services...")
|
||||||
|
self.methodAi.services.currentWorkflow = self.services.currentWorkflow
|
||||||
|
|
||||||
|
actionResult = await self.methodAi.process(parameters)
|
||||||
|
|
||||||
|
endTime = asyncio.get_event_loop().time()
|
||||||
|
processingTime = endTime - startTime
|
||||||
|
|
||||||
|
# Analyze result
|
||||||
|
result = {
|
||||||
|
"operationType": operationType.value,
|
||||||
|
"status": "SUCCESS" if actionResult.success else "ERROR",
|
||||||
|
"processingTime": round(processingTime, 2),
|
||||||
|
"hasDocuments": len(actionResult.documents) > 0 if actionResult.documents else False,
|
||||||
|
"documentCount": len(actionResult.documents) if actionResult.documents else 0,
|
||||||
|
"error": actionResult.error if not actionResult.success else None
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract document information
|
||||||
|
if actionResult.documents:
|
||||||
|
doc = actionResult.documents[0]
|
||||||
|
result["documentName"] = doc.documentName
|
||||||
|
result["mimeType"] = doc.mimeType
|
||||||
|
result["dataSize"] = len(doc.documentData) if doc.documentData else 0
|
||||||
|
result["dataPreview"] = str(doc.documentData)[:200] + "..." if len(str(doc.documentData)) > 200 else str(doc.documentData)
|
||||||
|
|
||||||
|
print(f"✅ Status: {result['status']}")
|
||||||
|
print(f"⏱️ Processing time: {result['processingTime']}s")
|
||||||
|
print(f"📄 Documents: {result.get('documentCount', 0)}")
|
||||||
|
|
||||||
|
if actionResult.success:
|
||||||
|
if result.get('documentName'):
|
||||||
|
print(f"📄 Saved: {result['documentName']}")
|
||||||
|
print(f"📄 MIME type: {result.get('mimeType')}")
|
||||||
|
print(f"📄 Size: {result.get('dataSize')} bytes")
|
||||||
|
|
||||||
|
# Try to decode if it's JSON
|
||||||
|
if result.get('mimeType') == 'application/json':
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
jsonData = json.loads(actionResult.documents[0].documentData)
|
||||||
|
result["isValidJson"] = True
|
||||||
|
result["jsonKeys"] = list(jsonData.keys()) if isinstance(jsonData, dict) else "Not a dict"
|
||||||
|
print(f"✅ Valid JSON with keys: {result['jsonKeys']}")
|
||||||
|
except:
|
||||||
|
result["isValidJson"] = False
|
||||||
|
print(f"⚠️ Not valid JSON")
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {result.get('error')}")
|
||||||
|
|
||||||
|
self.testResults.append(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
endTime = asyncio.get_event_loop().time()
|
||||||
|
processingTime = endTime - startTime
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"operationType": operationType.value,
|
||||||
|
"status": "EXCEPTION",
|
||||||
|
"processingTime": round(processingTime, 2),
|
||||||
|
"error": str(e),
|
||||||
|
"hasDocuments": False
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"💥 EXCEPTION: {str(e)}")
|
||||||
|
self.testResults.append(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def testAllOperations(self):
|
||||||
|
"""Test all operation types."""
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print("STARTING METHODAI OPERATIONS TESTS - DATA_GENERATE ONLY")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
print("Testing DATA_GENERATE operation type...")
|
||||||
|
|
||||||
|
# Test only DATA_GENERATE
|
||||||
|
await self.testOperation(OperationTypeEnum.DATA_GENERATE)
|
||||||
|
print(f"\n{'─'*80}")
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
self.printSummary()
|
||||||
|
|
||||||
|
def printSummary(self):
|
||||||
|
"""Print test summary."""
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print("TEST SUMMARY")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
|
||||||
|
successfulTests = [r for r in self.testResults if r["status"] == "SUCCESS"]
|
||||||
|
failedTests = [r for r in self.testResults if r["status"] == "ERROR"]
|
||||||
|
exceptionTests = [r for r in self.testResults if r["status"] == "EXCEPTION"]
|
||||||
|
|
||||||
|
print(f"\nTotal tests: {len(self.testResults)}")
|
||||||
|
print(f"✅ Successful: {len(successfulTests)}")
|
||||||
|
print(f"❌ Failed: {len(failedTests)}")
|
||||||
|
print(f"💥 Exceptions: {len(exceptionTests)}")
|
||||||
|
|
||||||
|
if successfulTests:
|
||||||
|
print(f"\n{'─'*80}")
|
||||||
|
print("SUCCESSFUL TESTS")
|
||||||
|
print(f"{'─'*80}")
|
||||||
|
for result in successfulTests:
|
||||||
|
print(f"✅ {result['operationType']}: {result['processingTime']}s")
|
||||||
|
|
||||||
|
if failedTests:
|
||||||
|
print(f"\n{'─'*80}")
|
||||||
|
print("FAILED TESTS")
|
||||||
|
print(f"{'─'*80}")
|
||||||
|
for result in failedTests:
|
||||||
|
print(f"❌ {result['operationType']}: {result.get('error', 'Unknown error')}")
|
||||||
|
|
||||||
|
if exceptionTests:
|
||||||
|
print(f"\n{'─'*80}")
|
||||||
|
print("EXCEPTIONS")
|
||||||
|
print(f"{'─'*80}")
|
||||||
|
for result in exceptionTests:
|
||||||
|
print(f"💥 {result['operationType']}: {result.get('error', 'Unknown error')}")
|
||||||
|
|
||||||
|
# Save results
|
||||||
|
import json
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
resultsFile = os.path.join(self.modelTestDir, f"method_ai_operations_test_{timestamp}.json")
|
||||||
|
|
||||||
|
with open(resultsFile, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump({
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"summary": {
|
||||||
|
"total": len(self.testResults),
|
||||||
|
"successful": len(successfulTests),
|
||||||
|
"failed": len(failedTests),
|
||||||
|
"exceptions": len(exceptionTests)
|
||||||
|
},
|
||||||
|
"results": self.testResults
|
||||||
|
}, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n📄 Results saved to: {resultsFile}")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Run methodAI operations tests."""
|
||||||
|
tester = MethodAiOperationsTester()
|
||||||
|
|
||||||
|
await tester.initialize()
|
||||||
|
await tester.testAllOperations()
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print("TESTING COMPLETED")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
||||||
|
|
@ -1,107 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Test script to demonstrate the new operation type rating system.
|
|
||||||
This shows how models are now sorted by their capability ratings for specific operation types.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelAi import OperationTypeEnum, createOperationTypeRatings, AiCallOptions, PriorityEnum, ProcessingModeEnum
|
|
||||||
from modules.aicore.aicorePluginPerplexity import AiPerplexity
|
|
||||||
from modules.aicore.aicorePluginTavily import ConnectorWeb
|
|
||||||
from modules.aicore.aicorePluginAnthropic import AiAnthropic
|
|
||||||
from modules.aicore.aicorePluginOpenai import AiOpenai
|
|
||||||
from modules.aicore.aicorePluginInternal import AiInternal
|
|
||||||
from modules.aicore.aicoreModelSelector import ModelSelector
|
|
||||||
|
|
||||||
def testOperationTypeRatings():
|
|
||||||
"""Test the new operation type rating system."""
|
|
||||||
print("🧪 Testing Operation Type Rating System")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
# Initialize connectors
|
|
||||||
perplexity = AiPerplexity()
|
|
||||||
tavily = ConnectorWeb()
|
|
||||||
anthropic = AiAnthropic()
|
|
||||||
openai = AiOpenai()
|
|
||||||
internal = AiInternal()
|
|
||||||
modelSelector = ModelSelector()
|
|
||||||
|
|
||||||
# Get all models
|
|
||||||
allModels = (perplexity.getModels() + tavily.getModels() +
|
|
||||||
anthropic.getModels() + openai.getModels() + internal.getModels())
|
|
||||||
|
|
||||||
print(f"📊 Total models available: {len(allModels)}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Test different operation types
|
|
||||||
testCases = [
|
|
||||||
(OperationTypeEnum.WEB_RESEARCH, "Web Research"),
|
|
||||||
(OperationTypeEnum.WEB_NEWS, "Web News"),
|
|
||||||
(OperationTypeEnum.WEB_QUESTIONS, "Web Questions"),
|
|
||||||
(OperationTypeEnum.WEB_SEARCH, "Web Search"),
|
|
||||||
(OperationTypeEnum.DATA_ANALYSE, "Data Analysis tasks"),
|
|
||||||
(OperationTypeEnum.DATA_GENERATE, "Data Generation tasks"),
|
|
||||||
(OperationTypeEnum.DATA_EXTRACT, "Data Extraction tasks"),
|
|
||||||
(OperationTypeEnum.PLAN, "Planning tasks")
|
|
||||||
]
|
|
||||||
|
|
||||||
for operationType, description in testCases:
|
|
||||||
print(f"🎯 Testing: {description} ({operationType.value})")
|
|
||||||
print("-" * 40)
|
|
||||||
|
|
||||||
# Create AI call options
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=operationType,
|
|
||||||
priority=PriorityEnum.BALANCED,
|
|
||||||
processingMode=ProcessingModeEnum.BASIC
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get failover model list (sorted by rating)
|
|
||||||
failoverModels = modelSelector.getFailoverModelList(
|
|
||||||
prompt="Test prompt",
|
|
||||||
context="Test context",
|
|
||||||
options=options,
|
|
||||||
availableModels=allModels
|
|
||||||
)
|
|
||||||
|
|
||||||
if failoverModels:
|
|
||||||
print(f"✅ Found {len(failoverModels)} suitable models:")
|
|
||||||
for i, model in enumerate(failoverModels[:5]): # Show top 5
|
|
||||||
# Get the rating for this operation type
|
|
||||||
rating = 0
|
|
||||||
for ot_rating in model.operationTypes:
|
|
||||||
if ot_rating.operationType == operationType:
|
|
||||||
rating = ot_rating.rating
|
|
||||||
break
|
|
||||||
|
|
||||||
print(f" {i+1}. {model.displayName}")
|
|
||||||
print(f" Rating: {rating}/10 | Speed: {model.speedRating}/10 | Quality: {model.qualityRating}/10")
|
|
||||||
print(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens")
|
|
||||||
else:
|
|
||||||
print("❌ No suitable models found")
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Test the helper function
|
|
||||||
print("🔧 Testing Helper Function")
|
|
||||||
print("-" * 30)
|
|
||||||
|
|
||||||
# Create operation type ratings using the helper
|
|
||||||
ratings = createOperationTypeRatings(
|
|
||||||
(OperationTypeEnum.WEB_RESEARCH, 10),
|
|
||||||
(OperationTypeEnum.WEB_NEWS, 8),
|
|
||||||
(OperationTypeEnum.DATA_ANALYSE, 6)
|
|
||||||
)
|
|
||||||
|
|
||||||
print("Created ratings:")
|
|
||||||
for rating in ratings:
|
|
||||||
print(f" {rating.operationType.value}: {rating.rating}/10")
|
|
||||||
|
|
||||||
print()
|
|
||||||
print("✅ All tests completed successfully!")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
testOperationTypeRatings()
|
|
||||||
Loading…
Reference in a new issue