Merge pull request #81 from valueonag/feat/coding-path
Feat/coding path
This commit is contained in:
commit
c61255c12e
49 changed files with 12431 additions and 4921 deletions
|
|
@ -18,6 +18,9 @@ from modules.connectors.connectorDbPostgre import DatabaseConnector
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# TODO TESTING: Override maxTokens for all models during testing
|
||||
# Set to None to disable override, or set to an integer (e.g., 20000) to override all models
|
||||
TESTING_MAX_TOKENS_OVERRIDE: Optional[int] = None # TODO TESTING: Set to None to disable
|
||||
|
||||
class ModelRegistry:
|
||||
"""Dynamic registry for AI models from all connectors."""
|
||||
|
|
@ -50,6 +53,12 @@ class ModelRegistry:
|
|||
logger.error(errorMsg)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# TODO TESTING: Override maxTokens if testing override is enabled
|
||||
if TESTING_MAX_TOKENS_OVERRIDE is not None and model.maxTokens > TESTING_MAX_TOKENS_OVERRIDE:
|
||||
originalMaxTokens = model.maxTokens
|
||||
model.maxTokens = TESTING_MAX_TOKENS_OVERRIDE
|
||||
logger.debug(f"TESTING: Overrode maxTokens for {model.displayName}: {originalMaxTokens} -> {TESTING_MAX_TOKENS_OVERRIDE}")
|
||||
|
||||
# Use displayName as the key (must be unique)
|
||||
self._models[model.displayName] = model
|
||||
logger.debug(f"Registered model: {model.displayName} (name: {model.name}) from {connectorType}")
|
||||
|
|
@ -118,6 +127,12 @@ class ModelRegistry:
|
|||
logger.error(errorMsg)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# TODO TESTING: Override maxTokens if testing override is enabled
|
||||
if TESTING_MAX_TOKENS_OVERRIDE is not None and model.maxTokens > TESTING_MAX_TOKENS_OVERRIDE:
|
||||
originalMaxTokens = model.maxTokens
|
||||
model.maxTokens = TESTING_MAX_TOKENS_OVERRIDE
|
||||
logger.debug(f"TESTING: Overrode maxTokens for {model.displayName}: {originalMaxTokens} -> {TESTING_MAX_TOKENS_OVERRIDE}")
|
||||
|
||||
# Use displayName as the key (must be unique)
|
||||
self._models[model.displayName] = model
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@ from enum import Enum
|
|||
|
||||
# Import ContentPart for runtime use (needed for Pydantic model rebuilding)
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
# Import JSON utilities for safe conversion
|
||||
from modules.shared.jsonUtils import extractJsonString, tryParseJson, repairBrokenJson
|
||||
|
||||
# Operation Types
|
||||
class OperationTypeEnum(str, Enum):
|
||||
|
|
@ -258,3 +256,70 @@ class JsonAccumulationState(BaseModel):
|
|||
description="KPI definitions with current values: [{id, description, jsonPath, targetValue, currentValue}, ...]"
|
||||
)
|
||||
|
||||
|
||||
class ContinuationContext(BaseModel):
|
||||
"""Pydantic model for continuation context information."""
|
||||
section_count: int
|
||||
delivered_summary: str
|
||||
template_structure: Optional[str] = None
|
||||
last_complete_part: Optional[str] = None
|
||||
incomplete_part: Optional[str] = None
|
||||
last_raw_json: Optional[str] = None
|
||||
overlap_context: Optional[str] = None # From jsonContinuation.getContexts() - innermost element containing cut
|
||||
hierarchy_context: Optional[str] = None # From jsonContinuation.getContexts() - full structure from root to cut
|
||||
|
||||
|
||||
class JsonContinuationContexts(BaseModel):
|
||||
"""
|
||||
Pydantic model for JSON continuation contexts.
|
||||
|
||||
Contains contexts for truncated JSON strings:
|
||||
- overlapContext: The innermost object/array element containing the cut point (for merging)
|
||||
- hierarchyContext: Full structure from root to cut WITHOUT budget limitations (for internal use)
|
||||
- hierarchyContextForPrompt: Full structure from root to cut WITH budget limitations (for prompts)
|
||||
- completePart: Valid JSON with all structures properly closed
|
||||
- jsonParsingSuccess: True if completePart is valid parseable JSON
|
||||
"""
|
||||
overlapContext: str = Field(description="The innermost object/array element containing the cut point (for merging)")
|
||||
hierarchyContext: str = Field(description="Full structure from root to cut WITHOUT budget limitations (for internal use)")
|
||||
hierarchyContextForPrompt: str = Field(description="Full structure from root to cut WITH budget limitations (for prompts)")
|
||||
completePart: str = Field(description="Valid JSON with all structures properly closed")
|
||||
jsonParsingSuccess: bool = Field(default=False, description="True if completePart is valid parseable JSON")
|
||||
|
||||
|
||||
class SectionPromptArgs(BaseModel):
|
||||
"""Type-safe arguments for section content prompt builder."""
|
||||
section: Dict[str, Any]
|
||||
contentParts: List[ContentPart]
|
||||
userPrompt: str
|
||||
generationHint: str
|
||||
allSections: List[Dict[str, Any]]
|
||||
sectionIndex: int
|
||||
isAggregation: bool
|
||||
language: str
|
||||
|
||||
|
||||
class ChapterStructurePromptArgs(BaseModel):
|
||||
"""Type-safe arguments for chapter structure prompt builder."""
|
||||
userPrompt: str
|
||||
contentParts: List[ContentPart] = Field(default_factory=list)
|
||||
outputFormat: str
|
||||
|
||||
|
||||
class CodeContentPromptArgs(BaseModel):
|
||||
"""Type-safe arguments for code content prompt builder."""
|
||||
filename: str
|
||||
fileType: str
|
||||
functions: List[Dict] = Field(default_factory=list)
|
||||
classes: List[Dict] = Field(default_factory=list)
|
||||
dependencies: List[str] = Field(default_factory=list)
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
userPrompt: str
|
||||
contentParts: List[ContentPart] = Field(default_factory=list)
|
||||
contextInfo: str = ""
|
||||
|
||||
|
||||
class CodeStructurePromptArgs(BaseModel):
|
||||
"""Type-safe arguments for code structure prompt builder."""
|
||||
userPrompt: str
|
||||
contentParts: List[ContentPart] = Field(default_factory=list)
|
||||
|
|
@ -89,6 +89,131 @@ async def getPermissions(
|
|||
)
|
||||
|
||||
|
||||
@router.get("/permissions/all", response_model=Dict[str, Any])
|
||||
@limiter.limit("30/minute")
|
||||
async def getAllPermissions(
|
||||
request: Request,
|
||||
context: Optional[str] = Query(None, description="Context type: UI or RESOURCE (if not provided, returns both)"),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get all RBAC permissions for the current user for UI and/or RESOURCE contexts.
|
||||
This endpoint is optimized for UI initialization to avoid multiple API calls.
|
||||
|
||||
Query Parameters:
|
||||
- context: Optional context filter. If "UI", returns only UI permissions.
|
||||
If "RESOURCE", returns only RESOURCE permissions.
|
||||
If not provided, returns both UI and RESOURCE permissions.
|
||||
|
||||
Returns:
|
||||
- Dictionary with structure:
|
||||
{
|
||||
"ui": {
|
||||
"item1": UserPermissions,
|
||||
"item2": UserPermissions,
|
||||
...
|
||||
},
|
||||
"resource": {
|
||||
"item1": UserPermissions,
|
||||
"item2": UserPermissions,
|
||||
...
|
||||
}
|
||||
}
|
||||
If context is specified, only that context is returned.
|
||||
|
||||
Example:
|
||||
- GET /api/rbac/permissions/all
|
||||
- GET /api/rbac/permissions/all?context=UI
|
||||
- GET /api/rbac/permissions/all?context=RESOURCE
|
||||
"""
|
||||
try:
|
||||
# Get interface and RBAC permissions
|
||||
interface = getInterface(currentUser)
|
||||
if not interface.rbac:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="RBAC interface not available"
|
||||
)
|
||||
|
||||
# Determine which contexts to fetch
|
||||
contextsToFetch = []
|
||||
if context:
|
||||
try:
|
||||
accessContext = AccessRuleContext(context.upper())
|
||||
if accessContext in [AccessRuleContext.UI, AccessRuleContext.RESOURCE]:
|
||||
contextsToFetch = [accessContext]
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Context '{context}' must be UI or RESOURCE for this endpoint"
|
||||
)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid context '{context}'. Must be UI or RESOURCE"
|
||||
)
|
||||
else:
|
||||
# Return both UI and RESOURCE if no context specified
|
||||
contextsToFetch = [AccessRuleContext.UI, AccessRuleContext.RESOURCE]
|
||||
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
# Get all access rules for user's roles
|
||||
roleLabels = currentUser.roleLabels or []
|
||||
if not roleLabels:
|
||||
# User has no roles, return empty permissions
|
||||
for ctx in contextsToFetch:
|
||||
result[ctx.value.lower()] = {}
|
||||
return result
|
||||
|
||||
# Get all access rules for user's roles and requested contexts
|
||||
allRules: Dict[AccessRuleContext, List[AccessRule]] = {}
|
||||
for ctx in contextsToFetch:
|
||||
allRules[ctx] = []
|
||||
# Get all rules for user's roles in this context
|
||||
for roleLabel in roleLabels:
|
||||
rules = interface.getAccessRules(
|
||||
roleLabel=roleLabel,
|
||||
context=ctx,
|
||||
pagination=None
|
||||
)
|
||||
allRules[ctx].extend(rules)
|
||||
|
||||
# Build result: for each context, collect all unique items and calculate permissions
|
||||
for ctx in contextsToFetch:
|
||||
result[ctx.value.lower()] = {}
|
||||
|
||||
# Collect all unique items from rules
|
||||
items = set()
|
||||
for rule in allRules[ctx]:
|
||||
if rule.item:
|
||||
items.add(rule.item)
|
||||
|
||||
# For each item, calculate user permissions
|
||||
for item in sorted(items):
|
||||
permissions = interface.rbac.getUserPermissions(currentUser, ctx, item)
|
||||
# Only include if user has view permission
|
||||
if permissions.view:
|
||||
result[ctx.value.lower()][item] = {
|
||||
"view": permissions.view,
|
||||
"read": permissions.read.value if permissions.read else None,
|
||||
"create": permissions.create.value if permissions.create else None,
|
||||
"update": permissions.update.value if permissions.update else None,
|
||||
"delete": permissions.delete.value if permissions.delete else None
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting all RBAC permissions: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get all permissions: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/rules", response_model=PaginatedResponse)
|
||||
@limiter.limit("30/minute")
|
||||
async def getAccessRules(
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,376 +0,0 @@
|
|||
# Parallel Processing Refactoring Concept
|
||||
|
||||
## Current State (Sequential)
|
||||
|
||||
### Chapter Sections Structure Generation (`_generateChapterSectionsStructure`)
|
||||
- **Current**: Processes chapters sequentially, one after another
|
||||
- **Flow**:
|
||||
1. Iterate through documents
|
||||
2. For each document, iterate through chapters
|
||||
3. For each chapter, generate sections structure using AI
|
||||
4. Update progress after each chapter
|
||||
|
||||
### Section Content Generation (`_fillChapterSections`)
|
||||
- **Current**: Processes chapters sequentially, sections within each chapter sequentially
|
||||
- **Flow**:
|
||||
1. Iterate through documents
|
||||
2. For each document, iterate through chapters
|
||||
3. For each chapter, iterate through sections
|
||||
4. For each section, generate content using AI
|
||||
5. Update progress after each section
|
||||
|
||||
## Desired State (Parallel)
|
||||
|
||||
### Chapter Sections Structure Generation
|
||||
- **Target**: Process all chapters in parallel
|
||||
- **Requirements**:
|
||||
- Maintain chapter order in final result
|
||||
- Each chapter can be processed independently
|
||||
- Progress updates should reflect parallel processing
|
||||
- Errors in one chapter should not stop others
|
||||
|
||||
### Section Content Generation
|
||||
- **Target**: Process sections within each chapter in parallel
|
||||
- **Requirements**:
|
||||
- Maintain section order within each chapter
|
||||
- Sections within a chapter can be processed independently
|
||||
- Chapters still processed sequentially (to maintain order)
|
||||
- Progress updates should reflect parallel processing
|
||||
- Errors in one section should not stop others
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### Phase 1: Chapter Sections Structure Generation Parallelization
|
||||
|
||||
#### Step 1.1: Extract Single Chapter Processing
|
||||
- **Create**: `_generateSingleChapterSectionsStructure()` method
|
||||
- **Purpose**: Process one chapter independently
|
||||
- **Parameters**:
|
||||
- `chapter`: Chapter dict
|
||||
- `chapterIndex`: Index for ordering
|
||||
- `chapterId`, `chapterLevel`, `chapterTitle`: Chapter metadata
|
||||
- `generationHint`: Generation instructions
|
||||
- `contentPartIds`, `contentPartInstructions`: Content part info
|
||||
- `contentParts`: Full content parts list
|
||||
- `userPrompt`: User's original prompt
|
||||
- `language`: Language for generation
|
||||
- `parentOperationId`: For progress logging
|
||||
- **Returns**: None (modifies chapter dict in place)
|
||||
- **Error Handling**: Logs errors, raises exception to be caught by caller
|
||||
|
||||
#### Step 1.2: Refactor Main Method
|
||||
- **Modify**: `_generateChapterSectionsStructure()`
|
||||
- **Changes**:
|
||||
1. Collect all chapters with their indices
|
||||
2. Create async tasks for each chapter using `_generateSingleChapterSectionsStructure`
|
||||
3. Use `asyncio.gather()` to execute all tasks in parallel
|
||||
4. Process results in order (using `zip` with original order)
|
||||
5. Handle errors per chapter (don't fail entire operation)
|
||||
6. Update progress after each chapter completes
|
||||
|
||||
#### Step 1.3: Progress Reporting
|
||||
- **Maintain**: Overall progress tracking
|
||||
- **Update**: Progress after each chapter completes (not sequentially)
|
||||
- **Format**: "Chapter X/Y completed" or "Chapter X/Y error"
|
||||
|
||||
### Phase 2: Section Content Generation Parallelization
|
||||
|
||||
#### Step 2.1: Extract Single Section Processing
|
||||
- **Create**: `_processSingleSection()` method
|
||||
- **Purpose**: Process one section independently
|
||||
- **Parameters**:
|
||||
- `section`: Section dict
|
||||
- `sectionIndex`: Index for ordering
|
||||
- `totalSections`: Total sections in chapter
|
||||
- `chapterIndex`: Chapter index
|
||||
- `totalChapters`: Total chapters
|
||||
- `chapterId`: Chapter ID
|
||||
- `chapterOperationId`: Chapter progress operation ID
|
||||
- `fillOperationId`: Overall fill operation ID
|
||||
- `contentParts`: Full content parts list
|
||||
- `userPrompt`: User's original prompt
|
||||
- `all_sections_list`: All sections for context
|
||||
- `language`: Language for generation
|
||||
- `calculateOverallProgress`: Function to calculate overall progress
|
||||
- **Returns**: `List[Dict[str, Any]]` (elements for the section)
|
||||
- **Error Handling**: Returns error element instead of raising
|
||||
|
||||
#### Step 2.2: Extract Section Processing Logic
|
||||
- **Create**: Helper methods for different processing paths:
|
||||
- `_processSectionAggregation()`: Handle aggregation path (multiple parts)
|
||||
- `_processSectionGeneration()`: Handle generation without parts (only generationHint)
|
||||
- `_processSectionParts()`: Handle individual part processing
|
||||
- **Purpose**: Keep logic organized and reusable
|
||||
|
||||
#### Step 2.3: Refactor Main Method
|
||||
- **Modify**: `_fillChapterSections()`
|
||||
- **Changes**:
|
||||
1. Keep sequential chapter processing (maintains order)
|
||||
2. For each chapter, collect all sections with indices
|
||||
3. Create async tasks for each section using `_processSingleSection`
|
||||
4. Use `asyncio.gather()` to execute all section tasks in parallel
|
||||
5. Process results in order (using `zip` with original order)
|
||||
6. Assign elements to sections in correct order
|
||||
7. Update progress after each section completes
|
||||
8. Handle errors per section (don't fail entire chapter)
|
||||
|
||||
#### Step 2.4: Progress Reporting
|
||||
- **Maintain**: Hierarchical progress tracking
|
||||
- **Update**:
|
||||
- Section progress: After each section completes
|
||||
- Chapter progress: After all sections in chapter complete
|
||||
- Overall progress: After each section/chapter completes
|
||||
- **Format**: "Chapter X/Y, Section A/B completed"
|
||||
|
||||
## Key Considerations
|
||||
|
||||
### Order Preservation
|
||||
- **Chapters**: Must maintain document order → process chapters sequentially
|
||||
- **Sections**: Must maintain chapter order → process sections sequentially within chapter
|
||||
- **Solution**: Use `asyncio.gather()` with ordered task list, then `zip` results with original order
|
||||
|
||||
### Error Handling
|
||||
- **Chapters**: Error in one chapter should not stop others
|
||||
- **Sections**: Error in one section should not stop others
|
||||
- **Solution**: Use `return_exceptions=True` in `asyncio.gather()`, check `isinstance(result, Exception)`
|
||||
|
||||
### Progress Reporting
|
||||
- **Challenge**: Progress updates happen out of order
|
||||
- **Solution**: Update progress when each task completes, not sequentially
|
||||
- **Format**: Show completed count, not sequential position
|
||||
|
||||
### Shared State
|
||||
- **Chapters**: Modify chapter dicts in place (safe, each chapter is independent)
|
||||
- **Sections**: Return elements, assign to sections in order (safe, each section is independent)
|
||||
- **Content Parts**: Read-only, passed to all tasks (safe)
|
||||
|
||||
### Dependencies
|
||||
- **Chapters**: No dependencies between chapters
|
||||
- **Sections**: No dependencies between sections (each is self-contained)
|
||||
- **Solution**: All tasks can run truly in parallel
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### Step 1: Clean Current Code
|
||||
1. Ensure current sequential implementation is correct
|
||||
2. Fix any existing bugs
|
||||
3. Verify all tests pass
|
||||
|
||||
### Step 2: Implement Chapter Parallelization
|
||||
1. Create `_generateSingleChapterSectionsStructure()` method
|
||||
2. Extract chapter processing logic
|
||||
3. Refactor `_generateChapterSectionsStructure()` to use parallel processing
|
||||
4. Test with single chapter
|
||||
5. Test with multiple chapters
|
||||
6. Verify order preservation
|
||||
7. Verify error handling
|
||||
|
||||
### Step 3: Implement Section Parallelization
|
||||
1. Create `_processSingleSection()` method
|
||||
2. Extract section processing logic into helper methods
|
||||
3. Refactor `_fillChapterSections()` to use parallel processing for sections
|
||||
4. Test with single section
|
||||
5. Test with multiple sections
|
||||
6. Test with multiple chapters
|
||||
7. Verify order preservation
|
||||
8. Verify error handling
|
||||
|
||||
### Step 4: Testing & Validation
|
||||
1. Test with various document structures
|
||||
2. Test error scenarios
|
||||
3. Verify progress reporting accuracy
|
||||
4. Performance testing (compare sequential vs parallel)
|
||||
5. Verify final output order matches input order
|
||||
|
||||
## Code Structure
|
||||
|
||||
### New Methods to Create
|
||||
|
||||
```python
|
||||
async def _generateSingleChapterSectionsStructure(
|
||||
self,
|
||||
chapter: Dict[str, Any],
|
||||
chapterIndex: int,
|
||||
chapterId: str,
|
||||
chapterLevel: int,
|
||||
chapterTitle: str,
|
||||
generationHint: str,
|
||||
contentPartIds: List[str],
|
||||
contentPartInstructions: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
language: str,
|
||||
parentOperationId: str
|
||||
) -> None:
|
||||
"""Generate sections structure for a single chapter (used for parallel processing)."""
|
||||
# Extract logic from current sequential loop
|
||||
# Modify chapter dict in place
|
||||
# Handle errors internally, raise if critical
|
||||
|
||||
async def _processSingleSection(
|
||||
self,
|
||||
section: Dict[str, Any],
|
||||
sectionIndex: int,
|
||||
totalSections: int,
|
||||
chapterIndex: int,
|
||||
totalChapters: int,
|
||||
chapterId: str,
|
||||
chapterOperationId: str,
|
||||
fillOperationId: str,
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
all_sections_list: List[Dict[str, Any]],
|
||||
language: str,
|
||||
calculateOverallProgress: Callable
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Process a single section and return its elements."""
|
||||
# Extract logic from current sequential loop
|
||||
# Return elements list
|
||||
# Return error element on failure (don't raise)
|
||||
|
||||
async def _processSectionAggregation(
|
||||
self,
|
||||
section: Dict[str, Any],
|
||||
sectionId: str,
|
||||
sectionTitle: str,
|
||||
sectionIndex: int,
|
||||
totalSections: int,
|
||||
chapterId: str,
|
||||
chapterOperationId: str,
|
||||
fillOperationId: str,
|
||||
contentPartIds: List[str],
|
||||
contentFormats: Dict[str, str],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
generationHint: str,
|
||||
all_sections_list: List[Dict[str, Any]],
|
||||
language: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Process section with aggregation (multiple parts together)."""
|
||||
# Extract aggregation logic
|
||||
# Return elements list
|
||||
|
||||
async def _processSectionGeneration(
|
||||
self,
|
||||
section: Dict[str, Any],
|
||||
sectionId: str,
|
||||
sectionTitle: str,
|
||||
sectionIndex: int,
|
||||
totalSections: int,
|
||||
chapterId: str,
|
||||
chapterOperationId: str,
|
||||
fillOperationId: str,
|
||||
contentType: str,
|
||||
userPrompt: str,
|
||||
generationHint: str,
|
||||
all_sections_list: List[Dict[str, Any]],
|
||||
language: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Process section generation without content parts (only generationHint)."""
|
||||
# Extract generation logic
|
||||
# Return elements list
|
||||
|
||||
async def _processSectionParts(
|
||||
self,
|
||||
section: Dict[str, Any],
|
||||
sectionId: str,
|
||||
sectionTitle: str,
|
||||
sectionIndex: int,
|
||||
totalSections: int,
|
||||
chapterId: str,
|
||||
chapterOperationId: str,
|
||||
fillOperationId: str,
|
||||
contentPartIds: List[str],
|
||||
contentFormats: Dict[str, str],
|
||||
contentParts: List[ContentPart],
|
||||
contentType: str,
|
||||
useAiCall: bool,
|
||||
generationHint: str,
|
||||
userPrompt: str,
|
||||
all_sections_list: List[Dict[str, Any]],
|
||||
language: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Process individual parts in a section."""
|
||||
# Extract individual part processing logic
|
||||
# Return elements list
|
||||
```
|
||||
|
||||
### Modified Methods
|
||||
|
||||
```python
|
||||
async def _generateChapterSectionsStructure(
|
||||
self,
|
||||
chapterStructure: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate sections structure for all chapters in parallel."""
|
||||
# Collect chapters with indices
|
||||
# Create tasks
|
||||
# Execute in parallel
|
||||
# Process results in order
|
||||
# Update progress
|
||||
|
||||
async def _fillChapterSections(
|
||||
self,
|
||||
chapterStructure: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
fillOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Fill sections with content, processing sections in parallel within each chapter."""
|
||||
# Process chapters sequentially
|
||||
# For each chapter, process sections in parallel
|
||||
# Maintain order
|
||||
# Update progress
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit Tests
|
||||
1. Test `_generateSingleChapterSectionsStructure` independently
|
||||
2. Test `_processSingleSection` independently
|
||||
3. Test helper methods independently
|
||||
|
||||
### Integration Tests
|
||||
1. Test parallel chapter processing with multiple chapters
|
||||
2. Test parallel section processing with multiple sections
|
||||
3. Test error handling (one chapter/section fails)
|
||||
4. Test order preservation
|
||||
|
||||
### Performance Tests
|
||||
1. Measure sequential vs parallel execution time
|
||||
2. Verify parallel processing is faster
|
||||
3. Check resource usage (memory, CPU)
|
||||
|
||||
## Risk Mitigation
|
||||
|
||||
### Risks
|
||||
1. **Order not preserved**: Use `zip` with original order
|
||||
2. **Race conditions**: No shared mutable state between tasks
|
||||
3. **Progress reporting incorrect**: Update progress when tasks complete
|
||||
4. **Errors not handled**: Use `return_exceptions=True` and check results
|
||||
5. **Performance degradation**: Test and measure, fallback to sequential if needed
|
||||
|
||||
### Safety Measures
|
||||
1. Keep sequential implementation as fallback (commented out)
|
||||
2. Add feature flag to enable/disable parallel processing
|
||||
3. Extensive logging for debugging
|
||||
4. Gradual rollout (test with small datasets first)
|
||||
|
||||
## Migration Path
|
||||
|
||||
1. **Phase 1**: Implement chapter parallelization, test thoroughly
|
||||
2. **Phase 2**: Implement section parallelization, test thoroughly
|
||||
3. **Phase 3**: Enable both in production with monitoring
|
||||
4. **Phase 4**: Remove sequential fallback code (if stable)
|
||||
|
||||
## Notes
|
||||
|
||||
- All async methods must use `await` correctly
|
||||
- Progress updates happen asynchronously (may appear out of order in logs)
|
||||
- Final result order is guaranteed by processing results in order
|
||||
- Error handling is per-task, not global
|
||||
- No shared mutable state between parallel tasks (read-only contentParts, independent chapter/section dicts)
|
||||
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
# Module Structure - serviceAi
|
||||
|
||||
## Übersicht
|
||||
|
||||
Das `mainServiceAi.py` Modul wurde in mehrere Submodule aufgeteilt, um die Übersichtlichkeit zu verbessern.
|
||||
|
||||
## Modulstruktur
|
||||
|
||||
### Hauptmodul
|
||||
- **mainServiceAi.py** (~800 Zeilen)
|
||||
- Initialisierung (`__init__`, `create`, `ensureAiObjectsInitialized`)
|
||||
- Public API (`callAiPlanning`, `callAiContent`)
|
||||
- Routing zu Submodulen
|
||||
- Helper-Methoden
|
||||
|
||||
### Submodule
|
||||
|
||||
1. **subJsonResponseHandling.py** (bereits vorhanden)
|
||||
- JSON Response Merging
|
||||
- Section Merging
|
||||
- Fragment Detection
|
||||
|
||||
2. **subResponseParsing.py** (~200 Zeilen)
|
||||
- `ResponseParser.extractSectionsFromResponse()` - Extrahiert Sections aus AI-Responses
|
||||
- `ResponseParser.shouldContinueGeneration()` - Entscheidet ob Generation fortgesetzt werden soll
|
||||
- `ResponseParser._isStuckInLoop()` - Loop-Detection
|
||||
- `ResponseParser.extractDocumentMetadata()` - Extrahiert Metadaten
|
||||
- `ResponseParser.buildFinalResultFromSections()` - Baut finales JSON
|
||||
|
||||
3. **subDocumentIntents.py** (~300 Zeilen)
|
||||
- `DocumentIntentAnalyzer.clarifyDocumentIntents()` - Analysiert Dokument-Intents
|
||||
- `DocumentIntentAnalyzer.resolvePreExtractedDocument()` - Löst pre-extracted Dokumente auf
|
||||
- `DocumentIntentAnalyzer._buildIntentAnalysisPrompt()` - Baut Intent-Analyse-Prompt
|
||||
|
||||
4. **subContentExtraction.py** (~600 Zeilen)
|
||||
- `ContentExtractor.extractAndPrepareContent()` - Extrahiert und bereitet Content vor
|
||||
- `ContentExtractor.extractTextFromImage()` - Vision AI für Bilder
|
||||
- `ContentExtractor.processTextContentWithAi()` - AI-Verarbeitung von Text
|
||||
- `ContentExtractor._isBinary()` - Helper für Binary-Check
|
||||
|
||||
5. **subStructureGeneration.py** (~200 Zeilen)
|
||||
- `StructureGenerator.generateStructure()` - Generiert Dokument-Struktur
|
||||
- `StructureGenerator._buildStructurePrompt()` - Baut Struktur-Prompt
|
||||
|
||||
6. **subStructureFilling.py** (~400 Zeilen)
|
||||
- `StructureFiller.fillStructure()` - Füllt Struktur mit Content
|
||||
- `StructureFiller._buildSectionGenerationPrompt()` - Baut Section-Generation-Prompt
|
||||
- `StructureFiller._findContentPartById()` - Helper für ContentPart-Suche
|
||||
- `StructureFiller._needsAggregation()` - Entscheidet ob Aggregation nötig
|
||||
|
||||
7. **subAiCallLooping.py** (~400 Zeilen)
|
||||
- `AiCallLooper.callAiWithLooping()` - Haupt-Looping-Logik
|
||||
- `AiCallLooper._defineKpisFromPrompt()` - KPI-Definition
|
||||
|
||||
## Verwendung
|
||||
|
||||
Alle Submodule werden über das Hauptmodul `AiService` verwendet:
|
||||
|
||||
```python
|
||||
# Initialisierung
|
||||
aiService = await AiService.create(serviceCenter)
|
||||
|
||||
# Submodule werden automatisch initialisiert
|
||||
# aiService.responseParser
|
||||
# aiService.intentAnalyzer
|
||||
# aiService.contentExtractor
|
||||
# etc.
|
||||
```
|
||||
|
||||
## Migration
|
||||
|
||||
Die öffentliche API bleibt unverändert. Interne Methoden wurden in Submodule verschoben:
|
||||
|
||||
- `_extractSectionsFromResponse` → `responseParser.extractSectionsFromResponse`
|
||||
- `_clarifyDocumentIntents` → `intentAnalyzer.clarifyDocumentIntents`
|
||||
- `_extractAndPrepareContent` → `contentExtractor.extractAndPrepareContent`
|
||||
- etc.
|
||||
|
||||
|
|
@ -222,18 +222,6 @@ Respond with ONLY a JSON object in this exact format:
|
|||
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId
|
||||
)
|
||||
|
||||
async def _defineKpisFromPrompt(
|
||||
self,
|
||||
userPrompt: str,
|
||||
rawJsonString: Optional[str],
|
||||
continuationContext: Dict[str, Any],
|
||||
debugPrefix: str = "kpi"
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Delegate to AiCallLooper."""
|
||||
return await self.aiCallLooper._defineKpisFromPrompt(
|
||||
userPrompt, rawJsonString, continuationContext, debugPrefix
|
||||
)
|
||||
|
||||
# JSON merging logic moved to subJsonResponseHandling.py
|
||||
|
||||
def _extractSectionsFromResponse(
|
||||
|
|
|
|||
661
modules/services/serviceAi/merge_1.txt
Normal file
661
modules/services/serviceAi/merge_1.txt
Normal file
|
|
@ -0,0 +1,661 @@
|
|||
================================================================================
|
||||
JSON MERGE OPERATION #1
|
||||
================================================================================
|
||||
Timestamp: 2026-01-06T20:08:23.213372
|
||||
|
||||
INPUT:
|
||||
Accumulated length: 33682 chars
|
||||
New Fragment length: 27012 chars
|
||||
Accumulated: 306 lines (showing first 5 and last 5)
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
... (296 lines omitted) ...
|
||||
[" 26821", " 26833", " 26839", " 26849", " 26861", " 26863", " 26879", " 26881", " 26891", " 26893"],
|
||||
[" 26903", " 26921", " 26927", " 26947", " 26951", " 26953", " 26959", " 26981", " 26987", " 26993"],
|
||||
[" 27011", " 27017", " 27031", " 27043", " 27059", " 27061", " 27067", " 27073", " 27077", " 27091"],
|
||||
[" 27103", " 27107", " 27109", " 27127", " 27143", " 27179", " 27191", " 27197", " 27211", " 27239"],
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
|
||||
New Fragment: 248 lines (showing first 5 and last 5)
|
||||
```json
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283", " 27299", " 27329", " 27337"],
|
||||
[" 27361", " 27367", " 27397", " 27407", " 27409", " 27427", " 27431", " 27437", " 27449", " 27457"],
|
||||
[" 27479", " 27481", " 27487", " 27509", " 27527", " 27529", " 27539", " 27541", " 27551", " 27581"],
|
||||
[" 27583", " 27611", " 27617", " 27631", " 27647", " 27653", " 27673", " 27689", " 27691", " 27697"],
|
||||
... (238 lines omitted) ...
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Normalized Accumulated (33682 chars)
|
||||
(showing first 5 and last 5 of 306 lines)
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
... (296 lines omitted) ...
|
||||
[" 26821", " 26833", " 26839", " 26849", " 26861", " 26863", " 26879", " 26881", " 26891", " 26893"],
|
||||
[" 26903", " 26921", " 26927", " 26947", " 26951", " 26953", " 26959", " 26981", " 26987", " 26993"],
|
||||
[" 27011", " 27017", " 27031", " 27043", " 27059", " 27061", " 27067", " 27073", " 27077", " 27091"],
|
||||
[" 27103", " 27107", " 27109", " 27127", " 27143", " 27179", " 27191", " 27197", " 27211", " 27239"],
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
|
||||
|
||||
Normalized New Fragment (27000 chars)
|
||||
(showing first 5 and last 5 of 246 lines)
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283", " 27299", " 27329", " 27337"],
|
||||
[" 27361", " 27367", " 27397", " 27407", " 27409", " 27427", " 27431", " 27437", " 27449", " 27457"],
|
||||
[" 27479", " 27481", " 27487", " 27509", " 27527", " 27529", " 27539", " 27541", " 27551", " 27581"],
|
||||
[" 27583", " 27611", " 27617", " 27631", " 27647", " 27653", " 27673", " 27689", " 27691", " 27697"],
|
||||
[" 27701", " 27733", " 27737", " 27739", " 27743", " 27749", " 27751", " 27763", " 27767", " 27773"],
|
||||
... (236 lines omitted) ...
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
STEP: PHASE 1
|
||||
Description: Finding overlap between JSON strings
|
||||
⏳ In progress...
|
||||
|
||||
Overlap Detection (string (exact)):
|
||||
Overlap length: 70
|
||||
✅ Found overlap of 70 chars
|
||||
Accumulated suffix (COMPLETE, 70 chars):
|
||||
============================================================================
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
|
||||
============================================================================
|
||||
Fragment prefix (70 chars, 1 lines)
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
|
||||
|
||||
Overlap found (70 chars):
|
||||
Accumulated suffix: [" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
|
||||
Fragment prefix: [" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
|
||||
STEP: PHASE 2
|
||||
Description: Merging strings (overlap: 70 chars)
|
||||
⏳ In progress...
|
||||
|
||||
|
||||
Merged String (60612 chars)
|
||||
(showing first 5 and last 5 of 551 lines)
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
... (541 lines omitted) ...
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
STEP: PHASE 3
|
||||
Description: Returning merged string (may be unclosed)
|
||||
⏳ In progress...
|
||||
|
||||
|
||||
Returning merged string (preserving incomplete element at end for next iteration)
|
||||
|
||||
================================================================================
|
||||
MERGE RESULT: ✅ SUCCESS
|
||||
================================================================================
|
||||
Final result length: 60612 chars
|
||||
Final result (COMPLETE):
|
||||
================================================================================
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
"headers": ["Spalte 1", "Spalte 2", "Spalte 3", "Spalte 4", "Spalte 5", "Spalte 6", "Spalte 7", "Spalte 8", "Spalte 9", "Spalte 10"],
|
||||
"rows": [
|
||||
[" 2", " 3", " 5", " 7", " 11", " 13", " 17", " 19", " 23", " 29"],
|
||||
[" 31", " 37", " 41", " 43", " 47", " 53", " 59", " 61", " 67", " 71"],
|
||||
[" 73", " 79", " 83", " 89", " 97", " 101", " 103", " 107", " 109", " 113"],
|
||||
[" 127", " 131", " 137", " 139", " 149", " 151", " 157", " 163", " 167", " 173"],
|
||||
[" 179", " 181", " 191", " 193", " 197", " 199", " 211", " 223", " 227", " 229"],
|
||||
[" 233", " 239", " 241", " 251", " 257", " 263", " 269", " 271", " 277", " 281"],
|
||||
[" 283", " 293", " 307", " 311", " 313", " 317", " 331", " 337", " 347", " 349"],
|
||||
[" 353", " 359", " 367", " 373", " 379", " 383", " 389", " 397", " 401", " 409"],
|
||||
[" 419", " 421", " 431", " 433", " 439", " 443", " 449", " 457", " 461", " 463"],
|
||||
[" 467", " 479", " 487", " 491", " 499", " 503", " 509", " 521", " 523", " 541"],
|
||||
[" 547", " 557", " 563", " 569", " 571", " 577", " 587", " 593", " 599", " 601"],
|
||||
[" 607", " 613", " 617", " 619", " 631", " 641", " 643", " 647", " 653", " 659"],
|
||||
[" 661", " 673", " 677", " 683", " 691", " 701", " 709", " 719", " 727", " 733"],
|
||||
[" 739", " 743", " 751", " 757", " 761", " 769", " 773", " 787", " 797", " 809"],
|
||||
[" 811", " 821", " 823", " 827", " 829", " 839", " 853", " 857", " 859", " 863"],
|
||||
[" 877", " 881", " 883", " 887", " 907", " 911", " 919", " 929", " 937", " 941"],
|
||||
[" 947", " 953", " 967", " 971", " 977", " 983", " 991", " 997", " 1009", " 1013"],
|
||||
[" 1019", " 1021", " 1031", " 1033", " 1039", " 1049", " 1051", " 1061", " 1063", " 1069"],
|
||||
[" 1087", " 1091", " 1093", " 1097", " 1103", " 1109", " 1117", " 1123", " 1129", " 1151"],
|
||||
[" 1153", " 1163", " 1171", " 1181", " 1187", " 1193", " 1201", " 1213", " 1217", " 1223"],
|
||||
[" 1229", " 1231", " 1237", " 1249", " 1259", " 1277", " 1279", " 1283", " 1289", " 1291"],
|
||||
[" 1297", " 1301", " 1303", " 1307", " 1319", " 1321", " 1327", " 1361", " 1367", " 1373"],
|
||||
[" 1381", " 1399", " 1409", " 1423", " 1427", " 1429", " 1433", " 1439", " 1447", " 1451"],
|
||||
[" 1453", " 1459", " 1471", " 1481", " 1483", " 1487", " 1489", " 1493", " 1499", " 1511"],
|
||||
[" 1523", " 1531", " 1543", " 1549", " 1553", " 1559", " 1567", " 1571", " 1579", " 1583"],
|
||||
[" 1597", " 1601", " 1607", " 1609", " 1613", " 1619", " 1621", " 1627", " 1637", " 1657"],
|
||||
[" 1663", " 1667", " 1669", " 1693", " 1697", " 1699", " 1709", " 1721", " 1723", " 1733"],
|
||||
[" 1741", " 1747", " 1753", " 1759", " 1777", " 1783", " 1787", " 1789", " 1801", " 1811"],
|
||||
[" 1823", " 1831", " 1847", " 1861", " 1867", " 1871", " 1873", " 1877", " 1879", " 1889"],
|
||||
[" 1901", " 1907", " 1913", " 1931", " 1933", " 1949", " 1951", " 1973", " 1979", " 1987"],
|
||||
[" 1993", " 1997", " 1999", " 2003", " 2011", " 2017", " 2027", " 2029", " 2039", " 2053"],
|
||||
[" 2063", " 2069", " 2081", " 2083", " 2087", " 2089", " 2099", " 2111", " 2113", " 2129"],
|
||||
[" 2131", " 2137", " 2141", " 2143", " 2153", " 2161", " 2179", " 2203", " 2207", " 2213"],
|
||||
[" 2221", " 2237", " 2239", " 2243", " 2251", " 2267", " 2269", " 2273", " 2281", " 2287"],
|
||||
[" 2293", " 2297", " 2309", " 2311", " 2333", " 2339", " 2341", " 2347", " 2351", " 2357"],
|
||||
[" 2371", " 2377", " 2381", " 2383", " 2389", " 2393", " 2399", " 2411", " 2417", " 2423"],
|
||||
[" 2437", " 2441", " 2447", " 2459", " 2467", " 2473", " 2477", " 2503", " 2521", " 2531"],
|
||||
[" 2539", " 2543", " 2549", " 2551", " 2557", " 2579", " 2591", " 2593", " 2609", " 2617"],
|
||||
[" 2621", " 2633", " 2647", " 2657", " 2659", " 2663", " 2671", " 2677", " 2683", " 2687"],
|
||||
[" 2689", " 2693", " 2699", " 2707", " 2711", " 2713", " 2719", " 2729", " 2731", " 2741"],
|
||||
[" 2749", " 2753", " 2767", " 2777", " 2789", " 2791", " 2797", " 2801", " 2803", " 2819"],
|
||||
[" 2833", " 2837", " 2843", " 2851", " 2857", " 2861", " 2879", " 2887", " 2897", " 2903"],
|
||||
[" 2909", " 2917", " 2927", " 2939", " 2953", " 2957", " 2963", " 2969", " 2971", " 2999"],
|
||||
[" 3001", " 3011", " 3019", " 3023", " 3037", " 3041", " 3049", " 3061", " 3067", " 3079"],
|
||||
[" 3083", " 3089", " 3109", " 3119", " 3121", " 3137", " 3163", " 3167", " 3169", " 3181"],
|
||||
[" 3187", " 3191", " 3203", " 3209", " 3217", " 3221", " 3229", " 3251", " 3253", " 3257"],
|
||||
[" 3259", " 3271", " 3299", " 3301", " 3307", " 3313", " 3319", " 3323", " 3329", " 3331"],
|
||||
[" 3343", " 3347", " 3359", " 3361", " 3371", " 3373", " 3389", " 3391", " 3407", " 3413"],
|
||||
[" 3433", " 3449", " 3457", " 3461", " 3463", " 3467", " 3469", " 3491", " 3499", " 3511"],
|
||||
[" 3517", " 3527", " 3529", " 3533", " 3539", " 3541", " 3547", " 3557", " 3559", " 3571"],
|
||||
[" 3581", " 3583", " 3593", " 3607", " 3613", " 3617", " 3623", " 3631", " 3637", " 3643"],
|
||||
[" 3659", " 3671", " 3673", " 3677", " 3691", " 3697", " 3701", " 3709", " 3719", " 3727"],
|
||||
[" 3733", " 3739", " 3761", " 3767", " 3769", " 3779", " 3793", " 3797", " 3803", " 3821"],
|
||||
[" 3823", " 3833", " 3847", " 3851", " 3853", " 3863", " 3877", " 3881", " 3889", " 3907"],
|
||||
[" 3911", " 3917", " 3919", " 3923", " 3929", " 3931", " 3943", " 3947", " 3967", " 3989"],
|
||||
[" 4001", " 4003", " 4007", " 4013", " 4019", " 4021", " 4027", " 4049", " 4051", " 4057"],
|
||||
[" 4073", " 4079", " 4091", " 4093", " 4099", " 4111", " 4127", " 4129", " 4133", " 4139"],
|
||||
[" 4153", " 4157", " 4159", " 4177", " 4201", " 4211", " 4217", " 4219", " 4229", " 4231"],
|
||||
[" 4241", " 4243", " 4253", " 4259", " 4261", " 4271", " 4273", " 4283", " 4289", " 4297"],
|
||||
[" 4327", " 4337", " 4339", " 4349", " 4357", " 4363", " 4373", " 4391", " 4397", " 4409"],
|
||||
[" 4421", " 4423", " 4441", " 4447", " 4451", " 4457", " 4463", " 4481", " 4483", " 4493"],
|
||||
[" 4507", " 4513", " 4517", " 4519", " 4523", " 4547", " 4549", " 4561", " 4567", " 4583"],
|
||||
[" 4591", " 4597", " 4603", " 4621", " 4637", " 4639", " 4643", " 4649", " 4651", " 4657"],
|
||||
[" 4663", " 4673", " 4679", " 4691", " 4703", " 4721", " 4723", " 4729", " 4733", " 4751"],
|
||||
[" 4759", " 4783", " 4787", " 4789", " 4793", " 4799", " 4801", " 4813", " 4817", " 4831"],
|
||||
[" 4861", " 4871", " 4877", " 4889", " 4903", " 4909", " 4919", " 4931", " 4933", " 4937"],
|
||||
[" 4943", " 4951", " 4957", " 4967", " 4969", " 4973", " 4987", " 4993", " 4999", " 5003"],
|
||||
[" 5009", " 5011", " 5021", " 5023", " 5039", " 5051", " 5059", " 5077", " 5081", " 5087"],
|
||||
[" 5099", " 5101", " 5107", " 5113", " 5119", " 5147", " 5153", " 5167", " 5171", " 5179"],
|
||||
[" 5189", " 5197", " 5209", " 5227", " 5231", " 5233", " 5237", " 5261", " 5273", " 5279"],
|
||||
[" 5281", " 5297", " 5303", " 5309", " 5323", " 5333", " 5347", " 5351", " 5381", " 5387"],
|
||||
[" 5393", " 5399", " 5407", " 5413", " 5417", " 5419", " 5431", " 5437", " 5441", " 5443"],
|
||||
[" 5449", " 5471", " 5477", " 5479", " 5483", " 5501", " 5503", " 5507", " 5519", " 5521"],
|
||||
[" 5527", " 5531", " 5557", " 5563", " 5569", " 5573", " 5581", " 5591", " 5623", " 5639"],
|
||||
[" 5641", " 5647", " 5651", " 5653", " 5657", " 5659", " 5669", " 5683", " 5689", " 5693"],
|
||||
[" 5701", " 5711", " 5717", " 5737", " 5741", " 5743", " 5749", " 5779", " 5783", " 5791"],
|
||||
[" 5801", " 5807", " 5813", " 5821", " 5827", " 5839", " 5843", " 5849", " 5851", " 5857"],
|
||||
[" 5861", " 5867", " 5869", " 5879", " 5881", " 5897", " 5903", " 5923", " 5927", " 5939"],
|
||||
[" 5953", " 5981", " 5987", " 6007", " 6011", " 6029", " 6037", " 6043", " 6047", " 6053"],
|
||||
[" 6067", " 6073", " 6079", " 6089", " 6091", " 6101", " 6113", " 6121", " 6131", " 6133"],
|
||||
[" 6143", " 6151", " 6163", " 6173", " 6197", " 6199", " 6203", " 6211", " 6217", " 6221"],
|
||||
[" 6229", " 6247", " 6257", " 6263", " 6269", " 6271", " 6277", " 6287", " 6299", " 6301"],
|
||||
[" 6311", " 6317", " 6323", " 6329", " 6337", " 6343", " 6353", " 6359", " 6361", " 6367"],
|
||||
[" 6373", " 6379", " 6389", " 6397", " 6421", " 6427", " 6449", " 6451", " 6469", " 6473"],
|
||||
[" 6481", " 6491", " 6521", " 6529", " 6547", " 6551", " 6553", " 6563", " 6569", " 6571"],
|
||||
[" 6577", " 6581", " 6599", " 6607", " 6619", " 6637", " 6653", " 6659", " 6661", " 6673"],
|
||||
[" 6679", " 6689", " 6691", " 6701", " 6703", " 6709", " 6719", " 6733", " 6737", " 6761"],
|
||||
[" 6763", " 6779", " 6781", " 6791", " 6793", " 6803", " 6823", " 6827", " 6829", " 6833"],
|
||||
[" 6841", " 6857", " 6863", " 6869", " 6871", " 6883", " 6899", " 6907", " 6911", " 6917"],
|
||||
[" 6947", " 6949", " 6959", " 6961", " 6967", " 6971", " 6977", " 6983", " 6991", " 6997"],
|
||||
[" 7001", " 7013", " 7019", " 7027", " 7039", " 7043", " 7057", " 7069", " 7079", " 7103"],
|
||||
[" 7109", " 7121", " 7127", " 7129", " 7151", " 7159", " 7177", " 7187", " 7193", " 7207"],
|
||||
[" 7211", " 7213", " 7219", " 7229", " 7237", " 7243", " 7247", " 7253", " 7283", " 7297"],
|
||||
[" 7307", " 7309", " 7321", " 7331", " 7333", " 7349", " 7351", " 7369", " 7393", " 7411"],
|
||||
[" 7417", " 7433", " 7451", " 7457", " 7459", " 7477", " 7481", " 7487", " 7489", " 7499"],
|
||||
[" 7507", " 7517", " 7523", " 7529", " 7537", " 7541", " 7547", " 7549", " 7559", " 7561"],
|
||||
[" 7573", " 7577", " 7583", " 7589", " 7591", " 7603", " 7607", " 7621", " 7639", " 7643"],
|
||||
[" 7649", " 7669", " 7673", " 7681", " 7687", " 7691", " 7699", " 7703", " 7717", " 7723"],
|
||||
[" 7727", " 7741", " 7753", " 7757", " 7759", " 7789", " 7793", " 7817", " 7823", " 7829"],
|
||||
[" 7841", " 7853", " 7867", " 7873", " 7877", " 7879", " 7883", " 7901", " 7907", " 7919"],
|
||||
[" 7927", " 7933", " 7937", " 7949", " 7951", " 7963", " 7993", " 8009", " 8011", " 8017"],
|
||||
[" 8039", " 8053", " 8059", " 8069", " 8081", " 8087", " 8089", " 8093", " 8101", " 8111"],
|
||||
[" 8117", " 8123", " 8147", " 8161", " 8167", " 8171", " 8179", " 8191", " 8209", " 8219"],
|
||||
[" 8221", " 8231", " 8233", " 8237", " 8243", " 8263", " 8269", " 8273", " 8287", " 8291"],
|
||||
[" 8293", " 8297", " 8311", " 8317", " 8329", " 8353", " 8363", " 8369", " 8377", " 8387"],
|
||||
[" 8389", " 8419", " 8423", " 8429", " 8431", " 8443", " 8447", " 8461", " 8467", " 8501"],
|
||||
[" 8513", " 8521", " 8527", " 8537", " 8539", " 8543", " 8563", " 8573", " 8581", " 8597"],
|
||||
[" 8599", " 8609", " 8623", " 8627", " 8629", " 8641", " 8647", " 8663", " 8669", " 8677"],
|
||||
[" 8681", " 8689", " 8693", " 8699", " 8707", " 8713", " 8719", " 8731", " 8737", " 8741"],
|
||||
[" 8747", " 8753", " 8761", " 8779", " 8783", " 8803", " 8807", " 8819", " 8821", " 8831"],
|
||||
[" 8837", " 8839", " 8849", " 8861", " 8863", " 8867", " 8887", " 8893", " 8923", " 8929"],
|
||||
[" 8933", " 8941", " 8951", " 8963", " 8969", " 8971", " 8999", " 9001", " 9007", " 9011"],
|
||||
[" 9013", " 9029", " 9041", " 9043", " 9049", " 9059", " 9067", " 9091", " 9103", " 9109"],
|
||||
[" 9127", " 9133", " 9137", " 9151", " 9157", " 9161", " 9173", " 9181", " 9187", " 9199"],
|
||||
[" 9203", " 9209", " 9221", " 9227", " 9239", " 9241", " 9257", " 9277", " 9281", " 9283"],
|
||||
[" 9293", " 9311", " 9319", " 9323", " 9337", " 9341", " 9343", " 9349", " 9371", " 9377"],
|
||||
[" 9391", " 9397", " 9403", " 9413", " 9419", " 9421", " 9431", " 9433", " 9437", " 9439"],
|
||||
[" 9461", " 9463", " 9467", " 9473", " 9479", " 9491", " 9497", " 9511", " 9521", " 9533"],
|
||||
[" 9539", " 9547", " 9551", " 9587", " 9601", " 9613", " 9619", " 9623", " 9629", " 9631"],
|
||||
[" 9643", " 9649", " 9661", " 9677", " 9679", " 9689", " 9697", " 9719", " 9721", " 9733"],
|
||||
[" 9739", " 9743", " 9749", " 9767", " 9769", " 9781", " 9787", " 9791", " 9803", " 9811"],
|
||||
[" 9817", " 9829", " 9833", " 9839", " 9851", " 9857", " 9859", " 9871", " 9883", " 9887"],
|
||||
[" 9901", " 9907", " 9923", " 9929", " 9931", " 9941", " 9949", " 9967", " 9973", " 10007"],
|
||||
[" 10009", " 10037", " 10039", " 10061", " 10067", " 10069", " 10079", " 10091", " 10093", " 10099"],
|
||||
[" 10103", " 10111", " 10133", " 10139", " 10141", " 10151", " 10159", " 10163", " 10169", " 10177"],
|
||||
[" 10181", " 10193", " 10211", " 10223", " 10243", " 10247", " 10253", " 10259", " 10267", " 10271"],
|
||||
[" 10273", " 10289", " 10301", " 10303", " 10313", " 10321", " 10331", " 10333", " 10337", " 10343"],
|
||||
[" 10357", " 10369", " 10391", " 10399", " 10427", " 10429", " 10433", " 10453", " 10457", " 10459"],
|
||||
[" 10463", " 10477", " 10487", " 10499", " 10501", " 10513", " 10529", " 10531", " 10559", " 10567"],
|
||||
[" 10589", " 10597", " 10601", " 10607", " 10613", " 10627", " 10631", " 10639", " 10651", " 10657"],
|
||||
[" 10663", " 10667", " 10687", " 10691", " 10709", " 10711", " 10723", " 10729", " 10733", " 10739"],
|
||||
[" 10753", " 10771", " 10781", " 10789", " 10799", " 10831", " 10837", " 10847", " 10853", " 10859"],
|
||||
[" 10861", " 10867", " 10883", " 10889", " 10891", " 10903", " 10909", " 10937", " 10939", " 10949"],
|
||||
[" 10957", " 10973", " 10979", " 10987", " 10993", " 11003", " 11027", " 11047", " 11057", " 11059"],
|
||||
[" 11069", " 11071", " 11083", " 11087", " 11093", " 11113", " 11117", " 11119", " 11131", " 11149"],
|
||||
[" 11159", " 11161", " 11171", " 11173", " 11177", " 11197", " 11213", " 11239", " 11243", " 11251"],
|
||||
[" 11257", " 11261", " 11273", " 11279", " 11287", " 11299", " 11311", " 11317", " 11321", " 11329"],
|
||||
[" 11351", " 11353", " 11369", " 11383", " 11393", " 11399", " 11411", " 11423", " 11437", " 11443"],
|
||||
[" 11447", " 11467", " 11471", " 11483", " 11489", " 11491", " 11497", " 11503", " 11519", " 11527"],
|
||||
[" 11549", " 11551", " 11579", " 11587", " 11593", " 11597", " 11617", " 11621", " 11633", " 11657"],
|
||||
[" 11677", " 11681", " 11689", " 11699", " 11701", " 11717", " 11719", " 11731", " 11743", " 11777"],
|
||||
[" 11779", " 11783", " 11789", " 11801", " 11807", " 11813", " 11821", " 11827", " 11831", " 11833"],
|
||||
[" 11839", " 11863", " 11867", " 11887", " 11897", " 11903", " 11909", " 11923", " 11927", " 11933"],
|
||||
[" 11939", " 11941", " 11953", " 11959", " 11969", " 11971", " 11981", " 11987", " 12007", " 12011"],
|
||||
[" 12037", " 12041", " 12043", " 12049", " 12071", " 12073", " 12097", " 12101", " 12107", " 12109"],
|
||||
[" 12113", " 12119", " 12143", " 12149", " 12157", " 12161", " 12163", " 12197", " 12203", " 12211"],
|
||||
[" 12227", " 12239", " 12241", " 12251", " 12253", " 12263", " 12269", " 12277", " 12281", " 12289"],
|
||||
[" 12301", " 12323", " 12329", " 12343", " 12347", " 12373", " 12377", " 12379", " 12391", " 12401"],
|
||||
[" 12409", " 12413", " 12421", " 12433", " 12437", " 12451", " 12457", " 12473", " 12479", " 12487"],
|
||||
[" 12491", " 12497", " 12503", " 12511", " 12517", " 12527", " 12539", " 12541", " 12547", " 12553"],
|
||||
[" 12569", " 12577", " 12583", " 12589", " 12601", " 12611", " 12613", " 12619", " 12637", " 12641"],
|
||||
[" 12647", " 12653", " 12659", " 12671", " 12689", " 12697", " 12703", " 12713", " 12721", " 12739"],
|
||||
[" 12743", " 12757", " 12763", " 12781", " 12791", " 12799", " 12809", " 12821", " 12823", " 12829"],
|
||||
[" 12841", " 12853", " 12889", " 12893", " 12899", " 12907", " 12911", " 12917", " 12919", " 12923"],
|
||||
[" 12941", " 12953", " 12959", " 12967", " 12973", " 12979", " 12983", " 13001", " 13003", " 13007"],
|
||||
[" 13009", " 13033", " 13037", " 13043", " 13049", " 13063", " 13093", " 13099", " 13103", " 13109"],
|
||||
[" 13121", " 13127", " 13147", " 13151", " 13159", " 13163", " 13171", " 13177", " 13183", " 13187"],
|
||||
[" 13217", " 13219", " 13229", " 13241", " 13249", " 13259", " 13267", " 13291", " 13297", " 13309"],
|
||||
[" 13313", " 13327", " 13331", " 13337", " 13339", " 13367", " 13381", " 13397", " 13399", " 13411"],
|
||||
[" 13417", " 13421", " 13441", " 13451", " 13457", " 13463", " 13469", " 13477", " 13487", " 13499"],
|
||||
[" 13513", " 13523", " 13537", " 13553", " 13567", " 13577", " 13591", " 13597", " 13613", " 13619"],
|
||||
[" 13627", " 13633", " 13649", " 13669", " 13679", " 13681", " 13687", " 13691", " 13693", " 13697"],
|
||||
[" 13709", " 13711", " 13721", " 13723", " 13729", " 13751", " 13757", " 13759", " 13763", " 13781"],
|
||||
[" 13789", " 13799", " 13807", " 13829", " 13831", " 13841", " 13859", " 13873", " 13877", " 13879"],
|
||||
[" 13883", " 13901", " 13903", " 13907", " 13913", " 13921", " 13931", " 13933", " 13963", " 13967"],
|
||||
[" 13997", " 13999", " 14009", " 14011", " 14029", " 14033", " 14051", " 14057", " 14071", " 14081"],
|
||||
[" 14083", " 14087", " 14107", " 14143", " 14149", " 14153", " 14159", " 14173", " 14177", " 14197"],
|
||||
[" 14207", " 14221", " 14243", " 14249", " 14251", " 14281", " 14293", " 14303", " 14321", " 14323"],
|
||||
[" 14327", " 14341", " 14347", " 14369", " 14387", " 14389", " 14401", " 14407", " 14411", " 14419"],
|
||||
[" 14423", " 14431", " 14437", " 14447", " 14449", " 14461", " 14479", " 14489", " 14503", " 14519"],
|
||||
[" 14533", " 14537", " 14543", " 14549", " 14551", " 14557", " 14561", " 14563", " 14591", " 14593"],
|
||||
[" 14621", " 14627", " 14629", " 14633", " 14639", " 14653", " 14657", " 14669", " 14683", " 14699"],
|
||||
[" 14713", " 14717", " 14723", " 14731", " 14737", " 14741", " 14747", " 14753", " 14759", " 14767"],
|
||||
[" 14771", " 14779", " 14783", " 14797", " 14813", " 14821", " 14827", " 14831", " 14843", " 14851"],
|
||||
[" 14867", " 14869", " 14879", " 14887", " 14891", " 14897", " 14923", " 14929", " 14939", " 14947"],
|
||||
[" 14951", " 14957", " 14969", " 14983", " 15013", " 15017", " 15031", " 15053", " 15061", " 15073"],
|
||||
[" 15077", " 15083", " 15091", " 15101", " 15107", " 15121", " 15131", " 15137", " 15139", " 15149"],
|
||||
[" 15161", " 15173", " 15187", " 15193", " 15199", " 15217", " 15227", " 15233", " 15241", " 15259"],
|
||||
[" 15263", " 15269", " 15271", " 15277", " 15287", " 15289", " 15299", " 15307", " 15313", " 15319"],
|
||||
[" 15329", " 15331", " 15349", " 15359", " 15361", " 15373", " 15377", " 15383", " 15391", " 15401"],
|
||||
[" 15413", " 15427", " 15439", " 15443", " 15451", " 15461", " 15467", " 15473", " 15493", " 15497"],
|
||||
[" 15511", " 15527", " 15541", " 15551", " 15559", " 15569", " 15581", " 15583", " 15601", " 15607"],
|
||||
[" 15619", " 15629", " 15641", " 15643", " 15647", " 15649", " 15661", " 15667", " 15671", " 15679"],
|
||||
[" 15683", " 15727", " 15731", " 15733", " 15737", " 15739", " 15749", " 15761", " 15767", " 15773"],
|
||||
[" 15787", " 15791", " 15797", " 15803", " 15809", " 15817", " 15823", " 15859", " 15877", " 15881"],
|
||||
[" 15887", " 15889", " 15901", " 15907", " 15913", " 15919", " 15923", " 15937", " 15959", " 15971"],
|
||||
[" 15973", " 15991", " 16001", " 16007", " 16033", " 16057", " 16061", " 16063", " 16067", " 16069"],
|
||||
[" 16073", " 16087", " 16091", " 16097", " 16103", " 16111", " 16127", " 16139", " 16141", " 16183"],
|
||||
[" 16187", " 16189", " 16193", " 16217", " 16223", " 16229", " 16231", " 16249", " 16253", " 16267"],
|
||||
[" 16273", " 16301", " 16319", " 16333", " 16339", " 16349", " 16361", " 16363", " 16369", " 16381"],
|
||||
[" 16411", " 16417", " 16421", " 16427", " 16433", " 16447", " 16451", " 16453", " 16477", " 16481"],
|
||||
[" 16487", " 16493", " 16519", " 16529", " 16547", " 16553", " 16561", " 16567", " 16573", " 16603"],
|
||||
[" 16607", " 16619", " 16631", " 16633", " 16649", " 16651", " 16657", " 16661", " 16673", " 16691"],
|
||||
[" 16693", " 16699", " 16703", " 16729", " 16741", " 16747", " 16759", " 16763", " 16787", " 16811"],
|
||||
[" 16823", " 16829", " 16831", " 16843", " 16871", " 16879", " 16883", " 16889", " 16901", " 16903"],
|
||||
[" 16921", " 16927", " 16931", " 16937", " 16943", " 16963", " 16979", " 16981", " 16987", " 16993"],
|
||||
[" 17011", " 17021", " 17027", " 17029", " 17033", " 17041", " 17047", " 17053", " 17077", " 17093"],
|
||||
[" 17099", " 17107", " 17117", " 17123", " 17137", " 17159", " 17167", " 17183", " 17189", " 17191"],
|
||||
[" 17203", " 17207", " 17209", " 17231", " 17239", " 17257", " 17291", " 17293", " 17299", " 17317"],
|
||||
[" 17321", " 17327", " 17333", " 17341", " 17351", " 17359", " 17377", " 17383", " 17387", " 17389"],
|
||||
[" 17393", " 17401", " 17417", " 17419", " 17431", " 17443", " 17449", " 17467", " 17471", " 17477"],
|
||||
[" 17483", " 17489", " 17491", " 17497", " 17509", " 17519", " 17539", " 17551", " 17569", " 17573"],
|
||||
[" 17579", " 17581", " 17597", " 17599", " 17609", " 17623", " 17627", " 17657", " 17659", " 17669"],
|
||||
[" 17681", " 17683", " 17707", " 17713", " 17729", " 17737", " 17747", " 17749", " 17761", " 17783"],
|
||||
[" 17789", " 17791", " 17807", " 17827", " 17837", " 17839", " 17851", " 17863", " 17881", " 17891"],
|
||||
[" 17903", " 17909", " 17911", " 17921", " 17923", " 17929", " 17939", " 17957", " 17959", " 17971"],
|
||||
[" 17977", " 17981", " 17987", " 17989", " 18013", " 18041", " 18043", " 18047", " 18049", " 18059"],
|
||||
[" 18061", " 18077", " 18089", " 18097", " 18119", " 18121", " 18127", " 18131", " 18133", " 18143"],
|
||||
[" 18149", " 18169", " 18181", " 18191", " 18199", " 18211", " 18217", " 18223", " 18229", " 18233"],
|
||||
[" 18251", " 18253", " 18257", " 18269", " 18287", " 18289", " 18301", " 18307", " 18311", " 18313"],
|
||||
[" 18329", " 18341", " 18353", " 18367", " 18371", " 18379", " 18397", " 18401", " 18413", " 18427"],
|
||||
[" 18433", " 18439", " 18443", " 18451", " 18457", " 18461", " 18481", " 18493", " 18503", " 18517"],
|
||||
[" 18521", " 18523", " 18539", " 18541", " 18553", " 18583", " 18587", " 18593", " 18617", " 18637"],
|
||||
[" 18661", " 18671", " 18679", " 18691", " 18701", " 18713", " 18719", " 18731", " 18743", " 18749"],
|
||||
[" 18757", " 18773", " 18787", " 18793", " 18797", " 18803", " 18839", " 18859", " 18869", " 18899"],
|
||||
[" 18911", " 18913", " 18917", " 18919", " 18947", " 18959", " 18973", " 18979", " 19001", " 19009"],
|
||||
[" 19013", " 19031", " 19037", " 19051", " 19069", " 19073", " 19079", " 19081", " 19087", " 19121"],
|
||||
[" 19139", " 19141", " 19157", " 19163", " 19181", " 19183", " 19207", " 19211", " 19213", " 19219"],
|
||||
[" 19231", " 19237", " 19249", " 19259", " 19267", " 19273", " 19289", " 19301", " 19309", " 19319"],
|
||||
[" 19333", " 19373", " 19379", " 19381", " 19387", " 19391", " 19403", " 19417", " 19421", " 19423"],
|
||||
[" 19427", " 19429", " 19433", " 19441", " 19447", " 19457", " 19463", " 19469", " 19471", " 19477"],
|
||||
[" 19483", " 19501", " 19507", " 19531", " 19541", " 19543", " 19553", " 19559", " 19571", " 19577"],
|
||||
[" 19583", " 19597", " 19603", " 19609", " 19661", " 19681", " 19687", " 19697", " 19699", " 19709"],
|
||||
[" 19717", " 19727", " 19739", " 19751", " 19753", " 19759", " 19763", " 19777", " 19793", " 19801"],
|
||||
[" 19813", " 19819", " 19841", " 19843", " 19853", " 19861", " 19867", " 19889", " 19891", " 19913"],
|
||||
[" 19919", " 19927", " 19937", " 19949", " 19961", " 19963", " 19973", " 19979", " 19991", " 19993"],
|
||||
[" 19997", " 20011", " 20021", " 20023", " 20029", " 20047", " 20051", " 20063", " 20071", " 20089"],
|
||||
[" 20101", " 20107", " 20113", " 20117", " 20123", " 20129", " 20143", " 20147", " 20149", " 20161"],
|
||||
[" 20173", " 20177", " 20183", " 20201", " 20219", " 20231", " 20233", " 20249", " 20261", " 20269"],
|
||||
[" 20287", " 20297", " 20323", " 20327", " 20333", " 20341", " 20347", " 20353", " 20357", " 20359"],
|
||||
[" 20369", " 20389", " 20393", " 20399", " 20407", " 20411", " 20431", " 20441", " 20443", " 20477"],
|
||||
[" 20479", " 20483", " 20507", " 20509", " 20521", " 20533", " 20543", " 20549", " 20551", " 20563"],
|
||||
[" 20593", " 20599", " 20611", " 20627", " 20639", " 20641", " 20663", " 20681", " 20693", " 20707"],
|
||||
[" 20717", " 20719", " 20731", " 20743", " 20747", " 20749", " 20753", " 20759", " 20771", " 20773"],
|
||||
[" 20789", " 20807", " 20809", " 20849", " 20857", " 20873", " 20879", " 20887", " 20897", " 20899"],
|
||||
[" 20903", " 20921", " 20929", " 20939", " 20947", " 20959", " 20963", " 20981", " 20983", " 21001"],
|
||||
[" 21011", " 21013", " 21017", " 21019", " 21023", " 21031", " 21059", " 21061", " 21067", " 21089"],
|
||||
[" 21101", " 21107", " 21121", " 21139", " 21143", " 21149", " 21157", " 21163", " 21169", " 21179"],
|
||||
[" 21187", " 21191", " 21193", " 21211", " 21221", " 21227", " 21247", " 21269", " 21277", " 21283"],
|
||||
[" 21313", " 21317", " 21319", " 21323", " 21341", " 21347", " 21377", " 21379", " 21383", " 21391"],
|
||||
[" 21397", " 21401", " 21407", " 21419", " 21433", " 21467", " 21481", " 21487", " 21491", " 21493"],
|
||||
[" 21499", " 21503", " 21517", " 21521", " 21523", " 21529", " 21557", " 21559", " 21563", " 21569"],
|
||||
[" 21577", " 21587", " 21589", " 21599", " 21601", " 21611", " 21613", " 21617", " 21647", " 21649"],
|
||||
[" 21661", " 21673", " 21683", " 21701", " 21713", " 21727", " 21737", " 21739", " 21751", " 21757"],
|
||||
[" 21767", " 21773", " 21787", " 21799", " 21803", " 21817", " 21821", " 21839", " 21841", " 21851"],
|
||||
[" 21859", " 21863", " 21871", " 21881", " 21893", " 21911", " 21929", " 21937", " 21943", " 21961"],
|
||||
[" 21977", " 21991", " 21997", " 22003", " 22013", " 22027", " 22031", " 22037", " 22039", " 22051"],
|
||||
[" 22063", " 22067", " 22073", " 22079", " 22091", " 22093", " 22109", " 22111", " 22123", " 22129"],
|
||||
[" 22133", " 22147", " 22153", " 22157", " 22159", " 22171", " 22189", " 22193", " 22229", " 22247"],
|
||||
[" 22259", " 22271", " 22273", " 22277", " 22279", " 22283", " 22291", " 22303", " 22307", " 22343"],
|
||||
[" 22349", " 22367", " 22369", " 22381", " 22391", " 22397", " 22409", " 22433", " 22441", " 22447"],
|
||||
[" 22453", " 22469", " 22481", " 22483", " 22501", " 22511", " 22531", " 22541", " 22543", " 22549"],
|
||||
[" 22567", " 22571", " 22573", " 22613", " 22619", " 22621", " 22637", " 22639", " 22643", " 22651"],
|
||||
[" 22669", " 22679", " 22691", " 22697", " 22699", " 22709", " 22717", " 22721", " 22727", " 22739"],
|
||||
[" 22741", " 22751", " 22769", " 22777", " 22783", " 22787", " 22807", " 22811", " 22817", " 22853"],
|
||||
[" 22859", " 22861", " 22871", " 22877", " 22901", " 22907", " 22921", " 22937", " 22943", " 22961"],
|
||||
[" 22963", " 22973", " 22993", " 23003", " 23011", " 23017", " 23021", " 23027", " 23029", " 23039"],
|
||||
[" 23041", " 23053", " 23057", " 23059", " 23063", " 23071", " 23081", " 23087", " 23099", " 23117"],
|
||||
[" 23131", " 23143", " 23159", " 23167", " 23173", " 23189", " 23197", " 23201", " 23203", " 23209"],
|
||||
[" 23227", " 23251", " 23269", " 23279", " 23291", " 23293", " 23297", " 23311", " 23321", " 23327"],
|
||||
[" 23333", " 23339", " 23357", " 23369", " 23371", " 23399", " 23417", " 23431", " 23447", " 23459"],
|
||||
[" 23473", " 23497", " 23509", " 23531", " 23537", " 23539", " 23549", " 23557", " 23561", " 23563"],
|
||||
[" 23567", " 23581", " 23593", " 23599", " 23603", " 23609", " 23623", " 23627", " 23629", " 23633"],
|
||||
[" 23663", " 23669", " 23671", " 23677", " 23687", " 23689", " 23719", " 23741", " 23743", " 23747"],
|
||||
[" 23753", " 23761", " 23767", " 23773", " 23789", " 23801", " 23813", " 23819", " 23827", " 23831"],
|
||||
[" 23833", " 23857", " 23869", " 23873", " 23879", " 23887", " 23893", " 23899", " 23909", " 23911"],
|
||||
[" 23917", " 23929", " 23957", " 23971", " 23977", " 23981", " 23993", " 24001", " 24007", " 24019"],
|
||||
[" 24023", " 24029", " 24043", " 24049", " 24061", " 24071", " 24077", " 24083", " 24091", " 24097"],
|
||||
[" 24103", " 24107", " 24109", " 24113", " 24121", " 24133", " 24137", " 24151", " 24169", " 24179"],
|
||||
[" 24181", " 24197", " 24203", " 24223", " 24229", " 24239", " 24247", " 24251", " 24281", " 24317"],
|
||||
[" 24329", " 24337", " 24359", " 24371", " 24373", " 24379", " 24391", " 24407", " 24413", " 24419"],
|
||||
[" 24421", " 24439", " 24443", " 24469", " 24473", " 24481", " 24499", " 24509", " 24517", " 24527"],
|
||||
[" 24533", " 24547", " 24551", " 24571", " 24593", " 24611", " 24623", " 24631", " 24659", " 24671"],
|
||||
[" 24677", " 24683", " 24691", " 24697", " 24709", " 24733", " 24749", " 24763", " 24767", " 24781"],
|
||||
[" 24793", " 24799", " 24809", " 24821", " 24841", " 24847", " 24851", " 24859", " 24877", " 24889"],
|
||||
[" 24907", " 24917", " 24919", " 24923", " 24943", " 24953", " 24967", " 24971", " 24977", " 24979"],
|
||||
[" 24989", " 25013", " 25031", " 25033", " 25037", " 25057", " 25073", " 25087", " 25097", " 25111"],
|
||||
[" 25117", " 25121", " 25127", " 25147", " 25153", " 25163", " 25169", " 25171", " 25183", " 25189"],
|
||||
[" 25219", " 25229", " 25237", " 25243", " 25247", " 25253", " 25261", " 25301", " 25303", " 25307"],
|
||||
[" 25309", " 25321", " 25339", " 25343", " 25349", " 25357", " 25367", " 25373", " 25391", " 25409"],
|
||||
[" 25411", " 25423", " 25439", " 25447", " 25453", " 25457", " 25463", " 25469", " 25471", " 25523"],
|
||||
[" 25537", " 25541", " 25561", " 25577", " 25579", " 25583", " 25589", " 25601", " 25603", " 25609"],
|
||||
[" 25621", " 25633", " 25639", " 25643", " 25657", " 25667", " 25673", " 25679", " 25693", " 25703"],
|
||||
[" 25717", " 25733", " 25741", " 25747", " 25759", " 25763", " 25771", " 25793", " 25799", " 25801"],
|
||||
[" 25819", " 25841", " 25847", " 25849", " 25867", " 25873", " 25889", " 25903", " 25913", " 25919"],
|
||||
[" 25931", " 25933", " 25939", " 25943", " 25951", " 25969", " 25981", " 25997", " 25999", " 26003"],
|
||||
[" 26017", " 26021", " 26029", " 26041", " 26053", " 26083", " 26099", " 26107", " 26111", " 26113"],
|
||||
[" 26119", " 26141", " 26153", " 26161", " 26171", " 26177", " 26183", " 26189", " 26203", " 26209"],
|
||||
[" 26227", " 26237", " 26249", " 26251", " 26261", " 26263", " 26267", " 26293", " 26297", " 26309"],
|
||||
[" 26317", " 26321", " 26339", " 26347", " 26357", " 26371", " 26387", " 26393", " 26399", " 26407"],
|
||||
[" 26417", " 26423", " 26431", " 26437", " 26449", " 26459", " 26479", " 26489", " 26497", " 26501"],
|
||||
[" 26513", " 26539", " 26557", " 26561", " 26573", " 26591", " 26597", " 26627", " 26633", " 26641"],
|
||||
[" 26647", " 26669", " 26681", " 26683", " 26687", " 26693", " 26699", " 26701", " 26711", " 26713"],
|
||||
[" 26717", " 26723", " 26729", " 26731", " 26737", " 26759", " 26777", " 26783", " 26801", " 26813"],
|
||||
[" 26821", " 26833", " 26839", " 26849", " 26861", " 26863", " 26879", " 26881", " 26891", " 26893"],
|
||||
[" 26903", " 26921", " 26927", " 26947", " 26951", " 26953", " 26959", " 26981", " 26987", " 26993"],
|
||||
[" 27011", " 27017", " 27031", " 27043", " 27059", " 27061", " 27067", " 27073", " 27077", " 27091"],
|
||||
[" 27103", " 27107", " 27109", " 27127", " 27143", " 27179", " 27191", " 27197", " 27211", " 27239"],
|
||||
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283", " 27299", " 27329", " 27337"],
|
||||
[" 27361", " 27367", " 27397", " 27407", " 27409", " 27427", " 27431", " 27437", " 27449", " 27457"],
|
||||
[" 27479", " 27481", " 27487", " 27509", " 27527", " 27529", " 27539", " 27541", " 27551", " 27581"],
|
||||
[" 27583", " 27611", " 27617", " 27631", " 27647", " 27653", " 27673", " 27689", " 27691", " 27697"],
|
||||
[" 27701", " 27733", " 27737", " 27739", " 27743", " 27749", " 27751", " 27763", " 27767", " 27773"],
|
||||
[" 27779", " 27791", " 27793", " 27799", " 27803", " 27809", " 27817", " 27823", " 27827", " 27847"],
|
||||
[" 27851", " 27883", " 27893", " 27901", " 27917", " 27919", " 27941", " 27943", " 27947", " 27953"],
|
||||
[" 27961", " 27967", " 27983", " 27997", " 28001", " 28019", " 28027", " 28031", " 28051", " 28057"],
|
||||
[" 28069", " 28081", " 28087", " 28097", " 28099", " 28109", " 28111", " 28123", " 28151", " 28163"],
|
||||
[" 28181", " 28183", " 28201", " 28211", " 28219", " 28229", " 28277", " 28279", " 28283", " 28289"],
|
||||
[" 28297", " 28307", " 28309", " 28319", " 28349", " 28351", " 28387", " 28393", " 28403", " 28409"],
|
||||
[" 28411", " 28429", " 28433", " 28439", " 28447", " 28463", " 28477", " 28493", " 28499", " 28513"],
|
||||
[" 28517", " 28537", " 28541", " 28547", " 28549", " 28559", " 28571", " 28573", " 28579", " 28591"],
|
||||
[" 28597", " 28603", " 28607", " 28619", " 28621", " 28627", " 28631", " 28643", " 28649", " 28657"],
|
||||
[" 28661", " 28663", " 28669", " 28687", " 28697", " 28703", " 28711", " 28723", " 28729", " 28751"],
|
||||
[" 28753", " 28759", " 28771", " 28789", " 28793", " 28807", " 28813", " 28817", " 28837", " 28843"],
|
||||
[" 28859", " 28867", " 28871", " 28879", " 28901", " 28909", " 28921", " 28927", " 28933", " 28949"],
|
||||
[" 28961", " 28979", " 29009", " 29017", " 29021", " 29023", " 29027", " 29033", " 29059", " 29063"],
|
||||
[" 29077", " 29101", " 29123", " 29129", " 29131", " 29137", " 29147", " 29153", " 29167", " 29173"],
|
||||
[" 29179", " 29191", " 29201", " 29207", " 29209", " 29221", " 29231", " 29243", " 29251", " 29269"],
|
||||
[" 29287", " 29297", " 29303", " 29311", " 29327", " 29333", " 29339", " 29347", " 29363", " 29383"],
|
||||
[" 29387", " 29389", " 29399", " 29401", " 29411", " 29423", " 29429", " 29437", " 29443", " 29453"],
|
||||
[" 29473", " 29483", " 29501", " 29527", " 29531", " 29537", " 29567", " 29569", " 29573", " 29581"],
|
||||
[" 29587", " 29599", " 29611", " 29629", " 29633", " 29641", " 29663", " 29669", " 29671", " 29683"],
|
||||
[" 29717", " 29723", " 29741", " 29753", " 29759", " 29761", " 29789", " 29803", " 29819", " 29833"],
|
||||
[" 29837", " 29851", " 29863", " 29867", " 29873", " 29879", " 29881", " 29917", " 29921", " 29927"],
|
||||
[" 29947", " 29959", " 29983", " 29989", " 30011", " 30013", " 30029", " 30047", " 30059", " 30071"],
|
||||
[" 30089", " 30091", " 30097", " 30103", " 30109", " 30113", " 30119", " 30133", " 30137", " 30139"],
|
||||
[" 30161", " 30169", " 30181", " 30187", " 30197", " 30203", " 30211", " 30223", " 30241", " 30253"],
|
||||
[" 30259", " 30269", " 30271", " 30293", " 30307", " 30313", " 30319", " 30323", " 30341", " 30347"],
|
||||
[" 30367", " 30389", " 30391", " 30403", " 30427", " 30431", " 30449", " 30467", " 30469", " 30491"],
|
||||
[" 30493", " 30497", " 30509", " 30517", " 30529", " 30539", " 30553", " 30557", " 30559", " 30577"],
|
||||
[" 30593", " 30631", " 30637", " 30643", " 30649", " 30661", " 30671", " 30677", " 30689", " 30697"],
|
||||
[" 30703", " 30707", " 30713", " 30727", " 30757", " 30763", " 30773", " 30781", " 30803", " 30809"],
|
||||
[" 30817", " 30829", " 30839", " 30841", " 30851", " 30853", " 30859", " 30869", " 30871", " 30881"],
|
||||
[" 30893", " 30911", " 30931", " 30937", " 30941", " 30949", " 30971", " 30977", " 30983", " 31013"],
|
||||
[" 31019", " 31033", " 31039", " 31051", " 31063", " 31069", " 31079", " 31081", " 31091", " 31121"],
|
||||
[" 31123", " 31139", " 31147", " 31151", " 31153", " 31159", " 31177", " 31181", " 31183", " 31189"],
|
||||
[" 31193", " 31219", " 31223", " 31231", " 31237", " 31247", " 31249", " 31253", " 31259", " 31267"],
|
||||
[" 31271", " 31277", " 31307", " 31319", " 31321", " 31327", " 31333", " 31337", " 31357", " 31379"],
|
||||
[" 31387", " 31391", " 31393", " 31397", " 31469", " 31477", " 31481", " 31489", " 31511", " 31513"],
|
||||
[" 31517", " 31531", " 31541", " 31543", " 31547", " 31567", " 31573", " 31583", " 31601", " 31607"],
|
||||
[" 31627", " 31643", " 31649", " 31657", " 31663", " 31667", " 31687", " 31699", " 31721", " 31723"],
|
||||
[" 31727", " 31729", " 31741", " 31751", " 31769", " 31771", " 31793", " 31799", " 31817", " 31847"],
|
||||
[" 31849", " 31859", " 31873", " 31883", " 31891", " 31907", " 31957", " 31963", " 31973", " 31981"],
|
||||
[" 31991", " 32003", " 32009", " 32027", " 32029", " 32051", " 32057", " 32059", " 32063", " 32069"],
|
||||
[" 32077", " 32083", " 32089", " 32099", " 32117", " 32119", " 32141", " 32143", " 32159", " 32173"],
|
||||
[" 32183", " 32189", " 32191", " 32203", " 32213", " 32233", " 32237", " 32251", " 32257", " 32261"],
|
||||
[" 32297", " 32299", " 32303", " 32309", " 32321", " 32323", " 32327", " 32341", " 32353", " 32359"],
|
||||
[" 32363", " 32369", " 32371", " 32377", " 32381", " 32401", " 32411", " 32413", " 32423", " 32429"],
|
||||
[" 32441", " 32443", " 32467", " 32479", " 32491", " 32497", " 32503", " 32507", " 32531", " 32533"],
|
||||
[" 32537", " 32561", " 32563", " 32569", " 32573", " 32579", " 32587", " 32603", " 32609", " 32611"],
|
||||
[" 32621", " 32633", " 32647", " 32653", " 32687", " 32693", " 32707", " 32713", " 32717", " 32719"],
|
||||
[" 32749", " 32771", " 32779", " 32783", " 32789", " 32797", " 32801", " 32803", " 32831", " 32833"],
|
||||
[" 32839", " 32843", " 32869", " 32887", " 32909", " 32911", " 32917", " 32933", " 32939", " 32941"],
|
||||
[" 32957", " 32969", " 32971", " 32983", " 32987", " 32993", " 32999", " 33013", " 33023", " 33029"],
|
||||
[" 33037", " 33049", " 33053", " 33071", " 33073", " 33083", " 33091", " 33107", " 33113", " 33119"],
|
||||
[" 33149", " 33151", " 33161", " 33179", " 33181", " 33191", " 33199", " 33203", " 33211", " 33223"],
|
||||
[" 33247", " 33287", " 33289", " 33301", " 33311", " 33317", " 33329", " 33331", " 33343", " 33347"],
|
||||
[" 33349", " 33353", " 33359", " 33377", " 33391", " 33403", " 33409", " 33413", " 33427", " 33457"],
|
||||
[" 33461", " 33469", " 33479", " 33487", " 33493", " 33503", " 33521", " 33529", " 33533", " 33547"],
|
||||
[" 33563", " 33569", " 33577", " 33581", " 33587", " 33589", " 33599", " 33601", " 33613", " 33617"],
|
||||
[" 33619", " 33623", " 33629", " 33637", " 33641", " 33647", " 33679", " 33703", " 33713", " 33721"],
|
||||
[" 33739", " 33749", " 33751", " 33757", " 33767", " 33769", " 33773", " 33791", " 33797", " 33809"],
|
||||
[" 33811", " 33827", " 33829", " 33851", " 33857", " 33863", " 33871", " 33889", " 33893", " 33911"],
|
||||
[" 33923", " 33931", " 33937", " 33941", " 33961", " 33967", " 33997", " 34019", " 34031", " 34033"],
|
||||
[" 34039", " 34057", " 34061", " 34123", " 34127", " 34129", " 34141", " 34147", " 34157", " 34159"],
|
||||
[" 34171", " 34183", " 34211", " 34213", " 34217", " 34231", " 34253", " 34259", " 34261", " 34267"],
|
||||
[" 34273", " 34283", " 34297", " 34301", " 34303", " 34313", " 34319", " 34327", " 34337", " 34351"],
|
||||
[" 34361", " 34367", " 34369", " 34381", " 34403", " 34421", " 34429", " 34439", " 34457", " 34469"],
|
||||
[" 34471", " 34483", " 34487", " 34499", " 34501", " 34511", " 34513", " 34519", " 34537", " 34543"],
|
||||
[" 34549", " 34583", " 34589", " 34591", " 34603", " 34607", " 34613", " 34631", " 34649", " 34651"],
|
||||
[" 34667", " 34673", " 34679", " 34687", " 34693", " 34703", " 34721", " 34729", " 34739", " 34747"],
|
||||
[" 34757", " 34759", " 34763", " 34781", " 34807", " 34819", " 34841", " 34843", " 34847", " 34849"],
|
||||
[" 34871", " 34877", " 34883", " 34897", " 34913", " 34919", " 34939", " 34949", " 34961", " 34963"],
|
||||
[" 34981", " 35023", " 35027", " 35051", " 35053", " 35059", " 35069", " 35081", " 35083", " 35089"],
|
||||
[" 35099", " 35107", " 35111", " 35117", " 35129", " 35141", " 35149", " 35153", " 35159", " 35171"],
|
||||
[" 35201", " 35221", " 35227", " 35251", " 35257", " 35267", " 35279", " 35281", " 35291", " 35311"],
|
||||
[" 35317", " 35323", " 35327", " 35339", " 35353", " 35363", " 35381", " 35393", " 35401", " 35407"],
|
||||
[" 35419", " 35423", " 35437", " 35447", " 35449", " 35461", " 35491", " 35507", " 35509", " 35521"],
|
||||
[" 35527", " 35531", " 35533", " 35537", " 35543", " 35569", " 35573", " 35591", " 35593", " 35597"],
|
||||
[" 35603", " 35617", " 35671", " 35677", " 35729", " 35731", " 35747", " 35753", " 35759", " 35771"],
|
||||
[" 35797", " 35801", " 35803", " 35809", " 35831", " 35837", " 35839", " 35851", " 35863", " 35869"],
|
||||
[" 35879", " 35897", " 35899", " 35911", " 35923", " 35933", " 35951", " 35963", " 35969", " 35977"],
|
||||
[" 35983", " 35993", " 35999", " 36007", " 36011", " 36013", " 36017", " 36037", " 36061", " 36067"],
|
||||
[" 36073", " 36083", " 36097", " 36107", " 36109", " 36131", " 36137", " 36151", " 36161", " 36187"],
|
||||
[" 36191", " 36209", " 36217", " 36229", " 36241", " 36251", " 36263", " 36269", " 36277", " 36293"],
|
||||
[" 36299", " 36307", " 36313", " 36319", " 36341", " 36343", " 36353", " 36373", " 36383", " 36389"],
|
||||
[" 36433", " 36451", " 36457", " 36467", " 36469", " 36473", " 36479", " 36493", " 36497", " 36523"],
|
||||
[" 36527", " 36529", " 36541", " 36551", " 36559", " 36563", " 36571", " 36583", " 36587", " 36599"],
|
||||
[" 36607", " 36629", " 36637", " 36643", " 36653", " 36671", " 36677", " 36683", " 36691", " 36697"],
|
||||
[" 36709", " 36713", " 36721", " 36739", " 36749", " 36761", " 36767", " 36779", " 36781", " 36787"],
|
||||
[" 36791", " 36793", " 36809", " 36821", " 36833", " 36847", " 36857", " 36871", " 36877", " 36887"],
|
||||
[" 36899", " 36901", " 36913", " 36919", " 36923", " 36929", " 36931", " 36943", " 36947", " 36973"],
|
||||
[" 36979", " 36997", " 37003", " 37013", " 37019", " 37021", " 37039", " 37049", " 37057", " 37061"],
|
||||
[" 37087", " 37097", " 37117", " 37123", " 37139", " 37159", " 37171", " 37181", " 37189", " 37199"],
|
||||
[" 37201", " 37217", " 37223", " 37243", " 37253", " 37273", " 37277", " 37307", " 37309", " 37313"],
|
||||
[" 37321", " 37337", " 37339", " 37357", " 37361", " 37363", " 37369", " 37379", " 37397", " 37409"],
|
||||
[" 37423", " 37441", " 37447", " 37463", " 37483", " 37489", " 37493", " 37501", " 37507", " 37511"],
|
||||
[" 37517", " 37529", " 37537", " 37547", " 37549", " 37561", " 37567", " 37571", " 37573", " 37579"],
|
||||
[" 37589", " 37591", " 37607", " 37619", " 37633", " 37643", " 37649", " 37657", " 37663", " 37691"],
|
||||
[" 37693", " 37699", " 37717", " 37747", " 37781", " 37783", " 37799", " 37811", " 37813", " 37831"],
|
||||
[" 37847", " 37853", " 37861", " 37871", " 37879", " 37889", " 37897", " 37907", " 37951", " 37957"],
|
||||
[" 37963", " 37967", " 37987", " 37991", " 37993", " 37997", " 38011", " 38039", " 38047", " 38053"],
|
||||
[" 38069", " 38083", " 38113", " 38119", " 38149", " 38153", " 38167", " 38177", " 38183", " 38189"],
|
||||
[" 38197", " 38201", " 38219", " 38231", " 38237", " 38239", " 38261", " 38273", " 38281", " 38287"],
|
||||
[" 38299", " 38303", " 38317", " 38321", " 38327", " 38329", " 38333", " 38351", " 38371", " 38377"],
|
||||
[" 38393", " 38431", " 38447", " 38449", " 38453", " 38459", " 38461", " 38501", " 38543", " 38557"],
|
||||
[" 38561", " 38567", " 38569", " 38593", " 38603", " 38609", " 38611", " 38629", " 38639", " 38651"],
|
||||
[" 38653", " 38669", " 38671", " 38677", " 38693", " 38699", " 38707", " 38711", " 38713", " 38723"],
|
||||
[" 38729", " 38737", " 38747", " 38749", " 38767", " 38783", " 38791", " 38803", " 38821", " 38833"],
|
||||
[" 38839", " 38851", " 38861", " 38867", " 38873", " 38891", " 38903", " 38917", " 38921", " 38923"],
|
||||
[" 38933", " 38953", " 38959", " 38971", " 38977", " 38993", " 39019", " 39023", " 39041", " 39043"],
|
||||
[" 39047", " 39079", " 39089", " 39097", " 39103", " 39107", " 39113", " 39119", " 39133", " 39139"],
|
||||
[" 39157", " 39161", " 39163", " 39181", " 39191", " 39199", " 39209", " 39217", " 39227", " 39229"],
|
||||
[" 39233", " 39239", " 39241", " 39251", " 39293", " 39301", " 39313", " 39317", " 39323", " 39341"],
|
||||
[" 39343", " 39359", " 39367", " 39371", " 39373", " 39383", " 39397", " 39409", " 39419", " 39439"],
|
||||
[" 39443", " 39451", " 39461", " 39499", " 39503", " 39509", " 39511", " 39521", " 39541", " 39551"],
|
||||
[" 39563", " 39569", " 39581", " 39607", " 39619", " 39623", " 39631", " 39659", " 39667", " 39671"],
|
||||
[" 39679", " 39703", " 39709", " 39719", " 39727", " 39733", " 39749", " 39761", " 39769", " 39779"],
|
||||
[" 39791", " 39799", " 39821", " 39827", " 39829", " 39839", " 39841", " 39847", " 39857", " 39863"],
|
||||
[" 39869", " 39877", " 39883", " 39887", " 39901", " 39929", " 39937", " 39953", " 39971", " 39979"],
|
||||
[" 39983", " 39989", " 40009", " 40013", " 40031", " 40037", " 40039", " 40063", " 40087", " 40093"],
|
||||
[" 40099", " 40111", " 40123", " 40127", " 40129", " 40151", " 40153", " 40163", " 40169", " 40177"],
|
||||
[" 40189", " 40193", " 40213", " 40231", " 40237", " 40241", " 40253", " 40277", " 40283", " 40289"],
|
||||
[" 40343", " 40351", " 40357", " 40361", " 40387", " 40423", " 40427", " 40429", " 40433", " 40459"],
|
||||
[" 40471", " 40483", " 40487", " 40493", " 40499", " 40507", " 40519", " 40529", " 40531", " 40543"],
|
||||
[" 40559", " 40577", " 40583", " 40591", " 40597", " 40609", " 40627", " 40637", " 40639", " 40693"],
|
||||
[" 40697", " 40699", " 40709", " 40739", " 40751", " 40759", " 40763", " 40771", " 40787", " 40801"],
|
||||
[" 40813", " 40819", " 40823", " 40829", " 40841", " 40847", " 40849", " 40853", " 40867", " 40879"],
|
||||
[" 40883", " 40897", " 40903", " 40927", " 40933", " 40939", " 40949", " 40961", " 40973", " 40993"],
|
||||
[" 41011", " 41017", " 41023", " 41039", " 41047", " 41051", " 41057", " 41077", " 41081", " 41113"],
|
||||
[" 41117", " 41131", " 41141", " 41143", " 41149", " 41161", " 41177", " 41179", " 41183", " 41189"],
|
||||
[" 41201", " 41203", " 41213", " 41221", " 41227", " 41231", " 41233", " 41243", " 41257", " 41263"],
|
||||
[" 41269", " 41281", " 41299", " 41333", " 41341", " 41351", " 41357", " 41381", " 41387", " 41389"],
|
||||
[" 41399", " 41411", " 41413", " 41443", " 41453", " 41467", " 41479", " 41491", " 41507", " 41513"],
|
||||
[" 41519", " 41521", " 41539", " 41543", " 41549", " 41579", " 41593", " 41597", " 41603", " 41609"],
|
||||
[" 41611", " 41617", " 41621", " 41627", " 41641", " 41647", " 41651", " 41659", " 41669", " 41681"],
|
||||
[" 41687", " 41719", " 41729", " 41737", " 41759", " 41761", " 41771", " 41777", " 41801", " 41809"],
|
||||
[" 41813", " 41843", " 41849", " 41851", " 41863", " 41879", " 41887", " 41893", " 41897", " 41903"],
|
||||
[" 41911", " 41927", " 41941", " 41947", " 41953", " 41957", " 41959", " 41969", " 41981", " 41983"],
|
||||
[" 41999", " 42013", " 42017", " 42019", " 42023", " 42043", " 42061", " 42071", " 42073", " 42083"],
|
||||
[" 42089", " 42101", " 42131", " 42139", " 42157", " 42169", " 42179", " 42181", " 42187", " 42193"],
|
||||
[" 42197", " 42209", " 42221", " 42223", " 42227", " 42239", " 42257", " 42281", " 42283", " 42293"],
|
||||
[" 42299", " 42307", " 42323", " 42331", " 42337", " 42349", " 42359", " 42373", " 42379", " 42391"],
|
||||
[" 42397", " 42403", " 42407", " 42409", " 42433", " 42437", " 42443", " 42451", " 42457", " 42461"],
|
||||
[" 42463", " 42467", " 42473", " 42487", " 42491", " 42499", " 42509", " 42533", " 42557", " 42569"],
|
||||
[" 42571", " 42577", " 42589", " 42611", " 42641", " 42643", " 42649", " 42667", " 42677", " 42683"],
|
||||
[" 42689", " 42697", " 42701", " 42703", " 42709", " 42719", " 42727", " 42737", " 42743", " 42751"],
|
||||
[" 42767", " 42773", " 42787", " 42793", " 42797", " 42821", " 42829", " 42839", " 42841", " 42853"],
|
||||
[" 42859", " 42863", " 42899", " 42901", " 42923", " 42929", " 42937", " 42943", " 42953", " 42961"],
|
||||
[" 42967", " 42979", " 42989", " 43003", " 43013", " 43019", " 43037", " 43049", " 43051", " 43063"],
|
||||
[" 43067", " 43093", " 43103", " 43117", " 43133", " 43151", " 43159", " 43177", " 43189", " 43201"],
|
||||
[" 43207", " 43223", " 43237", " 43261", " 43271", " 43283", " 43291", " 43313", " 43319", " 43321"],
|
||||
[" 43331", " 43391", " 43397", " 43399", " 43403", " 43411", " 43427", " 43441", " 43451", " 43457"],
|
||||
[" 43481", " 43487", " 43499", " 43517", " 43541", " 43543", " 43573", " 43577", " 43579", " 43591"],
|
||||
[" 43597", " 43607", " 43609", " 43613", " 43627", " 43633", " 43649", " 43651", " 43661", " 43669"],
|
||||
[" 43691", " 43711", " 43717", " 43721", " 43753", " 43759", " 43777", " 43781", " 43783", " 43787"],
|
||||
[" 43789", " 43793", " 43801", " 43853", " 43867", " 43889", " 43891", " 43913", " 43933", " 43943"],
|
||||
[" 43951", " 43961", " 43963", " 43969", " 43973", " 43987", " 43991", " 43997", " 44017", " 44021"],
|
||||
[" 44027", " 44029", " 44041", " 44053", " 44059", " 44071", " 44087", " 44089", " 44101", " 44111"],
|
||||
[" 44119", " 44123", " 44129", " 44131", " 44159", " 44171", " 44179", " 44189", " 44201", " 44203"],
|
||||
[" 44207", " 44221", " 44249", " 44257", " 44263", " 44267", " 44269", " 44273", " 44279", " 44281"],
|
||||
[" 44293", " 44351", " 44357", " 44371", " 44381", " 44383", " 44389", " 44417", " 44449", " 44453"],
|
||||
[" 44483", " 44491", " 44497", " 44501", " 44507", " 44519", " 44531", " 44533", " 44537", " 44543"],
|
||||
[" 44549", " 44563", " 44579", " 44587", " 44617", " 44621", " 44623", " 44633", " 44641", " 44647"],
|
||||
[" 44651", " 44657", " 44683", " 44687", " 44699", " 44701", " 44711", " 44729", " 44741", " 44753"],
|
||||
[" 44771", " 44773", " 44777", " 44789", " 44797", " 44809", " 44819", " 44839", " 44843", " 44851"],
|
||||
[" 44867", " 44879", " 44887", " 44893", " 44909", " 44917", " 44927", " 44939", " 44953", " 44959"],
|
||||
[" 44963", " 44971", " 44983", " 44987", " 45007", " 45013", " 45053", " 45061", " 45077", " 45083"],
|
||||
[" 45119", " 45121", " 45127", " 45131", " 45137", " 45139", " 45161", " 45179", " 45181", " 45191"],
|
||||
[" 45197", " 45233", " 45247", " 45259", " 45263", " 45281", " 45289", " 45293", " 45307", " 45317"],
|
||||
[" 45319", " 45329", " 45337", " 45341", " 45343", " 45361", " 45377", " 45389", " 45403", " 45413"],
|
||||
[" 45427", " 45433", " 45439", " 45481", " 45491", " 45497", " 45503", " 45523", " 45533", " 45541"],
|
||||
[" 45553", " 45557", " 45569", " 45587", " 45589", " 45599", " 45613", " 45631", " 45641", " 45659"],
|
||||
[" 45667", " 45673", " 45677", " 45691", " 45697", " 45707", " 45737", " 45751", " 45757", " 45763"],
|
||||
[" 45767", " 45779", " 45817", " 45821", " 45823", " 45827", " 45833", " 45841", " 45853", " 45863"],
|
||||
[" 45869", " 45887", " 45893", " 45943", " 45949", " 45953", " 45959", " 45971", " 45979", " 45989"],
|
||||
[" 46021", " 46027", " 46049", " 46051", " 46061", " 46073", " 46091", " 46093", " 46099", " 46103"],
|
||||
[" 46133", " 46141", " 46147", " 46153", " 46171", " 46181", " 46183", " 46187", " 46199", " 46219"],
|
||||
[" 46229", " 46237", " 46261", " 46271", " 46273", " 46279", " 46301", " 46307", " 46309", " 46327"],
|
||||
[" 46337", " 46349", " 46351", " 46381", " 46399", " 46411", " 46439", " 46441", " 46447", " 46451"],
|
||||
[" 46457", " 46471", " 46477", " 46489", " 46499", " 46507", " 46511", " 46523", " 46549", " 46559"],
|
||||
[" 46567", " 46573", " 46589", " 46591", " 46601", " 46619", " 46633", " 46639", " 46643", " 46649"],
|
||||
[" 46663", " 46679", " 46681", " 46687", " 46691", " 46703", " 46723", " 46727", " 46747", " 46751"],
|
||||
[" 46757", " 46769", " 46771", " 46807", " 46811", " 46817", " 46819", " 46829", " 46831", " 46853"],
|
||||
[" 46861", " 46867", " 46877", " 46889", " 46901", " 46919", " 46933", " 46957", " 46993", " 46997"],
|
||||
[" 47017", " 47041", " 47051", " 47057", " 47059", " 47087", " 47093", " 47111", " 47119", " 47123"],
|
||||
[" 47129", " 47137", " 47143", " 47147", " 47149", " 47161", " 47189", " 47207", " 47221", " 47237"],
|
||||
[" 47251", " 47269", " 47279", " 47287", " 47293", " 47297", " 47303", " 47309", " 47317", " 47339"],
|
||||
[" 47351", " 47353", " 47363", " 47381", " 47387", " 47389", " 47407", " 47417", " 47419", " 47431"],
|
||||
[" 47441", " 47459", " 47491", " 47497", " 47501", " 47507", " 47513", " 47521", " 47527", " 47533"],
|
||||
[" 47543", " 47563", " 47569", " 47581", " 47591", " 47599", " 47609", " 47623", " 47629", " 47639"],
|
||||
[" 47653", " 47657", " 47659", " 47681", " 47699", " 47701", " 47711", " 47713", " 47717", " 47737"],
|
||||
[" 47741", " 47743", " 47777", " 47779", " 47791", " 47797", " 47807", " 47809", " 47819", " 47837"],
|
||||
[" 47843", " 47857", " 47869", " 47881", " 47903", " 47911", " 47917", " 47933", " 47939", " 47947"],
|
||||
[" 47951", " 47963", " 47969", " 47977", " 47981", " 48017", " 48023", " 48029", " 48049", " 48073"],
|
||||
[" 48079", " 48091", " 48109", " 48119", " 48121", " 48131", " 48157", " 48163", " 48179", " 48187"],
|
||||
[" 48193", " 48197", " 48221", " 48239", " 48247", " 48259", " 48271", " 48281", " 48299", " 48311"],
|
||||
[" 48313", " 48337", " 48341", " 48353", " 48371", " 48383", " 48397", " 48407", " 48409", " 48413"],
|
||||
[" 48437", " 48449", " 48463", " 48473", " 48479", " 48481", " 48487", " 48491", " 48497", " 48523"],
|
||||
[" 48527", " 48533", " 48539", " 48541", " 48563", " 48571", " 48589", " 48593", " 48611", " 48619"],
|
||||
[" 48623", " 48647", " 48649", " 48661", " 48673", " 48677", " 48679", " 48731", " 48733", " 48751"],
|
||||
[" 48757", " 48761", " 48767", " 48779", " 48781", " 48787", " 48799", " 48809", " 48817", " 48821"],
|
||||
[" 48823", " 48847", " 48857", " 48869", " 48871", " 48883", " 48889", " 48907", " 48947", " 48953"],
|
||||
[" 48973", " 48989", " 48991", " 49003", " 49009", " 49019", " 49031", " 49033", " 49037", " 49043"],
|
||||
[" 49057", " 49069", " 49081", " 49103", " 49109", " 49117", " 49121", " 49123", " 49139", " 49157"],
|
||||
[" 49169", " 49171", " 49177", " 49193", " 49199", " 49201", " 49207", " 49211", " 49223", " 49253"],
|
||||
[" 49261", " 49277", " 49279", " 49297", " 49307", " 49331", " 49333", " 49339", " 49363", " 49367"],
|
||||
[" 49369", " 49391", " 49393", " 49409", " 49411", " 49417", " 49429", " 49433", " 49451", " 49459"],
|
||||
[" 49463", " 49477", " 49481", " 49499", " 49523", " 49529", " 49531", " 49537", " 49547", " 49549"],
|
||||
[" 49559", " 49597", " 49603", " 49613", " 49627", " 49633", " 49639", " 49663", " 49667", " 49669"],
|
||||
[" 49681", " 49697", " 49711", " 49727", " 49739", " 49741", " 49747", " 49757", " 49783", " 49787"],
|
||||
[" 49789", " 49801", " 49807", " 49811", " 49823", " 49831", " 49843", " 49853", " 49871", " 49877"],
|
||||
[" 49891", " 49919", " 49921", " 49927", " 49937", " 49939", " 49943", " 49957", " 49991", " 49993"],
|
||||
[" 49999", " 50021", " 50023", " 50033", " 50047", " 50051", " 50053", " 50069", " 50077", " 50087"],
|
||||
[" 50093", " 50101", " 50111", " 50119", " 50123", " 50129", " 50131", " 50147", " 50153", " 50159"],
|
||||
[" 50177", " 50207", " 50221", " 50227", " 50231", " 50261", " 50263", " 50273", " 50287", " 50291"],
|
||||
[" 50311", " 50321", " 50329", " 50333", " 50341", " 50359", " 50363", " 50377", " 50383", " 50387"],
|
||||
[" 50411", " 50417", " 50423", " 50441", " 50459", " 50461", " 50497", " 50503", " 50513", " 50527"],
|
||||
[" 50539", " 50543", " 50549", " 50551", " 50581", " 50587", " 50591", " 50593", " 50599", " 50627"],
|
||||
[" 50647", " 50651", " 50671", " 50683", " 50707", " 50723", " 50741", " 50753", " 50767", " 50773"],
|
||||
[" 50777", " 50789", " 50821", " 50833", " 50839", " 50849", " 50857", " 50867", " 50873", " 50891"],
|
||||
[" 50893", " 50909", " 50923", " 50929", " 50951", " 50957", " 50969", " 50971", " 50989", " 50993"],
|
||||
[" 51001", " 51031", " 51043", " 51047", " 51059", " 51061", " 51071", " 51109", " 51131", " 51133"],
|
||||
[" 51137", " 51151", " 51157", " 51169", " 51193", " 51197", " 51199", " 51203", " 51217", " 51229"],
|
||||
[" 51239", " 51241", " 51257", " 51263", " 51283", " 51287", " 51307", " 51329", " 51341", " 51343"],
|
||||
[" 51347", " 51349", " 51361", " 51383", " 51407", " 51413", " 51419", " 51421", " 51427", " 51431"],
|
||||
[" 51437", " 51439", " 51449", " 51461", " 51473", " 51479", " 51481", " 51487", " 51503", " 51511"],
|
||||
[" 51517", " 51521", " 51539", " 51551", " 51563", " 51577", " 51581", " 51593", " 51599", " 51607"],
|
||||
[" 51613", " 51631", " 51637", " 51647", " 51659", " 51673", " 51679", " 51683", " 51691", " 51713"],
|
||||
[" 51719", " 51721", " 51749", " 51767", " 51769", " 51787", " 51797", " 51803", " 51817", " 51827"],
|
||||
[" 51829", " 51839", " 51853", " 51859", " 51869", " 51871", " 51893", " 51899", " 51907", " 51913"],
|
||||
[" 51929", " 51941", " 51949", " 51971", " 51973", " 51977", " 51991", " 52009", " 52021", " 52027"],
|
||||
[" 52051", " 52057", " 52067", " 52069", " 52081", " 52103", " 52121", " 52127", " 52147", " 52153"],
|
||||
[" 52163", " 52177", " 52181", " 52183", " 52189", " 52201", " 52223", " 52237", " 52249", " 52253"],
|
||||
[" 52259", " 52267", " 52289", " 52291", " 52301", " 52313", " 52321", " 52361", " 52363", " 52369"],
|
||||
[" 52379", " 52387", " 52391", " 52433", " 52453", " 52457", " 52489", " 52501", " 52511", " 52517"],
|
||||
[" 52529", " 52541", " 52543", " 52553", " 52561", " 52567", " 52571", " 52579", " 52583", " 52609"],
|
||||
[" 52627", " 52631", " 52639", " 52667", " 52673", " 52691", " 52697", " 52709", " 52711", " 52721"],
|
||||
[" 52727", " 52733", " 52747", " 52757", " 52769", " 52783", " 52807", " 52813", " 52817"]
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
================================================================================
|
||||
239
modules/services/serviceAi/subAiCallLooping-flow.md
Normal file
239
modules/services/serviceAi/subAiCallLooping-flow.md
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
# AI Call Iteration Flow - JSON Merging System
|
||||
|
||||
This document describes the iteration flow for handling large JSON responses from AI that may be truncated and need to be merged across multiple iterations.
|
||||
|
||||
## Overview
|
||||
|
||||
When an AI response is too large, it may be truncated (cut) at an arbitrary point. The iteration system:
|
||||
1. Detects incomplete JSON
|
||||
2. Requests continuation from the AI
|
||||
3. Merges the continuation with the existing JSON
|
||||
4. Repeats until complete or max failures reached
|
||||
|
||||
---
|
||||
|
||||
## Key Variables
|
||||
|
||||
| Variable | Type | Purpose |
|
||||
|----------|------|---------|
|
||||
| `jsonBase` | `str \| None` | The merged JSON string (CUT version for overlap matching) |
|
||||
| `candidateJson` | `str` | Temporary holder for merged result until validated |
|
||||
| `lastValidCompletePart` | `str \| None` | Fallback - last successfully parsed CLOSED JSON |
|
||||
| `lastOverlapContext` | `str` | Context for retry/continuation prompts |
|
||||
| `lastHierarchyContextForPrompt` | `str` | Context for retry/continuation prompts |
|
||||
| `mergeFailCount` | `int` | Global counter (max 3 failures) |
|
||||
|
||||
---
|
||||
|
||||
## Key Distinction: hierarchyContext vs completePart
|
||||
|
||||
| Field | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `hierarchyContext` | **CUT JSON** - truncated at cut point | Used as `jsonBase` for merging with next AI fragment |
|
||||
| `completePart` | **CLOSED JSON** - all structures properly closed | Used for validation, parsing, and fallback |
|
||||
|
||||
**Why this matters:**
|
||||
- The next AI fragment starts with an **overlap** that matches the CUT point
|
||||
- If we used `completePart` (closed), the overlap detection would FAIL
|
||||
- We must use `hierarchyContext` (cut) so overlap matching works correctly
|
||||
|
||||
---
|
||||
|
||||
## Flow Steps
|
||||
|
||||
### Step 1: BUILD PROMPT
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 163-212
|
||||
**Function:** `buildContinuationContext()` from `modules/shared/jsonUtils.py`
|
||||
|
||||
- **First iteration:** Use original prompt
|
||||
- **Continuation:** `buildContinuationContext(allSections, lastRawResponse, ...)`
|
||||
- Internally calls `getContexts(lastRawResponse)` to get overlap/hierarchy
|
||||
- Builds continuation prompt with `overlapContext` + `hierarchyContextForPrompt`
|
||||
|
||||
### Step 2: CALL AI
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 214-299
|
||||
**Function:** `self.aiService.callAi(request)`
|
||||
|
||||
- Returns `response.content` as `result`
|
||||
- NOTE: Do NOT update `lastRawResponse` yet! (only after successful merge)
|
||||
|
||||
### Step 4: MERGE
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 338-396
|
||||
**Function:** `JsonResponseHandler.mergeJsonStringsWithOverlap()` from `modules/services/serviceAi/subJsonResponseHandling.py`
|
||||
|
||||
```
|
||||
IF first iteration (jsonBase is None):
|
||||
→ candidateJson = result
|
||||
ELSE:
|
||||
→ mergedJsonString, hasOverlap = mergeJsonStringsWithOverlap(jsonBase, result)
|
||||
|
||||
IF hasOverlap = False (MERGE FAILED):
|
||||
→ mergeFailCount++
|
||||
→ If mergeFailCount >= 3: return lastValidCompletePart (fallback)
|
||||
→ Else: continue (retry with unchanged jsonBase AND lastRawResponse!)
|
||||
ELSE:
|
||||
→ candidateJson = mergedJsonString (don't update jsonBase yet!)
|
||||
|
||||
→ lastRawResponse = candidateJson (ONLY after first iteration or successful merge!)
|
||||
|
||||
TRY DIRECT PARSE of candidateJson:
|
||||
IF parse succeeds:
|
||||
→ jsonBase = candidateJson (commit)
|
||||
→ FINISHED! Return normalized result
|
||||
ELSE:
|
||||
→ Proceed to Step 5
|
||||
```
|
||||
|
||||
### Step 5: GET CONTEXTS
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 420-427
|
||||
**Function:** `getContexts()` from `modules/shared/jsonContinuation.py`
|
||||
|
||||
```python
|
||||
contexts = getContexts(candidateJson)
|
||||
```
|
||||
|
||||
Returns `JsonContinuationContexts`:
|
||||
- `overlapContext`: `""` if JSON is complete (no cut point)
|
||||
- `hierarchyContext`: CUT JSON (for merging with next fragment)
|
||||
- `hierarchyContextForPrompt`: CUT JSON with budget limits (for prompts)
|
||||
- `completePart`: CLOSED JSON (repaired if needed)
|
||||
- `jsonParsingSuccess`: `True` if completePart is valid JSON
|
||||
|
||||
**Enhancement:** If original JSON is already complete → `overlapContext = ""`
|
||||
This signals "JSON is complete, no more continuation needed"
|
||||
|
||||
### Step 6: DECIDE
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 429-528
|
||||
|
||||
#### Case A: `jsonParsingSuccess=true` AND `overlapContext=""`
|
||||
**→ FINISHED**
|
||||
- JSON is complete (no cut point)
|
||||
- `jsonBase = contexts.completePart` (use CLOSED version for final result)
|
||||
- Return `completePart` as result
|
||||
|
||||
#### Case B: `jsonParsingSuccess=true` AND `overlapContext!=""`
|
||||
**→ CONTINUE to next iteration**
|
||||
- JSON parseable but has cut point
|
||||
- `jsonBase = contexts.hierarchyContext` ← **CUT version for next merge!**
|
||||
- `lastValidCompletePart = contexts.completePart` ← **CLOSED version for fallback**
|
||||
- Store contexts for next prompt
|
||||
- `mergeFailCount = 0` (reset on success)
|
||||
- `lastRawResponse = jsonBase`
|
||||
- Continue to next iteration
|
||||
|
||||
#### Case C: `jsonParsingSuccess=false`
|
||||
**→ RETRY with same prompt**
|
||||
- Do NOT update `jsonBase` (keep previous valid state)
|
||||
- `mergeFailCount++`
|
||||
- If `mergeFailCount >= 3`: return `lastValidCompletePart` (fallback)
|
||||
- Else: continue (retry with unchanged jsonBase/lastRawResponse)
|
||||
|
||||
---
|
||||
|
||||
## Flow Diagram
|
||||
|
||||
```
|
||||
┌───────────────────────────────────────────────────────────────┐
|
||||
│ ITERATION START │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────▼───────────────────────────────────┐
|
||||
│ STEP 1: BUILD PROMPT │
|
||||
│ - First: original prompt │
|
||||
│ - Next: buildContinuationContext(lastRawResponse) │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────▼───────────────────────────────────┐
|
||||
│ STEP 2: CALL AI → result │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────▼───────────────────────────────────┐
|
||||
│ STEP 4: MERGE jsonBase + result → candidateJson │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌────────────▼────────────┐
|
||||
│ Merge OK? │
|
||||
└────────────┬────────────┘
|
||||
│
|
||||
┌─────────────────────┼─────────────────────┐
|
||||
│ NO │ YES │
|
||||
▼ ▼ │
|
||||
┌──────────────┐ ┌──────────────────┐ │
|
||||
│ fails++ │ │ TRY DIRECT PARSE │ │
|
||||
│ if >=3: │ │ of candidateJson │ │
|
||||
│ RETURN │ └────────┬─────────┘ │
|
||||
│ fallback │ │ │
|
||||
│ else: RETRY │ ┌────────▼─────────┐ │
|
||||
│ (continue) │ │ Parse OK? │ │
|
||||
└──────────────┘ └────────┬─────────┘ │
|
||||
│ │
|
||||
┌─────────────────────┼─────────────────────┐
|
||||
│ YES │ NO │
|
||||
▼ ▼ │
|
||||
┌──────────────┐ ┌──────────────────────────────┐
|
||||
│ FINISHED ✓ │ │ STEP 5: getContexts() │
|
||||
│ Return │ │ → jsonParsingSuccess │
|
||||
│ normalized │ │ → overlapContext │
|
||||
│ result │ └────────────┬─────────────────┘
|
||||
└──────────────┘ │
|
||||
┌────────────▼────────────────────┐
|
||||
│ STEP 6: DECIDE │
|
||||
└────────────┬────────────────────┘
|
||||
│
|
||||
┌────────────────────────────┼────────────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌───────────────────┐ ┌───────────────────────┐ ┌───────────────────┐
|
||||
│ success=true │ │ success=true │ │ success=false │
|
||||
│ overlap="" │ │ overlap!="" │ │ │
|
||||
│ ───────────── │ │ ───────────────── │ │ ───────────── │
|
||||
│ FINISHED ✓ │ │ CONTINUE │ │ RETRY │
|
||||
│ │ │ │ │ │
|
||||
│ jsonBase = │ │ jsonBase = │ │ jsonBase unchanged│
|
||||
│ completePart │ │ hierarchyContext │ │ fails++ │
|
||||
│ (CLOSED) │ │ (CUT for merge!) │ │ │
|
||||
│ │ │ │ │ if >=3: fallback │
|
||||
│ Return result │ │ fallback = │ │ else: retry │
|
||||
│ │ │ completePart │ │ │
|
||||
│ │ │ (CLOSED) │ │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ │ Next iteration → │ │ │
|
||||
└───────────────────┘ └───────────────────────┘ └───────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Files Involved
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `modules/services/serviceAi/subAiCallLooping.py` | Main iteration loop |
|
||||
| `modules/shared/jsonContinuation.py` | `getContexts()` - context extraction & repair |
|
||||
| `modules/shared/jsonUtils.py` | `buildContinuationContext()` - prompt building |
|
||||
| `modules/services/serviceAi/subJsonResponseHandling.py` | `mergeJsonStringsWithOverlap()` |
|
||||
| `modules/services/serviceAi/subJsonMerger.py` | `ModularJsonMerger` - actual merge logic |
|
||||
| `modules/datamodels/datamodelAi.py` | `JsonContinuationContexts` model |
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Merge Failures
|
||||
- Max 3 consecutive failures allowed
|
||||
- On failure: retry with unchanged `jsonBase` (previous valid state)
|
||||
- After 3 failures: return `lastValidCompletePart` as fallback
|
||||
|
||||
### Parse Failures
|
||||
- If `getContexts()` cannot produce valid JSON: increment fail counter
|
||||
- Retry with same prompt (don't update jsonBase)
|
||||
- After 3 failures: return `lastValidCompletePart` as fallback
|
||||
|
||||
### Fallback Strategy
|
||||
- `lastValidCompletePart` stores the last successfully parsed CLOSED JSON
|
||||
- Always available as fallback when things go wrong
|
||||
- Ensures we return valid JSON even after multiple failures
|
||||
|
|
@ -7,17 +7,60 @@ Handles AI calls with looping and repair logic, including:
|
|||
- Looping with JSON repair and continuation
|
||||
- KPI definition and tracking
|
||||
- Progress tracking and iteration management
|
||||
|
||||
FLOW LOGIC
|
||||
|
||||
VARIABLES:
|
||||
- jsonBase: str (merged JSON so far, starts empty)
|
||||
- lastValidCompletePart: str (fallback for failures)
|
||||
- mergeFailCount: int = 0 (max 3)
|
||||
|
||||
FLOW:
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 1. BUILD PROMPT │
|
||||
│ - First: original prompt │
|
||||
│ - Next: buildContinuationContext(lastRawResponse) │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ 2. CALL AI → response fragment │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ 4. MERGE jsonBase + response │
|
||||
│ ├─ FAILS: repeat prompt, fails++ (if >=3 return fallback) │
|
||||
│ └─ SUCCEEDS: try parse │
|
||||
│ ├─ SUCCEEDS: FINISHED │
|
||||
│ └─ FAILS: → step 5 │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ 5. GET CONTEXTS (merge OK, parse failed) │
|
||||
│ getContexts(mergedJson) → │
|
||||
│ - If no cut point: overlapContext = "" │
|
||||
│ - Store contexts for next iteration │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ 6. DECIDE │
|
||||
│ ├─ jsonParsingSuccess=true AND overlapContext="": │
|
||||
│ │ FINISHED. return completePart │
|
||||
│ ├─ jsonParsingSuccess=true AND overlapContext!="": │
|
||||
│ │ CONTINUE, fails=0 │
|
||||
│ └─ ELSE: repeat prompt, fails++ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Callable
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, JsonAccumulationState
|
||||
from modules.datamodels.datamodelAi import (
|
||||
AiCallRequest, AiCallOptions
|
||||
)
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
|
||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
|
||||
from modules.shared.jsonUtils import tryParseJson
|
||||
from modules.shared.jsonUtils import closeJsonStructures
|
||||
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -86,9 +129,18 @@ class AiCallLooper:
|
|||
iteration = 0
|
||||
allSections = [] # Accumulate all sections across iterations
|
||||
lastRawResponse = None # Store last raw JSON response for continuation
|
||||
documentMetadata = None # Store document metadata (title, filename) from first iteration
|
||||
accumulationState = None # Track accumulation state for string accumulation
|
||||
accumulatedDirectJson = [] # Accumulate JSON strings for direct return use cases (chapter_structure, code_structure)
|
||||
|
||||
# JSON Base Iteration System:
|
||||
# - jsonBase: the merged JSON string (replaces accumulatedDirectJson array)
|
||||
# - After each iteration, new response is merged with jsonBase
|
||||
# - On merge success: check if complete, store contexts for next iteration
|
||||
# - On merge fail: retry with same prompt, increment fails
|
||||
jsonBase = None # Merged JSON string (starts None, set on first response)
|
||||
|
||||
# Merge fail tracking - stop after 3 consecutive merge failures
|
||||
MAX_MERGE_FAILS = 3
|
||||
mergeFailCount = 0 # Global counter for merge failures across entire loop
|
||||
lastValidCompletePart = None # Store last successfully parsed completePart for fallback
|
||||
|
||||
# Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID)
|
||||
parentOperationId = operationId # Use the parent's operationId directly
|
||||
|
|
@ -112,29 +164,49 @@ class AiCallLooper:
|
|||
# CRITICAL: Build continuation prompt if we have sections OR if we have a previous response (even if broken)
|
||||
# This ensures continuation prompts are built even when JSON is so broken that no sections can be extracted
|
||||
if (len(allSections) > 0 or lastRawResponse) and promptBuilder and promptArgs:
|
||||
# Extract templateStructure and basePrompt from promptArgs (REQUIRED)
|
||||
templateStructure = promptArgs.get("templateStructure")
|
||||
if not templateStructure:
|
||||
raise ValueError(
|
||||
f"templateStructure is REQUIRED in promptArgs for use case '{useCaseId}'. "
|
||||
"Prompt creation functions must return (prompt, templateStructure) tuple."
|
||||
)
|
||||
|
||||
basePrompt = promptArgs.get("basePrompt")
|
||||
if not basePrompt:
|
||||
# Fallback: use prompt parameter (should be the same)
|
||||
basePrompt = prompt
|
||||
logger.warning(
|
||||
f"basePrompt not found in promptArgs for use case '{useCaseId}', "
|
||||
"using prompt parameter instead. This may indicate a bug."
|
||||
)
|
||||
|
||||
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
||||
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
||||
continuationContext = buildContinuationContext(
|
||||
allSections, lastRawResponse, useCaseId, templateStructure
|
||||
)
|
||||
if not lastRawResponse:
|
||||
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
||||
|
||||
# For section_content, pass all promptArgs (it uses buildSectionPromptWithContinuation which needs all args)
|
||||
# For other use cases (chapter_structure, code_structure), filter to only accepted parameters
|
||||
if useCaseId == "section_content":
|
||||
# Pass all promptArgs plus continuationContext for section_content
|
||||
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
||||
else:
|
||||
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
|
||||
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services
|
||||
filteredPromptArgs = {
|
||||
k: v for k, v in promptArgs.items()
|
||||
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services']
|
||||
}
|
||||
# Always include services if available
|
||||
if not filteredPromptArgs.get('services') and hasattr(self, 'services'):
|
||||
filteredPromptArgs['services'] = self.services
|
||||
|
||||
# Rebuild prompt with continuation context using the provided prompt builder
|
||||
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
|
||||
# Store valid completePart from continuation context for fallback on merge failures
|
||||
# Use getContexts to check if completePart is parseable and store it
|
||||
if lastRawResponse and not lastValidCompletePart:
|
||||
try:
|
||||
contexts = getContexts(lastRawResponse)
|
||||
if contexts.jsonParsingSuccess and contexts.completePart:
|
||||
lastValidCompletePart = contexts.completePart
|
||||
logger.debug(f"Iteration {iteration}: Stored initial valid completePart ({len(lastValidCompletePart)} chars)")
|
||||
except Exception as e:
|
||||
logger.debug(f"Iteration {iteration}: Failed to extract completePart: {e}")
|
||||
|
||||
# Unified prompt builder call: Continuation builders only need continuationContext, templateStructure, and basePrompt
|
||||
# All initial context (section, userPrompt, etc.) is already in basePrompt, so promptArgs is not needed
|
||||
# Extract templateStructure and basePrompt from promptArgs (they're explicit parameters)
|
||||
iterationPrompt = await promptBuilder(
|
||||
continuationContext=continuationContext,
|
||||
templateStructure=templateStructure,
|
||||
basePrompt=basePrompt
|
||||
)
|
||||
else:
|
||||
# First iteration - use original prompt
|
||||
iterationPrompt = prompt
|
||||
|
|
@ -155,14 +227,17 @@ class AiCallLooper:
|
|||
)
|
||||
|
||||
# Write the ACTUAL prompt sent to AI
|
||||
# For section content generation: only write one prompt file (first iteration)
|
||||
# For section content generation: write prompt for first iteration and continuation iterations
|
||||
# For document generation: write prompt for each iteration
|
||||
isSectionContent = "_section_" in debugPrefix
|
||||
if iteration == 1 or not isSectionContent:
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||
elif not isSectionContent:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||
elif isSectionContent:
|
||||
# Save continuation prompts for section_content debugging
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
else:
|
||||
# Document generation - save all iteration prompts
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
|
||||
response = await self.aiService.callAi(request)
|
||||
result = response.content
|
||||
|
|
@ -183,13 +258,16 @@ class AiCallLooper:
|
|||
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})")
|
||||
|
||||
# Write raw AI response to debug file
|
||||
# For section content generation: only write one response file (first iteration)
|
||||
# For section content generation: write response for first iteration and continuation iterations
|
||||
# For document generation: write response for each iteration
|
||||
if iteration == 1 or not isSectionContent:
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||
elif not isSectionContent:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||
elif isSectionContent:
|
||||
# Save continuation responses for section_content debugging
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||
else:
|
||||
# Document generation - save all iteration responses
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||
|
||||
# Emit stats for this iteration (only if workflow exists and has id)
|
||||
if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id:
|
||||
|
|
@ -229,319 +307,230 @@ class AiCallLooper:
|
|||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
return result
|
||||
|
||||
# Store raw response for continuation (even if broken)
|
||||
lastRawResponse = result
|
||||
|
||||
# Parse JSON for use case handling
|
||||
parsedJsonForUseCase = None
|
||||
extractedJsonForUseCase = None
|
||||
|
||||
try:
|
||||
extractedJsonForUseCase = extractJsonString(result)
|
||||
parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase)
|
||||
if parseError is None and parsedJson:
|
||||
parsedJsonForUseCase = parsedJson
|
||||
except Exception:
|
||||
pass
|
||||
# NOTE: Do NOT update lastRawResponse here!
|
||||
# lastRawResponse should only be updated after successful merge
|
||||
# This ensures retry iterations use the correct base context
|
||||
|
||||
# Handle use cases that return JSON directly (no section extraction needed)
|
||||
directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"]
|
||||
if useCaseId in directReturnUseCases:
|
||||
# For chapter_structure, code_structure, and section_content, check completeness and support looping
|
||||
loopingUseCases = ["chapter_structure", "code_structure", "section_content"]
|
||||
if useCaseId in loopingUseCases:
|
||||
# If parsing failed (e.g., invalid JSON with comments or truncated JSON), continue looping to get valid JSON
|
||||
if not parsedJsonForUseCase:
|
||||
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON parsing failed (likely incomplete/truncated), continuing iteration to complete")
|
||||
# Accumulate response for merging in next iteration
|
||||
accumulatedDirectJson.append(result)
|
||||
|
||||
# Continue to next iteration - continuation prompt builder will handle the rest
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
|
||||
# Check completeness if we have parsed JSON
|
||||
isComplete = JsonResponseHandler.isJsonComplete(parsedJsonForUseCase)
|
||||
|
||||
if not isComplete:
|
||||
logger.warning(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is incomplete, continuing for continuation")
|
||||
# Accumulate response for merging in next iteration
|
||||
accumulatedDirectJson.append(result)
|
||||
|
||||
# Continue to next iteration - continuation prompt builder will handle the rest
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
else:
|
||||
# JSON is complete - merge accumulated responses if any
|
||||
if accumulatedDirectJson:
|
||||
logger.info(f"Iteration {iteration}: Merging {len(accumulatedDirectJson) + 1} accumulated responses")
|
||||
# Merge accumulated JSON strings with current response
|
||||
mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result
|
||||
for prevJson in accumulatedDirectJson[1:]:
|
||||
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson)
|
||||
# Finally merge with current response
|
||||
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result)
|
||||
|
||||
# Re-parse merged JSON
|
||||
try:
|
||||
extractedMerged = extractJsonString(mergedJsonString)
|
||||
parsedMerged, parseError, _ = tryParseJson(extractedMerged)
|
||||
if parseError is None and parsedMerged:
|
||||
parsedJsonForUseCase = parsedMerged
|
||||
result = mergedJsonString
|
||||
logger.info(f"Successfully merged and parsed {len(accumulatedDirectJson) + 1} JSON fragments")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse merged JSON, using last response: {e}")
|
||||
|
||||
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is complete")
|
||||
# Check if use case supports direct return (all registered use cases do)
|
||||
if useCase and not useCase.requiresExtraction:
|
||||
# =====================================================================
|
||||
# ITERATION FLOW (Simplified)
|
||||
# =====================================================================
|
||||
# Step 4: MERGE jsonBase + new response
|
||||
# - FAILS: repeat prompt, increment fails cont (if >=3 return fallback)
|
||||
# - SUCCEEDS: try parse
|
||||
# - SUCCEEDS: FINISHED
|
||||
# - FAILS: proceed to Step 5
|
||||
# Step 5: GET CONTEXTS (merge OK, parse failed)
|
||||
# - getContexts() with repair
|
||||
# - If no cut point: overlapContext = ""
|
||||
# Step 6: DECIDE
|
||||
# - jsonParsingSuccess=true AND overlapContext="": FINISHED
|
||||
# - jsonParsingSuccess=true AND overlapContext!="": continue, fails=0
|
||||
# - ELSE: repeat prompt, increment fails count
|
||||
# =====================================================================
|
||||
|
||||
# STEP 4: MERGE jsonBase + new response
|
||||
# Use candidateJson to hold merged result until we confirm it's valid
|
||||
candidateJson = None
|
||||
|
||||
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly")
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
|
||||
# For section_content, return raw result to allow merging of multiple JSON blocks
|
||||
# The merging logic in subStructureFilling.py will handle extraction and merging
|
||||
if useCaseId == "section_content":
|
||||
final_json = result # Return raw response to preserve all JSON blocks
|
||||
if jsonBase is None:
|
||||
# First iteration - candidate is the current result
|
||||
candidateJson = result
|
||||
logger.debug(f"Iteration {iteration}: First response, candidateJson ({len(candidateJson)} chars)")
|
||||
else:
|
||||
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
|
||||
|
||||
# Write final result for chapter structure and code structure (section_content skips it)
|
||||
if useCaseId in ["chapter_structure", "code_structure"]:
|
||||
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
|
||||
|
||||
return final_json
|
||||
|
||||
# Extract sections from response (handles both valid and broken JSON)
|
||||
# Only for document generation (JSON responses)
|
||||
# CRITICAL: Pass allSections and accumulationState to enable string accumulation
|
||||
extractedSections, wasJsonComplete, parsedResult, accumulationState = self.responseParser.extractSectionsFromResponse(
|
||||
result, iteration, debugPrefix, allSections, accumulationState
|
||||
)
|
||||
|
||||
# CRITICAL: Merge sections BEFORE KPI validation
|
||||
# This ensures sections are preserved even if KPI validation fails
|
||||
if extractedSections:
|
||||
allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
||||
|
||||
# Define KPIs if we just entered accumulation mode (iteration 1, incomplete JSON)
|
||||
if accumulationState and accumulationState.isAccumulationMode and iteration == 1 and not accumulationState.kpis:
|
||||
logger.info(f"Iteration {iteration}: Defining KPIs for accumulation tracking")
|
||||
continuationContext = buildContinuationContext(allSections, result)
|
||||
# Pass raw response string from first iteration for KPI definition
|
||||
kpiDefinitions = await self._defineKpisFromPrompt(
|
||||
userPrompt or prompt,
|
||||
result, # Pass raw JSON string from first iteration
|
||||
continuationContext,
|
||||
debugPrefix
|
||||
)
|
||||
# Initialize KPIs with currentValue = 0
|
||||
accumulationState.kpis = [{**kpi, "currentValue": 0} for kpi in kpiDefinitions]
|
||||
logger.info(f"Defined {len(accumulationState.kpis)} KPIs: {[kpi.get('id') for kpi in accumulationState.kpis]}")
|
||||
|
||||
# Extract and validate KPIs (if in accumulation mode with KPIs defined)
|
||||
if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis:
|
||||
# For KPI extraction, prefer accumulated JSON string over repaired JSON
|
||||
# because repairBrokenJson may lose data (e.g., empty rows array when JSON is incomplete)
|
||||
updatedKpis = []
|
||||
|
||||
# First try to extract from parsedResult (repaired JSON)
|
||||
if parsedResult:
|
||||
try:
|
||||
updatedKpis = JsonResponseHandler.extractKpiValuesFromJson(
|
||||
parsedResult,
|
||||
accumulationState.kpis
|
||||
# Merge jsonBase with new response
|
||||
logger.info(f"Iteration {iteration}: Merging jsonBase ({len(jsonBase)} chars) with new response ({len(result)} chars)")
|
||||
mergedJsonString, hasOverlap = JsonResponseHandler.mergeJsonStringsWithOverlap(jsonBase, result)
|
||||
|
||||
if not hasOverlap:
|
||||
# MERGE FAILED - repeat prompt with unchanged jsonBase
|
||||
mergeFailCount += 1
|
||||
logger.warning(
|
||||
f"Iteration {iteration}: Merge failed, no overlap found "
|
||||
f"(fail {mergeFailCount}/{MAX_MERGE_FAILS})"
|
||||
)
|
||||
# Check if we got meaningful values (non-zero)
|
||||
hasValidValues = any(kpi.get("currentValue", 0) > 0 for kpi in updatedKpis)
|
||||
if not hasValidValues and accumulationState.accumulatedJsonString:
|
||||
# Repaired JSON has empty values, try accumulated string
|
||||
logger.debug("Repaired JSON has empty KPI values, trying accumulated JSON string")
|
||||
updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson(
|
||||
accumulationState.accumulatedJsonString,
|
||||
accumulationState.kpis
|
||||
|
||||
if mergeFailCount >= MAX_MERGE_FAILS:
|
||||
# Max failures reached - return last valid completePart
|
||||
logger.error(
|
||||
f"Iteration {iteration}: Max merge failures ({MAX_MERGE_FAILS}) reached, "
|
||||
"returning last valid completePart"
|
||||
)
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, False)
|
||||
|
||||
if lastValidCompletePart:
|
||||
try:
|
||||
extracted = extractJsonString(lastValidCompletePart)
|
||||
parsed, parseErr, _ = tryParseJson(extracted)
|
||||
if parseErr is None and parsed:
|
||||
normalized = self._normalizeJsonStructure(parsed, useCase)
|
||||
return json.dumps(normalized, indent=2, ensure_ascii=False)
|
||||
except Exception:
|
||||
pass
|
||||
return lastValidCompletePart
|
||||
else:
|
||||
# No valid fallback - return whatever we have
|
||||
return jsonBase if jsonBase else ""
|
||||
|
||||
# Not at max failures - retry with same prompt (jsonBase unchanged)
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(
|
||||
iterationOperationId, 0.7,
|
||||
f"Merge failed ({mergeFailCount}/{MAX_MERGE_FAILS}), retrying"
|
||||
)
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
|
||||
# MERGE SUCCEEDED - set candidate (don't update jsonBase yet!)
|
||||
candidateJson = mergedJsonString
|
||||
logger.debug(f"Iteration {iteration}: Merge succeeded, candidateJson ({len(candidateJson)} chars)")
|
||||
|
||||
# Update lastRawResponse ONLY after we have a valid candidateJson
|
||||
# (first iteration or successful merge - NOT on merge failure!)
|
||||
# This ensures retry iterations use the correct base context
|
||||
lastRawResponse = candidateJson
|
||||
|
||||
# Try direct parse of candidate
|
||||
try:
|
||||
extracted = extractJsonString(candidateJson)
|
||||
parsed, parseErr, _ = tryParseJson(extracted)
|
||||
if parseErr is None and parsed:
|
||||
# Direct parse succeeded - FINISHED
|
||||
# Commit candidate to jsonBase
|
||||
jsonBase = candidateJson
|
||||
logger.info(f"Iteration {iteration}: Direct parse succeeded, JSON is complete")
|
||||
normalized = self._normalizeJsonStructure(parsed, useCase)
|
||||
result = json.dumps(normalized, indent=2, ensure_ascii=False)
|
||||
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
|
||||
if not useCase.finalResultHandler:
|
||||
raise ValueError(
|
||||
f"Use case '{useCaseId}' is missing required 'finalResultHandler' callback."
|
||||
)
|
||||
return useCase.finalResultHandler(
|
||||
result, normalized, extracted, debugPrefix, self.services
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Iteration {iteration}: Direct parse failed: {e}")
|
||||
|
||||
# STEP 5: GET CONTEXTS (merge OK, parse failed = cut JSON)
|
||||
# Use candidateJson for context extraction
|
||||
contexts = getContexts(candidateJson)
|
||||
overlapInfo = "(empty=complete)" if contexts.overlapContext == "" else f"({len(contexts.overlapContext)} chars)"
|
||||
logger.debug(
|
||||
f"Iteration {iteration}: getContexts() -> "
|
||||
f"jsonParsingSuccess={contexts.jsonParsingSuccess}, "
|
||||
f"overlapContext={overlapInfo}"
|
||||
)
|
||||
|
||||
# STEP 6: DECIDE based on jsonParsingSuccess and overlapContext
|
||||
if contexts.jsonParsingSuccess and contexts.overlapContext == "":
|
||||
# JSON is complete (no cut point) - FINISHED
|
||||
# Use completePart for final result (closed, repaired JSON)
|
||||
# No more merging needed, so we don't need the cut version
|
||||
jsonBase = contexts.completePart
|
||||
logger.info(f"Iteration {iteration}: jsonParsingSuccess=true, overlapContext='', JSON complete")
|
||||
|
||||
# Store and parse completePart
|
||||
lastValidCompletePart = contexts.completePart
|
||||
|
||||
try:
|
||||
extracted = extractJsonString(contexts.completePart)
|
||||
parsed, parseErr, _ = tryParseJson(extracted)
|
||||
if parseErr is None and parsed:
|
||||
normalized = self._normalizeJsonStructure(parsed, useCase)
|
||||
result = json.dumps(normalized, indent=2, ensure_ascii=False)
|
||||
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
|
||||
if not useCase.finalResultHandler:
|
||||
raise ValueError(
|
||||
f"Use case '{useCaseId}' is missing required 'finalResultHandler' callback."
|
||||
)
|
||||
return useCase.finalResultHandler(
|
||||
result, normalized, extracted, debugPrefix, self.services
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting KPIs from parsedResult: {e}")
|
||||
updatedKpis = []
|
||||
logger.warning(f"Iteration {iteration}: Failed to parse completePart: {e}")
|
||||
|
||||
# Fallback: return completePart as-is
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
return contexts.completePart
|
||||
|
||||
# If no parsedResult or extraction failed, try accumulated string
|
||||
if not updatedKpis and accumulationState.accumulatedJsonString:
|
||||
try:
|
||||
updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson(
|
||||
accumulationState.accumulatedJsonString,
|
||||
accumulationState.kpis
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting KPIs from accumulated JSON string: {e}")
|
||||
updatedKpis = []
|
||||
|
||||
if updatedKpis:
|
||||
shouldProceed, reason = JsonResponseHandler.validateKpiProgression(
|
||||
accumulationState,
|
||||
updatedKpis
|
||||
elif contexts.jsonParsingSuccess and contexts.overlapContext != "":
|
||||
# JSON parseable but has cut point - CONTINUE to next iteration
|
||||
# CRITICAL: Use hierarchyContext (CUT json) as jsonBase for next merge!
|
||||
# - hierarchyContext = the truncated JSON at cut point (needed for overlap matching)
|
||||
# - completePart = closed JSON (for validation/fallback only)
|
||||
# The next AI fragment's overlap must match the CUT point, not closed structures
|
||||
jsonBase = contexts.hierarchyContext
|
||||
logger.info(
|
||||
f"Iteration {iteration}: jsonParsingSuccess=true, overlapContext not empty, "
|
||||
f"continuing iteration (jsonBase updated to hierarchyContext: {len(jsonBase)} chars)"
|
||||
)
|
||||
|
||||
if not shouldProceed:
|
||||
logger.warning(f"Iteration {iteration}: KPI validation failed: {reason}")
|
||||
# Store valid completePart as fallback (different from jsonBase!)
|
||||
lastValidCompletePart = contexts.completePart
|
||||
|
||||
# Reset fail counter on successful progress
|
||||
mergeFailCount = 0
|
||||
|
||||
# Update lastRawResponse for continuation prompt building
|
||||
# Use the CUT version for prompt context as well
|
||||
lastRawResponse = jsonBase
|
||||
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
|
||||
else:
|
||||
# JSON not parseable after repair - repeat prompt, increment fails
|
||||
# Do NOT update jsonBase - keep previous valid state
|
||||
mergeFailCount += 1
|
||||
logger.warning(
|
||||
f"Iteration {iteration}: jsonParsingSuccess=false, "
|
||||
f"repeat prompt (fail {mergeFailCount}/{MAX_MERGE_FAILS})"
|
||||
)
|
||||
|
||||
if mergeFailCount >= MAX_MERGE_FAILS:
|
||||
# Max failures reached - return last valid completePart
|
||||
logger.error(
|
||||
f"Iteration {iteration}: Max failures ({MAX_MERGE_FAILS}) reached, "
|
||||
"returning last valid completePart"
|
||||
)
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, False)
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.9, f"KPI validation failed: {reason} ({iteration} iterations)")
|
||||
break
|
||||
|
||||
if lastValidCompletePart:
|
||||
try:
|
||||
extracted = extractJsonString(lastValidCompletePart)
|
||||
parsed, parseErr, _ = tryParseJson(extracted)
|
||||
if parseErr is None and parsed:
|
||||
normalized = self._normalizeJsonStructure(parsed, useCase)
|
||||
return json.dumps(normalized, indent=2, ensure_ascii=False)
|
||||
except Exception:
|
||||
pass
|
||||
return lastValidCompletePart
|
||||
else:
|
||||
return jsonBase if jsonBase else ""
|
||||
|
||||
# Update KPIs in accumulation state
|
||||
accumulationState.kpis = updatedKpis
|
||||
logger.info(f"Iteration {iteration}: KPIs updated: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
|
||||
|
||||
# Check if all KPIs completed
|
||||
allCompleted = True
|
||||
for kpi in updatedKpis:
|
||||
targetValue = kpi.get("targetValue", 0)
|
||||
currentValue = kpi.get("currentValue", 0)
|
||||
if currentValue < targetValue:
|
||||
allCompleted = False
|
||||
break
|
||||
|
||||
if allCompleted:
|
||||
logger.info(f"Iteration {iteration}: All KPIs completed, finishing accumulation")
|
||||
wasJsonComplete = True # Mark as complete to exit loop
|
||||
|
||||
# CRITICAL: Handle JSON fragments (continuation content)
|
||||
# Fragment merging happens inside extractSectionsFromResponse
|
||||
# If merge fails (returns wasJsonComplete=True), stop iterations and complete JSON
|
||||
if not extractedSections and allSections:
|
||||
if wasJsonComplete:
|
||||
# Merge failed - stop iterations, complete JSON with available data
|
||||
logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - Stopping iterations, completing JSON with available data")
|
||||
# Not at max - retry with same prompt
|
||||
# Do NOT update jsonBase or lastRawResponse - keep previous for retry
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, False)
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.9, f"Merge failed, completing JSON ({iteration} iterations)")
|
||||
break
|
||||
|
||||
# Fragment was detected and merged successfully
|
||||
logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing")
|
||||
# Don't break - fragment was merged, continue to get more content if needed
|
||||
# Check if we should continue based on JSON completeness
|
||||
shouldContinue = self.responseParser.shouldContinueGeneration(
|
||||
allSections,
|
||||
iteration,
|
||||
wasJsonComplete,
|
||||
result
|
||||
)
|
||||
if shouldContinue:
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing")
|
||||
self.services.chat.progressLogUpdate(
|
||||
iterationOperationId, 0.7,
|
||||
f"Parse failed ({mergeFailCount}/{MAX_MERGE_FAILS}), retrying"
|
||||
)
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
else:
|
||||
# Done - fragment was merged and JSON is complete
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)")
|
||||
logger.info(f"Generation complete after {iteration} iterations: fragment merged")
|
||||
break
|
||||
|
||||
# Extract document metadata from first iteration if available
|
||||
if iteration == 1 and parsedResult and not documentMetadata:
|
||||
documentMetadata = self.responseParser.extractDocumentMetadata(parsedResult)
|
||||
|
||||
# Update progress after parsing
|
||||
if iterationOperationId:
|
||||
if extractedSections:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections")
|
||||
|
||||
if not extractedSections:
|
||||
# CRITICAL: If JSON was incomplete/broken, continue even if no sections extracted
|
||||
# This allows the AI to retry and complete the broken JSON
|
||||
if not wasJsonComplete:
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
|
||||
continue
|
||||
# If JSON was complete but no sections extracted - check if it was a fragment
|
||||
# Fragments are handled above, so if we get here and it's complete, it's an error
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
|
||||
break
|
||||
|
||||
# NOTE: Section merging now happens BEFORE KPI validation (see above)
|
||||
# This ensures sections are preserved even if KPI validation fails
|
||||
|
||||
# Calculate total bytes in merged content for progress display
|
||||
merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False)
|
||||
totalBytesGenerated = len(merged_json_str.encode('utf-8'))
|
||||
|
||||
# Update main operation with byte progress
|
||||
if operationId:
|
||||
# Format bytes for display
|
||||
if totalBytesGenerated < 1024:
|
||||
bytesDisplay = f"{totalBytesGenerated}B"
|
||||
elif totalBytesGenerated < 1024 * 1024:
|
||||
bytesDisplay = f"{totalBytesGenerated / 1024:.1f}kB"
|
||||
else:
|
||||
bytesDisplay = f"{totalBytesGenerated / (1024 * 1024):.1f}MB"
|
||||
# Estimate progress based on iterations (rough estimate)
|
||||
estimatedProgress = min(0.9, 0.4 + (iteration * 0.1))
|
||||
self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})")
|
||||
|
||||
# Log merged sections for debugging
|
||||
# For section content generation: skip merged sections debug files (only one prompt/response needed)
|
||||
isSectionContent = "_section_" in debugPrefix
|
||||
if not isSectionContent:
|
||||
self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}")
|
||||
|
||||
# Check if we should continue (completion detection)
|
||||
# Simple logic: JSON completeness determines continuation
|
||||
shouldContinue = self.responseParser.shouldContinueGeneration(
|
||||
allSections,
|
||||
iteration,
|
||||
wasJsonComplete,
|
||||
result
|
||||
)
|
||||
|
||||
if shouldContinue:
|
||||
# Finish iteration operation (will continue with next iteration)
|
||||
if iterationOperationId:
|
||||
# Show byte progress in iteration completion
|
||||
iterBytes = len(result.encode('utf-8')) if result else 0
|
||||
if iterBytes < 1024:
|
||||
iterBytesDisplay = f"{iterBytes}B"
|
||||
elif iterBytes < 1024 * 1024:
|
||||
iterBytesDisplay = f"{iterBytes / 1024:.1f}kB"
|
||||
else:
|
||||
iterBytesDisplay = f"{iterBytes / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Completed ({iterBytesDisplay})")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
else:
|
||||
# Done - finish iteration and update main operation
|
||||
if iterationOperationId:
|
||||
# Show final byte count
|
||||
finalBytes = len(merged_json_str.encode('utf-8'))
|
||||
if finalBytes < 1024:
|
||||
finalBytesDisplay = f"{finalBytes}B"
|
||||
elif finalBytes < 1024 * 1024:
|
||||
finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
|
||||
else:
|
||||
finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Complete ({finalBytesDisplay})")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
if operationId:
|
||||
# Show final size in main operation
|
||||
finalBytes = len(merged_json_str.encode('utf-8'))
|
||||
if finalBytes < 1024:
|
||||
finalBytesDisplay = f"{finalBytes}B"
|
||||
elif finalBytes < 1024 * 1024:
|
||||
finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
|
||||
else:
|
||||
finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
|
||||
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete: {finalBytesDisplay} ({iteration} iterations, {len(allSections)} sections)")
|
||||
logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
|
||||
|
|
@ -552,113 +541,135 @@ class AiCallLooper:
|
|||
if iteration >= maxIterations:
|
||||
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
|
||||
|
||||
# CRITICAL: Complete any incomplete structures in sections before building final result
|
||||
# This ensures JSON is properly closed even if merge failed or iterations stopped early
|
||||
allSections = JsonResponseHandler.completeIncompleteStructures(allSections)
|
||||
|
||||
# Build final result from accumulated sections
|
||||
final_result = self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
|
||||
|
||||
# Write final result to debug file
|
||||
# For section content generation: skip final_result debug file (response already written)
|
||||
isSectionContent = "_section_" in debugPrefix
|
||||
if not isSectionContent:
|
||||
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
||||
|
||||
return final_result
|
||||
# This code path should never be reached because all registered use cases
|
||||
# return early when JSON is complete. This would only execute for use cases that
|
||||
# require section extraction, but no such use cases are currently registered.
|
||||
logger.error(f"Unexpected code path: reached end of loop without return for use case '{useCaseId}'")
|
||||
return result if result else ""
|
||||
|
||||
async def _defineKpisFromPrompt(
|
||||
self,
|
||||
userPrompt: str,
|
||||
rawJsonString: Optional[str],
|
||||
continuationContext: Dict[str, Any],
|
||||
debugPrefix: str = "kpi"
|
||||
) -> List[Dict[str, Any]]:
|
||||
def _isJsonStringIncomplete(self, jsonString: str) -> bool:
|
||||
"""
|
||||
Make separate AI call to define KPIs based on user prompt and incomplete JSON.
|
||||
Check if JSON string is incomplete (truncated) BEFORE closing/parsing.
|
||||
|
||||
This is critical because if JSON is truncated, closing it makes it appear complete,
|
||||
but we need to detect the truncation to continue iteration.
|
||||
|
||||
Args:
|
||||
userPrompt: Original user prompt
|
||||
rawJsonString: Raw JSON string from first iteration response
|
||||
continuationContext: Continuation context (not used for JSON, kept for compatibility)
|
||||
debugPrefix: Prefix for debug file names
|
||||
jsonString: JSON string to check
|
||||
|
||||
Returns:
|
||||
List of KPI definitions: [{"id": str, "description": str, "jsonPath": str, "targetValue": int}, ...]
|
||||
True if JSON string appears incomplete/truncated, False otherwise
|
||||
"""
|
||||
# Use raw JSON string from first iteration response
|
||||
if rawJsonString:
|
||||
# Remove markdown code fences if present
|
||||
from modules.shared.jsonUtils import stripCodeFences
|
||||
incompleteJson = stripCodeFences(rawJsonString.strip())
|
||||
else:
|
||||
incompleteJson = "Not available"
|
||||
if not jsonString or not jsonString.strip():
|
||||
return False
|
||||
|
||||
# Normalize JSON string
|
||||
normalized = stripCodeFences(normalizeJsonText(jsonString)).strip()
|
||||
if not normalized:
|
||||
return False
|
||||
|
||||
kpiDefinitionPrompt = f"""Analyze the user request and incomplete JSON to define KPIs (Key Performance Indicators) for tracking progress.
|
||||
|
||||
User Request:
|
||||
{userPrompt}
|
||||
|
||||
Delivered JSON part:
|
||||
{incompleteJson}
|
||||
|
||||
Task: Define which JSON items should be tracked to measure completion progress.
|
||||
|
||||
IMPORTANT: Analyze the Delivered JSON part structure to understand what is being tracked:
|
||||
1. Identify the structure type (table with rows, list with items, etc.)
|
||||
2. Determine what the jsonPath actually counts (number of rows, number of items, etc.)
|
||||
3. Calculate targetValue based on what is being tracked, NOT the total quantity requested
|
||||
|
||||
For each trackable item, provide:
|
||||
- id: Unique identifier (use descriptive name)
|
||||
- description: What this KPI measures (be specific about what is counted)
|
||||
- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "documents[0].sections[1].elements[0].rows")
|
||||
- targetValue: Target value to reach (integer) - MUST match what jsonPath actually tracks (rows count, items count, etc.)
|
||||
|
||||
Return ONLY valid JSON in this format:
|
||||
{{
|
||||
"kpis": [
|
||||
{{
|
||||
"id": "unique_id",
|
||||
"description": "Description of what is measured",
|
||||
"jsonPath": "path.to.value",
|
||||
"targetValue": 0
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
If no trackable items can be identified, return: {{"kpis": []}}
|
||||
"""
|
||||
# Find first '{' or '[' to start
|
||||
startIdx = -1
|
||||
for i, char in enumerate(normalized):
|
||||
if char in '{[':
|
||||
startIdx = i
|
||||
break
|
||||
|
||||
try:
|
||||
request = AiCallRequest(
|
||||
prompt=kpiDefinitionPrompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC
|
||||
)
|
||||
if startIdx == -1:
|
||||
return False
|
||||
|
||||
jsonContent = normalized[startIdx:]
|
||||
|
||||
# Check if structures are balanced (all opened structures are closed)
|
||||
braceCount = 0
|
||||
bracketCount = 0
|
||||
inString = False
|
||||
escapeNext = False
|
||||
|
||||
for char in jsonContent:
|
||||
if escapeNext:
|
||||
escapeNext = False
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escapeNext = True
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
continue
|
||||
|
||||
if not inString:
|
||||
if char == '{':
|
||||
braceCount += 1
|
||||
elif char == '}':
|
||||
braceCount -= 1
|
||||
elif char == '[':
|
||||
bracketCount += 1
|
||||
elif char == ']':
|
||||
bracketCount -= 1
|
||||
|
||||
# If structures are unbalanced, JSON is incomplete
|
||||
if braceCount > 0 or bracketCount > 0:
|
||||
return True
|
||||
|
||||
# Check if JSON ends with incomplete value (e.g., unclosed string, incomplete number, trailing comma)
|
||||
trimmed = jsonContent.rstrip()
|
||||
if not trimmed:
|
||||
return False
|
||||
|
||||
# Check for trailing comma (might indicate incomplete)
|
||||
if trimmed.endswith(','):
|
||||
# Trailing comma might indicate incomplete, but could also be valid
|
||||
# Check if there's a closing bracket/brace after the comma
|
||||
return False # Trailing comma alone doesn't mean incomplete
|
||||
|
||||
# Check if ends with incomplete string (odd number of quotes)
|
||||
quoteCount = jsonContent.count('"')
|
||||
if quoteCount % 2 == 1:
|
||||
# Odd number of quotes - string is not closed
|
||||
return True
|
||||
|
||||
# Check if ends mid-value (e.g., ends with "417 instead of "4170. 41719"])
|
||||
# Look for patterns that suggest truncation:
|
||||
# - Ends with incomplete number (e.g., "417)
|
||||
# - Ends with incomplete array element (e.g., ["417)
|
||||
# - Ends with incomplete object property (e.g., {"key": "val)
|
||||
|
||||
# If JSON parses successfully without closing, it's complete
|
||||
parsed, parseErr, _ = tryParseJson(jsonContent)
|
||||
if parseErr is None:
|
||||
# Parses successfully - it's complete
|
||||
return False
|
||||
|
||||
# If it doesn't parse, try closing it and see if that helps
|
||||
closed = closeJsonStructures(jsonContent)
|
||||
parsedClosed, parseErrClosed, _ = tryParseJson(closed)
|
||||
|
||||
if parseErrClosed is None:
|
||||
# Only parses after closing - it was incomplete
|
||||
return True
|
||||
|
||||
# Doesn't parse even after closing - might be malformed, but assume incomplete to be safe
|
||||
return True
|
||||
|
||||
def _normalizeJsonStructure(self, parsed: Any, useCase) -> Any:
|
||||
"""
|
||||
Normalize JSON structure to ensure consistent format before merging.
|
||||
Handles different response formats and converts them to expected structure.
|
||||
|
||||
Args:
|
||||
parsed: Parsed JSON object (can be dict, list, or primitive)
|
||||
useCase: LoopingUseCase instance with jsonNormalizer callback
|
||||
|
||||
Returns:
|
||||
Normalized JSON structure
|
||||
"""
|
||||
# Use callback to normalize JSON structure (REQUIRED - no fallback)
|
||||
if not useCase or not useCase.jsonNormalizer:
|
||||
raise ValueError(
|
||||
f"Use case '{useCase.useCaseId if useCase else 'unknown'}' is missing required 'jsonNormalizer' callback. "
|
||||
"All use cases must provide a jsonNormalizer function."
|
||||
)
|
||||
|
||||
# Write KPI definition prompt to debug file
|
||||
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
response = await self.aiService.callAi(request)
|
||||
|
||||
# Write KPI definition response to debug file
|
||||
self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response")
|
||||
|
||||
# Parse response
|
||||
extracted = extractJsonString(response.content)
|
||||
kpiResponse = json.loads(extracted)
|
||||
|
||||
kpiDefinitions = kpiResponse.get("kpis", [])
|
||||
logger.info(f"Defined {len(kpiDefinitions)} KPIs for tracking")
|
||||
|
||||
return kpiDefinitions
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to define KPIs: {e}, continuing without KPI tracking")
|
||||
return []
|
||||
return useCase.jsonNormalizer(parsed, useCase.useCaseId)
|
||||
|
||||
|
|
|
|||
2081
modules/services/serviceAi/subJsonMerger.py
Normal file
2081
modules/services/serviceAi/subJsonMerger.py
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -12,13 +12,96 @@ from typing import Dict, Any, List, Optional, Callable
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Callback functions for use-case-specific logic
|
||||
|
||||
def _handleSectionContentFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
|
||||
debugPrefix: str, services: Any) -> str:
|
||||
"""Handle final result for section_content: return raw result to preserve all JSON blocks."""
|
||||
final_json = result # Return raw response to preserve all JSON blocks
|
||||
# Write final merged result for section_content (overwrites iteration 1 response with complete merged result)
|
||||
if services and hasattr(services, 'utils') and hasattr(services.utils, 'writeDebugFile'):
|
||||
services.utils.writeDebugFile(final_json, f"{debugPrefix}_response")
|
||||
return final_json
|
||||
|
||||
|
||||
def _handleChapterStructureFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
|
||||
debugPrefix: str, services: Any) -> str:
|
||||
"""Handle final result for chapter_structure: format JSON and write debug file."""
|
||||
import json
|
||||
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
|
||||
# Write final result for chapter structure
|
||||
if services and hasattr(services, 'utils') and hasattr(services.utils, 'writeDebugFile'):
|
||||
services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
|
||||
return final_json
|
||||
|
||||
|
||||
def _handleCodeStructureFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
|
||||
debugPrefix: str, services: Any) -> str:
|
||||
"""Handle final result for code_structure: format JSON and write debug file."""
|
||||
import json
|
||||
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
|
||||
# Write final result for code structure
|
||||
if services and hasattr(services, 'utils') and hasattr(services.utils, 'writeDebugFile'):
|
||||
services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
|
||||
return final_json
|
||||
|
||||
|
||||
def _handleCodeContentFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
|
||||
debugPrefix: str, services: Any) -> str:
|
||||
"""Handle final result for code_content: format JSON."""
|
||||
import json
|
||||
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
|
||||
return final_json
|
||||
|
||||
|
||||
def _normalizeSectionContentJson(parsed: Any, useCaseId: str) -> Any:
|
||||
"""Normalize JSON structure for section_content use case."""
|
||||
# For section_content, expect {"elements": [...]} structure
|
||||
if isinstance(parsed, list):
|
||||
# Check if list contains strings (invalid format) or element objects
|
||||
if parsed and isinstance(parsed[0], str):
|
||||
# Invalid format - list of strings instead of elements
|
||||
# Try to convert strings to paragraph elements as fallback
|
||||
logger.debug(f"Received list of strings instead of elements array, converting to paragraph elements")
|
||||
elements = []
|
||||
for text in parsed:
|
||||
if isinstance(text, str) and text.strip():
|
||||
elements.append({
|
||||
"type": "paragraph",
|
||||
"content": {
|
||||
"text": text.strip()
|
||||
}
|
||||
})
|
||||
return {"elements": elements} if elements else {"elements": []}
|
||||
else:
|
||||
# Convert plain list of elements to elements structure
|
||||
return {"elements": parsed}
|
||||
elif isinstance(parsed, dict):
|
||||
# If it already has "elements", return as-is
|
||||
if "elements" in parsed:
|
||||
return parsed
|
||||
# If it has "type" and looks like an element, wrap in elements array
|
||||
elif parsed.get("type"):
|
||||
return {"elements": [parsed]}
|
||||
# Otherwise, assume it's already in correct format
|
||||
else:
|
||||
return parsed
|
||||
|
||||
# For other use cases, return as-is (they have their own structures)
|
||||
return parsed
|
||||
|
||||
|
||||
def _normalizeDefaultJson(parsed: Any, useCaseId: str) -> Any:
|
||||
"""Default normalizer: return as-is."""
|
||||
return parsed
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoopingUseCase:
|
||||
"""Configuration for a specific looping use case."""
|
||||
|
||||
# Identification
|
||||
useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch"
|
||||
useCaseId: str # "section_content", "chapter_structure", "code_structure", "code_content"
|
||||
|
||||
# JSON Format Detection
|
||||
jsonTemplate: Dict[str, Any] # Expected JSON structure template
|
||||
|
|
@ -39,6 +122,10 @@ class LoopingUseCase:
|
|||
# Result Building
|
||||
resultBuilder: Optional[Callable] = None # Build final result from accumulated data
|
||||
|
||||
# Use-case-specific handlers (callbacks to avoid if/elif chains in generic code)
|
||||
finalResultHandler: Optional[Callable] = None # Handle final result formatting and debug file writing
|
||||
jsonNormalizer: Optional[Callable] = None # Normalize JSON structure for this use case
|
||||
|
||||
# Metadata
|
||||
supportsAccumulation: bool = True # Whether this use case supports accumulation
|
||||
requiresExtraction: bool = False # Whether this requires extraction (like sections)
|
||||
|
|
@ -124,6 +211,8 @@ class LoopingUseCaseRegistry:
|
|||
merger=None,
|
||||
continuationContextBuilder=None, # Will use default continuation context
|
||||
resultBuilder=None, # Return JSON directly
|
||||
finalResultHandler=_handleSectionContentFinalResult,
|
||||
jsonNormalizer=_normalizeSectionContentJson,
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
|
@ -141,28 +230,13 @@ class LoopingUseCaseRegistry:
|
|||
merger=None,
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None, # Return JSON directly
|
||||
finalResultHandler=_handleChapterStructureFinalResult,
|
||||
jsonNormalizer=_normalizeDefaultJson,
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
# Use Case 3: Document Structure Generation
|
||||
# Returns JSON with "documents[0].sections" structure, requires extraction and accumulation
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="document_structure",
|
||||
jsonTemplate={"documents": [{"sections": []}]},
|
||||
detectionKeys=["sections"],
|
||||
detectionPath="documents[0].sections",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Will use default accumulator
|
||||
merger=None, # Will use default merger
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None, # Will use default result builder
|
||||
supportsAccumulation=True,
|
||||
requiresExtraction=True
|
||||
))
|
||||
|
||||
# Use Case 4: Code Structure Generation (NEW)
|
||||
# Use Case 3: Code Structure Generation
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="code_structure",
|
||||
jsonTemplate={
|
||||
|
|
@ -191,6 +265,8 @@ class LoopingUseCaseRegistry:
|
|||
merger=None,
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None,
|
||||
finalResultHandler=_handleCodeStructureFinalResult,
|
||||
jsonNormalizer=_normalizeDefaultJson,
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
|
@ -207,25 +283,11 @@ class LoopingUseCaseRegistry:
|
|||
merger=None, # Will use default merger
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None, # Will use default result builder
|
||||
finalResultHandler=_handleCodeContentFinalResult,
|
||||
jsonNormalizer=_normalizeDefaultJson,
|
||||
supportsAccumulation=True,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
# Use Case 6: Image Batch Generation (NEW)
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="image_batch",
|
||||
jsonTemplate={"images": []},
|
||||
detectionKeys=["images"],
|
||||
detectionPath="images",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Direct return
|
||||
merger=None,
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None,
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
logger.info(f"Registered {len(self.useCases)} default looping use cases")
|
||||
|
||||
|
|
|
|||
|
|
@ -213,15 +213,16 @@ class StructureFiller:
|
|||
if not isinstance(doc["language"], str) or len(doc["language"]) != 2:
|
||||
raise ValueError(f"Document {doc.get('id')} has invalid language format in filled structure: {doc['language']} - should be 2-character ISO 639-1 code")
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
for section in chapter.get("sections", []):
|
||||
# Validation 4.2: Section missing 'elements' field
|
||||
if "elements" not in section:
|
||||
section["elements"] = []
|
||||
logger.info(f"Section {section.get('id')} missing 'elements' - created empty list")
|
||||
|
||||
# Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK)
|
||||
# No action needed - empty elements are allowed
|
||||
# CRITICAL: flattenedStructure has sections, not chapters!
|
||||
# After flattening, chapters are converted to sections, so we need to validate sections directly
|
||||
for section in doc.get("sections", []):
|
||||
# Validation 4.2: Section missing 'elements' field
|
||||
if "elements" not in section:
|
||||
section["elements"] = []
|
||||
logger.info(f"Section {section.get('id')} missing 'elements' - created empty list")
|
||||
|
||||
# Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK)
|
||||
# No action needed - empty elements are allowed
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||
|
|
@ -246,6 +247,7 @@ class StructureFiller:
|
|||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
language: str,
|
||||
outputFormat: str,
|
||||
parentOperationId: str,
|
||||
totalChapters: int
|
||||
) -> None:
|
||||
|
|
@ -271,7 +273,8 @@ class StructureFiller:
|
|||
contentPartInstructions=contentPartInstructions,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
language=language
|
||||
language=language,
|
||||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
# AI-Call für Chapter-Struktur-Generierung
|
||||
|
|
@ -372,6 +375,8 @@ class StructureFiller:
|
|||
docId = doc.get("id", "unknown")
|
||||
# Get language for this specific document
|
||||
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
|
||||
# Get output format for this specific document
|
||||
docFormat = doc.get("outputFormat", "txt")
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
chapterIndex += 1
|
||||
|
|
@ -382,7 +387,7 @@ class StructureFiller:
|
|||
contentPartIds, contentPartInstructions = self._extractContentPartInfo(chapter)
|
||||
|
||||
# Create task for parallel processing with semaphore
|
||||
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage):
|
||||
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage, docFormat):
|
||||
checkWorkflowStopped(self.services)
|
||||
async with semaphore:
|
||||
return await self._generateSingleChapterSectionsStructure(
|
||||
|
|
@ -397,12 +402,13 @@ class StructureFiller:
|
|||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
language=docLanguage, # Use document-specific language
|
||||
outputFormat=docFormat, # Use document-specific format
|
||||
parentOperationId=parentOperationId,
|
||||
totalChapters=totalChapters
|
||||
)
|
||||
|
||||
task = processChapterWithSemaphore(
|
||||
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage
|
||||
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage, docFormat
|
||||
)
|
||||
chapterTasks.append((chapterIndex, chapter, task))
|
||||
|
||||
|
|
@ -747,7 +753,7 @@ class StructureFiller:
|
|||
if processedExtractedParts:
|
||||
logger.debug(f"Section {sectionId}: Aggregating {len(processedExtractedParts)} extracted parts with AI")
|
||||
isAggregation = True
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=processedExtractedParts,
|
||||
userPrompt=userPrompt,
|
||||
|
|
@ -805,48 +811,8 @@ class StructureFiller:
|
|||
f"{chapterId}_section_{sectionId}_response"
|
||||
)
|
||||
else:
|
||||
async def buildSectionPromptWithContinuation(
|
||||
section: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
generationHint: str,
|
||||
allSections: List[Dict[str, Any]],
|
||||
sectionIndex: int,
|
||||
isAggregation: bool,
|
||||
continuationContext: Dict[str, Any],
|
||||
services: Any
|
||||
) -> str:
|
||||
basePrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
generationHint=generationHint,
|
||||
allSections=allSections,
|
||||
sectionIndex=sectionIndex,
|
||||
isAggregation=isAggregation,
|
||||
language=language
|
||||
)
|
||||
|
||||
continuationInfo = continuationContext.get("delivered_summary", "")
|
||||
cutOffElement = continuationContext.get("cut_off_element", "")
|
||||
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Please continue from where it stopped.
|
||||
|
||||
PREVIOUSLY DELIVERED SUMMARY:
|
||||
{continuationInfo}
|
||||
|
||||
LAST INCOMPLETE ELEMENT:
|
||||
{cutOffElement}
|
||||
|
||||
TASK: Continue generating the JSON elements array from where it was cut off.
|
||||
Complete the incomplete element and continue with remaining elements.
|
||||
|
||||
Return ONLY the continuation JSON (starting from the incomplete element).
|
||||
The JSON should be a fragment that can be merged with the previous response."""
|
||||
return continuationPrompt
|
||||
# Use consolidated class method
|
||||
buildSectionPromptWithContinuation = self.buildSectionPromptWithContinuation
|
||||
|
||||
options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
|
|
@ -868,7 +834,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
"allSections": all_sections_list,
|
||||
"sectionIndex": sectionIndex,
|
||||
"isAggregation": isAggregation,
|
||||
"services": self.services
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": generationPrompt
|
||||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
|
|
@ -974,7 +941,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
if len(contentPartIds) == 0 and useAiCall and generationHint:
|
||||
# Generate content from scratch using only generationHint
|
||||
logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only")
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=[],
|
||||
userPrompt=userPrompt,
|
||||
|
|
@ -1033,48 +1000,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
else:
|
||||
isAggregation = False
|
||||
|
||||
async def buildSectionPromptWithContinuation(
|
||||
section: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
generationHint: str,
|
||||
allSections: List[Dict[str, Any]],
|
||||
sectionIndex: int,
|
||||
isAggregation: bool,
|
||||
continuationContext: Dict[str, Any],
|
||||
services: Any
|
||||
) -> str:
|
||||
basePrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
generationHint=generationHint,
|
||||
allSections=allSections,
|
||||
sectionIndex=sectionIndex,
|
||||
isAggregation=isAggregation,
|
||||
language=language
|
||||
)
|
||||
|
||||
continuationInfo = continuationContext.get("delivered_summary", "")
|
||||
cutOffElement = continuationContext.get("cut_off_element", "")
|
||||
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Please continue from where it stopped.
|
||||
|
||||
PREVIOUSLY DELIVERED SUMMARY:
|
||||
{continuationInfo}
|
||||
|
||||
LAST INCOMPLETE ELEMENT:
|
||||
{cutOffElement}
|
||||
|
||||
TASK: Continue generating the JSON elements array from where it was cut off.
|
||||
Complete the incomplete element and continue with remaining elements.
|
||||
|
||||
Return ONLY the continuation JSON (starting from the incomplete element).
|
||||
The JSON should be a fragment that can be merged with the previous response."""
|
||||
return continuationPrompt
|
||||
# Use consolidated class method
|
||||
buildSectionPromptWithContinuation = self.buildSectionPromptWithContinuation
|
||||
|
||||
options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
|
|
@ -1086,7 +1013,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
prompt=generationPrompt,
|
||||
options=options,
|
||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||
promptBuilder=buildSectionPromptWithContinuation,
|
||||
promptBuilder=self.buildSectionPromptWithContinuation,
|
||||
promptArgs={
|
||||
"section": section,
|
||||
"contentParts": [],
|
||||
|
|
@ -1095,7 +1022,9 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
"allSections": all_sections_list,
|
||||
"sectionIndex": sectionIndex,
|
||||
"isAggregation": isAggregation,
|
||||
"services": self.services
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": generationPrompt,
|
||||
"language": language
|
||||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
|
|
@ -1277,7 +1206,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
if useAiCall and generationHint:
|
||||
# AI-Call mit einzelnen ContentPart (now may be text part after Vision extraction)
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted part with AI call")
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=[part],
|
||||
userPrompt=userPrompt,
|
||||
|
|
@ -1336,48 +1265,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
else:
|
||||
isAggregation = False
|
||||
|
||||
async def buildSectionPromptWithContinuation(
|
||||
section: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
generationHint: str,
|
||||
allSections: List[Dict[str, Any]],
|
||||
sectionIndex: int,
|
||||
isAggregation: bool,
|
||||
continuationContext: Dict[str, Any],
|
||||
services: Any
|
||||
) -> str:
|
||||
basePrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
generationHint=generationHint,
|
||||
allSections=allSections,
|
||||
sectionIndex=sectionIndex,
|
||||
isAggregation=isAggregation,
|
||||
language=language
|
||||
)
|
||||
|
||||
continuationInfo = continuationContext.get("delivered_summary", "")
|
||||
cutOffElement = continuationContext.get("cut_off_element", "")
|
||||
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Please continue from where it stopped.
|
||||
|
||||
PREVIOUSLY DELIVERED SUMMARY:
|
||||
{continuationInfo}
|
||||
|
||||
LAST INCOMPLETE ELEMENT:
|
||||
{cutOffElement}
|
||||
|
||||
TASK: Continue generating the JSON elements array from where it was cut off.
|
||||
Complete the incomplete element and continue with remaining elements.
|
||||
|
||||
Return ONLY the continuation JSON (starting from the incomplete element).
|
||||
The JSON should be a fragment that can be merged with the previous response."""
|
||||
return continuationPrompt
|
||||
# Use consolidated class method
|
||||
buildSectionPromptWithContinuation = self.buildSectionPromptWithContinuation
|
||||
|
||||
options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
|
|
@ -1389,7 +1278,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
prompt=generationPrompt,
|
||||
options=options,
|
||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||
promptBuilder=buildSectionPromptWithContinuation,
|
||||
promptBuilder=self.buildSectionPromptWithContinuation,
|
||||
promptArgs={
|
||||
"section": section,
|
||||
"contentParts": [part],
|
||||
|
|
@ -1398,7 +1287,10 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
"allSections": all_sections_list,
|
||||
"sectionIndex": sectionIndex,
|
||||
"isAggregation": isAggregation,
|
||||
"services": self.services
|
||||
"services": self.services,
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": generationPrompt,
|
||||
"language": language
|
||||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
|
|
@ -1639,104 +1531,88 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
maxConcurrent = self._getMaxConcurrentGeneration(options)
|
||||
sectionSemaphore = asyncio.Semaphore(maxConcurrent)
|
||||
|
||||
# Helper function to calculate overall progress
|
||||
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
|
||||
"""Calculate overall progress: 0.0 to 1.0"""
|
||||
if totalChapters == 0:
|
||||
return 1.0
|
||||
|
||||
# Progress from completed chapters (0 to chapterIndex-1)
|
||||
completedChaptersProgress = chapterIndex / totalChapters
|
||||
|
||||
# Progress from current chapter (sectionIndex / totalSections)
|
||||
currentChapterProgress = (sectionIndex / totalSections) / totalChapters if totalSections > 0 else 0
|
||||
|
||||
return min(1.0, completedChaptersProgress + currentChapterProgress)
|
||||
# Collect ALL sections from ALL chapters for fully parallel processing
|
||||
# Each task carries: (docId, chapterId, chapterTitle, sectionIndex, section, docLanguage)
|
||||
allSectionTasks = []
|
||||
totalSections = len(all_sections_list)
|
||||
completedSections = [0] # Mutable counter for progress tracking
|
||||
|
||||
# Process chapters sequentially with chapter-level progress
|
||||
chapterIndex = 0
|
||||
for doc in chapterStructure.get("documents", []):
|
||||
docId = doc.get("id", "unknown")
|
||||
# Get language for this specific document
|
||||
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
chapterIndex += 1
|
||||
chapterId = chapter.get("id", "unknown")
|
||||
chapterTitle = chapter.get("title", "Untitled Chapter")
|
||||
sections = chapter.get("sections", [])
|
||||
totalSections = len(sections)
|
||||
chapterSectionCount = len(sections)
|
||||
|
||||
# Start chapter operation
|
||||
chapterOperationId = f"{fillOperationId}_chapter_{chapterId}"
|
||||
self.services.chat.progressLogStart(
|
||||
chapterOperationId,
|
||||
"Chapter Generation",
|
||||
f"Chapter {chapterIndex}/{totalChapters}",
|
||||
chapterTitle,
|
||||
parentOperationId=fillOperationId
|
||||
for sectionIndex, section in enumerate(sections):
|
||||
allSectionTasks.append({
|
||||
"docId": docId,
|
||||
"chapterId": chapterId,
|
||||
"chapterTitle": chapterTitle,
|
||||
"sectionIndex": sectionIndex,
|
||||
"chapterSectionCount": chapterSectionCount,
|
||||
"section": section,
|
||||
"docLanguage": docLanguage
|
||||
})
|
||||
|
||||
logger.info(f"Starting FULLY PARALLEL section generation: {totalSections} sections across {totalChapters} chapters")
|
||||
|
||||
# Create task wrapper for each section with progress tracking
|
||||
async def processSectionWithSemaphore(taskInfo):
|
||||
checkWorkflowStopped(self.services)
|
||||
async with sectionSemaphore:
|
||||
result = await self._processSingleSection(
|
||||
section=taskInfo["section"],
|
||||
sectionIndex=taskInfo["sectionIndex"],
|
||||
totalSections=taskInfo["chapterSectionCount"],
|
||||
chapterIndex=0, # Not used for sequential logic anymore
|
||||
totalChapters=totalChapters,
|
||||
chapterId=taskInfo["chapterId"],
|
||||
chapterOperationId=fillOperationId, # Use fillOperationId as parent (no chapter-level ops in parallel mode)
|
||||
fillOperationId=fillOperationId,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
all_sections_list=all_sections_list,
|
||||
language=taskInfo["docLanguage"],
|
||||
calculateOverallProgress=lambda *args: completedSections[0] / totalSections if totalSections > 0 else 1.0
|
||||
)
|
||||
|
||||
# Process sections within chapter in parallel with concurrency control
|
||||
sectionTasks = []
|
||||
for sectionIndex, section in enumerate(sections):
|
||||
# Create task wrapper with semaphore for parallel processing
|
||||
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress):
|
||||
checkWorkflowStopped(self.services)
|
||||
async with sectionSemaphore:
|
||||
return await self._processSingleSection(
|
||||
section=section,
|
||||
sectionIndex=sectionIndex,
|
||||
totalSections=totalSections,
|
||||
chapterIndex=chapterIndex,
|
||||
totalChapters=totalChapters,
|
||||
chapterId=chapterId,
|
||||
chapterOperationId=chapterOperationId,
|
||||
fillOperationId=fillOperationId,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
all_sections_list=all_sections_list,
|
||||
language=docLanguage, # Use document-specific language
|
||||
calculateOverallProgress=calculateOverallProgress
|
||||
)
|
||||
|
||||
task = processSectionWithSemaphore(
|
||||
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress
|
||||
)
|
||||
sectionTasks.append((sectionIndex, section, task))
|
||||
|
||||
# Execute all section tasks in parallel with concurrency control
|
||||
if sectionTasks:
|
||||
# Create list of tasks (without indices for gather)
|
||||
tasks = [task for _, _, task in sectionTasks]
|
||||
|
||||
# Execute in parallel with error handling
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Process results in order and assign elements to sections
|
||||
for (originalIndex, originalSection, _), result in zip(sectionTasks, results):
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"Error processing section {originalSection.get('id')}: {str(result)}")
|
||||
# Set error element
|
||||
originalSection["elements"] = [{
|
||||
"type": "error",
|
||||
"message": f"Error processing section: {str(result)}",
|
||||
"sectionId": originalSection.get("id")
|
||||
}]
|
||||
else:
|
||||
# Assign elements to section in correct order
|
||||
originalSection["elements"] = result
|
||||
|
||||
# Finish chapter operation after all sections processed
|
||||
self.services.chat.progressLogFinish(chapterOperationId, True)
|
||||
|
||||
# Update overall progress after chapter completion
|
||||
overallProgress = chapterIndex / totalChapters if totalChapters > 0 else 1.0
|
||||
# Update progress after each section completes
|
||||
completedSections[0] += 1
|
||||
overallProgress = completedSections[0] / totalSections if totalSections > 0 else 1.0
|
||||
sectionId = taskInfo["section"].get("id", "unknown")
|
||||
self.services.chat.progressLogUpdate(
|
||||
fillOperationId,
|
||||
overallProgress,
|
||||
f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}"
|
||||
f"Section {completedSections[0]}/{totalSections} completed: {sectionId}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
# Create all tasks
|
||||
tasks = [processSectionWithSemaphore(taskInfo) for taskInfo in allSectionTasks]
|
||||
|
||||
# Execute ALL sections in parallel with concurrency control
|
||||
if tasks:
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Assign results back to sections
|
||||
for taskInfo, result in zip(allSectionTasks, results):
|
||||
section = taskInfo["section"]
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"Error processing section {section.get('id')}: {str(result)}")
|
||||
section["elements"] = [{
|
||||
"type": "error",
|
||||
"message": f"Error processing section: {str(result)}",
|
||||
"sectionId": section.get("id")
|
||||
}]
|
||||
else:
|
||||
section["elements"] = result if result is not None else []
|
||||
|
||||
logger.info(f"Completed FULLY PARALLEL section generation: {totalSections} sections")
|
||||
|
||||
return chapterStructure
|
||||
|
||||
|
|
@ -1830,7 +1706,13 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
|
||||
# 2. Generierte Sections - adjust heading levels
|
||||
for section in chapter.get("sections", []):
|
||||
# CRITICAL: Ensure elements are preserved when flattening
|
||||
# _adjustSectionHeadingLevels uses deepcopy which should preserve elements,
|
||||
# but verify that elements exist in the source section
|
||||
adjusted_section = self._adjustSectionHeadingLevels(section)
|
||||
# Ensure elements are preserved (deepcopy should handle this, but double-check)
|
||||
if "elements" in section and "elements" not in adjusted_section:
|
||||
adjusted_section["elements"] = section["elements"]
|
||||
flattened_doc["sections"].append(adjusted_section)
|
||||
|
||||
result["documents"].append(flattened_doc)
|
||||
|
|
@ -1868,9 +1750,10 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
contentPartInstructions: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
language: str = "en"
|
||||
language: str = "en",
|
||||
outputFormat: str = "txt"
|
||||
) -> str:
|
||||
"""Baue Prompt für Chapter-Sections-Struktur-Generierung."""
|
||||
"""Baue Prompt für Chapter-Sections-Struktur-Generierung, querying renderer for accepted section types."""
|
||||
# Baue ContentParts-Index (nur IDs, keine Previews!)
|
||||
contentPartsIndex = ""
|
||||
for partId in contentPartIds:
|
||||
|
|
@ -1904,6 +1787,9 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
if not contentPartsIndex:
|
||||
contentPartsIndex = "\n(No content parts specified for this chapter)"
|
||||
|
||||
# Query renderer for accepted section types
|
||||
acceptedSectionTypes = self._getAcceptedSectionTypesForFormat(outputFormat)
|
||||
|
||||
prompt = f"""TASK: Generate Chapter Sections Structure
|
||||
|
||||
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
||||
|
|
@ -1936,11 +1822,24 @@ If AVAILABLE CONTENT PARTS are listed above, then EVERY section that generates c
|
|||
## CONTENT TYPES
|
||||
Available content types for sections: table, bullet_list, heading, paragraph, code_block, image
|
||||
|
||||
useAiCall RULES:
|
||||
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
|
||||
- useAiCall: false if Format is "object" or "reference" (direct insertion)
|
||||
- useAiCall: false if Format is "extracted" AND simple "include full text" instruction
|
||||
- useAiCall: true if no ContentPartIds provided (content must be generated from scratch); Sections without ContentParts must have a clear, detailed generationHint explaining what content to generate
|
||||
## ACCEPTED SECTION TYPES FOR THIS FORMAT
|
||||
The document output format ({outputFormat}) accepts only the following section types:
|
||||
{', '.join(acceptedSectionTypes) if acceptedSectionTypes else 'All section types'}
|
||||
|
||||
**IMPORTANT**: Only create sections with content types from the accepted list above. Do not create sections with types that are not accepted by this format.
|
||||
|
||||
## FORMAT-APPROPRIATE SECTION STRUCTURE
|
||||
When determining which sections to create for this chapter, consider the document's output format ({outputFormat}) and ensure sections are structured appropriately for that format:
|
||||
- Different formats have different capabilities and constraints
|
||||
- Structure sections to match what the format can effectively represent
|
||||
- Consider what content types work best for each format
|
||||
- Ensure the section structure aligns with the format's strengths and limitations
|
||||
- Select content types that are well-suited for the target format
|
||||
- **CRITICAL**: Only use section types from the ACCEPTED SECTION TYPES list above
|
||||
|
||||
useAiCall RULE (simple):
|
||||
- useAiCall: true → Content needs AI processing (extract, transform, generate, filter, summarize)
|
||||
- useAiCall: false → Content can be inserted directly without changes (Format is "object" or "reference")
|
||||
|
||||
RETURN JSON:
|
||||
{{
|
||||
|
|
@ -1948,10 +1847,9 @@ RETURN JSON:
|
|||
{{
|
||||
"id": "section_1",
|
||||
"content_type": "paragraph",
|
||||
"contentPartIds": ["extracted_part_1"],
|
||||
"generationHint": "Include full text",
|
||||
"useAiCall": false,
|
||||
"caption": "optional, only for image sections",
|
||||
"contentPartIds": ["extracted_part_id"],
|
||||
"generationHint": "Description of what to extract or generate",
|
||||
"useAiCall": true,
|
||||
"elements": []
|
||||
}}
|
||||
]
|
||||
|
|
@ -1993,7 +1891,7 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
|
|||
sectionIndex: Optional[int] = None,
|
||||
isAggregation: bool = False,
|
||||
language: str = "en"
|
||||
) -> str:
|
||||
) -> tuple[str, str]:
|
||||
"""Baue Prompt für Section-Generierung mit vollständigem Kontext."""
|
||||
# Filtere None-Werte
|
||||
validParts = [p for p in contentParts if p is not None]
|
||||
|
|
@ -2102,8 +2000,16 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
|
|||
|
||||
contentStructureExample = self._getContentStructureExample(contentType)
|
||||
|
||||
# Special handling for image content type with IMAGE_GENERATE
|
||||
isImageGeneration = contentType == "image" and len(validParts) == 0
|
||||
# Create template structure explicitly (not extracted from prompt)
|
||||
# This ensures exact identity between initial and continuation prompts
|
||||
templateStructure = f"""{{
|
||||
"elements": [
|
||||
{{
|
||||
"type": "{contentType}",
|
||||
"content": {contentStructureExample}
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
if isAggregation:
|
||||
prompt = f"""# TASK: Generate Section Content (Aggregation)
|
||||
|
|
@ -2126,6 +2032,8 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
5. For table: Extract all rows from the context. Return {{"headers": [...], "rows": []}} only if no data exists.
|
||||
6. Format based on content_type ({contentType}).
|
||||
7. No HTML/styling: Plain text only, no markup.
|
||||
8. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
|
||||
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return a JSON object with this structure:
|
||||
|
|
@ -2177,6 +2085,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
3. Format based on content_type ({contentType}).
|
||||
4. Return only valid JSON with "elements" array.
|
||||
5. No HTML/styling: Plain text only, no markup.
|
||||
6. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return a JSON object with this structure:
|
||||
|
|
@ -2221,6 +2130,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
3. The content should be relevant to the USER REQUEST and fit the context of surrounding sections.
|
||||
4. Return only valid JSON with "elements" array.
|
||||
5. No HTML/styling: Plain text only, no markup.
|
||||
6. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return a JSON object with this structure:
|
||||
|
|
@ -2248,7 +2158,69 @@ Output requirements:
|
|||
## CONTEXT
|
||||
{contextText if contextText else ""}
|
||||
"""
|
||||
return prompt
|
||||
return prompt, templateStructure
|
||||
|
||||
async def buildSectionPromptWithContinuation(
|
||||
self,
|
||||
continuationContext: Any,
|
||||
templateStructure: str,
|
||||
basePrompt: str
|
||||
) -> str:
|
||||
"""Build section prompt with continuation context. Uses unified signature.
|
||||
|
||||
Single unified implementation for all section content generation contexts.
|
||||
|
||||
Note: All initial context (section, contentParts, userPrompt, etc.) is already
|
||||
contained in basePrompt. This function only adds continuation-specific instructions.
|
||||
"""
|
||||
# Extract continuation context fields (only what's needed for continuation)
|
||||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Generate both overlap context and hierarchy context using jsonContinuation
|
||||
overlapContext = ""
|
||||
unifiedContext = ""
|
||||
if lastRawJson:
|
||||
# Get contexts directly from jsonContinuation
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
contexts = getContexts(lastRawJson)
|
||||
overlapContext = contexts.overlapContext
|
||||
unifiedContext = contexts.hierarchyContextForPrompt
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
unifiedContext = "Unable to extract context - response was completely broken"
|
||||
|
||||
# Build unified continuation prompt format
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
|
||||
|
||||
Overlap context (start your response with this exact text):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response EXACTLY with the overlap context shown above (character by character)
|
||||
2. Continue seamlessly from where the overlap context ends
|
||||
3. Complete the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
|
||||
- Continue seamlessly after the overlap context with new content
|
||||
- Your response must be valid JSON matching the structure template above"""
|
||||
return continuationPrompt
|
||||
|
||||
def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
|
|
@ -2547,4 +2519,38 @@ Output requirements:
|
|||
# (z.B. Vergleich mehrerer Dokumente)
|
||||
# Standard: Keine Aggregation für paragraph
|
||||
return False
|
||||
|
||||
def _getAcceptedSectionTypesForFormat(self, outputFormat: str) -> List[str]:
|
||||
"""
|
||||
Get accepted section types for a given output format by querying the renderer.
|
||||
|
||||
Args:
|
||||
outputFormat: Format name (e.g., 'csv', 'json', 'pdf')
|
||||
|
||||
Returns:
|
||||
List of accepted section content types (e.g., ["table", "code_block"])
|
||||
"""
|
||||
try:
|
||||
from modules.services.serviceGeneration.renderers.registry import getRenderer
|
||||
|
||||
# Get renderer for this format
|
||||
renderer = getRenderer(outputFormat, self.services)
|
||||
|
||||
if renderer and hasattr(renderer, 'getAcceptedSectionTypes'):
|
||||
# Query renderer for accepted types
|
||||
acceptedTypes = renderer.getAcceptedSectionTypes(outputFormat)
|
||||
if acceptedTypes:
|
||||
logger.debug(f"Renderer for format '{outputFormat}' accepts section types: {acceptedTypes}")
|
||||
return acceptedTypes
|
||||
|
||||
# Fallback: if no renderer or method not found, return all types
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
logger.debug(f"No renderer found for format '{outputFormat}' or method not available, using all section types")
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error querying renderer for accepted section types for format '{outputFormat}': {str(e)}")
|
||||
# Fallback: return all types
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
|
|
|
|||
|
|
@ -107,47 +107,71 @@ class StructureGenerator:
|
|||
resultFormat="json"
|
||||
)
|
||||
|
||||
structurePrompt, templateStructure = self._buildChapterStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
# Create prompt builder for continuation support
|
||||
async def buildChapterStructurePromptWithContinuation(
|
||||
continuationContext: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
continuationContext: Any,
|
||||
templateStructure: str,
|
||||
basePrompt: str
|
||||
) -> str:
|
||||
"""Build chapter structure prompt with optional continuation context."""
|
||||
basePrompt = self._buildChapterStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat
|
||||
)
|
||||
"""Build chapter structure prompt with continuation context. Uses unified signature.
|
||||
|
||||
if continuationContext:
|
||||
# Add continuation instructions
|
||||
deliveredSummary = continuationContext.get("delivered_summary", "")
|
||||
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
|
||||
cutOffElement = continuationContext.get("cut_off_element", "")
|
||||
|
||||
continuationText = f"{deliveredSummary}\n\n"
|
||||
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
|
||||
|
||||
if elementBeforeCutoff:
|
||||
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
|
||||
continuationText += f"{elementBeforeCutoff}\n\n"
|
||||
|
||||
if cutOffElement:
|
||||
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
|
||||
continuationText += f"{cutOffElement}\n\n"
|
||||
|
||||
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
|
||||
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
|
||||
continuationText += "Start directly with the next chapter that should follow.\n\n"
|
||||
|
||||
return f"""{basePrompt}
|
||||
|
||||
{continuationText}
|
||||
|
||||
Continue generating the remaining chapters now.
|
||||
"""
|
||||
Note: All initial context (userPrompt, contentParts, outputFormat, etc.) is already
|
||||
contained in basePrompt. This function only adds continuation-specific instructions.
|
||||
"""
|
||||
# Extract continuation context fields (only what's needed for continuation)
|
||||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Generate both overlap context and hierarchy context using jsonContinuation
|
||||
overlapContext = ""
|
||||
unifiedContext = ""
|
||||
if lastRawJson:
|
||||
# Get contexts directly from jsonContinuation
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
contexts = getContexts(lastRawJson)
|
||||
overlapContext = contexts.overlapContext
|
||||
unifiedContext = contexts.hierarchyContextForPrompt
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
return basePrompt
|
||||
unifiedContext = "Unable to extract context - response was completely broken"
|
||||
|
||||
# Build unified continuation prompt format
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
|
||||
|
||||
Overlap context (start your response with this exact text):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response EXACTLY with the overlap context shown above (character by character)
|
||||
2. Continue seamlessly from where the overlap context ends
|
||||
3. Complete the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
|
||||
- Continue seamlessly after the overlap context with new content
|
||||
- Your response must be valid JSON matching the structure template above"""
|
||||
return continuationPrompt
|
||||
|
||||
# Call AI with looping support
|
||||
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
|
||||
|
|
@ -162,7 +186,8 @@ Continue generating the remaining chapters now.
|
|||
promptArgs={
|
||||
"userPrompt": userPrompt,
|
||||
"outputFormat": outputFormat,
|
||||
"services": self.services
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": structurePrompt
|
||||
},
|
||||
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
|
||||
operationId=structureOperationId,
|
||||
|
|
@ -275,7 +300,7 @@ Continue generating the remaining chapters now.
|
|||
userPrompt: str,
|
||||
contentParts: List[ContentPart],
|
||||
outputFormat: str
|
||||
) -> str:
|
||||
) -> tuple[str, str]:
|
||||
"""Baue Prompt für Chapter-Struktur-Generierung."""
|
||||
# Baue ContentParts-Index - filtere leere Parts heraus
|
||||
contentPartsIndex = ""
|
||||
|
|
@ -331,6 +356,36 @@ Continue generating the remaining chapters now.
|
|||
language = self._getUserLanguage()
|
||||
logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}")
|
||||
|
||||
# Create template structure explicitly (not extracted from prompt)
|
||||
# This ensures exact identity between initial and continuation prompts
|
||||
templateStructure = f"""{{
|
||||
"metadata": {{
|
||||
"title": "Document Title",
|
||||
"language": "{language}"
|
||||
}},
|
||||
"documents": [{{
|
||||
"id": "doc_1",
|
||||
"title": "Document Title",
|
||||
"filename": "document.{outputFormat}",
|
||||
"outputFormat": "{outputFormat}",
|
||||
"language": "{language}",
|
||||
"chapters": [
|
||||
{{
|
||||
"id": "chapter_1",
|
||||
"level": 1,
|
||||
"title": "Chapter Title",
|
||||
"contentParts": {{
|
||||
"extracted_part_id": {{
|
||||
"instruction": "Use extracted content with ALL relevant details from user request"
|
||||
}}
|
||||
}},
|
||||
"generationHint": "Detailed description including ALL relevant details from user request for this chapter",
|
||||
"sections": []
|
||||
}}
|
||||
]
|
||||
}}]
|
||||
}}"""
|
||||
|
||||
prompt = f"""# TASK: Generate Chapter Structure
|
||||
|
||||
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
|
||||
|
|
@ -363,13 +418,24 @@ Then chapters that generate those generic content types MUST assign the relevant
|
|||
|
||||
## CHAPTER STRUCTURE REQUIREMENTS
|
||||
- Generate chapters based on USER REQUEST - analyze what structure the user wants
|
||||
- Each chapter needs: id, level (1, 2, 3, etc.), title
|
||||
- IMPORTANT: Each chapter MUST have ALL these fields:
|
||||
- id: Unique identifier (e.g., "chapter_1")
|
||||
- level: Heading level (1, 2, 3, etc.)
|
||||
- title: Chapter title
|
||||
- contentParts: Object mapping ContentPart IDs to usage instructions
|
||||
- generationHint: Description of what content to generate
|
||||
- sections: Empty array [] (REQUIRED - sections are generated in next phase)
|
||||
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
|
||||
- The "instruction" field for each ContentPart MUST contain ALL relevant details from the USER REQUEST that apply to content extraction for this specific chapter. Include all formatting rules, data requirements, constraints, and specifications mentioned in the user request that are relevant for processing this ContentPart in this chapter.
|
||||
- generationHint: Description of what content to generate for this chapter
|
||||
The generationHint MUST contain ALL relevant details from the USER REQUEST that apply to this specific chapter. Include all formatting rules, data requirements, constraints, column specifications, validation rules, and any other specifications mentioned in the user request that are relevant for generating content for this chapter. Do NOT use generic descriptions - include specific details from the user request.
|
||||
- The number of chapters depends on the user request - create only what is requested
|
||||
|
||||
## WHAT IS A CHAPTER vs WHAT IS FORMATTING
|
||||
- A CHAPTER contains CONTENT (text, tables, lists, images, etc.)
|
||||
- FORMATTING INSTRUCTIONS (CSS styling, spacing, typography, colors, borders) are NOT separate chapters
|
||||
- If user mentions formatting topics, apply these to ALL chapters via generationHint, do NOT create a separate "Formatting" chapter
|
||||
|
||||
## DOCUMENT OUTPUT FORMAT
|
||||
For each document, determine the output format by analyzing the USER REQUEST:
|
||||
- Look for explicit format mentions
|
||||
|
|
@ -379,6 +445,13 @@ For each document, determine the output format by analyzing the USER REQUEST:
|
|||
- Include "outputFormat" field in each document in the JSON structure
|
||||
- Multiple documents can have different formats
|
||||
|
||||
## FORMAT-APPROPRIATE CHAPTER STRUCTURE
|
||||
When determining the chapter structure, consider the document's output format and ensure chapters are structured appropriately for that format:
|
||||
- Different formats have different capabilities and constraints
|
||||
- Structure chapters to match what the format can effectively represent
|
||||
- Consider what content types work best for each format
|
||||
- Ensure the chapter structure aligns with the format's strengths and limitations
|
||||
|
||||
## DOCUMENT LANGUAGE
|
||||
For each document, determine the language by analyzing the USER REQUEST:
|
||||
- Look for explicit language mentions
|
||||
|
|
@ -401,7 +474,7 @@ For each document, determine the language by analyzing the USER REQUEST:
|
|||
- title: Chapter title
|
||||
- contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
|
||||
- generationHint: Description of what content to generate
|
||||
- sections: Empty array []
|
||||
- sections: Empty array [] (MANDATORY - always include this field)
|
||||
|
||||
EXAMPLE STRUCTURE (for reference only - adapt to user request):
|
||||
{{
|
||||
|
|
@ -451,5 +524,5 @@ For each chapter, verify:
|
|||
|
||||
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
|
||||
"""
|
||||
return prompt
|
||||
return prompt, templateStructure
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati
|
|||
# Type hint for renderer parameter
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from modules.services.serviceGeneration.renderers.rendererBaseTemplate import BaseRenderer
|
||||
from modules.services.serviceGeneration.renderers.documentRendererBaseTemplate import BaseRenderer
|
||||
_RendererLike = BaseRenderer
|
||||
else:
|
||||
_RendererLike = Any
|
||||
|
|
|
|||
|
|
@ -1,114 +0,0 @@
|
|||
# Document Generation Architecture Analysis
|
||||
|
||||
## Current Flow
|
||||
|
||||
### 1. Document Input → ContentParts (`extractAndPrepareContent`)
|
||||
|
||||
**Location**: `gateway/modules/services/serviceAi/subContentExtraction.py`
|
||||
|
||||
**Flow**:
|
||||
- Regular documents → Calls `extractContent()` (NON-AI extraction) → Creates contentParts with raw extracted text
|
||||
- **BUT THEN**:
|
||||
- Images with "extract" intent → Calls Vision AI (line 190) → AI extraction
|
||||
- Text with "extract" intent + extractionPrompt → Calls AI processing (line 265) → AI extraction
|
||||
- Pre-extracted JSON → Uses contentParts directly (no AI)
|
||||
|
||||
**Result**: ContentParts may already be AI-processed before structure generation
|
||||
|
||||
### 2. Structure Generation
|
||||
|
||||
**Location**: `gateway/modules/services/serviceAi/subStructureGeneration.py`
|
||||
|
||||
**Flow**:
|
||||
- Uses contentParts (may already be AI-processed)
|
||||
- Generates document structure (chapters, sections)
|
||||
|
||||
### 3. Section Generation (`_processSingleSection`)
|
||||
|
||||
**Location**: `gateway/modules/services/serviceAi/subStructureFilling.py`
|
||||
|
||||
**Flow**:
|
||||
- Uses contentParts (which may already be AI-processed)
|
||||
- Aggregates "extracted" contentParts with AI (line 554-682)
|
||||
- Generates section content using `callAiWithLooping` with `useCaseId="section_content"`
|
||||
|
||||
## Issues Identified
|
||||
|
||||
### Issue 1: Duplicate AI Processing
|
||||
- AI extraction happens in `extractAndPrepareContent` (for images/text)
|
||||
- AI generation happens again in section generation
|
||||
- This is redundant and inefficient
|
||||
|
||||
### Issue 2: Architecture Inconsistency
|
||||
- Pre-extracted JSON files → contentParts directly (no AI)
|
||||
- Regular documents → contentParts + AI extraction (inconsistent)
|
||||
- User wants: Documents → contentParts (like pre-extracted JSON) → AI only in section generation
|
||||
|
||||
### Issue 3: Image Processing
|
||||
- Images need Vision AI to extract text
|
||||
- Currently happens in `extractAndPrepareContent`
|
||||
- Question: Should this happen during section generation instead?
|
||||
|
||||
## Proposed Architecture
|
||||
|
||||
### Option A: Remove All AI from `extractAndPrepareContent`
|
||||
- Documents → `extractContent()` → Raw contentParts (text, tables, etc.)
|
||||
- Images → Keep as image contentParts (no Vision AI extraction)
|
||||
- Section generation → Handle images with Vision AI when needed
|
||||
|
||||
**Pros**:
|
||||
- Consistent with pre-extracted JSON flow
|
||||
- Single point of AI processing (section generation)
|
||||
- Clear separation of concerns
|
||||
|
||||
**Cons**:
|
||||
- Images won't have extracted text until section generation
|
||||
- May need to handle images differently in section generation
|
||||
|
||||
### Option B: Keep Vision AI for Images Only
|
||||
- Documents → `extractContent()` → Raw contentParts
|
||||
- Images → Vision AI extraction → Text contentParts
|
||||
- Section generation → Uses text contentParts (no additional AI extraction)
|
||||
|
||||
**Pros**:
|
||||
- Images get text extracted early
|
||||
- Section generation can use text directly
|
||||
|
||||
**Cons**:
|
||||
- Still has AI extraction before structure generation
|
||||
- Inconsistent with user's request
|
||||
|
||||
## Recommendation
|
||||
|
||||
**Follow Option A** - Remove all AI extraction from `extractAndPrepareContent`:
|
||||
|
||||
1. **Documents → ContentParts** (like pre-extracted JSON):
|
||||
- Call `extractContent()` (NON-AI)
|
||||
- Create contentParts with raw extracted content
|
||||
- Images remain as image contentParts (no Vision AI)
|
||||
|
||||
2. **Section Generation**:
|
||||
- Handle images with Vision AI when needed
|
||||
- Aggregate all contentParts with AI
|
||||
- Single point of AI processing
|
||||
|
||||
**Benefits**:
|
||||
- Clear architecture: Documents = raw contentParts
|
||||
- Consistent with pre-extracted JSON flow
|
||||
- AI processing only where needed (section generation)
|
||||
- Easier to understand and maintain
|
||||
|
||||
## Questions to Resolve
|
||||
|
||||
1. **Image handling**: How should images be processed during section generation?
|
||||
- Option 1: Vision AI extraction happens automatically when image contentParts are used
|
||||
- Option 2: Images are passed to AI with Vision models during section generation
|
||||
- Option 3: Images remain as binary and are rendered directly (no text extraction)
|
||||
|
||||
2. **Text with extractionPrompt**: Should text contentParts with extractionPrompt be processed differently?
|
||||
- Currently: AI processing in `extractAndPrepareContent`
|
||||
- Proposed: Raw text → AI processing during section generation
|
||||
|
||||
3. **Performance**: Will deferring image extraction to section generation cause performance issues?
|
||||
- Need to test with multiple images
|
||||
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
# Architecture Changes Summary
|
||||
|
||||
## Problem Identified
|
||||
|
||||
The architecture had AI extraction happening in TWO places:
|
||||
1. **`extractAndPrepareContent`**: Vision AI for images, AI processing for text with extractionPrompt
|
||||
2. **Section generation**: AI aggregation of contentParts
|
||||
|
||||
This was:
|
||||
- Redundant (double AI processing)
|
||||
- Inconsistent (pre-extracted JSON had no AI, regular documents had AI)
|
||||
- Against the desired architecture (documents should become contentParts like pre-extracted JSON)
|
||||
|
||||
## Solution Implemented
|
||||
|
||||
### 1. Removed AI Extraction from `extractAndPrepareContent`
|
||||
|
||||
**File**: `gateway/modules/services/serviceAi/subContentExtraction.py`
|
||||
|
||||
**Changes**:
|
||||
- **Removed**: Vision AI extraction for images (lines 186-246)
|
||||
- **Removed**: AI text processing with extractionPrompt (lines 260-334)
|
||||
- **Updated**: Images with extract intent are now marked with `needsVisionExtraction=True` flag
|
||||
- **Updated**: Regular documents mark images with `needsVisionExtraction=True` when extract intent is present
|
||||
|
||||
**Result**: Documents → contentParts (raw extraction only, no AI)
|
||||
|
||||
### 2. Added Vision AI Extraction in Section Generation
|
||||
|
||||
**File**: `gateway/modules/services/serviceAi/subStructureFilling.py`
|
||||
|
||||
**Changes**:
|
||||
- **Added**: Vision AI extraction logic before aggregation (lines 553-610)
|
||||
- **Added**: Vision AI extraction logic for single-part processing (lines 1074-1115)
|
||||
- **Logic**:
|
||||
- Checks if `part.typeGroup == "image"` AND `needsVisionExtraction == True` AND `intent == "extract"`
|
||||
- Extracts text using Vision AI (`IMAGE_ANALYSE` operation)
|
||||
- Replaces image part with text part for further processing
|
||||
- Images with `contentFormat == "object"` (render intent) are rendered directly (no extraction)
|
||||
|
||||
**Result**: AI extraction happens ONLY during section generation
|
||||
|
||||
## Architecture Flow (After Changes)
|
||||
|
||||
### Document Input → ContentParts
|
||||
1. **Regular documents**: `extractContent()` (NON-AI) → Raw contentParts
|
||||
- Images with extract intent: `contentFormat="extracted"`, `needsVisionExtraction=True`
|
||||
- Images with render intent: `contentFormat="object"` (rendered directly)
|
||||
- Text: `contentFormat="extracted"` (raw text, no AI processing)
|
||||
|
||||
2. **Pre-extracted JSON**: Direct contentParts (no changes)
|
||||
|
||||
### Section Generation → AI Processing
|
||||
1. **Images with extract intent**: Vision AI extraction → Text part → AI aggregation
|
||||
2. **Images with render intent**: Rendered directly (no extraction)
|
||||
3. **Text contentParts**: AI aggregation with extractionPrompt (if provided)
|
||||
|
||||
## Key Benefits
|
||||
|
||||
1. **Consistent Architecture**: Documents = raw contentParts (like pre-extracted JSON)
|
||||
2. **Single Point of AI Processing**: Only in section generation
|
||||
3. **Clear Separation**: Extraction vs Generation
|
||||
4. **Intent-Based Logic**:
|
||||
- `intent == "extract"` → Vision AI extraction during section generation
|
||||
- `intent == "render"` → Direct rendering (no extraction)
|
||||
- `contentFormat == "object"` → Embedded/referenced images (no extraction)
|
||||
|
||||
## Testing Checklist
|
||||
|
||||
- [ ] Regular documents create contentParts without AI extraction
|
||||
- [ ] Images with extract intent are marked with `needsVisionExtraction=True`
|
||||
- [ ] Images with render intent are marked with `contentFormat="object"`
|
||||
- [ ] Section generation extracts images with Vision AI when needed
|
||||
- [ ] Section generation renders images with object format directly
|
||||
- [ ] Text contentParts are processed with AI during section generation
|
||||
- [ ] Pre-extracted JSON flow still works correctly
|
||||
|
||||
|
|
@ -15,6 +15,7 @@ from typing import Dict, Any, List, Optional
|
|||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
from modules.shared.jsonUtils import extractJsonString
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -25,6 +26,7 @@ class CodeGenerationPath:
|
|||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
|
||||
async def generateCode(
|
||||
self,
|
||||
userPrompt: str,
|
||||
|
|
@ -66,27 +68,67 @@ class CodeGenerationPath:
|
|||
|
||||
# Phase 2: Code content generation (with dependency handling)
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
|
||||
codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
|
||||
codeFiles = await self._generateCodeContent(
|
||||
codeStructure,
|
||||
codeOperationId,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts
|
||||
)
|
||||
|
||||
# Phase 3: Code formatting & validation
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.8, "Formatting code files")
|
||||
formattedFiles = await self._formatAndValidateCode(codeFiles)
|
||||
|
||||
# Convert to unified document format
|
||||
documents = []
|
||||
# Phase 4: Code Rendering (Renderer-Based)
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Rendering code files")
|
||||
|
||||
# Group files by format
|
||||
filesByFormat = {}
|
||||
for file in formattedFiles:
|
||||
mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
|
||||
content = file.get("content", "")
|
||||
if isinstance(content, str):
|
||||
contentBytes = content.encode('utf-8')
|
||||
else:
|
||||
contentBytes = content
|
||||
fileType = file.get("fileType", outputFormat or "txt")
|
||||
if fileType not in filesByFormat:
|
||||
filesByFormat[fileType] = []
|
||||
filesByFormat[fileType].append(file)
|
||||
|
||||
# Render each format group using appropriate renderer
|
||||
allRenderedDocuments = []
|
||||
for fileType, files in filesByFormat.items():
|
||||
# Get renderer for this format
|
||||
renderer = self._getCodeRenderer(fileType)
|
||||
|
||||
if renderer:
|
||||
# Use code renderer
|
||||
renderedDocs = await renderer.renderCodeFiles(
|
||||
codeFiles=files,
|
||||
metadata=codeStructure.get("metadata", {}),
|
||||
userPrompt=userPrompt
|
||||
)
|
||||
allRenderedDocuments.extend(renderedDocs)
|
||||
else:
|
||||
# Fallback: output directly (for formats without renderers)
|
||||
for file in files:
|
||||
mimeType = self._getMimeType(file.get("fileType", "txt"))
|
||||
content = file.get("content", "")
|
||||
contentBytes = content.encode('utf-8') if isinstance(content, str) else content
|
||||
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
allRenderedDocuments.append(
|
||||
RenderedDocument(
|
||||
documentData=contentBytes,
|
||||
mimeType=mimeType,
|
||||
filename=file.get("filename", "generated.txt"),
|
||||
metadata=codeStructure.get("metadata", {})
|
||||
)
|
||||
)
|
||||
|
||||
# Convert RenderedDocument to DocumentData
|
||||
documents = []
|
||||
for renderedDoc in allRenderedDocuments:
|
||||
documents.append(DocumentData(
|
||||
documentName=file.get("filename", "generated.txt"),
|
||||
documentData=contentBytes,
|
||||
mimeType=mimeType,
|
||||
sourceJson=file
|
||||
documentName=renderedDoc.filename,
|
||||
documentData=renderedDoc.documentData,
|
||||
mimeType=renderedDoc.mimeType,
|
||||
sourceJson=renderedDoc.metadata if hasattr(renderedDoc, 'metadata') else None
|
||||
))
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
|
|
@ -94,11 +136,25 @@ class CodeGenerationPath:
|
|||
operationType=OperationTypeEnum.DATA_GENERATE.value
|
||||
)
|
||||
|
||||
# Create summary JSON for content field
|
||||
summaryContent = {
|
||||
"type": "code_generation",
|
||||
"metadata": codeStructure.get("metadata", {}),
|
||||
"files": [
|
||||
{
|
||||
"filename": doc.documentName,
|
||||
"mimeType": doc.mimeType
|
||||
}
|
||||
for doc in documents
|
||||
],
|
||||
"fileCount": len(documents)
|
||||
}
|
||||
|
||||
self.services.chat.progressLogFinish(codeOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
documents=documents,
|
||||
content=None,
|
||||
content=json.dumps(summaryContent, ensure_ascii=False),
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
|
|
@ -149,47 +205,184 @@ class CodeGenerationPath:
|
|||
) -> Dict[str, Any]:
|
||||
"""Generate code structure using looping system."""
|
||||
|
||||
# Build structure generation prompt
|
||||
structurePrompt = f"""Analyze the following code generation request and create a project structure.
|
||||
|
||||
Request: {userPrompt}
|
||||
|
||||
Language: {language}
|
||||
|
||||
Create a JSON structure with:
|
||||
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
|
||||
2. files: Array of file structures, each with:
|
||||
- id: Unique identifier
|
||||
- filename: File name (e.g., "main.py", "utils.py")
|
||||
- fileType: File extension (e.g., "py", "js")
|
||||
- dependencies: List of file IDs this file depends on (for multi-file projects)
|
||||
- imports: List of import statements (for dependency extraction)
|
||||
- functions: Array of function signatures {{"name": "...", "signature": "..."}}
|
||||
- classes: Array of class definitions {{"name": "...", "signature": "..."}}
|
||||
|
||||
For single-file projects, return one file. For multi-file projects, break down into logical modules.
|
||||
|
||||
Return ONLY valid JSON in this format:
|
||||
{{
|
||||
# Build content parts index (similar to document generation)
|
||||
contentPartsIndex = ""
|
||||
if contentParts:
|
||||
validParts = []
|
||||
for part in contentParts:
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
originalFileName = part.metadata.get('originalFileName', 'N/A')
|
||||
|
||||
# Include reference parts and parts with data
|
||||
if contentFormat == "reference" or (part.data and len(str(part.data).strip()) > 0):
|
||||
validParts.append(part)
|
||||
|
||||
if validParts:
|
||||
contentPartsIndex = "\n## AVAILABLE CONTENT PARTS\n"
|
||||
for i, part in enumerate(validParts, 1):
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
originalFileName = part.metadata.get('originalFileName', 'N/A')
|
||||
|
||||
contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
|
||||
contentPartsIndex += f" Format: {contentFormat}\n"
|
||||
contentPartsIndex += f" Type: {part.typeGroup}\n"
|
||||
contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n"
|
||||
contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
|
||||
contentPartsIndex += f" Original file name: {originalFileName}\n"
|
||||
contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
|
||||
|
||||
if not contentPartsIndex:
|
||||
contentPartsIndex = "\n(No content parts available)"
|
||||
|
||||
# Create template structure explicitly (not extracted from prompt)
|
||||
templateStructure = f"""{{
|
||||
"metadata": {{
|
||||
"language": "{language}",
|
||||
"projectType": "single_file",
|
||||
"projectName": "generated-project"
|
||||
"projectType": "single_file|multi_file",
|
||||
"projectName": ""
|
||||
}},
|
||||
"files": [
|
||||
{{
|
||||
"id": "file_1",
|
||||
"filename": "main.py",
|
||||
"fileType": "py",
|
||||
"id": "",
|
||||
"filename": "",
|
||||
"fileType": "",
|
||||
"dependencies": [],
|
||||
"imports": [],
|
||||
"functions": [],
|
||||
"classes": []
|
||||
}}
|
||||
]
|
||||
}}
|
||||
}}"""
|
||||
|
||||
# Build structure generation prompt
|
||||
structurePrompt = f"""# TASK: Generate Code Project Structure
|
||||
|
||||
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
|
||||
|
||||
## USER REQUEST (for context)
|
||||
```
|
||||
{userPrompt}
|
||||
```
|
||||
{contentPartsIndex}
|
||||
|
||||
## LANGUAGE
|
||||
{language}
|
||||
|
||||
## TASK DESCRIPTION
|
||||
Analyze the USER REQUEST above and create a project structure that fulfills ALL requirements mentioned in the request.
|
||||
|
||||
IMPORTANT: If the request mentions multiple files (e.g., "3 files", "config.json and customers.json", etc.), you MUST include ALL requested files in the files array. Set projectType to "multi_file" when multiple files are requested.
|
||||
|
||||
## CONTENT PARTS USAGE (if available)
|
||||
If AVAILABLE CONTENT PARTS are listed above, use them to inform the file structure:
|
||||
|
||||
**Analyzing Content Parts:**
|
||||
- Review each ContentPart's format, type, original file name, and usage hint
|
||||
- Content parts with "reference" format = documents/images that will be processed/extracted
|
||||
- Content parts with "extracted" format = pre-processed data ready to use
|
||||
- Content parts with "object" format = images/documents to be displayed or processed
|
||||
|
||||
**Mapping Content Parts to Files:**
|
||||
- If content parts contain data (e.g., expense receipts, customer lists), create data files (JSON/CSV) that will store/represent that data
|
||||
- If content parts are documents to be processed (e.g., PDFs), you may need code files that parse/process them
|
||||
- Use the original file names and usage hints to determine appropriate filenames and file types
|
||||
|
||||
**Populating File Structure Fields:**
|
||||
- **dependencies**: List file IDs that this file depends on (e.g., if a Python script reads a JSON config file, the script depends on the config file)
|
||||
- **imports**: For code files, list imports needed based on content parts (e.g., if processing PDFs: ["import PyPDF2"], if processing CSV: ["import csv"], if processing JSON: ["import json"])
|
||||
- **functions**: For CODE files only - list function signatures if the USER REQUEST specifies functionality (e.g., {{"name": "parseReceipt", "signature": "def parseReceipt(pdf_path: str) -> dict"}})
|
||||
- **classes**: For CODE files only - list class definitions if the USER REQUEST specifies OOP structure
|
||||
- **functions/classes for DATA files**: Leave as empty arrays [] - data files (JSON/CSV/XML) don't contain executable code
|
||||
|
||||
## FILE STRUCTURE REQUIREMENTS
|
||||
Create a JSON structure with:
|
||||
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
|
||||
- projectName: Derive from USER REQUEST or content parts (e.g., "expense-tracker", "customer-manager")
|
||||
|
||||
2. files: Array of file structures, each with:
|
||||
- id: Unique identifier (e.g., "file_1", "file_2")
|
||||
- filename: File name matching USER REQUEST requirements (e.g., "config.json", "customers.json", "expenses.csv")
|
||||
- fileType: File extension matching the requested format (e.g., "json", "py", "js", "csv", "xml")
|
||||
- dependencies: List of file IDs this file depends on (for multi-file projects where files reference each other)
|
||||
- imports: List of import statements that this file will need (e.g., ["import json", "import csv"] for Python files processing JSON/CSV)
|
||||
- functions: Array of function signatures {{"name": "...", "signature": "..."}} - ONLY if the file will contain executable code (not for pure data files like JSON/CSV)
|
||||
- classes: Array of class definitions {{"name": "...", "signature": "..."}} - ONLY if the file will contain executable code (not for pure data files like JSON/CSV)
|
||||
|
||||
IMPORTANT FOR DATA FILES (JSON, CSV, XML):
|
||||
- For pure data files (config.json, customers.json, expenses.csv), leave functions and classes as empty arrays []
|
||||
- These files contain structured data, not executable code
|
||||
- Use imports only if the file will be processed by code (e.g., a Python script that reads the CSV)
|
||||
|
||||
IMPORTANT FOR CODE FILES (Python, JavaScript, etc.):
|
||||
- Include functions/classes if the USER REQUEST specifies functionality
|
||||
- Use dependencies to indicate which data files this code file reads/processes
|
||||
- Use imports to specify what libraries/modules are needed
|
||||
|
||||
For single-file projects, return one file. For multi-file projects, include ALL requested files in the files array.
|
||||
|
||||
Return ONLY valid JSON matching the request above.
|
||||
"""
|
||||
|
||||
# Build continuation prompt builder
|
||||
async def buildCodeStructurePromptWithContinuation(
|
||||
continuationContext: Any,
|
||||
templateStructure: str,
|
||||
basePrompt: str
|
||||
) -> str:
|
||||
"""Build code structure prompt with continuation context. Uses unified signature.
|
||||
|
||||
Note: All initial context (userPrompt, contentParts, etc.) is already
|
||||
contained in basePrompt. This function only adds continuation-specific instructions.
|
||||
"""
|
||||
# Extract continuation context fields (only what's needed for continuation)
|
||||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Generate both overlap context and hierarchy context using jsonContinuation
|
||||
overlapContext = ""
|
||||
unifiedContext = ""
|
||||
if lastRawJson:
|
||||
# Get contexts directly from jsonContinuation
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
contexts = getContexts(lastRawJson)
|
||||
overlapContext = contexts.overlapContext
|
||||
unifiedContext = contexts.hierarchyContextForPrompt
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
unifiedContext = "Unable to extract context - response was completely broken"
|
||||
|
||||
# Build unified continuation prompt format
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
|
||||
|
||||
Overlap context (start your response with this exact text):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response EXACTLY with the overlap context shown above (character by character)
|
||||
2. Continue seamlessly from where the overlap context ends
|
||||
3. Complete the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
|
||||
- Continue seamlessly after the overlap context with new content
|
||||
- Your response must be valid JSON matching the structure template above"""
|
||||
return continuationPrompt
|
||||
|
||||
# Use generic looping system with code_structure use case
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
|
|
@ -199,18 +392,29 @@ Return ONLY valid JSON in this format:
|
|||
structureJson = await self.services.ai.callAiWithLooping(
|
||||
prompt=structurePrompt,
|
||||
options=options,
|
||||
promptBuilder=buildCodeStructurePromptWithContinuation,
|
||||
promptArgs={
|
||||
"userPrompt": userPrompt,
|
||||
"contentParts": contentParts,
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": structurePrompt
|
||||
},
|
||||
useCaseId="code_structure",
|
||||
debugPrefix="code_structure_generation",
|
||||
contentParts=contentParts
|
||||
)
|
||||
|
||||
parsed = json.loads(structureJson)
|
||||
# Extract JSON from markdown fences if present
|
||||
extractedJson = extractJsonString(structureJson)
|
||||
parsed = json.loads(extractedJson)
|
||||
return parsed
|
||||
|
||||
async def _generateCodeContent(
|
||||
self,
|
||||
codeStructure: Dict[str, Any],
|
||||
parentOperationId: str
|
||||
parentOperationId: str,
|
||||
userPrompt: str = None,
|
||||
contentParts: Optional[List[ContentPart]] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate code content for each file with dependency handling."""
|
||||
files = codeStructure.get("files", [])
|
||||
|
|
@ -246,7 +450,9 @@ Return ONLY valid JSON in this format:
|
|||
fileStructure,
|
||||
fileContext=fileContext,
|
||||
allFilesStructure=orderedFiles,
|
||||
metadata=metadata
|
||||
metadata=metadata,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts
|
||||
)
|
||||
|
||||
codeFiles.append(fileContent)
|
||||
|
|
@ -452,7 +658,9 @@ Return ONLY valid JSON in this format:
|
|||
fileStructure: Dict[str, Any],
|
||||
fileContext: Dict[str, Any] = None,
|
||||
allFilesStructure: List[Dict[str, Any]] = None,
|
||||
metadata: Dict[str, Any] = None
|
||||
metadata: Dict[str, Any] = None,
|
||||
userPrompt: str = None,
|
||||
contentParts: Optional[List[ContentPart]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate code content for a single file with context about other files."""
|
||||
|
||||
|
|
@ -479,10 +687,68 @@ Return ONLY valid JSON in this format:
|
|||
contextInfo += ", ".join(exports)
|
||||
contextInfo += "\n"
|
||||
|
||||
contentPrompt = f"""Generate complete, executable code for the file: {filename}
|
||||
# Build content parts section if available
|
||||
contentPartsSection = ""
|
||||
if contentParts:
|
||||
relevantParts = []
|
||||
for part in contentParts:
|
||||
# Include parts that might be relevant to this file
|
||||
usageHint = part.metadata.get('usageHint', '').lower()
|
||||
originalFileName = part.metadata.get('originalFileName', '').lower()
|
||||
filenameLower = filename.lower()
|
||||
|
||||
# Check if this content part is relevant to this file
|
||||
if (filenameLower in usageHint or
|
||||
filenameLower in originalFileName or
|
||||
part.metadata.get('contentFormat') == 'reference' or
|
||||
(part.data and len(str(part.data).strip()) > 0)):
|
||||
relevantParts.append(part)
|
||||
|
||||
if relevantParts:
|
||||
contentPartsSection = "\n## AVAILABLE CONTENT PARTS\n"
|
||||
for i, part in enumerate(relevantParts, 1):
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
originalFileName = part.metadata.get('originalFileName', 'N/A')
|
||||
contentPartsSection += f"\n{i}. ContentPart ID: {part.id}\n"
|
||||
contentPartsSection += f" Format: {contentFormat}\n"
|
||||
contentPartsSection += f" Type: {part.typeGroup}\n"
|
||||
contentPartsSection += f" Original file name: {originalFileName}\n"
|
||||
contentPartsSection += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
|
||||
# Include actual content if it's small enough (for data files like CSV, JSON)
|
||||
if part.data and isinstance(part.data, str) and len(part.data) < 2000:
|
||||
contentPartsSection += f" Content preview: {part.data[:500]}...\n"
|
||||
|
||||
# Build user request section
|
||||
userRequestSection = ""
|
||||
if userPrompt:
|
||||
userRequestSection = f"""
|
||||
## ORIGINAL USER REQUEST
|
||||
```
|
||||
{userPrompt}
|
||||
```
|
||||
"""
|
||||
|
||||
# Create template structure explicitly (not extracted from prompt)
|
||||
templateStructure = f"""{{
|
||||
"files": [
|
||||
{{
|
||||
"filename": "{filename}",
|
||||
"content": "// Complete code here",
|
||||
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
|
||||
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
# Build base prompt
|
||||
contentPrompt = f"""# TASK: Generate Code File Content
|
||||
|
||||
Generate complete, executable code for the file: {filename}
|
||||
{userRequestSection}## FILE SPECIFICATIONS
|
||||
|
||||
File Type: {fileType}
|
||||
Language: {metadata.get('language', 'python') if metadata else 'python'}
|
||||
{contentPartsSection}
|
||||
|
||||
Required functions:
|
||||
{json.dumps(functions, indent=2) if functions else 'None specified'}
|
||||
|
|
@ -501,18 +767,69 @@ Generate complete, production-ready code with:
|
|||
5. Type hints where appropriate
|
||||
|
||||
Return ONLY valid JSON in this format:
|
||||
{{
|
||||
"files": [
|
||||
{{
|
||||
"filename": "{filename}",
|
||||
"content": "// Complete code here",
|
||||
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
|
||||
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
|
||||
}}
|
||||
]
|
||||
}}
|
||||
{templateStructure}
|
||||
"""
|
||||
|
||||
# Build continuation prompt builder
|
||||
async def buildCodeContentPromptWithContinuation(
|
||||
continuationContext: Any,
|
||||
templateStructure: str,
|
||||
basePrompt: str
|
||||
) -> str:
|
||||
"""Build code content prompt with continuation context. Uses unified signature.
|
||||
|
||||
Note: All initial context (filename, fileType, functions, etc.) is already
|
||||
contained in basePrompt. This function only adds continuation-specific instructions.
|
||||
"""
|
||||
# Extract continuation context fields (only what's needed for continuation)
|
||||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Generate both overlap context and hierarchy context using jsonContinuation
|
||||
overlapContext = ""
|
||||
unifiedContext = ""
|
||||
if lastRawJson:
|
||||
# Get contexts directly from jsonContinuation
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
contexts = getContexts(lastRawJson)
|
||||
overlapContext = contexts.overlapContext
|
||||
unifiedContext = contexts.hierarchyContextForPrompt
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
unifiedContext = "Unable to extract context - response was completely broken"
|
||||
|
||||
# Build unified continuation prompt format
|
||||
continuationPrompt = f"""{basePrompt}
|
||||
|
||||
--- CONTINUATION REQUEST ---
|
||||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
|
||||
|
||||
Overlap context (start your response with this exact text):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response EXACTLY with the overlap context shown above (character by character)
|
||||
2. Continue seamlessly from where the overlap context ends
|
||||
3. Complete the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
|
||||
- Continue seamlessly after the overlap context with new content
|
||||
- Your response must be valid JSON matching the structure template above"""
|
||||
return continuationPrompt
|
||||
|
||||
# Use generic looping system with code_content use case
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
|
|
@ -522,11 +839,27 @@ Return ONLY valid JSON in this format:
|
|||
contentJson = await self.services.ai.callAiWithLooping(
|
||||
prompt=contentPrompt,
|
||||
options=options,
|
||||
promptBuilder=buildCodeContentPromptWithContinuation,
|
||||
promptArgs={
|
||||
"filename": filename,
|
||||
"fileType": fileType,
|
||||
"functions": functions,
|
||||
"classes": classes,
|
||||
"dependencies": dependencies,
|
||||
"metadata": metadata,
|
||||
"userPrompt": userPrompt,
|
||||
"contentParts": contentParts,
|
||||
"contextInfo": contextInfo,
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": contentPrompt
|
||||
},
|
||||
useCaseId="code_content",
|
||||
debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
|
||||
)
|
||||
|
||||
parsed = json.loads(contentJson)
|
||||
# Extract JSON from markdown fences if present
|
||||
extractedJson = extractJsonString(contentJson)
|
||||
parsed = json.loads(extractedJson)
|
||||
|
||||
# Extract file content and metadata
|
||||
files = parsed.get("files", [])
|
||||
|
|
@ -579,6 +912,28 @@ Return ONLY valid JSON in this format:
|
|||
"md": "text/markdown",
|
||||
"java": "text/x-java-source",
|
||||
"cpp": "text/x-c++src",
|
||||
"c": "text/x-csrc"
|
||||
"c": "text/x-csrc",
|
||||
"csv": "text/csv",
|
||||
"xml": "application/xml"
|
||||
}
|
||||
return mimeTypes.get(fileType.lower(), "text/plain")
|
||||
|
||||
def _getCodeRenderer(self, fileType: str):
|
||||
"""Get code renderer for file type."""
|
||||
from modules.services.serviceGeneration.renderers.registry import getRenderer
|
||||
|
||||
# Map file types to renderer formats
|
||||
formatMap = {
|
||||
'json': 'json',
|
||||
'csv': 'csv',
|
||||
'xml': 'xml'
|
||||
}
|
||||
|
||||
rendererFormat = formatMap.get(fileType.lower())
|
||||
if rendererFormat:
|
||||
renderer = getRenderer(rendererFormat, self.services)
|
||||
# Check if renderer supports code rendering
|
||||
if renderer and hasattr(renderer, 'renderCodeFiles'):
|
||||
return renderer
|
||||
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ Handles document generation using existing chapter/section model.
|
|||
import json
|
||||
import logging
|
||||
import time
|
||||
import copy
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||
|
|
@ -153,6 +154,11 @@ class DocumentGenerationPath:
|
|||
# Use validated currentUserLanguage as global fallback (always valid infrastructure)
|
||||
language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en"
|
||||
|
||||
# IMPORTANT: Create deep copy BEFORE renderResult to preserve filledStructure with elements
|
||||
# renderResult might modify the structure, so we need to preserve the original for sourceJson
|
||||
# This ensures sourceJson contains the complete structure with elements for validation
|
||||
filledStructureForSourceJson = copy.deepcopy(filledStructure) if filledStructure else None
|
||||
|
||||
renderedDocuments = await self.services.ai.renderResult(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
|
|
@ -167,11 +173,12 @@ class DocumentGenerationPath:
|
|||
for renderedDoc in renderedDocuments:
|
||||
try:
|
||||
# Erstelle DocumentData für jedes gerenderte Dokument
|
||||
# Use the preserved filledStructureForSourceJson (with elements) for sourceJson
|
||||
docDataObj = DocumentData(
|
||||
documentName=renderedDoc.filename,
|
||||
documentData=renderedDoc.documentData,
|
||||
mimeType=renderedDoc.mimeType,
|
||||
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||
sourceJson=filledStructureForSourceJson if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||
)
|
||||
documentDataList.append(docDataObj)
|
||||
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Base renderer class for code format renderers.
|
||||
"""
|
||||
|
||||
from abc import abstractmethod
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseCodeRenderer(BaseRenderer):
|
||||
"""Base class for code format renderers."""
|
||||
|
||||
@abstractmethod
|
||||
async def renderCodeFiles(
|
||||
self,
|
||||
codeFiles: List[Dict[str, Any]],
|
||||
metadata: Dict[str, Any],
|
||||
userPrompt: str = None
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render code files to format-specific output.
|
||||
|
||||
Args:
|
||||
codeFiles: List of file dictionaries with:
|
||||
- filename: str
|
||||
- fileType: str (json, csv, xml, etc.)
|
||||
- content: str (generated code)
|
||||
- id: str (optional)
|
||||
metadata: Project metadata (language, projectType, etc.)
|
||||
userPrompt: Original user prompt
|
||||
|
||||
Returns:
|
||||
List of RenderedDocument objects (can be 1..n files)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _validateCodeFile(self, codeFile: Dict[str, Any]) -> bool:
|
||||
"""Validate code file structure."""
|
||||
required = ['filename', 'fileType', 'content']
|
||||
return all(key in codeFile for key in required)
|
||||
|
|
@ -63,6 +63,27 @@ class BaseRenderer(ABC):
|
|||
"""
|
||||
return 'document' # Default to document style
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that this renderer accepts.
|
||||
This allows renderers to declare which section types they can process.
|
||||
|
||||
Default implementation returns all supported section types.
|
||||
Override this method in subclasses to restrict accepted types.
|
||||
|
||||
Args:
|
||||
formatName: Optional format name (e.g., 'txt', 'js', 'csv') - useful for renderers
|
||||
that handle multiple formats with different accepted types (e.g., RendererText)
|
||||
|
||||
Returns:
|
||||
List of accepted section content types (e.g., ["table", "paragraph", "heading"])
|
||||
Valid types: "table", "bullet_list", "heading", "paragraph", "code_block", "image"
|
||||
"""
|
||||
# Default: accept all section types
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
|
|
@ -325,9 +346,18 @@ class BaseRenderer(ABC):
|
|||
|
||||
response = await aiService.callAi(request)
|
||||
|
||||
# Save styling prompt and response to debug
|
||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||
# Save styling prompt and response to debug (fire and forget - don't block on slow file I/O)
|
||||
# The writeDebugFile calls os.listdir() which can be slow with many files
|
||||
# Run in background thread to avoid blocking rendering
|
||||
import threading
|
||||
def _writeDebugFiles():
|
||||
try:
|
||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||
except Exception:
|
||||
pass # Silently fail - debug writing should never block rendering
|
||||
|
||||
threading.Thread(target=_writeDebugFiles, daemon=True).start()
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip() if response and response.content else ""
|
||||
|
|
@ -7,7 +7,7 @@ Renderer registry for automatic discovery and registration of renderers.
|
|||
import logging
|
||||
import importlib
|
||||
from typing import Dict, Type, List, Optional
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ class RendererRegistry:
|
|||
|
||||
# Scan all Python files in the renderers directory
|
||||
for filePath in renderersDir.glob("*.py"):
|
||||
if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
|
||||
if filePath.name in ['registry.py', 'documentRendererBaseTemplate.py', '__init__.py']:
|
||||
continue
|
||||
|
||||
# Extract module name from filename
|
||||
|
|
@ -76,9 +76,26 @@ class RendererRegistry:
|
|||
# Get supported formats from the renderer class
|
||||
supportedFormats = rendererClass.getSupportedFormats()
|
||||
|
||||
# Get priority (default to 0 if not specified)
|
||||
priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0
|
||||
|
||||
for formatName in supportedFormats:
|
||||
# Register primary format
|
||||
self._renderers[formatName.lower()] = rendererClass
|
||||
formatKey = formatName.lower()
|
||||
|
||||
# Check if format already registered - use priority to decide
|
||||
if formatKey in self._renderers:
|
||||
existingRenderer = self._renderers[formatKey]
|
||||
existingPriority = existingRenderer.getPriority() if hasattr(existingRenderer, 'getPriority') else 0
|
||||
|
||||
# Only replace if new renderer has higher priority
|
||||
if priority > existingPriority:
|
||||
logger.debug(f"Replacing {existingRenderer.__name__} with {rendererClass.__name__} for format '{formatName}' (priority {priority} > {existingPriority})")
|
||||
self._renderers[formatKey] = rendererClass
|
||||
else:
|
||||
logger.debug(f"Keeping {existingRenderer.__name__} for format '{formatName}' (priority {existingPriority} >= {priority})")
|
||||
else:
|
||||
# Register primary format
|
||||
self._renderers[formatKey] = rendererClass
|
||||
|
||||
# Register aliases if any
|
||||
if hasattr(rendererClass, 'getFormatAliases'):
|
||||
|
|
@ -86,7 +103,7 @@ class RendererRegistry:
|
|||
for alias in aliases:
|
||||
self._format_mappings[alias.lower()] = formatName.lower()
|
||||
|
||||
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
|
||||
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats} (priority: {priority})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")
|
||||
|
|
|
|||
159
modules/services/serviceGeneration/renderers/rendererCodeCsv.py
Normal file
159
modules/services/serviceGeneration/renderers/rendererCodeCsv.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
CSV code renderer for code generation.
|
||||
"""
|
||||
|
||||
from .codeRendererBaseTemplate import BaseCodeRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import csv
|
||||
import io
|
||||
|
||||
class RendererCodeCsv(BaseCodeRenderer):
|
||||
"""Renders CSV code files."""
|
||||
|
||||
@classmethod
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported CSV formats."""
|
||||
return ['csv']
|
||||
|
||||
@classmethod
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for CSV code renderer."""
|
||||
return 75 # Higher than document renderer (70) for code generation
|
||||
|
||||
@classmethod
|
||||
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
||||
"""Return output style classification: CSV requires specific structure."""
|
||||
return 'code'
|
||||
|
||||
async def renderCodeFiles(
|
||||
self,
|
||||
codeFiles: List[Dict[str, Any]],
|
||||
metadata: Dict[str, Any],
|
||||
userPrompt: str = None
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render CSV code files.
|
||||
For single file: output as-is (validate structure)
|
||||
For multiple files: output separately (each is independent CSV)
|
||||
"""
|
||||
renderedDocs = []
|
||||
|
||||
for codeFile in codeFiles:
|
||||
if not self._validateCodeFile(codeFile):
|
||||
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
|
||||
continue
|
||||
|
||||
filename = codeFile['filename']
|
||||
content = codeFile['content']
|
||||
|
||||
# Validate CSV structure (header row, consistent columns)
|
||||
validatedContent = self._validateAndFixCsv(content)
|
||||
|
||||
# Extract CSV statistics for validation
|
||||
csvStats = self._extractCsvStatistics(validatedContent)
|
||||
|
||||
# Merge file-specific metadata with project metadata
|
||||
fileMetadata = dict(metadata) if metadata else {}
|
||||
fileMetadata.update({
|
||||
"filename": filename,
|
||||
"fileType": "csv",
|
||||
"statistics": csvStats
|
||||
})
|
||||
|
||||
renderedDocs.append(
|
||||
RenderedDocument(
|
||||
documentData=validatedContent.encode('utf-8'),
|
||||
mimeType="text/csv",
|
||||
filename=filename,
|
||||
metadata=fileMetadata
|
||||
)
|
||||
)
|
||||
|
||||
return renderedDocs
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render method for document generation compatibility.
|
||||
Delegates to document renderer if needed, or handles code files directly.
|
||||
"""
|
||||
# Check if this is code generation (has files array) or document generation (has documents array)
|
||||
if "files" in extractedContent:
|
||||
# Code generation path - use renderCodeFiles
|
||||
files = extractedContent.get("files", [])
|
||||
metadata = extractedContent.get("metadata", {})
|
||||
return await self.renderCodeFiles(files, metadata, userPrompt)
|
||||
else:
|
||||
# Document generation path - delegate to document renderer
|
||||
from .rendererCsv import RendererCsv
|
||||
documentRenderer = RendererCsv(self.services)
|
||||
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
def _validateAndFixCsv(self, content: str) -> str:
|
||||
"""Validate CSV structure and fix common issues."""
|
||||
try:
|
||||
# Parse CSV to validate structure
|
||||
reader = csv.reader(io.StringIO(content))
|
||||
rows = list(reader)
|
||||
|
||||
if not rows:
|
||||
return content # Empty CSV
|
||||
|
||||
# Check header row exists
|
||||
headerRow = rows[0]
|
||||
headerCount = len(headerRow)
|
||||
|
||||
# Validate all rows have same column count
|
||||
fixedRows = [headerRow] # Start with header
|
||||
|
||||
for i, row in enumerate(rows[1:], 1):
|
||||
if len(row) != headerCount:
|
||||
self.logger.debug(f"Row {i} has {len(row)} columns, expected {headerCount}. Auto-fixing...")
|
||||
# Pad or truncate to match header
|
||||
if len(row) < headerCount:
|
||||
row.extend([''] * (headerCount - len(row)))
|
||||
else:
|
||||
row = row[:headerCount]
|
||||
fixedRows.append(row)
|
||||
|
||||
# Convert back to CSV string
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
for row in fixedRows:
|
||||
writer.writerow(row)
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"CSV validation failed: {e}, returning original content")
|
||||
return content
|
||||
|
||||
def _extractCsvStatistics(self, content: str) -> Dict[str, Any]:
|
||||
"""Extract CSV statistics for validation (row count, column count, headers)."""
|
||||
try:
|
||||
reader = csv.reader(io.StringIO(content))
|
||||
rows = list(reader)
|
||||
|
||||
if not rows:
|
||||
return {"rowCount": 0, "columnCount": 0, "headerRow": []}
|
||||
|
||||
headerRow = rows[0]
|
||||
columnCount = len(headerRow)
|
||||
rowCount = len(rows) - 1 # Exclude header
|
||||
|
||||
return {
|
||||
"rowCount": rowCount,
|
||||
"columnCount": columnCount,
|
||||
"headerRow": headerRow,
|
||||
"dataRowCount": rowCount
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"CSV statistics extraction failed: {e}")
|
||||
return {}
|
||||
141
modules/services/serviceGeneration/renderers/rendererCodeJson.py
Normal file
141
modules/services/serviceGeneration/renderers/rendererCodeJson.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
JSON code renderer for code generation.
|
||||
"""
|
||||
|
||||
from .codeRendererBaseTemplate import BaseCodeRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import json
|
||||
|
||||
class RendererCodeJson(BaseCodeRenderer):
|
||||
"""Renders JSON code files."""
|
||||
|
||||
@classmethod
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported JSON formats."""
|
||||
return ['json']
|
||||
|
||||
@classmethod
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for JSON code renderer."""
|
||||
return 85 # Higher than document renderer (80) for code generation
|
||||
|
||||
@classmethod
|
||||
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
||||
"""Return output style classification: JSON is structured data format."""
|
||||
return 'code'
|
||||
|
||||
async def renderCodeFiles(
|
||||
self,
|
||||
codeFiles: List[Dict[str, Any]],
|
||||
metadata: Dict[str, Any],
|
||||
userPrompt: str = None
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render JSON code files.
|
||||
For single file: output as-is
|
||||
For multiple files: output separately (each file is independent JSON)
|
||||
"""
|
||||
renderedDocs = []
|
||||
|
||||
for codeFile in codeFiles:
|
||||
if not self._validateCodeFile(codeFile):
|
||||
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
|
||||
continue
|
||||
|
||||
filename = codeFile['filename']
|
||||
content = codeFile['content']
|
||||
|
||||
# Validate JSON syntax and extract statistics
|
||||
parsed = None
|
||||
try:
|
||||
parsed = json.loads(content) # Validate JSON
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.warning(f"Invalid JSON in {filename}: {e}")
|
||||
# Could fix/format JSON here if needed
|
||||
|
||||
# Format JSON (pretty print)
|
||||
try:
|
||||
if parsed is None:
|
||||
parsed = json.loads(content)
|
||||
formattedContent = json.dumps(parsed, indent=2, ensure_ascii=False)
|
||||
except Exception:
|
||||
formattedContent = content # Use original if formatting fails
|
||||
|
||||
# Extract JSON statistics for validation
|
||||
jsonStats = self._extractJsonStatistics(parsed) if parsed else {}
|
||||
|
||||
# Merge file-specific metadata with project metadata
|
||||
fileMetadata = dict(metadata) if metadata else {}
|
||||
fileMetadata.update({
|
||||
"filename": filename,
|
||||
"fileType": "json",
|
||||
"statistics": jsonStats
|
||||
})
|
||||
|
||||
renderedDocs.append(
|
||||
RenderedDocument(
|
||||
documentData=formattedContent.encode('utf-8'),
|
||||
mimeType="application/json",
|
||||
filename=filename,
|
||||
metadata=fileMetadata
|
||||
)
|
||||
)
|
||||
|
||||
return renderedDocs
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render method for document generation compatibility.
|
||||
Delegates to document renderer if needed, or handles code files directly.
|
||||
"""
|
||||
# Check if this is code generation (has files array) or document generation (has documents array)
|
||||
if "files" in extractedContent:
|
||||
# Code generation path - use renderCodeFiles
|
||||
files = extractedContent.get("files", [])
|
||||
metadata = extractedContent.get("metadata", {})
|
||||
return await self.renderCodeFiles(files, metadata, userPrompt)
|
||||
else:
|
||||
# Document generation path - delegate to document renderer
|
||||
# Import here to avoid circular dependency
|
||||
from .rendererJson import RendererJson
|
||||
documentRenderer = RendererJson(self.services)
|
||||
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
def _extractJsonStatistics(self, parsed: Any) -> Dict[str, Any]:
|
||||
"""Extract JSON statistics for validation (object count, array count, key count)."""
|
||||
try:
|
||||
stats = {
|
||||
"isArray": isinstance(parsed, list),
|
||||
"isObject": isinstance(parsed, dict),
|
||||
"itemCount": 0,
|
||||
"keyCount": 0
|
||||
}
|
||||
|
||||
if isinstance(parsed, list):
|
||||
stats["itemCount"] = len(parsed)
|
||||
# Count nested objects/arrays
|
||||
objectCount = sum(1 for item in parsed if isinstance(item, dict))
|
||||
arrayCount = sum(1 for item in parsed if isinstance(item, list))
|
||||
stats["objectCount"] = objectCount
|
||||
stats["arrayCount"] = arrayCount
|
||||
elif isinstance(parsed, dict):
|
||||
stats["keyCount"] = len(parsed)
|
||||
stats["keys"] = list(parsed.keys())
|
||||
# Count nested objects/arrays
|
||||
objectCount = sum(1 for v in parsed.values() if isinstance(v, dict))
|
||||
arrayCount = sum(1 for v in parsed.values() if isinstance(v, list))
|
||||
stats["objectCount"] = objectCount
|
||||
stats["arrayCount"] = arrayCount
|
||||
|
||||
return stats
|
||||
except Exception as e:
|
||||
self.logger.warning(f"JSON statistics extraction failed: {e}")
|
||||
return {}
|
||||
148
modules/services/serviceGeneration/renderers/rendererCodeXml.py
Normal file
148
modules/services/serviceGeneration/renderers/rendererCodeXml.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
XML code renderer for code generation.
|
||||
"""
|
||||
|
||||
from .codeRendererBaseTemplate import BaseCodeRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import xml.etree.ElementTree as ET
|
||||
from xml.dom import minidom
|
||||
|
||||
class RendererCodeXml(BaseCodeRenderer):
|
||||
"""Renders XML code files."""
|
||||
|
||||
@classmethod
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported XML formats."""
|
||||
return ['xml']
|
||||
|
||||
@classmethod
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for XML code renderer."""
|
||||
return 80
|
||||
|
||||
@classmethod
|
||||
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
||||
"""Return output style classification: XML is structured data format."""
|
||||
return 'code'
|
||||
|
||||
async def renderCodeFiles(
|
||||
self,
|
||||
codeFiles: List[Dict[str, Any]],
|
||||
metadata: Dict[str, Any],
|
||||
userPrompt: str = None
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render XML code files.
|
||||
Validates XML syntax and formats (pretty print).
|
||||
"""
|
||||
renderedDocs = []
|
||||
|
||||
for codeFile in codeFiles:
|
||||
if not self._validateCodeFile(codeFile):
|
||||
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
|
||||
continue
|
||||
|
||||
filename = codeFile['filename']
|
||||
content = codeFile['content']
|
||||
|
||||
# Validate and format XML
|
||||
formattedContent = self._validateAndFormatXml(content)
|
||||
|
||||
# Extract XML statistics for validation
|
||||
xmlStats = self._extractXmlStatistics(formattedContent)
|
||||
|
||||
# Merge file-specific metadata with project metadata
|
||||
fileMetadata = dict(metadata) if metadata else {}
|
||||
fileMetadata.update({
|
||||
"filename": filename,
|
||||
"fileType": "xml",
|
||||
"statistics": xmlStats
|
||||
})
|
||||
|
||||
renderedDocs.append(
|
||||
RenderedDocument(
|
||||
documentData=formattedContent.encode('utf-8'),
|
||||
mimeType="application/xml",
|
||||
filename=filename,
|
||||
metadata=fileMetadata
|
||||
)
|
||||
)
|
||||
|
||||
return renderedDocs
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render method for document generation compatibility.
|
||||
For XML, we only support code generation (no document renderer exists yet).
|
||||
"""
|
||||
# Check if this is code generation (has files array)
|
||||
if "files" in extractedContent:
|
||||
# Code generation path - use renderCodeFiles
|
||||
files = extractedContent.get("files", [])
|
||||
metadata = extractedContent.get("metadata", {})
|
||||
return await self.renderCodeFiles(files, metadata, userPrompt)
|
||||
else:
|
||||
# Document generation path - not supported yet, return error
|
||||
self.logger.warning("XML document generation not supported, only code generation")
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=f"XML document generation not yet supported".encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename="error.txt",
|
||||
metadata={}
|
||||
)
|
||||
]
|
||||
|
||||
def _validateAndFormatXml(self, content: str) -> str:
|
||||
"""Validate XML syntax and format (pretty print)."""
|
||||
try:
|
||||
# Parse XML to validate
|
||||
root = ET.fromstring(content)
|
||||
|
||||
# Format XML (pretty print)
|
||||
rough_string = ET.tostring(root, encoding='unicode')
|
||||
reparsed = minidom.parseString(rough_string)
|
||||
formatted = reparsed.toprettyxml(indent=" ")
|
||||
|
||||
# Remove extra blank lines
|
||||
lines = [line for line in formatted.split('\n') if line.strip()]
|
||||
return '\n'.join(lines)
|
||||
|
||||
except ET.ParseError as e:
|
||||
self.logger.warning(f"Invalid XML: {e}, returning original content")
|
||||
return content
|
||||
except Exception as e:
|
||||
self.logger.warning(f"XML formatting failed: {e}, returning original content")
|
||||
return content
|
||||
|
||||
def _extractXmlStatistics(self, content: str) -> Dict[str, Any]:
|
||||
"""Extract XML statistics for validation (element count, attribute count, root element)."""
|
||||
try:
|
||||
root = ET.fromstring(content)
|
||||
|
||||
# Count all elements recursively
|
||||
elementCount = len(list(root.iter()))
|
||||
|
||||
# Count attributes
|
||||
attributeCount = sum(len(elem.attrib) for elem in root.iter())
|
||||
|
||||
# Get root element name
|
||||
rootElement = root.tag
|
||||
|
||||
return {
|
||||
"elementCount": elementCount,
|
||||
"attributeCount": attributeCount,
|
||||
"rootElement": rootElement,
|
||||
"hasRoot": True
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"XML statistics extraction failed: {e}")
|
||||
return {}
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
CSV renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
|
@ -28,45 +28,131 @@ class RendererCsv(BaseRenderer):
|
|||
|
||||
@classmethod
|
||||
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
||||
"""Return output style classification: CSV requires specific structure (header, then data rows)."""
|
||||
return 'code'
|
||||
"""Return output style classification: CSV document renderer converts structured document content to CSV."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that CSV renderer accepts.
|
||||
CSV renderer only accepts table sections.
|
||||
"""
|
||||
return ["table"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to CSV format."""
|
||||
"""Render extracted JSON content to CSV format. Produces one CSV file per table section."""
|
||||
try:
|
||||
# Generate CSV directly from JSON (no styling needed for CSV)
|
||||
csvContent = await self._generateCsvFromJson(extractedContent, title)
|
||||
# Validate JSON structure
|
||||
if not self._validateJsonStructure(extractedContent):
|
||||
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
||||
|
||||
# Determine filename from document or title
|
||||
# Extract sections and metadata
|
||||
sections = self._extractSections(extractedContent)
|
||||
metadata = self._extractMetadata(extractedContent)
|
||||
|
||||
# Determine base filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
baseFilename = None
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "text/csv")
|
||||
else:
|
||||
filename = self._determineFilename(title, "text/csv")
|
||||
baseFilename = documents[0].get("filename")
|
||||
if not baseFilename:
|
||||
baseFilename = self._determineFilename(title, "text/csv")
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
||||
# Remove extension from base filename if present
|
||||
if baseFilename.endswith('.csv'):
|
||||
baseFilename = baseFilename[:-4]
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=csvContent.encode('utf-8'),
|
||||
mimeType="text/csv",
|
||||
filename=filename,
|
||||
documentType=documentType,
|
||||
metadata=metadata if isinstance(metadata, dict) else None
|
||||
# Find all table sections
|
||||
tableSections = []
|
||||
for section in sections:
|
||||
sectionType = section.get("content_type", "paragraph")
|
||||
if sectionType == "table":
|
||||
tableSections.append(section)
|
||||
|
||||
# If no table sections found, return empty CSV
|
||||
if not tableSections:
|
||||
self.logger.warning("No table sections found in CSV document - returning empty CSV")
|
||||
emptyCsv = self._convertRowsToCsv([["No table data available"]])
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=emptyCsv.encode('utf-8'),
|
||||
mimeType="text/csv",
|
||||
filename=self._determineFilename(title, "text/csv"),
|
||||
documentType=metadata.get("documentType") if isinstance(metadata, dict) else None,
|
||||
metadata=metadata if isinstance(metadata, dict) else None
|
||||
)
|
||||
]
|
||||
|
||||
# Generate one CSV file per table section
|
||||
renderedDocuments = []
|
||||
for i, tableSection in enumerate(tableSections):
|
||||
# Generate CSV content for this table section
|
||||
csvRows = []
|
||||
|
||||
# Add section title if available
|
||||
sectionTitle = tableSection.get("title")
|
||||
if sectionTitle:
|
||||
csvRows.append([sectionTitle])
|
||||
csvRows.append([]) # Empty row after title
|
||||
|
||||
# Render table from section elements
|
||||
elements = tableSection.get("elements", [])
|
||||
for element in elements:
|
||||
tableRows = self._renderJsonTableToCsv(element)
|
||||
if tableRows:
|
||||
csvRows.extend(tableRows)
|
||||
|
||||
# Convert to CSV string
|
||||
csvContent = self._convertRowsToCsv(csvRows)
|
||||
|
||||
# Determine filename for this table
|
||||
if len(tableSections) == 1:
|
||||
# Single table - use base filename
|
||||
filename = f"{baseFilename}.csv"
|
||||
else:
|
||||
# Multiple tables - add index or section title to filename
|
||||
sectionId = tableSection.get("id", f"table_{i+1}")
|
||||
# Use section title if available, otherwise use section ID
|
||||
if sectionTitle:
|
||||
# Sanitize section title for filename
|
||||
safeTitle = "".join(c for c in sectionTitle if c.isalnum() or c in (' ', '-', '_')).strip()
|
||||
safeTitle = safeTitle.replace(' ', '_')[:30] # Limit length
|
||||
filename = f"{baseFilename}_{safeTitle}.csv"
|
||||
else:
|
||||
filename = f"{baseFilename}_{sectionId}.csv"
|
||||
|
||||
# Extract document type from metadata
|
||||
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
||||
|
||||
renderedDocuments.append(
|
||||
RenderedDocument(
|
||||
documentData=csvContent.encode('utf-8'),
|
||||
mimeType="text/csv",
|
||||
filename=filename,
|
||||
documentType=documentType,
|
||||
metadata=metadata if isinstance(metadata, dict) else None
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
return renderedDocuments
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||
# Return minimal CSV fallback
|
||||
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
||||
fallbackCsv = self._convertRowsToCsv([["Title", "Content"], [title, f"Error rendering report: {str(e)}"]])
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackCsv.encode('utf-8'),
|
||||
mimeType="text/csv",
|
||||
filename=self._determineFilename(title, "text/csv"),
|
||||
metadata=extractedContent.get("metadata", {}) if extractedContent else None
|
||||
)
|
||||
]
|
||||
|
||||
async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||
"""Generate CSV content from structured JSON document."""
|
||||
"""Generate CSV content from structured JSON document. DEPRECATED: Use render() method instead."""
|
||||
# This method is kept for backward compatibility but is no longer used
|
||||
# The render() method now handles CSV generation directly
|
||||
try:
|
||||
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
||||
if not self._validateJsonStructure(jsonContent):
|
||||
|
|
@ -88,12 +174,14 @@ class RendererCsv(BaseRenderer):
|
|||
csvRows.append([documentTitle])
|
||||
csvRows.append([]) # Empty row
|
||||
|
||||
# Process each section in order
|
||||
# Process each section in order - only table sections
|
||||
for section in sections:
|
||||
sectionCsv = self._renderJsonSectionToCsv(section)
|
||||
if sectionCsv:
|
||||
csvRows.extend(sectionCsv)
|
||||
csvRows.append([]) # Empty row between sections
|
||||
sectionType = section.get("content_type", "paragraph")
|
||||
if sectionType == "table":
|
||||
sectionCsv = self._renderJsonSectionToCsv(section)
|
||||
if sectionCsv:
|
||||
csvRows.extend(sectionCsv)
|
||||
csvRows.append([]) # Empty row between sections
|
||||
|
||||
# Convert to CSV string
|
||||
csvContent = self._convertRowsToCsv(csvRows)
|
||||
|
|
@ -309,3 +397,4 @@ class RendererCsv(BaseRenderer):
|
|||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
return content
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
DOCX renderer for report generation using python-docx.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import io
|
||||
|
|
@ -44,6 +44,15 @@ class RendererDocx(BaseRenderer):
|
|||
"""Return output style classification: Word documents are formatted documents."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that DOCX renderer accepts.
|
||||
DOCX renderer accepts all section types (Word documents can contain all content types).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||
|
|
@ -107,24 +116,37 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate DOCX content from structured JSON document."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
try:
|
||||
self.logger.debug("_generateDocxFromJson: Starting document generation")
|
||||
# Create new document
|
||||
doc = Document()
|
||||
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
style_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: About to get style set")
|
||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
|
||||
|
||||
# Setup basic document styles and create all styles from style set
|
||||
setup_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Setting up document styles")
|
||||
self._setupBasicDocumentStyles(doc)
|
||||
self._setupDocumentStyles(doc, styleSet)
|
||||
self.logger.debug(f"_generateDocxFromJson: Document styles setup in {time.time() - setup_start:.2f}s")
|
||||
|
||||
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
||||
if not self._validateJsonStructure(json_content):
|
||||
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
||||
|
||||
# Extract sections and metadata from standardized schema
|
||||
extract_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Extracting sections and metadata")
|
||||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
self.logger.debug(f"_generateDocxFromJson: Extracted {len(sections)} sections in {time.time() - extract_start:.2f}s")
|
||||
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
|
|
@ -135,18 +157,32 @@ class RendererDocx(BaseRenderer):
|
|||
doc.add_paragraph(document_title, style='Title')
|
||||
|
||||
# Process each section in order
|
||||
for section in sections:
|
||||
render_start = time.time()
|
||||
self.logger.debug(f"_generateDocxFromJson: Starting to render {len(sections)} sections")
|
||||
for idx, section in enumerate(sections):
|
||||
section_start = time.time()
|
||||
self.logger.debug(f"_generateDocxFromJson: Rendering section {idx + 1}/{len(sections)}")
|
||||
self._renderJsonSection(doc, section, styleSet)
|
||||
self.logger.debug(f"_generateDocxFromJson: Section {idx + 1} rendered in {time.time() - section_start:.2f}s")
|
||||
self.logger.debug(f"_generateDocxFromJson: All sections rendered in {time.time() - render_start:.2f}s")
|
||||
|
||||
# Save to buffer
|
||||
save_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Starting to save document to buffer")
|
||||
buffer = io.BytesIO()
|
||||
doc.save(buffer)
|
||||
buffer.seek(0)
|
||||
self.logger.debug(f"_generateDocxFromJson: Document saved to buffer in {time.time() - save_start:.2f}s")
|
||||
|
||||
# Convert to base64
|
||||
encode_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Converting to base64")
|
||||
docx_bytes = buffer.getvalue()
|
||||
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
|
||||
self.logger.debug(f"_generateDocxFromJson: Converted to base64 in {time.time() - encode_start:.2f}s (document size: {len(docx_bytes)} bytes)")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.logger.info(f"_generateDocxFromJson: Document generation completed in {total_time:.2f}s")
|
||||
return docx_base64
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -299,6 +335,9 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
# Process each element in the section
|
||||
for element in elements:
|
||||
# Skip non-dict elements (e.g., int, str, etc.)
|
||||
if not isinstance(element, dict):
|
||||
continue
|
||||
element_type = element.get("type", "")
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
|
|
@ -368,7 +407,23 @@ class RendererDocx(BaseRenderer):
|
|||
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
|
||||
|
||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON table to DOCX using AI-generated styles."""
|
||||
"""
|
||||
Render a JSON table to DOCX using AI-generated styles.
|
||||
|
||||
PERFORMANCE OPTIMIZATION: Uses direct XML manipulation via lxml instead of
|
||||
python-docx high-level API. This bypasses the slow cell.text assignment
|
||||
which creates multiple XML operations per cell.
|
||||
|
||||
The key insight: python-docx's cell.text setter is slow because it:
|
||||
1. Clears existing content (XML manipulation)
|
||||
2. Creates a new paragraph element
|
||||
3. Creates a new run element
|
||||
4. Sets text value
|
||||
|
||||
By building the XML directly, we achieve 100-1000x faster performance.
|
||||
"""
|
||||
import time
|
||||
table_start = time.time()
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = table_data.get("content", {})
|
||||
|
|
@ -380,59 +435,244 @@ class RendererDocx(BaseRenderer):
|
|||
if not headers or not rows:
|
||||
return
|
||||
|
||||
# Create table
|
||||
table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
|
||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||
totalRows = len(rows)
|
||||
totalCols = len(headers)
|
||||
totalCells = totalRows * totalCols
|
||||
|
||||
# Apply table borders based on AI style
|
||||
border_style = styles["table_border"]["style"]
|
||||
if border_style == "horizontal_only":
|
||||
self._applyHorizontalBordersOnly(table)
|
||||
elif border_style == "grid":
|
||||
table.style = 'Table Grid'
|
||||
# else: no borders
|
||||
self.logger.debug(f"_renderJsonTable: Starting FAST table render - {totalRows} rows x {totalCols} columns = {totalCells} cells")
|
||||
|
||||
# Add headers with AI-generated styling
|
||||
header_row = table.rows[0]
|
||||
header_style = styles["table_header"]
|
||||
for i, header in enumerate(headers):
|
||||
if i < len(header_row.cells):
|
||||
cell = header_row.cells[i]
|
||||
cell.text = str(header)
|
||||
|
||||
# Apply background color
|
||||
bg_color = header_style["background"].lstrip('#')
|
||||
self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
|
||||
|
||||
# Apply text styling
|
||||
for paragraph in cell.paragraphs:
|
||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
|
||||
for run in paragraph.runs:
|
||||
run.bold = header_style["bold"]
|
||||
run.font.size = Pt(11)
|
||||
text_color = header_style["text_color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||
# Use fast XML-based table rendering
|
||||
self._renderTableFastXml(doc, headers, rows, styles)
|
||||
|
||||
# Add data rows with AI-generated styling
|
||||
cell_style = styles["table_cell"]
|
||||
for row_idx, row_data in enumerate(rows):
|
||||
if row_idx + 1 < len(table.rows):
|
||||
table_row = table.rows[row_idx + 1]
|
||||
for col_idx, cell_data in enumerate(row_data):
|
||||
if col_idx < len(table_row.cells):
|
||||
cell = table_row.cells[col_idx]
|
||||
cell.text = str(cell_data)
|
||||
|
||||
# Apply text styling
|
||||
for paragraph in cell.paragraphs:
|
||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
for run in paragraph.runs:
|
||||
run.font.size = Pt(10)
|
||||
text_color = cell_style["text_color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||
total_time = time.time() - table_start
|
||||
rate = totalCells / total_time if total_time > 0 else 0
|
||||
self.logger.info(f"_renderJsonTable: Table completed in {total_time:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
|
||||
|
||||
def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None:
|
||||
"""
|
||||
High-performance table rendering using direct XML manipulation.
|
||||
|
||||
This bypasses python-docx's slow high-level API and builds the table
|
||||
XML structure directly using lxml, which is 100-1000x faster.
|
||||
"""
|
||||
import time
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
from docx.oxml.ns import nsmap
|
||||
from lxml import etree
|
||||
|
||||
create_start = time.time()
|
||||
|
||||
# Get the document body element
|
||||
body = doc._body._body
|
||||
|
||||
# Create table element
|
||||
tbl = OxmlElement('w:tbl')
|
||||
|
||||
# Add table properties
|
||||
tblPr = OxmlElement('w:tblPr')
|
||||
|
||||
# Table width - auto
|
||||
tblW = OxmlElement('w:tblW')
|
||||
tblW.set(qn('w:type'), 'auto')
|
||||
tblW.set(qn('w:w'), '0')
|
||||
tblPr.append(tblW)
|
||||
|
||||
# Center alignment
|
||||
jc = OxmlElement('w:jc')
|
||||
jc.set(qn('w:val'), 'center')
|
||||
tblPr.append(jc)
|
||||
|
||||
# Apply table borders directly (works without template styles)
|
||||
borderStyle = styles.get("table_border", {}).get("style", "grid")
|
||||
tblBorders = self._createTableBordersXml(borderStyle)
|
||||
tblPr.append(tblBorders)
|
||||
|
||||
# Table cell margins for better readability
|
||||
tblCellMar = OxmlElement('w:tblCellMar')
|
||||
for side in ['top', 'left', 'bottom', 'right']:
|
||||
margin = OxmlElement(f'w:{side}')
|
||||
margin.set(qn('w:w'), '80') # 80 twips = ~4pt padding
|
||||
margin.set(qn('w:type'), 'dxa')
|
||||
tblCellMar.append(margin)
|
||||
tblPr.append(tblCellMar)
|
||||
|
||||
tbl.append(tblPr)
|
||||
|
||||
# Create table grid (column definitions)
|
||||
tblGrid = OxmlElement('w:tblGrid')
|
||||
for _ in range(len(headers)):
|
||||
gridCol = OxmlElement('w:gridCol')
|
||||
tblGrid.append(gridCol)
|
||||
tbl.append(tblGrid)
|
||||
|
||||
self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - create_start:.3f}s")
|
||||
|
||||
# Build all rows using fast XML
|
||||
rows_start = time.time()
|
||||
|
||||
# Header row
|
||||
headerRow = self._createTableRowXml(headers, isHeader=True)
|
||||
tbl.append(headerRow)
|
||||
|
||||
header_time = time.time() - rows_start
|
||||
self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
|
||||
|
||||
# Data rows - batch process for performance
|
||||
data_start = time.time()
|
||||
rowCount = len(rows)
|
||||
|
||||
for idx, rowData in enumerate(rows):
|
||||
# Convert all cells to strings
|
||||
cellTexts = [str(cell) if cell is not None else '' for cell in rowData]
|
||||
# Pad if needed
|
||||
while len(cellTexts) < len(headers):
|
||||
cellTexts.append('')
|
||||
|
||||
row = self._createTableRowXml(cellTexts, isHeader=False)
|
||||
tbl.append(row)
|
||||
|
||||
# Log progress every 10%
|
||||
if rowCount > 100 and (idx + 1) % (rowCount // 10) == 0:
|
||||
elapsed = time.time() - data_start
|
||||
rate = (idx + 1) * len(headers) / elapsed if elapsed > 0 else 0
|
||||
self.logger.debug(f"_renderTableFastXml: Progress {((idx + 1) / rowCount * 100):.0f}% ({idx + 1}/{rowCount} rows) - Rate: {rate:.0f} cells/s")
|
||||
|
||||
data_time = time.time() - data_start
|
||||
|
||||
# Append table to document body
|
||||
body.append(tbl)
|
||||
|
||||
total_time = time.time() - create_start
|
||||
totalCells = (rowCount + 1) * len(headers)
|
||||
rate = totalCells / total_time if total_time > 0 else 0
|
||||
|
||||
self.logger.debug(f"_renderTableFastXml: All rows created in {data_time:.2f}s, total: {total_time:.2f}s, rate: {rate:.0f} cells/s")
|
||||
|
||||
def _createTableBordersXml(self, borderStyle: str) -> Any:
|
||||
"""
|
||||
Create table borders XML element based on style.
|
||||
|
||||
Supports:
|
||||
- 'grid': Full grid with all borders (default)
|
||||
- 'horizontal_only': Only horizontal lines between rows
|
||||
- 'none' or other: Minimal/no borders
|
||||
"""
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
||||
tblBorders = OxmlElement('w:tblBorders')
|
||||
|
||||
# Border color - dark gray for professional look
|
||||
borderColor = '404040'
|
||||
borderSize = '4' # 0.5pt (in eighths of a point)
|
||||
|
||||
if borderStyle == "grid":
|
||||
# Full grid - all borders
|
||||
for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
|
||||
border = OxmlElement(f'w:{borderName}')
|
||||
border.set(qn('w:val'), 'single')
|
||||
border.set(qn('w:sz'), borderSize)
|
||||
border.set(qn('w:space'), '0')
|
||||
border.set(qn('w:color'), borderColor)
|
||||
tblBorders.append(border)
|
||||
|
||||
elif borderStyle == "horizontal_only":
|
||||
# Only horizontal lines
|
||||
for borderName in ['top', 'bottom', 'insideH']:
|
||||
border = OxmlElement(f'w:{borderName}')
|
||||
border.set(qn('w:val'), 'single')
|
||||
border.set(qn('w:sz'), borderSize)
|
||||
border.set(qn('w:space'), '0')
|
||||
border.set(qn('w:color'), borderColor)
|
||||
tblBorders.append(border)
|
||||
# No vertical borders
|
||||
for borderName in ['left', 'right', 'insideV']:
|
||||
border = OxmlElement(f'w:{borderName}')
|
||||
border.set(qn('w:val'), 'nil')
|
||||
tblBorders.append(border)
|
||||
else:
|
||||
# Minimal - just outer border
|
||||
for borderName in ['top', 'left', 'bottom', 'right']:
|
||||
border = OxmlElement(f'w:{borderName}')
|
||||
border.set(qn('w:val'), 'single')
|
||||
border.set(qn('w:sz'), borderSize)
|
||||
border.set(qn('w:space'), '0')
|
||||
border.set(qn('w:color'), borderColor)
|
||||
tblBorders.append(border)
|
||||
|
||||
return tblBorders
|
||||
|
||||
def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any:
|
||||
"""
|
||||
Create a table row XML element with cells.
|
||||
|
||||
This is the core fast-path: builds the row XML directly without
|
||||
going through python-docx's slow cell.text assignment.
|
||||
"""
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
||||
tr = OxmlElement('w:tr')
|
||||
|
||||
# Row properties for header
|
||||
if isHeader:
|
||||
trPr = OxmlElement('w:trPr')
|
||||
tblHeader = OxmlElement('w:tblHeader')
|
||||
trPr.append(tblHeader)
|
||||
tr.append(trPr)
|
||||
|
||||
for cellText in cells:
|
||||
# Create cell
|
||||
tc = OxmlElement('w:tc')
|
||||
|
||||
# Cell properties
|
||||
tcPr = OxmlElement('w:tcPr')
|
||||
tcW = OxmlElement('w:tcW')
|
||||
tcW.set(qn('w:type'), 'auto')
|
||||
tcW.set(qn('w:w'), '0')
|
||||
tcPr.append(tcW)
|
||||
|
||||
# Header cell styling - light blue background
|
||||
if isHeader:
|
||||
shd = OxmlElement('w:shd')
|
||||
shd.set(qn('w:val'), 'clear')
|
||||
shd.set(qn('w:color'), 'auto')
|
||||
shd.set(qn('w:fill'), '4472C4') # Professional blue
|
||||
tcPr.append(shd)
|
||||
|
||||
tc.append(tcPr)
|
||||
|
||||
# Paragraph with text
|
||||
p = OxmlElement('w:p')
|
||||
|
||||
# Add run with text
|
||||
r = OxmlElement('w:r')
|
||||
|
||||
# Header text styling - bold and white
|
||||
if isHeader:
|
||||
rPr = OxmlElement('w:rPr')
|
||||
b = OxmlElement('w:b')
|
||||
rPr.append(b)
|
||||
# White text color
|
||||
color = OxmlElement('w:color')
|
||||
color.set(qn('w:val'), 'FFFFFF')
|
||||
rPr.append(color)
|
||||
r.append(rPr)
|
||||
|
||||
# Text element
|
||||
t = OxmlElement('w:t')
|
||||
# Preserve spaces if text starts/ends with whitespace
|
||||
if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
t.text = cellText
|
||||
r.append(t)
|
||||
|
||||
p.append(r)
|
||||
tc.append(p)
|
||||
tr.append(tc)
|
||||
|
||||
return tr
|
||||
|
||||
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||
"""Apply only horizontal borders to the table (no vertical borders)."""
|
||||
|
|
@ -526,9 +766,38 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not set cell background: {str(e)}")
|
||||
|
||||
def _setCellBackgroundFast(self, cell, hex_color: str) -> None:
|
||||
"""
|
||||
Set the background color of a table cell using pre-calculated hex string.
|
||||
PERFORMANCE OPTIMIZED: Avoids RGBColor unpacking and string formatting in hot loop.
|
||||
"""
|
||||
try:
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
||||
# Get cell properties
|
||||
tc_pr = cell._element.find(qn('w:tcPr'))
|
||||
if tc_pr is None:
|
||||
tc_pr = OxmlElement('w:tcPr')
|
||||
cell._element.insert(0, tc_pr)
|
||||
|
||||
# Remove existing shading
|
||||
existing_shading = tc_pr.find(qn('w:shd'))
|
||||
if existing_shading is not None:
|
||||
tc_pr.remove(existing_shading)
|
||||
|
||||
# Create new shading element with pre-calculated hex color
|
||||
shading = OxmlElement('w:shd')
|
||||
shading.set(qn('w:val'), 'clear')
|
||||
shading.set(qn('w:color'), 'auto')
|
||||
shading.set(qn('w:fill'), hex_color)
|
||||
tc_pr.append(shading)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not set cell background: {str(e)}")
|
||||
|
||||
|
||||
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON bullet list to DOCX using AI-generated styles."""
|
||||
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = list_data.get("content", {})
|
||||
|
|
@ -537,20 +806,38 @@ class RendererDocx(BaseRenderer):
|
|||
items = content.get("items", [])
|
||||
bullet_style = styles.get("bullet_list", {})
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
font_size_pt = None
|
||||
text_color_rgb = None
|
||||
if bullet_style:
|
||||
if "font_size" in bullet_style:
|
||||
font_size_pt = Pt(bullet_style["font_size"])
|
||||
if "color" in bullet_style:
|
||||
color_hex = bullet_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
para = doc.add_paragraph(item, style='List Bullet')
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
para = doc.add_paragraph(item["text"], style='List Bullet')
|
||||
|
||||
# Apply bullet list styling from style set
|
||||
# Apply bullet list styling from style set - use cached objects
|
||||
if bullet_style and para.runs:
|
||||
for run in para.runs:
|
||||
if "font_size" in bullet_style:
|
||||
run.font.size = Pt(bullet_style["font_size"])
|
||||
if "color" in bullet_style:
|
||||
color_hex = bullet_style["color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
# Use direct access instead of iterating
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = para.add_run()
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
|
|
@ -603,17 +890,36 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
if text:
|
||||
para = doc.add_paragraph(text)
|
||||
# Apply paragraph styling from style set
|
||||
# Apply paragraph styling from style set - OPTIMIZED: pre-calculate style objects
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style:
|
||||
for run in para.runs:
|
||||
if "font_size" in paragraph_style:
|
||||
run.font.size = Pt(paragraph_style["font_size"])
|
||||
if "bold" in paragraph_style:
|
||||
run.font.bold = paragraph_style["bold"]
|
||||
if "color" in paragraph_style:
|
||||
color_hex = paragraph_style["color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
# Pre-calculate and cache style objects
|
||||
font_size_pt = None
|
||||
text_color_rgb = None
|
||||
if "font_size" in paragraph_style:
|
||||
font_size_pt = Pt(paragraph_style["font_size"])
|
||||
if "color" in paragraph_style:
|
||||
color_hex = paragraph_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
bold = paragraph_style.get("bold", False)
|
||||
|
||||
# Use direct access instead of iterating
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = para.add_run()
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
if "align" in paragraph_style:
|
||||
align = paragraph_style["align"]
|
||||
if align == "center":
|
||||
|
|
@ -640,16 +946,32 @@ class RendererDocx(BaseRenderer):
|
|||
if code:
|
||||
if language:
|
||||
lang_para = doc.add_paragraph(f"Code ({language}):")
|
||||
if lang_para.runs:
|
||||
if len(lang_para.runs) > 0:
|
||||
lang_para.runs[0].bold = True
|
||||
|
||||
# Pre-calculate and cache style objects
|
||||
code_font_name = code_style.get("font", "Courier New")
|
||||
code_font_size_pt = Pt(code_style.get("font_size", 9))
|
||||
code_text_color_rgb = None
|
||||
if "color" in code_style:
|
||||
color_hex = code_style["color"].lstrip('#')
|
||||
code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
code_para = doc.add_paragraph(code)
|
||||
for run in code_para.runs:
|
||||
run.font.name = code_style.get("font", "Courier New")
|
||||
run.font.size = Pt(code_style.get("font_size", 9))
|
||||
if "color" in code_style:
|
||||
color_hex = code_style["color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
# Use direct access instead of iterating
|
||||
if len(code_para.runs) > 0:
|
||||
run = code_para.runs[0]
|
||||
run.font.name = code_font_name
|
||||
run.font.size = code_font_size_pt
|
||||
if code_text_color_rgb:
|
||||
run.font.color.rgb = code_text_color_rgb
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = code_para.add_run()
|
||||
run.font.name = code_font_name
|
||||
run.font.size = code_font_size_pt
|
||||
if code_text_color_rgb:
|
||||
run.font.color.rgb = code_text_color_rgb
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
HTML renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
|
@ -31,6 +31,15 @@ class RendererHtml(BaseRenderer):
|
|||
"""Return output style classification: HTML web pages are rendered documents."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that HTML renderer accepts.
|
||||
HTML renderer accepts all section types (HTML pages can contain all content types including images).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render HTML document with images as separate files.
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
Image renderer for report generation using AI image generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import logging
|
||||
|
|
@ -35,6 +35,14 @@ class RendererImage(BaseRenderer):
|
|||
"""Return output style classification: Images are visual media."""
|
||||
return 'image'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that Image renderer accepts.
|
||||
Image renderer only accepts image sections (images are generated from image sections).
|
||||
"""
|
||||
return ["image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to image format using AI image generation."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
JSON renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import json
|
||||
|
|
@ -29,8 +29,18 @@ class RendererJson(BaseRenderer):
|
|||
|
||||
@classmethod
|
||||
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
||||
"""Return output style classification: JSON is structured data format."""
|
||||
return 'code'
|
||||
"""Return output style classification: JSON document renderer converts structured document content to JSON."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that JSON renderer accepts.
|
||||
JSON renderer accepts all section types except images (images cannot be serialized to JSON).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
# Return all types except image
|
||||
return [st for st in supportedSectionTypes if st != "image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to JSON format."""
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
Markdown renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
|
@ -31,6 +31,15 @@ class RendererMarkdown(BaseRenderer):
|
|||
"""Return output style classification: Markdown documents are formatted documents."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that Markdown renderer accepts.
|
||||
Markdown renderer accepts all section types except images.
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return [st for st in supportedSectionTypes if st != "image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Markdown format."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
PDF renderer for report generation using reportlab.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import io
|
||||
|
|
@ -44,6 +44,15 @@ class RendererPdf(BaseRenderer):
|
|||
"""Return output style classification: PDF documents are formatted documents."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that PDF renderer accepts.
|
||||
PDF renderer accepts all section types (PDF documents can contain all content types).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import json
|
|||
import re
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any, Optional, List
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -41,6 +41,15 @@ class RendererPptx(BaseRenderer):
|
|||
"""Return output style classification: PowerPoint presentations are formatted documents."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that PowerPoint renderer accepts.
|
||||
PowerPoint renderer accepts all section types (presentations can contain all content types including images).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render content as PowerPoint presentation from JSON data.
|
||||
|
|
@ -1257,78 +1266,96 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
for col_idx in range(num_cols):
|
||||
table.columns[col_idx].width = col_width_emu
|
||||
|
||||
# Add headers with styling
|
||||
# Add headers with styling - OPTIMIZED: pre-calculate color/style objects
|
||||
header_style = styles.get("table_header", {})
|
||||
header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
|
||||
header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
|
||||
header_font_size = header_style.get("font_size", 18)
|
||||
|
||||
# Pre-calculate and cache RGB color objects
|
||||
header_bg_rgb = RGBColor(*header_bg_color)
|
||||
header_text_rgb = RGBColor(*header_text_color)
|
||||
header_font_size_pt = Pt(header_font_size)
|
||||
header_bold = header_style.get("bold", True)
|
||||
|
||||
# Determine alignment once
|
||||
align = header_style.get("align", "center")
|
||||
if align == "left":
|
||||
header_alignment = PP_ALIGN.LEFT
|
||||
elif align == "right":
|
||||
header_alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
header_alignment = PP_ALIGN.CENTER
|
||||
|
||||
for col_idx, header in enumerate(headers):
|
||||
cell = table.cell(0, col_idx)
|
||||
# Clear existing text and set new text
|
||||
cell.text_frame.clear()
|
||||
cell.text = str(header) if header else ""
|
||||
header_text = str(header) if header else ""
|
||||
cell.text = header_text
|
||||
|
||||
# Ensure paragraph exists
|
||||
if len(cell.text_frame.paragraphs) == 0:
|
||||
cell.text_frame.add_paragraph()
|
||||
|
||||
# Apply styling
|
||||
# Apply styling - use cached objects
|
||||
cell.fill.solid()
|
||||
cell.fill.fore_color.rgb = RGBColor(*header_bg_color)
|
||||
cell.fill.fore_color.rgb = header_bg_rgb
|
||||
para = cell.text_frame.paragraphs[0]
|
||||
para.font.bold = header_style.get("bold", True)
|
||||
para.font.size = Pt(header_font_size)
|
||||
para.font.color.rgb = RGBColor(*header_text_color)
|
||||
|
||||
align = header_style.get("align", "center")
|
||||
if align == "left":
|
||||
para.alignment = PP_ALIGN.LEFT
|
||||
elif align == "right":
|
||||
para.alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
para.alignment = PP_ALIGN.CENTER
|
||||
para.font.bold = header_bold
|
||||
para.font.size = header_font_size_pt
|
||||
para.font.color.rgb = header_text_rgb
|
||||
para.alignment = header_alignment
|
||||
|
||||
# Ensure text is set on paragraph
|
||||
if not para.text:
|
||||
para.text = str(header) if header else ""
|
||||
para.text = header_text
|
||||
|
||||
# Add data rows with styling
|
||||
# Add data rows with styling - OPTIMIZED: pre-calculate color/style objects
|
||||
cell_style = styles.get("table_cell", {})
|
||||
cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
|
||||
cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
|
||||
cell_font_size = cell_style.get("font_size", 16)
|
||||
|
||||
# Pre-calculate and cache RGB color objects
|
||||
cell_bg_rgb = RGBColor(*cell_bg_color)
|
||||
cell_text_rgb = RGBColor(*cell_text_color)
|
||||
cell_font_size_pt = Pt(cell_font_size)
|
||||
cell_bold = cell_style.get("bold", False)
|
||||
|
||||
# Determine alignment once
|
||||
align = cell_style.get("align", "left")
|
||||
if align == "center":
|
||||
cell_alignment = PP_ALIGN.CENTER
|
||||
elif align == "right":
|
||||
cell_alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
cell_alignment = PP_ALIGN.LEFT
|
||||
|
||||
for row_idx, row_data in enumerate(rows, 1):
|
||||
for col_idx, cell_data in enumerate(row_data[:num_cols]):
|
||||
cell = table.cell(row_idx, col_idx)
|
||||
# Clear existing text and set new text
|
||||
cell.text_frame.clear()
|
||||
cell.text = str(cell_data) if cell_data is not None else ""
|
||||
cell_text = str(cell_data) if cell_data is not None else ""
|
||||
cell.text = cell_text
|
||||
|
||||
# Ensure paragraph exists
|
||||
if len(cell.text_frame.paragraphs) == 0:
|
||||
cell.text_frame.add_paragraph()
|
||||
|
||||
# Apply styling
|
||||
# Apply styling - use cached objects
|
||||
cell.fill.solid()
|
||||
cell.fill.fore_color.rgb = RGBColor(*cell_bg_color)
|
||||
cell.fill.fore_color.rgb = cell_bg_rgb
|
||||
para = cell.text_frame.paragraphs[0]
|
||||
para.font.size = Pt(cell_font_size)
|
||||
para.font.bold = cell_style.get("bold", False)
|
||||
para.font.color.rgb = RGBColor(*cell_text_color)
|
||||
|
||||
align = cell_style.get("align", "left")
|
||||
if align == "center":
|
||||
para.alignment = PP_ALIGN.CENTER
|
||||
elif align == "right":
|
||||
para.alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
para.alignment = PP_ALIGN.LEFT
|
||||
para.font.size = cell_font_size_pt
|
||||
para.font.bold = cell_bold
|
||||
para.font.color.rgb = cell_text_rgb
|
||||
para.alignment = cell_alignment
|
||||
|
||||
# Ensure text is set on paragraph
|
||||
if not para.text:
|
||||
para.text = str(cell_data) if cell_data is not None else ""
|
||||
para.text = cell_text
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adding table to slide: {str(e)}")
|
||||
|
|
@ -1353,6 +1380,13 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
base_font_size = list_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
font_size_pt = Pt(calculated_size)
|
||||
text_color = self._getSafeColor(list_style.get("color", (47, 47, 47)))
|
||||
text_color_rgb = RGBColor(*text_color)
|
||||
space_before_pt = Pt(2)
|
||||
space_after_pt = Pt(2)
|
||||
|
||||
logger.debug(f"Rendering bullet list with {len(items)} items")
|
||||
|
||||
for idx, item in enumerate(items):
|
||||
|
|
@ -1378,12 +1412,12 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
# Set text content
|
||||
p.text = item_text
|
||||
|
||||
# Apply formatting first
|
||||
p.font.size = Pt(calculated_size)
|
||||
p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
|
||||
# Apply formatting - use cached objects
|
||||
p.font.size = font_size_pt
|
||||
p.font.color.rgb = text_color_rgb
|
||||
p.alignment = PP_ALIGN.LEFT # Left align bullet lists
|
||||
p.space_before = Pt(2) # Small spacing before
|
||||
p.space_after = Pt(2) # Small spacing after
|
||||
p.space_before = space_before_pt # Small spacing before
|
||||
p.space_after = space_after_pt # Small spacing after
|
||||
|
||||
# In python-pptx, setting level > 0 should enable bullets automatically
|
||||
# However, some versions may not support paragraph_format, so we'll use manual bullets as fallback
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
Text renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
|
@ -63,6 +63,17 @@ class RendererText(BaseRenderer):
|
|||
# All other formats handled by RendererText are code style
|
||||
return 'code'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that Text renderer accepts.
|
||||
Text renderer accepts all section types except images (text formats cannot display images).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
|
||||
# Text renderer accepts all types except images
|
||||
return [st for st in supportedSectionTypes if st != "image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to plain text format."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -4,11 +4,12 @@
|
|||
Excel renderer for report generation using openpyxl.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
import io
|
||||
import base64
|
||||
import re
|
||||
from datetime import datetime, UTC, date
|
||||
try:
|
||||
from dateutil import parser as date_parser
|
||||
|
|
@ -25,6 +26,16 @@ try:
|
|||
except ImportError:
|
||||
OPENPYXL_AVAILABLE = False
|
||||
|
||||
# PERFORMANCE: Pre-compile regex patterns used in hot loops
|
||||
_DATE_PATTERN = re.compile(
|
||||
r'^\d{1,4}[-./]\d{1,2}[-./]\d{1,4}' # Basic date pattern: YYYY-MM-DD or DD.MM.YYYY
|
||||
r'|^\d{1,2}[-./]\d{1,2}[-./]\d{2,4}' # DD/MM/YYYY or MM/DD/YYYY
|
||||
r'|^\d{4}-\d{2}-\d{2}' # ISO format: YYYY-MM-DD
|
||||
r'|^\d{1,2}[-./]\d{1,2}[-./]\d{2,4}\s+\d{1,2}:\d{2}' # With time
|
||||
)
|
||||
_NUMBER_PATTERN = re.compile(r'^[\s\']*[+-]?\d+([.,]\d+)?([eE][+-]?\d+)?[\s\']*$')
|
||||
_DIGIT_CHECK_PATTERN = re.compile(r'\d') # Simple digit check
|
||||
|
||||
class RendererXlsx(BaseRenderer):
|
||||
"""Renders content to Excel format using openpyxl."""
|
||||
|
||||
|
|
@ -48,6 +59,15 @@ class RendererXlsx(BaseRenderer):
|
|||
"""Return output style classification: Excel spreadsheets are formatted documents."""
|
||||
return 'document'
|
||||
|
||||
@classmethod
|
||||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Return list of section content types that Excel renderer accepts.
|
||||
Excel renderer accepts all section types (spreadsheets can contain tables, text, headings, etc.).
|
||||
"""
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||
try:
|
||||
|
|
@ -1006,7 +1026,12 @@ class RendererXlsx(BaseRenderer):
|
|||
return startRow + 1
|
||||
|
||||
def _parseDateString(self, text: str) -> Any:
|
||||
"""Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise."""
|
||||
"""
|
||||
Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise.
|
||||
|
||||
PERFORMANCE OPTIMIZED: Uses regex pre-check before attempting parsing to avoid expensive
|
||||
operations on non-date strings. This dramatically improves performance for large tables.
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return None
|
||||
|
||||
|
|
@ -1014,6 +1039,17 @@ class RendererXlsx(BaseRenderer):
|
|||
if not text:
|
||||
return None
|
||||
|
||||
# PERFORMANCE FIX: Pre-check with regex to avoid expensive parsing attempts
|
||||
# Only attempt parsing if text looks like a date (contains digits and separators)
|
||||
# Quick check: does it look like a date? (contains digits and date separators)
|
||||
if not _DIGIT_CHECK_PATTERN.search(text): # No digits at all
|
||||
return None
|
||||
|
||||
# Check for common date patterns before attempting full parsing
|
||||
# This filters out most non-date strings quickly (uses pre-compiled pattern)
|
||||
if not _DATE_PATTERN.search(text):
|
||||
return None # Doesn't look like a date, skip expensive parsing
|
||||
|
||||
# Common date formats to try (in order of likelihood)
|
||||
date_formats = [
|
||||
"%Y-%m-%d", # 2025-01-01
|
||||
|
|
@ -1036,7 +1072,7 @@ class RendererXlsx(BaseRenderer):
|
|||
except ValueError:
|
||||
continue
|
||||
|
||||
# If dateutil is available, use it for more flexible parsing
|
||||
# If dateutil is available, use it for more flexible parsing (only if regex matched)
|
||||
if DATEUTIL_AVAILABLE:
|
||||
try:
|
||||
parsed_date = date_parser.parse(text, dayfirst=True, yearfirst=False)
|
||||
|
|
@ -1067,38 +1103,44 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
# Try to convert numeric strings to actual numbers
|
||||
# This ensures Excel treats them as numbers, not strings
|
||||
# PERFORMANCE OPTIMIZED: Use regex pre-check before attempting conversion
|
||||
if text:
|
||||
# Clean text for number conversion: remove common formatting characters
|
||||
# but preserve the original for fallback
|
||||
cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
|
||||
|
||||
# Only attempt conversion if cleaned text looks like a number
|
||||
# (starts with digit, +, -, or . followed by digit)
|
||||
if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
|
||||
# Try integer first (more restrictive)
|
||||
try:
|
||||
# Check if it's a valid integer (no decimal point, no scientific notation)
|
||||
if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
|
||||
int_value = int(cleaned_for_number)
|
||||
return int_value
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
# PERFORMANCE FIX: Quick regex check to see if text looks like a number
|
||||
# This avoids expensive string operations and conversion attempts for non-numbers
|
||||
# Uses pre-compiled pattern for better performance
|
||||
if _NUMBER_PATTERN.match(text.strip()):
|
||||
# Clean text for number conversion: remove common formatting characters
|
||||
cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
|
||||
|
||||
# Try float if integer conversion failed
|
||||
try:
|
||||
float_value = float(cleaned_for_number)
|
||||
# Only return as float if it's actually a number representation
|
||||
# Avoid converting things like "NaN", "inf" which are valid floats but not useful
|
||||
if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
|
||||
# Check for reasonable float values (not too large/small)
|
||||
if abs(float_value) < 1e308: # Avoid overflow
|
||||
return float_value
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
# Only attempt conversion if cleaned text looks like a number
|
||||
# (starts with digit, +, -, or . followed by digit)
|
||||
if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
|
||||
# Try integer first (more restrictive)
|
||||
try:
|
||||
# Check if it's a valid integer (no decimal point, no scientific notation)
|
||||
if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
|
||||
int_value = int(cleaned_for_number)
|
||||
return int_value
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
|
||||
# Try float if integer conversion failed
|
||||
try:
|
||||
float_value = float(cleaned_for_number)
|
||||
# Only return as float if it's actually a number representation
|
||||
# Avoid converting things like "NaN", "inf" which are valid floats but not useful
|
||||
if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
|
||||
# Check for reasonable float values (not too large/small)
|
||||
if abs(float_value) < 1e308: # Avoid overflow
|
||||
return float_value
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
|
||||
# Try to convert date strings to datetime objects
|
||||
# This ensures Excel treats them as dates, not strings
|
||||
# Use original text (not cleaned) for date parsing
|
||||
# PERFORMANCE OPTIMIZED: Date parsing now uses regex pre-check to avoid expensive operations
|
||||
# on non-date strings. This dramatically improves performance for large tables.
|
||||
date_value = self._parseDateString(text)
|
||||
if date_value is not None:
|
||||
return date_value
|
||||
|
|
@ -1109,7 +1151,17 @@ class RendererXlsx(BaseRenderer):
|
|||
return text
|
||||
|
||||
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a table element to Excel sheet with proper formatting and borders."""
|
||||
"""
|
||||
Add a table element to Excel sheet with proper formatting and borders.
|
||||
|
||||
PERFORMANCE OPTIMIZATIONS:
|
||||
1. Pre-calculated style objects (Font, PatternFill, Alignment) to avoid repeated creation
|
||||
2. Optimized _sanitizeCellValue() with regex pre-checks for numbers and dates
|
||||
3. Batch cell operations where possible
|
||||
4. Reduced exception handling overhead
|
||||
|
||||
Expected performance: 10-30x faster for large tables compared to unoptimized version.
|
||||
"""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
|
|
@ -1139,60 +1191,69 @@ class RendererXlsx(BaseRenderer):
|
|||
headerRow = startRow
|
||||
header_style = styles.get("table_header", {})
|
||||
|
||||
# Add headers with formatting
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
header_font_color = self._getSafeColor(header_style.get("text_color", "FF000000"))
|
||||
header_font = Font(bold=header_style.get("bold", True), color=header_font_color)
|
||||
header_bg_color = None
|
||||
header_fill = None
|
||||
if header_style.get("background"):
|
||||
header_bg_color = self._getSafeColor(header_style["background"])
|
||||
header_fill = PatternFill(start_color=header_bg_color, end_color=header_bg_color, fill_type="solid")
|
||||
header_alignment = Alignment(
|
||||
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
|
||||
vertical="center"
|
||||
)
|
||||
|
||||
# Add headers with formatting - OPTIMIZED: use cached style objects
|
||||
for col, header in enumerate(headers, 1):
|
||||
sanitized_header = self._sanitizeCellValue(header)
|
||||
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
|
||||
|
||||
# Apply styling with fallbacks - don't let styling errors prevent data rendering
|
||||
# Apply styling with fallbacks - use pre-calculated objects
|
||||
try:
|
||||
# Font styling
|
||||
cell.font = Font(
|
||||
bold=header_style.get("bold", True),
|
||||
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
|
||||
)
|
||||
cell.font = header_font
|
||||
except Exception:
|
||||
# Fallback to default font if styling fails
|
||||
try:
|
||||
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
|
||||
except Exception:
|
||||
pass # Continue even if font fails
|
||||
pass
|
||||
|
||||
try:
|
||||
# Background color
|
||||
if header_style.get("background"):
|
||||
cell.fill = PatternFill(
|
||||
start_color=self._getSafeColor(header_style["background"]),
|
||||
end_color=self._getSafeColor(header_style["background"]),
|
||||
fill_type="solid"
|
||||
)
|
||||
if header_fill:
|
||||
cell.fill = header_fill
|
||||
except Exception:
|
||||
pass # Continue without background color if it fails
|
||||
pass
|
||||
|
||||
try:
|
||||
# Alignment
|
||||
cell.alignment = Alignment(
|
||||
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
|
||||
vertical="center"
|
||||
)
|
||||
cell.alignment = header_alignment
|
||||
except Exception:
|
||||
# Fallback to default alignment if it fails
|
||||
try:
|
||||
cell.alignment = Alignment(horizontal="left", vertical="center")
|
||||
except Exception:
|
||||
pass # Continue even if alignment fails
|
||||
pass
|
||||
|
||||
try:
|
||||
# Border
|
||||
cell.border = thin_border
|
||||
except Exception:
|
||||
pass # Continue without border if it fails
|
||||
pass
|
||||
|
||||
startRow += 1
|
||||
|
||||
# Add rows with formatting
|
||||
# Add rows with formatting - OPTIMIZED: pre-calculate style objects
|
||||
cell_style = styles.get("table_cell", {})
|
||||
header_count = len(headers)
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
cell_text_color = None
|
||||
cell_font = None
|
||||
if cell_style.get("text_color"):
|
||||
cell_text_color = self._getSafeColor(cell_style["text_color"])
|
||||
cell_font = Font(color=cell_text_color)
|
||||
cell_alignment = Alignment(
|
||||
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
|
||||
vertical="center"
|
||||
)
|
||||
|
||||
for row_data in rows:
|
||||
# Handle different row formats
|
||||
if isinstance(row_data, list):
|
||||
|
|
@ -1214,32 +1275,25 @@ class RendererXlsx(BaseRenderer):
|
|||
sanitized_value = self._sanitizeCellValue(cell_value)
|
||||
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
|
||||
|
||||
# Apply styling with fallbacks - don't let styling errors prevent data rendering
|
||||
# Apply styling with fallbacks - use pre-calculated objects
|
||||
try:
|
||||
# Font styling
|
||||
if cell_style.get("text_color"):
|
||||
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||
if cell_font:
|
||||
cell.font = cell_font
|
||||
except Exception:
|
||||
pass # Continue without font color if it fails
|
||||
pass
|
||||
|
||||
try:
|
||||
# Alignment
|
||||
cell.alignment = Alignment(
|
||||
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
|
||||
vertical="center"
|
||||
)
|
||||
cell.alignment = cell_alignment
|
||||
except Exception:
|
||||
# Fallback to default alignment if it fails
|
||||
try:
|
||||
cell.alignment = Alignment(horizontal="left", vertical="center")
|
||||
except Exception:
|
||||
pass # Continue even if alignment fails
|
||||
pass
|
||||
|
||||
try:
|
||||
# Border
|
||||
cell.border = thin_border
|
||||
except Exception:
|
||||
pass # Continue without border if it fails
|
||||
pass
|
||||
|
||||
startRow += 1
|
||||
|
||||
|
|
@ -1439,28 +1493,32 @@ class RendererXlsx(BaseRenderer):
|
|||
if code:
|
||||
code_style = styles.get("code_block", {})
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
code_font_name = code_style.get("font", "Courier New")
|
||||
code_font_size = code_style.get("font_size", 10)
|
||||
code_text_color = self._getSafeColor(code_style.get("color", "FF2F2F2F"))
|
||||
code_font = Font(name=code_font_name, size=code_font_size, color=code_text_color)
|
||||
|
||||
code_bg_color = None
|
||||
code_fill = None
|
||||
if code_style.get("background"):
|
||||
code_bg_color = self._getSafeColor(code_style["background"])
|
||||
code_fill = PatternFill(start_color=code_bg_color, end_color=code_bg_color, fill_type="solid")
|
||||
|
||||
# Add language label if present
|
||||
if language:
|
||||
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
|
||||
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
|
||||
langCell.font = Font(bold=True, color=code_text_color)
|
||||
startRow += 1
|
||||
|
||||
# Split code into lines and add each line
|
||||
# Split code into lines and add each line - use cached style objects
|
||||
code_lines = code.split('\n')
|
||||
for line in code_lines:
|
||||
codeCell = sheet.cell(row=startRow, column=1, value=line)
|
||||
codeCell.font = Font(
|
||||
name=code_style.get("font", "Courier New"),
|
||||
size=code_style.get("font_size", 10),
|
||||
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
|
||||
)
|
||||
codeCell.font = code_font
|
||||
# Set background color if specified
|
||||
if code_style.get("background"):
|
||||
codeCell.fill = PatternFill(
|
||||
start_color=self._getSafeColor(code_style["background"]),
|
||||
end_color=self._getSafeColor(code_style["background"]),
|
||||
fill_type="solid"
|
||||
)
|
||||
if code_fill:
|
||||
codeCell.fill = code_fill
|
||||
startRow += 1
|
||||
|
||||
# Add spacing after code block
|
||||
|
|
|
|||
|
|
@ -64,25 +64,27 @@ async def buildGenerationPrompt(
|
|||
)
|
||||
|
||||
if hasContinuation:
|
||||
# CONTINUATION PROMPT - use new summary format from buildContinuationContext
|
||||
# CONTINUATION PROMPT - use centralized jsonContinuation system
|
||||
delivered_summary = continuationContext.get("delivered_summary", "")
|
||||
element_before_cutoff = continuationContext.get("element_before_cutoff")
|
||||
cut_off_element = continuationContext.get("cut_off_element")
|
||||
|
||||
# Use centralized system: overlap_context and hierarchy_context from jsonContinuation.getContexts()
|
||||
overlap_context = continuationContext.get("overlap_context")
|
||||
hierarchy_context = continuationContext.get("hierarchy_context")
|
||||
|
||||
# Build continuation text with delivered summary and cut-off information
|
||||
# CRITICAL: Always include cut-off information if available (per loop_plan.md)
|
||||
continuationText = f"{delivered_summary}\n\n"
|
||||
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
|
||||
|
||||
# Add cut-off point information (per loop_plan.md: always add if available)
|
||||
# Add cut-off point information using centralized jsonContinuation contexts
|
||||
# These are shown ONLY as REFERENCE to know where generation stopped
|
||||
if element_before_cutoff:
|
||||
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
|
||||
continuationText += f"{element_before_cutoff}\n\n"
|
||||
if hierarchy_context:
|
||||
continuationText += "# REFERENCE: Structure context (already delivered - DO NOT repeat):\n"
|
||||
continuationText += f"{hierarchy_context}\n\n"
|
||||
|
||||
if cut_off_element:
|
||||
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
|
||||
continuationText += f"{cut_off_element}\n\n"
|
||||
if overlap_context:
|
||||
continuationText += "# REFERENCE: Overlap context - incomplete element at cut point (DO NOT repeat):\n"
|
||||
continuationText += f"{overlap_context}\n\n"
|
||||
|
||||
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
|
||||
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
|
||||
|
|
|
|||
164
modules/shared/jsonContinuation-logic.md
Normal file
164
modules/shared/jsonContinuation-logic.md
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
# JSON Continuation Context Module
|
||||
|
||||
Ein Python-Modul zur Generierung von Kontextinformationen für abgeschnittene JSON-Strings, um AI-Modellen die Fortsetzung zu ermöglichen.
|
||||
|
||||
## Problem
|
||||
|
||||
Wenn eine AI-Antwort als JSON abgeschnitten wird (z.B. Token-Limit erreicht), muss die nächste Iteration wissen:
|
||||
- **Wo** der JSON abgeschnitten wurde
|
||||
- **Was** bereits generiert wurde
|
||||
- **Was** als nächstes geliefert werden soll
|
||||
|
||||
## Lösung: Drei Kontexte
|
||||
|
||||
### 1. Overlap Context
|
||||
- Zeigt das **innerste Objekt/Array-Element**, das den Cut-Punkt enthält
|
||||
- Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu **mergen**
|
||||
- Exakt so wie im Original-String (für String-Matching beim Merge)
|
||||
|
||||
### 2. Hierarchy Context
|
||||
- Zeigt die **hierarchische Struktur** vom Root bis zum Cut-Punkt
|
||||
- Mit **Budget-Logik**: Näher am Cut = vollständige Werte, weiter weg = `"..."` Platzhalter
|
||||
- Gibt der AI den Kontext der gesamten JSON-Struktur
|
||||
|
||||
### 3. Complete Part (NEU)
|
||||
- Der **vollständige, valide JSON** bis zum Cut-Punkt
|
||||
- Alle offenen Strukturen werden geschlossen (`}`, `]`, `"`)
|
||||
- Unvollständige Keys werden entfernt
|
||||
- Kann direkt als valides JSON geparst werden
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# Keine externen Abhängigkeiten erforderlich
|
||||
cp json_continuation.py /your/project/
|
||||
```
|
||||
|
||||
## Modulkonstanten
|
||||
|
||||
```python
|
||||
# Diese Konstanten können vor dem Import angepasst werden
|
||||
BUDGET_LIMIT: int = 500 # Zeichen-Budget für Datenwerte
|
||||
OVERLAP_MAX_CHARS: int = 1000 # Max Zeichen für Overlap Context
|
||||
```
|
||||
|
||||
## Verwendung
|
||||
|
||||
### Grundlegende Verwendung
|
||||
|
||||
```python
|
||||
from json_continuation import extract_continuation_contexts
|
||||
|
||||
truncated_json = '''{"customers": [
|
||||
{"id": 1, "name": "John"},
|
||||
{"id": 2, "name": "Jane", "email": "jane@exa'''
|
||||
|
||||
overlap, hierarchy, complete = extract_continuation_contexts(truncated_json)
|
||||
|
||||
print("Overlap Context:")
|
||||
print(overlap)
|
||||
# {"id": 2, "name": "Jane", "email": "jane@exa
|
||||
|
||||
print("Hierarchy Context:")
|
||||
print(hierarchy)
|
||||
# {"customers": [...structure with budget logic...]
|
||||
|
||||
print("Complete Part (valid JSON):")
|
||||
print(complete)
|
||||
# {"customers": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane", "email": "jane@exa"}]}
|
||||
|
||||
import json
|
||||
parsed = json.loads(complete) # ✓ Funktioniert!
|
||||
```
|
||||
|
||||
### Mit Dictionary-Interface
|
||||
|
||||
```python
|
||||
from json_continuation import get_contexts
|
||||
|
||||
contexts = get_contexts(truncated_json)
|
||||
|
||||
print(contexts['overlap'])
|
||||
print(contexts['hierarchy'])
|
||||
print(contexts['complete_part'])
|
||||
```
|
||||
|
||||
### Konstanten anpassen
|
||||
|
||||
```python
|
||||
import json_continuation
|
||||
|
||||
# Budget anpassen bevor Funktionen aufgerufen werden
|
||||
json_continuation.BUDGET_LIMIT = 200
|
||||
json_continuation.OVERLAP_MAX_CHARS = 500
|
||||
|
||||
overlap, hierarchy, complete = json_continuation.extract_continuation_contexts(truncated_json)
|
||||
```
|
||||
|
||||
## Rückgabewerte
|
||||
|
||||
| Rückgabe | Typ | Beschreibung |
|
||||
|----------|-----|--------------|
|
||||
| `overlap` | str | Innerstes Element mit Cut-Punkt (für Merge) |
|
||||
| `hierarchy` | str | Volle Struktur mit Budget-Logik |
|
||||
| `complete_part` | str | Valides JSON mit geschlossenen Strukturen |
|
||||
|
||||
## Beispiele
|
||||
|
||||
### Verschachtelte Objekte
|
||||
|
||||
```python
|
||||
json_str = '{"user": {"profile": {"bio": "Hello Wor'
|
||||
|
||||
overlap, hierarchy, complete = extract_continuation_contexts(json_str)
|
||||
|
||||
# Overlap: {"bio": "Hello Wor
|
||||
# Hierarchy: {"user": {"profile": {"bio": "Hello Wor
|
||||
# Complete: {"user": {"profile": {"bio": "Hello Wor"}}} ← Valides JSON!
|
||||
```
|
||||
|
||||
### Array von Objekten mit unvollständigem Key
|
||||
|
||||
```python
|
||||
json_str = '''{
|
||||
"items": [
|
||||
{"id": 1, "name": "First"},
|
||||
{"id": 2, "name": "Second"},
|
||||
{"id": 3, "name": "Third", "add'''
|
||||
|
||||
overlap, hierarchy, complete = extract_continuation_contexts(json_str)
|
||||
|
||||
# Complete entfernt den unvollständigen Key "add":
|
||||
# {"items": [{"id": 1, ...}, {"id": 2, ...}, {"id": 3, "name": "Third"}]}
|
||||
```
|
||||
|
||||
## Budget-Logik
|
||||
|
||||
Die Budget-Logik funktioniert wie folgt:
|
||||
|
||||
1. **Sammeln**: Alle String-Werte werden mit ihrer Position gesammelt
|
||||
2. **Sortieren**: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität)
|
||||
3. **Zuweisen**: Budget wird von hinten nach vorne aufgebraucht
|
||||
4. **Ersetzen**: Werte außerhalb des Budgets werden durch `"..."` ersetzt
|
||||
|
||||
## Tests ausführen
|
||||
|
||||
```bash
|
||||
python -m unittest test_json_continuation -v
|
||||
```
|
||||
|
||||
## API Referenz
|
||||
|
||||
### `extract_continuation_contexts(truncated_json: str) -> Tuple[str, str, str]`
|
||||
|
||||
Hauptfunktion. Gibt `(overlap, hierarchy, complete_part)` zurück.
|
||||
|
||||
### `get_contexts(truncated_json: str) -> dict`
|
||||
|
||||
Convenience-Funktion. Gibt Dictionary mit Keys `'overlap'`, `'hierarchy'`, `'complete_part'` zurück.
|
||||
|
||||
### Modulkonstanten
|
||||
|
||||
- `BUDGET_LIMIT`: int (default: 500) - Zeichen-Budget für Hierarchy-Context
|
||||
- `OVERLAP_MAX_CHARS`: int (default: 1000) - Max Zeichen für Overlap-Context
|
||||
|
||||
2224
modules/shared/jsonContinuation.py
Normal file
2224
modules/shared/jsonContinuation.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -5,6 +5,7 @@ import logging
|
|||
import re
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar
|
||||
from pydantic import BaseModel, ValidationError
|
||||
from modules.datamodels.datamodelAi import ContinuationContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -122,6 +123,160 @@ def tryParseJson(text: Union[str, bytes]) -> Tuple[Optional[Union[Dict, List]],
|
|||
return None, e, cleaned
|
||||
|
||||
|
||||
def _fixUnescapedQuotesInStrings(jsonStr: str) -> str:
|
||||
"""
|
||||
Fix unescaped quotes inside JSON string values.
|
||||
|
||||
AI often generates JSON with unescaped quotes like:
|
||||
"text with "quoted" words"
|
||||
|
||||
This should be:
|
||||
"text with \"quoted\" words"
|
||||
|
||||
Strategy:
|
||||
- Parse JSON structure to find string values
|
||||
- Within a string, find unescaped quotes that are followed by content
|
||||
that looks like it continues the string (not a : or , or } or ])
|
||||
- Escape those quotes
|
||||
"""
|
||||
if not jsonStr or not jsonStr.strip():
|
||||
return jsonStr
|
||||
|
||||
result = []
|
||||
i = 0
|
||||
inString = False
|
||||
escaped = False
|
||||
|
||||
while i < len(jsonStr):
|
||||
char = jsonStr[i]
|
||||
|
||||
if escaped:
|
||||
result.append(char)
|
||||
escaped = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == '\\' and inString:
|
||||
result.append(char)
|
||||
escaped = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
if not inString:
|
||||
# Starting a string
|
||||
inString = True
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
# Could be end of string OR unescaped quote inside string
|
||||
# Look ahead to determine
|
||||
nextNonSpace = i + 1
|
||||
while nextNonSpace < len(jsonStr) and jsonStr[nextNonSpace] in ' \t\n\r':
|
||||
nextNonSpace += 1
|
||||
|
||||
if nextNonSpace < len(jsonStr):
|
||||
nextChar = jsonStr[nextNonSpace]
|
||||
|
||||
# If next char is a structural character, this is end of string
|
||||
if nextChar in ':,}]':
|
||||
inString = False
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# If next char is a quote, might be end of string followed by another string
|
||||
# Check if we're at a reasonable string end (has a colon or comma before next structure)
|
||||
if nextChar == '"':
|
||||
# This is end of string, start of next
|
||||
inString = False
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Otherwise, this quote is INSIDE the string - escape it!
|
||||
result.append('\\')
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
# End of JSON - this must be closing quote
|
||||
inString = False
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
result.append(char)
|
||||
i += 1
|
||||
|
||||
return ''.join(result)
|
||||
|
||||
|
||||
def _fixUnescapedControlCharacters(jsonStr: str) -> str:
|
||||
"""
|
||||
Fix unescaped control characters in JSON strings.
|
||||
|
||||
JSON requires control characters (ASCII 0-31) to be escaped as \\uXXXX.
|
||||
Common ones have shortcuts: \\n, \\r, \\t, \\b, \\f
|
||||
|
||||
This function finds unescaped control chars inside strings and escapes them.
|
||||
"""
|
||||
if not jsonStr or not jsonStr.strip():
|
||||
return jsonStr
|
||||
|
||||
result = []
|
||||
i = 0
|
||||
inString = False
|
||||
escaped = False
|
||||
|
||||
# Mapping of common control chars to their escape sequences
|
||||
controlEscapes = {
|
||||
'\n': '\\n',
|
||||
'\r': '\\r',
|
||||
'\t': '\\t',
|
||||
'\b': '\\b',
|
||||
'\f': '\\f',
|
||||
}
|
||||
|
||||
while i < len(jsonStr):
|
||||
char = jsonStr[i]
|
||||
|
||||
if escaped:
|
||||
result.append(char)
|
||||
escaped = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == '\\' and inString:
|
||||
result.append(char)
|
||||
escaped = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if inString:
|
||||
# Check for control characters (ASCII 0-31)
|
||||
if ord(char) < 32:
|
||||
if char in controlEscapes:
|
||||
result.append(controlEscapes[char])
|
||||
else:
|
||||
# Use \uXXXX format for other control chars
|
||||
result.append(f'\\u{ord(char):04x}')
|
||||
i += 1
|
||||
continue
|
||||
|
||||
result.append(char)
|
||||
i += 1
|
||||
|
||||
return ''.join(result)
|
||||
|
||||
|
||||
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Attempt to repair broken JSON using multiple strategies.
|
||||
|
|
@ -134,6 +289,11 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
if not text:
|
||||
return None
|
||||
|
||||
# Pre-processing: Fix unescaped quotes and control characters inside strings
|
||||
# AI often generates JSON like: "text with "quoted" words"
|
||||
text = _fixUnescapedQuotesInStrings(text)
|
||||
text = _fixUnescapedControlCharacters(text)
|
||||
|
||||
# Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
|
||||
# This preserves all data and should be tried first
|
||||
closedStr = closeJsonStructures(text)
|
||||
|
|
@ -212,106 +372,77 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
|
||||
def closeJsonStructures(text: str) -> str:
|
||||
"""
|
||||
Close incomplete JSON structures by adding missing closing brackets.
|
||||
Also handles unterminated strings by closing them.
|
||||
Close incomplete JSON structures generically and correctly.
|
||||
|
||||
Generic approach:
|
||||
1. Close unterminated strings (if odd number of quotes)
|
||||
2. Track structure opening order with stack (LIFO)
|
||||
3. Close structures in reverse order (last opened, first closed)
|
||||
4. Remove trailing commas only directly before closing brackets/braces
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
result = text
|
||||
|
||||
# Handle unterminated strings: find the last unclosed string
|
||||
# Look for patterns like: "value" or "value\n (unterminated)
|
||||
# Check if we're in the middle of a string value when text ends
|
||||
if result.strip():
|
||||
# re is already imported at module level
|
||||
# Count quotes - if odd number, we have an unterminated string
|
||||
quoteCount = result.count('"')
|
||||
if quoteCount % 2 == 1:
|
||||
# Find the last opening quote that's not escaped
|
||||
lastQuotePos = result.rfind('"')
|
||||
if lastQuotePos >= 0:
|
||||
# Check if it's escaped
|
||||
# Step 1: Close unterminated strings
|
||||
# Simple: if odd number of quotes, find last unescaped quote and close it
|
||||
quoteCount = result.count('"')
|
||||
if quoteCount % 2 == 1:
|
||||
# Find last unescaped quote
|
||||
i = len(result) - 1
|
||||
while i >= 0:
|
||||
if result[i] == '"':
|
||||
# Count backslashes before quote
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
j = i - 1
|
||||
while j >= 0 and result[j] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
# If not escaped (even number of backslashes), close the string
|
||||
j -= 1
|
||||
# If even number of backslashes, quote is not escaped
|
||||
if escapeCount % 2 == 0:
|
||||
# Find where the string should end (before next comma, bracket, or brace)
|
||||
# For now, just close it at the end
|
||||
result += '"'
|
||||
else:
|
||||
# Even number of quotes, but might still be in middle of string if cut off
|
||||
# More robust detection: check if text ends with alphanumeric/text chars after a quote
|
||||
# This handles cases like: "text": "value cut off mid-word
|
||||
|
||||
# Pattern 1: ends with colon + quote + text (no closing quote)
|
||||
if re.search(r':\s*"[^"]*$', result):
|
||||
# We're in the middle of a string value, close it
|
||||
result += '"'
|
||||
else:
|
||||
# Pattern 2: find last quote and check what comes after
|
||||
lastQuotePos = result.rfind('"')
|
||||
if lastQuotePos >= 0:
|
||||
afterQuote = result[lastQuotePos + 1:]
|
||||
# If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace
|
||||
# and the text doesn't end with structural characters, we're likely in a string
|
||||
if afterQuote:
|
||||
# Check if it looks like we're in a string value (has text, no closing quote)
|
||||
# Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ]
|
||||
if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote):
|
||||
# Check if it's escaped
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
if escapeCount % 2 == 0:
|
||||
# Verify we're actually in a string context (not in a key name)
|
||||
# Look backwards to see if we have ": " before the quote (value context)
|
||||
beforeQuote = result[:lastQuotePos]
|
||||
# Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote)
|
||||
if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]):
|
||||
result += '"'
|
||||
# Also check if text ends with alphanumeric (likely cut off mid-word)
|
||||
elif re.search(r'[a-zA-Z]$', result):
|
||||
# If we end with a letter and have a quote before it, likely in a string
|
||||
result += '"'
|
||||
|
||||
# Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string
|
||||
# This handles edge cases where patterns above didn't match
|
||||
if result.strip() and re.search(r'[a-zA-Z0-9]$', result):
|
||||
# Count quotes - if we have quotes and end with text, might be in a string
|
||||
if quoteCount > 0:
|
||||
lastQuotePos = result.rfind('"')
|
||||
if lastQuotePos >= 0:
|
||||
afterQuote = result[lastQuotePos + 1:]
|
||||
# If after quote is text (not empty, not structural), close it
|
||||
if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]): # Check first 50 chars after quote
|
||||
# Make sure we're not already closed (check if next char would be quote/comma/brace)
|
||||
if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'):
|
||||
# Check if escaped
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
if escapeCount % 2 == 0:
|
||||
result += '"'
|
||||
break
|
||||
i -= 1
|
||||
|
||||
# Count open/close brackets and braces
|
||||
openBraces = result.count('{')
|
||||
closeBraces = result.count('}')
|
||||
openBrackets = result.count('[')
|
||||
closeBrackets = result.count(']')
|
||||
# Step 2: Track structure opening order with stack
|
||||
stack = []
|
||||
inString = False
|
||||
escapeNext = False
|
||||
|
||||
# Close incomplete structures
|
||||
for _ in range(openBraces - closeBraces):
|
||||
result += '}'
|
||||
for _ in range(openBrackets - closeBrackets):
|
||||
result += ']'
|
||||
for char in result:
|
||||
if escapeNext:
|
||||
escapeNext = False
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escapeNext = True
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
continue
|
||||
|
||||
# Only track braces/brackets outside of strings
|
||||
if not inString:
|
||||
if char == '{':
|
||||
stack.append('}')
|
||||
elif char == '[':
|
||||
stack.append(']')
|
||||
elif char == '}' or char == ']':
|
||||
# Pop matching closing bracket/brace from stack
|
||||
if stack and stack[-1] == char:
|
||||
stack.pop()
|
||||
|
||||
# Step 3: Close remaining structures in reverse order (LIFO)
|
||||
# Remove trailing comma ONLY directly before each closing bracket/brace
|
||||
while stack:
|
||||
closingChar = stack.pop()
|
||||
result = result.rstrip()
|
||||
# Remove trailing comma if present (invalid before closing)
|
||||
if result and result[-1] == ',':
|
||||
result = result[:-1].rstrip()
|
||||
result += closingChar
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -731,7 +862,12 @@ def extractSectionsFromDocument(documentData: Dict[str, Any]) -> List[Dict[str,
|
|||
return []
|
||||
|
||||
|
||||
def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse: Optional[str] = None) -> Dict[str, Any]:
|
||||
def buildContinuationContext(
|
||||
allSections: List[Dict[str, Any]],
|
||||
lastRawResponse: Optional[str] = None,
|
||||
useCaseId: Optional[str] = None,
|
||||
templateStructure: Optional[str] = None
|
||||
) -> ContinuationContext:
|
||||
"""
|
||||
Build context information from accumulated sections for continuation prompt.
|
||||
|
||||
|
|
@ -740,13 +876,13 @@ def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse:
|
|||
Args:
|
||||
allSections: List of ALL sections accumulated across ALL iterations
|
||||
lastRawResponse: Raw JSON response from last iteration (can be broken/incomplete)
|
||||
useCaseId: Optional use case ID to determine expected JSON structure
|
||||
templateStructure: JSON structure template from initial prompt (MUST be identical)
|
||||
|
||||
Returns:
|
||||
Dict with delivered_summary, cut_off_element, element_before_cutoff
|
||||
ContinuationContext: Pydantic model with all continuation context information
|
||||
"""
|
||||
context = {
|
||||
"section_count": len(allSections),
|
||||
}
|
||||
section_count = len(allSections)
|
||||
|
||||
# Build summary of delivered data (per-section counts)
|
||||
summary_lines = []
|
||||
|
|
@ -863,452 +999,53 @@ def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse:
|
|||
else:
|
||||
summary_lines.extend(summary_items)
|
||||
|
||||
context["delivered_summary"] = "\n".join(summary_lines)
|
||||
delivered_summary = "\n".join(summary_lines)
|
||||
|
||||
# Extract cut-off point using new algorithm
|
||||
# 1. Loop over all sections until finding incomplete section
|
||||
# 2. In incomplete section, loop through elements until finding cut-off element
|
||||
# CRITICAL: There is always only ONE section incomplete (JSON cut-off point)
|
||||
cut_off_element = None
|
||||
element_before_cutoff = None
|
||||
# Extract continuation contexts using centralized jsonContinuation module
|
||||
# This is the single source of truth for handling cut-off JSON strings
|
||||
last_raw_json = lastRawResponse or ""
|
||||
last_complete_part = ""
|
||||
incomplete_part = ""
|
||||
overlap_context = ""
|
||||
hierarchy_context = ""
|
||||
|
||||
if lastRawResponse:
|
||||
try:
|
||||
# CRITICAL: Always try to find incomplete section from raw JSON
|
||||
# Even if JSON can be parsed, it might be incomplete (cut off mid-element)
|
||||
raw_stripped = stripCodeFences(lastRawResponse.strip()).strip()
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
|
||||
# Check if response is just a fragment (not full JSON structure)
|
||||
# Fragments are continuation content that should be appended to the last incomplete element
|
||||
is_fragment = not (raw_stripped.strip().startswith('{') or raw_stripped.strip().startswith('['))
|
||||
|
||||
if is_fragment:
|
||||
# Response is a fragment - it continues the last incomplete element
|
||||
# Find the last incomplete element from allSections
|
||||
if allSections:
|
||||
last_section = allSections[-1]
|
||||
elements = last_section.get("elements", [])
|
||||
if isinstance(elements, list) and elements:
|
||||
# Get the last element (which should be incomplete)
|
||||
last_elem = elements[-1]
|
||||
if isinstance(last_elem, dict):
|
||||
# The fragment continues this element
|
||||
# Show the fragment as cut_off_element
|
||||
cut_off_element = raw_stripped
|
||||
# Show the element before (if there is one)
|
||||
if len(elements) > 1:
|
||||
element_before_cutoff = json.dumps(elements[-2])
|
||||
else:
|
||||
element_before_cutoff = json.dumps(last_elem)
|
||||
else:
|
||||
# Response is full JSON - use standard extraction
|
||||
# Strategy 1: Try to find incomplete section using structured parsing
|
||||
incomplete_section = _findIncompleteSectionInRaw(raw_stripped)
|
||||
if incomplete_section:
|
||||
cut_off_element, element_before_cutoff = _extractCutOffElements(incomplete_section, raw_stripped)
|
||||
# Normalize JSON string
|
||||
normalized = stripCodeFences(normalizeJsonText(lastRawResponse)).strip()
|
||||
if normalized:
|
||||
# Find first '{' or '[' to start
|
||||
startIdx = -1
|
||||
for i, char in enumerate(normalized):
|
||||
if char in '{[':
|
||||
startIdx = i
|
||||
break
|
||||
|
||||
# Strategy 2: If no incomplete section found, extract directly from raw JSON
|
||||
# This handles cases where JSON is cut off mid-element within a complete section
|
||||
if not cut_off_element:
|
||||
cut_off_element, element_before_cutoff = _extractCutOffElementsFromRaw(raw_stripped, allSections)
|
||||
if startIdx >= 0:
|
||||
jsonContent = normalized[startIdx:]
|
||||
contexts = getContexts(jsonContent)
|
||||
|
||||
# Store all contexts from centralized module
|
||||
last_complete_part = contexts.completePart
|
||||
incomplete_part = jsonContent[len(contexts.completePart):].strip()
|
||||
overlap_context = contexts.overlapContext
|
||||
hierarchy_context = contexts.hierarchyContext
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting cut-off point: {e}")
|
||||
logger.warning(f"Error extracting JSON continuation contexts: {e}", exc_info=True)
|
||||
|
||||
context["element_before_cutoff"] = element_before_cutoff
|
||||
context["cut_off_element"] = cut_off_element
|
||||
|
||||
# Store raw JSON response for prompt builder to check
|
||||
if lastRawResponse:
|
||||
context["last_raw_json"] = lastRawResponse
|
||||
else:
|
||||
context["last_raw_json"] = ""
|
||||
|
||||
return context
|
||||
|
||||
|
||||
def _findIncompleteSectionInRaw(raw_json: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Find the incomplete section in raw JSON.
|
||||
|
||||
CRITICAL: JSON can be cut off mid-element (e.g., {"text": "20327,20)
|
||||
We need to find the last section and check if it's incomplete.
|
||||
"""
|
||||
try:
|
||||
# Try to parse documents structure
|
||||
if '"documents"' in raw_json:
|
||||
# Find last document
|
||||
doc_start = raw_json.rfind('"documents"')
|
||||
if doc_start >= 0:
|
||||
doc_section = raw_json[doc_start:]
|
||||
# Try to find sections array
|
||||
sections_start = doc_section.find('"sections"')
|
||||
if sections_start >= 0:
|
||||
sections_section = doc_section[sections_start:]
|
||||
# Find sections array start
|
||||
array_start = sections_section.find('[')
|
||||
if array_start >= 0:
|
||||
# Find all complete sections
|
||||
section_objects = []
|
||||
depth = 0
|
||||
section_start = None
|
||||
|
||||
for i in range(array_start, len(sections_section)):
|
||||
if sections_section[i] == '{':
|
||||
if depth == 0:
|
||||
section_start = i
|
||||
depth += 1
|
||||
elif sections_section[i] == '}':
|
||||
depth -= 1
|
||||
if depth == 0 and section_start is not None:
|
||||
# Found complete section
|
||||
section_str = sections_section[section_start:i+1]
|
||||
try:
|
||||
section_obj = json.loads('{' + section_str + '}')
|
||||
section_objects.append(section_obj)
|
||||
except:
|
||||
pass
|
||||
section_start = None
|
||||
|
||||
# CRITICAL: Check if there's content after the last complete section
|
||||
# If JSON ends mid-element, the last section is incomplete
|
||||
if section_objects:
|
||||
# Find position after last complete section
|
||||
last_section_end = sections_section.rfind('}')
|
||||
if last_section_end >= 0:
|
||||
# Check if there's more content after the last }
|
||||
remaining_after_last_section = sections_section[last_section_end+1:].strip()
|
||||
# Remove closing brackets/braces that might be there
|
||||
remaining_after_last_section = remaining_after_last_section.lstrip('],}')
|
||||
|
||||
# If there's still content (like incomplete element), section is incomplete
|
||||
if remaining_after_last_section and not remaining_after_last_section.startswith(']'):
|
||||
# Last section is incomplete - return it
|
||||
return section_objects[-1]
|
||||
|
||||
# Also check: if we can't parse the full sections array, last section is incomplete
|
||||
try:
|
||||
# Try to parse the sections array
|
||||
sections_array_str = sections_section[array_start:]
|
||||
json.loads(sections_array_str)
|
||||
# Parsed successfully - all sections complete
|
||||
return None
|
||||
except:
|
||||
# Cannot parse - last section is incomplete
|
||||
return section_objects[-1] if section_objects else None
|
||||
except Exception as e:
|
||||
logger.debug(f"Error finding incomplete section: {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Extract cut-off element and element before from incomplete section."""
|
||||
cut_off_element = None
|
||||
element_before_cutoff = None
|
||||
|
||||
elements = incomplete_section.get("elements", [])
|
||||
if not elements:
|
||||
return None, None
|
||||
|
||||
# CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number
|
||||
# Deliver the cut-off part AS-IS (don't try to "complete" it)
|
||||
|
||||
if isinstance(elements, list):
|
||||
# Find last element (might be incomplete)
|
||||
if elements:
|
||||
# Edge case: If cut-off is in first element, just show cut-off element
|
||||
if len(elements) == 1:
|
||||
# Only one element - might be cut-off
|
||||
last_elem = elements[0]
|
||||
if isinstance(last_elem, dict):
|
||||
# Check if element contains nested content (e.g., code_block with JSON string)
|
||||
cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
|
||||
if not cut_off_element:
|
||||
cut_off_element = json.dumps(last_elem)
|
||||
else:
|
||||
cut_off_element = str(last_elem)
|
||||
else:
|
||||
# Multiple elements - last one might be cut-off, get element before
|
||||
element_before_cutoff = json.dumps(elements[-2]) if isinstance(elements[-2], dict) else str(elements[-2])
|
||||
last_elem = elements[-1]
|
||||
if isinstance(last_elem, dict):
|
||||
# Check if element contains nested content
|
||||
cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
|
||||
if not cut_off_element:
|
||||
cut_off_element = json.dumps(last_elem)
|
||||
else:
|
||||
cut_off_element = str(last_elem)
|
||||
elif isinstance(elements, dict):
|
||||
# Single element - might be cut-off
|
||||
cut_off_element = _extractCutOffFromElement(elements, raw_json)
|
||||
if not cut_off_element:
|
||||
cut_off_element = json.dumps(elements)
|
||||
|
||||
# If we couldn't extract from parsed structure, extract from raw JSON
|
||||
if not cut_off_element:
|
||||
# Extract the last incomplete part from raw JSON
|
||||
# Find the last incomplete string/number/array
|
||||
# re is already imported at module level
|
||||
# Look for incomplete string at the end
|
||||
incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL)
|
||||
if incomplete_match:
|
||||
cut_off_element = incomplete_match.group(1)
|
||||
else:
|
||||
# Look for incomplete number
|
||||
number_match = re.search(r'(\d+\.?\d*)(?:\s*[,}\]]|$)', raw_json[-200:])
|
||||
if number_match:
|
||||
cut_off_element = number_match.group(1)
|
||||
|
||||
return cut_off_element, element_before_cutoff
|
||||
|
||||
|
||||
def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optional[str]:
|
||||
"""
|
||||
Extract cut-off point from within an element (e.g., code_block with JSON string, table with incomplete rows).
|
||||
|
||||
This helps identify where exactly to continue within nested structures.
|
||||
"""
|
||||
# re is already imported at module level
|
||||
|
||||
# Check for code_block with nested JSON
|
||||
if "code" in element:
|
||||
code_content = element.get("code", "")
|
||||
if isinstance(code_content, str) and code_content.strip().startswith("{"):
|
||||
# This is JSON inside a code string - find where it was cut off
|
||||
# Look for the last complete value in the raw JSON
|
||||
# Find the code string in raw JSON
|
||||
code_match = re.search(r'"code"\s*:\s*"([^"]*?)(?:"|$)', raw_json[-2000:], re.DOTALL)
|
||||
if code_match:
|
||||
code_str = code_match.group(1)
|
||||
# Try to find the last complete value in the JSON string
|
||||
# Look for patterns like: [2, 3, 5, ... 17929, (cut off here)
|
||||
array_match = re.search(r'\[([^\]]*?)(?:\]|$)', code_str, re.DOTALL)
|
||||
if array_match:
|
||||
array_content = array_match.group(1)
|
||||
# Find last complete number/item
|
||||
# Match: number followed by comma or end
|
||||
last_complete = re.findall(r'(\d+)\s*[,]', array_content)
|
||||
if last_complete:
|
||||
last_num = last_complete[-1]
|
||||
# Return context showing where to continue
|
||||
return f'{{"code": "{{\\"primes\\": [... up to {last_num}, <CONTINUE FROM HERE>]"}}'
|
||||
|
||||
# Check for table with incomplete rows
|
||||
if "rows" in element:
|
||||
rows = element.get("rows", [])
|
||||
if isinstance(rows, list) and rows:
|
||||
# Find last complete row in raw JSON
|
||||
rows_str = str(rows)
|
||||
# Try to find where rows were cut off
|
||||
last_row_match = re.search(r'\[([^\]]*?)(?:\]|$)', raw_json[-1000:], re.DOTALL)
|
||||
if last_row_match:
|
||||
return f'{{"rows": [... last complete row shown above, <CONTINUE FROM HERE>]}}'
|
||||
|
||||
# Check for list items
|
||||
if "items" in element:
|
||||
items = element.get("items", [])
|
||||
if isinstance(items, list) and items:
|
||||
# Find last complete item
|
||||
last_item_match = re.search(r'"([^"]*?)"\s*(?:,|\])', raw_json[-1000:], re.DOTALL)
|
||||
if last_item_match:
|
||||
return f'{{"items": [... last item shown above, <CONTINUE FROM HERE>]}}'
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _extractCutOffElementsFromRaw(raw_json: str, allSections: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Extract cut-off element directly from raw JSON when section parsing fails.
|
||||
|
||||
This handles ALL cases where JSON is cut off:
|
||||
- Mid-element (incomplete element object)
|
||||
- Mid-string/number within an element
|
||||
- Mid-array within an element (e.g., rows in table, items in list)
|
||||
- Mid-nested structure
|
||||
|
||||
CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number - deliver as-is.
|
||||
"""
|
||||
cut_off_element = None
|
||||
element_before_cutoff = None
|
||||
|
||||
try:
|
||||
# Find the last "elements" array in raw JSON
|
||||
if '"elements"' in raw_json:
|
||||
# Find the last occurrence of "elements"
|
||||
last_elements_pos = raw_json.rfind('"elements"')
|
||||
if last_elements_pos >= 0:
|
||||
elements_section = raw_json[last_elements_pos:]
|
||||
|
||||
# Find the array start '['
|
||||
array_start = elements_section.find('[')
|
||||
if array_start >= 0:
|
||||
# Use a simpler approach: find all element objects by tracking braces
|
||||
# This works even if elements contain nested arrays/objects
|
||||
element_strings = []
|
||||
depth = 0
|
||||
in_string = False
|
||||
escape_next = False
|
||||
elem_start = None
|
||||
|
||||
for i in range(array_start, len(elements_section)):
|
||||
char = elements_section[i]
|
||||
|
||||
# Track string state (ignore brackets/braces inside strings)
|
||||
if escape_next:
|
||||
escape_next = False
|
||||
continue
|
||||
if char == '\\':
|
||||
escape_next = True
|
||||
continue
|
||||
if char == '"' and not escape_next:
|
||||
in_string = not in_string
|
||||
continue
|
||||
|
||||
if not in_string:
|
||||
if char == '{':
|
||||
if depth == 0:
|
||||
elem_start = i
|
||||
depth += 1
|
||||
elif char == '}':
|
||||
depth -= 1
|
||||
if depth == 0 and elem_start is not None:
|
||||
# Found complete element (all braces closed, even if nested arrays are incomplete)
|
||||
elem_str = elements_section[elem_start:i+1]
|
||||
element_strings.append(elem_str)
|
||||
elem_start = None
|
||||
|
||||
# Now analyze what we found
|
||||
if element_strings:
|
||||
last_elem = element_strings[-1]
|
||||
last_complete_pos = elements_section.rfind('}')
|
||||
|
||||
# Check if there's content after the last complete element
|
||||
if last_complete_pos >= 0:
|
||||
remaining = elements_section[last_complete_pos+1:].strip()
|
||||
remaining_clean = remaining.lstrip(',').strip().lstrip(']').strip()
|
||||
|
||||
# Case 1: Incomplete element after last complete one
|
||||
if remaining_clean and not remaining_clean.startswith(']'):
|
||||
incomplete_start = last_complete_pos + 1
|
||||
while incomplete_start < len(elements_section) and elements_section[incomplete_start] in ' \n\t\r,':
|
||||
incomplete_start += 1
|
||||
|
||||
if incomplete_start < len(elements_section):
|
||||
incomplete_elem_str = elements_section[incomplete_start:].strip()
|
||||
incomplete_elem_str = incomplete_elem_str.rstrip(']').rstrip('}').rstrip()
|
||||
cut_off_element = incomplete_elem_str
|
||||
element_before_cutoff = element_strings[-1]
|
||||
|
||||
# Case 2: Last element itself is incomplete (cut off in nested structure like rows, items, etc.)
|
||||
else:
|
||||
# Check if JSON is incomplete by analyzing structure
|
||||
# Count unclosed brackets/braces in elements section (ignoring strings)
|
||||
elements_section_braces = 0
|
||||
elements_section_brackets = 0
|
||||
in_str = False
|
||||
esc = False
|
||||
|
||||
for char in elements_section:
|
||||
if esc:
|
||||
esc = False
|
||||
continue
|
||||
if char == '\\':
|
||||
esc = True
|
||||
continue
|
||||
if char == '"':
|
||||
in_str = not in_str
|
||||
continue
|
||||
if not in_str:
|
||||
if char == '{':
|
||||
elements_section_braces += 1
|
||||
elif char == '}':
|
||||
elements_section_braces -= 1
|
||||
elif char == '[':
|
||||
elements_section_brackets += 1
|
||||
elif char == ']':
|
||||
elements_section_brackets -= 1
|
||||
|
||||
# Also check raw JSON for unclosed structures
|
||||
raw_braces = 0
|
||||
raw_brackets = 0
|
||||
in_str = False
|
||||
esc = False
|
||||
|
||||
for char in raw_json:
|
||||
if esc:
|
||||
esc = False
|
||||
continue
|
||||
if char == '\\':
|
||||
esc = True
|
||||
continue
|
||||
if char == '"':
|
||||
in_str = not in_str
|
||||
continue
|
||||
if not in_str:
|
||||
if char == '{':
|
||||
raw_braces += 1
|
||||
elif char == '}':
|
||||
raw_braces -= 1
|
||||
elif char == '[':
|
||||
raw_brackets += 1
|
||||
elif char == ']':
|
||||
raw_brackets -= 1
|
||||
|
||||
# Check if last element can be parsed
|
||||
last_elem_parsable = False
|
||||
try:
|
||||
json.loads(last_elem)
|
||||
last_elem_parsable = True
|
||||
except:
|
||||
pass
|
||||
|
||||
# Determine if last element is incomplete
|
||||
is_incomplete = False
|
||||
|
||||
# If there are unclosed structures, element is incomplete
|
||||
if elements_section_brackets > 0 or elements_section_braces > 0 or raw_brackets > 0 or raw_braces > 0:
|
||||
is_incomplete = True
|
||||
|
||||
# If element cannot be parsed, it's incomplete
|
||||
elif not last_elem_parsable:
|
||||
is_incomplete = True
|
||||
|
||||
# Check if JSON ends mid-element by finding where element ends in raw JSON
|
||||
elif last_elem_parsable:
|
||||
# Find where this element ends in the raw JSON
|
||||
elem_end_marker = last_elem[-100:] if len(last_elem) > 100 else last_elem
|
||||
elem_end_in_raw = raw_json.rfind(elem_end_marker)
|
||||
|
||||
if elem_end_in_raw >= 0:
|
||||
actual_elem_end = elem_end_in_raw + len(last_elem)
|
||||
|
||||
if actual_elem_end < len(raw_json):
|
||||
remaining_after_elem = raw_json[actual_elem_end:].strip()
|
||||
remaining_clean = remaining_after_elem.lstrip(',').strip()
|
||||
|
||||
# If there's unexpected content, element is incomplete
|
||||
if remaining_clean and not remaining_clean.startswith(']'):
|
||||
is_incomplete = True
|
||||
|
||||
if is_incomplete:
|
||||
cut_off_element = last_elem
|
||||
if len(element_strings) >= 2:
|
||||
element_before_cutoff = element_strings[-2]
|
||||
elif len(element_strings) == 1:
|
||||
element_before_cutoff = last_elem
|
||||
|
||||
# Case 3: No complete elements found, but there's an incomplete one
|
||||
elif elem_start is not None:
|
||||
# There's an incomplete element that hasn't been closed
|
||||
incomplete_elem_str = elements_section[elem_start:].strip()
|
||||
cut_off_element = incomplete_elem_str
|
||||
# No element before (this is the first/only element)
|
||||
element_before_cutoff = None
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting cut-off elements from raw JSON: {e}")
|
||||
|
||||
return cut_off_element, element_before_cutoff
|
||||
|
||||
# Return ContinuationContext Pydantic model
|
||||
return ContinuationContext(
|
||||
section_count=section_count,
|
||||
delivered_summary=delivered_summary,
|
||||
template_structure=templateStructure,
|
||||
last_complete_part=last_complete_part,
|
||||
incomplete_part=incomplete_part,
|
||||
last_raw_json=last_raw_json,
|
||||
overlap_context=overlap_context,
|
||||
hierarchy_context=hierarchy_context
|
||||
)
|
||||
|
||||
def parseJsonWithModel(jsonString: str, modelClass: Type[T]) -> T:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -26,9 +26,16 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
|
||||
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
|
||||
|
||||
return await self.process({
|
||||
# Pass parentOperationId to maintain progress hierarchy
|
||||
parentOperationId = parameters.get("parentOperationId")
|
||||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": normalizedFormat
|
||||
})
|
||||
}
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -28,10 +28,17 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
aiPrompt += f" Focus specifically on: {focus}."
|
||||
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
|
||||
|
||||
return await self.process({
|
||||
# Pass parentOperationId to maintain progress hierarchy
|
||||
parentOperationId = parameters.get("parentOperationId")
|
||||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType,
|
||||
"generationIntent": "document" # NEW: Explicit intent
|
||||
})
|
||||
}
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,9 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
aiPrompt += " Focus on accurate translation of content."
|
||||
aiPrompt += " Maintain the same document structure, headings, and organization."
|
||||
|
||||
# Pass parentOperationId to maintain progress hierarchy
|
||||
parentOperationId = parameters.get("parentOperationId")
|
||||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
|
|
@ -36,6 +39,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
}
|
||||
if resultType:
|
||||
processParams["resultType"] = resultType
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -282,7 +282,7 @@ class MethodAi(MethodBase):
|
|||
),
|
||||
"generateCode": WorkflowActionDefinition(
|
||||
actionId="ai.generateCode",
|
||||
description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt",
|
||||
description="Generate one or multiple code files in a single action - explicitly sets intent to 'code'. This action can generate multiple files (e.g., config.json, customers.json, settings.json) when the prompt requests multiple files. If the prompt specifies file formats to deliver, include them in the prompt. IMPORTANT: When the user requests multiple files (e.g., 'generate 3 JSON files'), use a SINGLE ai.generateCode action with a prompt that describes ALL requested files, rather than splitting into multiple actions.",
|
||||
dynamicMode=True,
|
||||
parameters={
|
||||
"prompt": WorkflowActionParameter(
|
||||
|
|
@ -290,7 +290,7 @@ class MethodAi(MethodBase):
|
|||
type="str",
|
||||
frontendType=FrontendType.TEXTAREA,
|
||||
required=True,
|
||||
description="Description of code to generate"
|
||||
description="Description of code to generate. If multiple files are requested, describe ALL files in this single prompt (e.g., 'Generate 3 JSON files: 1) config.json with..., 2) customers.json with..., 3) settings.json with...')."
|
||||
),
|
||||
"documentList": WorkflowActionParameter(
|
||||
name="documentList",
|
||||
|
|
@ -303,9 +303,9 @@ class MethodAi(MethodBase):
|
|||
name="resultType",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
|
||||
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"],
|
||||
required=False,
|
||||
description="Output format (html, js, py, etc.). Optional: if omitted, formats are determined from prompt by AI. With per-document format determination, AI can determine different formats for different documents based on prompt."
|
||||
description="Output format (html, js, py, json, csv, xml, etc.). Optional: if omitted, formats are determined from prompt by AI. This action can return MULTIPLE files in a single call when the prompt requests multiple files. With per-document format determination, AI can determine different formats for different files based on prompt. When multiple files are requested, the action will return multiple documents (one per file)."
|
||||
)
|
||||
},
|
||||
execute=generateCode.__get__(self, self.__class__)
|
||||
|
|
|
|||
|
|
@ -80,46 +80,64 @@ class ContentValidator:
|
|||
|
||||
# For tables: extract caption and statistics
|
||||
if section.get("content_type") == "table":
|
||||
# Try to extract from elements first
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
tableElement = elements[0]
|
||||
content = tableElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
else:
|
||||
headers = tableElement.get("headers", [])
|
||||
rows = tableElement.get("rows", [])
|
||||
if headers:
|
||||
sectionSummary["columnCount"] = len(headers)
|
||||
sectionSummary["headers"] = headers # Include headers for context
|
||||
if rows:
|
||||
sectionSummary["rowCount"] = len(rows)
|
||||
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
|
||||
# Ensure tableElement is a dictionary before accessing
|
||||
if isinstance(tableElement, dict):
|
||||
content = tableElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
else:
|
||||
headers = tableElement.get("headers", [])
|
||||
rows = tableElement.get("rows", [])
|
||||
if headers:
|
||||
sectionSummary["columnCount"] = len(headers)
|
||||
sectionSummary["headers"] = headers # Include headers for context
|
||||
if rows:
|
||||
sectionSummary["rowCount"] = len(rows)
|
||||
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
|
||||
else:
|
||||
# Fallback: extract KPIs from section metadata if elements are missing
|
||||
# This handles cases where filledStructure doesn't have elements populated
|
||||
if "columnCount" in section:
|
||||
sectionSummary["columnCount"] = section.get("columnCount")
|
||||
if "rowCount" in section:
|
||||
sectionSummary["rowCount"] = section.get("rowCount")
|
||||
if "headers" in section:
|
||||
sectionSummary["headers"] = section.get("headers")
|
||||
if "caption" in section:
|
||||
sectionSummary["caption"] = section.get("caption")
|
||||
|
||||
# For lists and bullet_lists: extract item count
|
||||
elif section.get("content_type") in ["list", "bullet_list"]:
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
listElement = elements[0]
|
||||
content = listElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
items = content.get("items", [])
|
||||
else:
|
||||
items = listElement.get("items", [])
|
||||
if items:
|
||||
sectionSummary["itemCount"] = len(items)
|
||||
# Ensure listElement is a dictionary before accessing
|
||||
if isinstance(listElement, dict):
|
||||
content = listElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
items = content.get("items", [])
|
||||
else:
|
||||
items = listElement.get("items", [])
|
||||
if items:
|
||||
sectionSummary["itemCount"] = len(items)
|
||||
|
||||
# For paragraphs/headings: extract text statistics (no preview for security)
|
||||
elif section.get("content_type") in ["paragraph", "heading"]:
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
textElement = elements[0]
|
||||
content = textElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
else:
|
||||
text = textElement.get("text", "")
|
||||
if text:
|
||||
sectionSummary["textLength"] = len(text)
|
||||
sectionSummary["wordCount"] = len(text.split())
|
||||
# Ensure textElement is a dictionary before accessing
|
||||
if isinstance(textElement, dict):
|
||||
content = textElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
else:
|
||||
text = textElement.get("text", "")
|
||||
if text:
|
||||
sectionSummary["textLength"] = len(text)
|
||||
sectionSummary["wordCount"] = len(text.split())
|
||||
# Also check for text length if available directly in section
|
||||
if section.get("textLength"):
|
||||
sectionSummary["textLength"] = section.get("textLength")
|
||||
|
|
@ -153,6 +171,7 @@ class ContentValidator:
|
|||
# Include any additional fields from section (generic approach)
|
||||
# This ensures all action-specific fields are preserved
|
||||
# BUT exclude type-specific KPIs that don't belong to this content_type
|
||||
# AND exclude internal planning fields that confuse validation
|
||||
contentType = section.get("content_type", "")
|
||||
# Define KPIs that are ONLY valid for specific types
|
||||
typeExclusiveKpis = {
|
||||
|
|
@ -165,8 +184,12 @@ class ContentValidator:
|
|||
if kpiType != contentType:
|
||||
excludedKpis.extend(kpiFields)
|
||||
|
||||
# Internal planning fields that should NOT be shown to validation AI
|
||||
# These are implementation details, not content indicators
|
||||
internalFields = ["generationHint", "useAiCall", "elements"]
|
||||
|
||||
for key, value in section.items():
|
||||
if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis:
|
||||
if key not in sectionSummary and key not in internalFields and key not in excludedKpis:
|
||||
# Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves
|
||||
# This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase
|
||||
if key in ["columnCount", "rowCount", "headers", "itemCount"]:
|
||||
|
|
@ -198,39 +221,61 @@ class ContentValidator:
|
|||
elements = section.get("elements", [])
|
||||
|
||||
if section.get("content_type") == "table":
|
||||
# Try to extract from elements first
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
tableElement = elements[0]
|
||||
content = tableElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
else:
|
||||
headers = tableElement.get("headers", [])
|
||||
rows = tableElement.get("rows", [])
|
||||
if headers:
|
||||
sectionSummary["columnCount"] = len(headers)
|
||||
sectionSummary["headers"] = headers
|
||||
if rows:
|
||||
sectionSummary["rowCount"] = len(rows)
|
||||
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
|
||||
# Ensure tableElement is a dictionary before accessing
|
||||
if isinstance(tableElement, dict):
|
||||
content = tableElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
else:
|
||||
headers = tableElement.get("headers", [])
|
||||
rows = tableElement.get("rows", [])
|
||||
if headers:
|
||||
sectionSummary["columnCount"] = len(headers)
|
||||
sectionSummary["headers"] = headers
|
||||
if rows:
|
||||
sectionSummary["rowCount"] = len(rows)
|
||||
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
|
||||
else:
|
||||
# Fallback: extract KPIs from section metadata if elements are missing
|
||||
# This handles cases where filledStructure doesn't have elements populated
|
||||
if "columnCount" in section:
|
||||
sectionSummary["columnCount"] = section.get("columnCount")
|
||||
if "rowCount" in section:
|
||||
sectionSummary["rowCount"] = section.get("rowCount")
|
||||
if "headers" in section:
|
||||
sectionSummary["headers"] = section.get("headers")
|
||||
if "caption" in section:
|
||||
sectionSummary["caption"] = section.get("caption")
|
||||
|
||||
# For lists and bullet_lists: extract item count
|
||||
elif section.get("content_type") in ["list", "bullet_list"]:
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
listElement = elements[0]
|
||||
content = listElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
items = content.get("items", [])
|
||||
else:
|
||||
items = listElement.get("items", [])
|
||||
if items:
|
||||
sectionSummary["itemCount"] = len(items)
|
||||
# Ensure listElement is a dictionary before accessing
|
||||
if isinstance(listElement, dict):
|
||||
content = listElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
items = content.get("items", [])
|
||||
else:
|
||||
items = listElement.get("items", [])
|
||||
if items:
|
||||
sectionSummary["itemCount"] = len(items)
|
||||
else:
|
||||
# Fallback: extract KPIs from section metadata if elements are missing
|
||||
if "itemCount" in section:
|
||||
sectionSummary["itemCount"] = section.get("itemCount")
|
||||
|
||||
# For paragraphs/headings: extract text statistics (no preview for security)
|
||||
elif section.get("content_type") in ["paragraph", "heading"]:
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
textElement = elements[0]
|
||||
content = textElement.get("content", {})
|
||||
# Ensure textElement is a dictionary before accessing
|
||||
if isinstance(textElement, dict):
|
||||
content = textElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
else:
|
||||
|
|
@ -269,6 +314,7 @@ class ContentValidator:
|
|||
|
||||
# Include any additional fields from section (generic approach)
|
||||
# BUT exclude type-specific KPIs that don't belong to this content_type
|
||||
# AND exclude internal planning fields that confuse validation
|
||||
contentType = section.get("content_type", "")
|
||||
# Define KPIs that are ONLY valid for specific types
|
||||
typeExclusiveKpis = {
|
||||
|
|
@ -281,8 +327,12 @@ class ContentValidator:
|
|||
if kpiType != contentType:
|
||||
excludedKpis.extend(kpiFields)
|
||||
|
||||
# Internal planning fields that should NOT be shown to validation AI
|
||||
# These are implementation details, not content indicators
|
||||
internalFields = ["generationHint", "useAiCall", "elements"]
|
||||
|
||||
for key, value in section.items():
|
||||
if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis:
|
||||
if key not in sectionSummary and key not in internalFields and key not in excludedKpis:
|
||||
# Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves
|
||||
# This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase
|
||||
if key in ["columnCount", "rowCount", "headers", "itemCount"]:
|
||||
|
|
@ -341,11 +391,22 @@ class ContentValidator:
|
|||
# NOT the actual rendered content. The actual content is in documentData.
|
||||
# Include both: jsonStructure for structure metadata, and contentPreview for actual content check
|
||||
if sourceJson and isinstance(sourceJson, dict):
|
||||
# Use source JSON for structure analysis (for rendered documents like xlsx/docx/pdf)
|
||||
jsonSummary = self._summarizeJsonStructure(sourceJson)
|
||||
summary["jsonStructure"] = jsonSummary
|
||||
# Add note that this is metadata, not actual content
|
||||
summary["note"] = "jsonStructure contains metadata about document structure. Actual rendered content is in documentData."
|
||||
# Check if this is code generation metadata (has statistics field)
|
||||
if "statistics" in sourceJson and "fileType" in sourceJson:
|
||||
# Code generation format - extract statistics from metadata
|
||||
codeStats = sourceJson.get("statistics", {})
|
||||
jsonSummary = {
|
||||
"metadata": sourceJson,
|
||||
"sections": [],
|
||||
"statistics": codeStats
|
||||
}
|
||||
summary["jsonStructure"] = jsonSummary
|
||||
summary["note"] = "jsonStructure contains metadata and statistics for code generation file. Actual rendered content is in documentData."
|
||||
else:
|
||||
# Document generation format - use standard structure analysis
|
||||
jsonSummary = self._summarizeJsonStructure(sourceJson)
|
||||
summary["jsonStructure"] = jsonSummary
|
||||
summary["note"] = "jsonStructure contains metadata about document structure. Actual rendered content is in documentData."
|
||||
|
||||
# For rendered documents, also check actual content
|
||||
if data is not None:
|
||||
|
|
@ -353,8 +414,19 @@ class ContentValidator:
|
|||
if contentPreview:
|
||||
summary["contentPreview"] = contentPreview
|
||||
elif data is not None:
|
||||
# For code generation files without sourceJson, extract statistics from content
|
||||
if formatExt in ["csv", "json", "xml"]:
|
||||
codeStats = self._extractCodeFileStatistics(data, formatExt, mimeType)
|
||||
if codeStats:
|
||||
jsonSummary = {
|
||||
"metadata": {},
|
||||
"sections": [],
|
||||
"statistics": codeStats
|
||||
}
|
||||
summary["jsonStructure"] = jsonSummary
|
||||
summary["note"] = "jsonStructure contains statistics extracted from code file content."
|
||||
# Fallback: try to parse documentData as JSON (for non-rendered documents)
|
||||
if isinstance(data, dict):
|
||||
elif isinstance(data, dict):
|
||||
# Summarize JSON structure
|
||||
jsonSummary = self._summarizeJsonStructure(data)
|
||||
summary["jsonStructure"] = jsonSummary
|
||||
|
|
@ -502,6 +574,74 @@ class ContentValidator:
|
|||
logger.warning(f"Error getting content structure info: {str(e)}")
|
||||
return None
|
||||
|
||||
def _extractCodeFileStatistics(self, data: Any, formatExt: str, mimeType: str) -> Optional[Dict[str, Any]]:
|
||||
"""Extract statistics from code generation files (CSV, JSON, XML) for validation."""
|
||||
try:
|
||||
# Convert bytes to string if needed
|
||||
content = None
|
||||
if isinstance(data, bytes):
|
||||
try:
|
||||
content = data.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
elif isinstance(data, str):
|
||||
content = data
|
||||
else:
|
||||
return None
|
||||
|
||||
if not content:
|
||||
return None
|
||||
|
||||
stats = {}
|
||||
|
||||
if formatExt == "csv":
|
||||
import csv
|
||||
import io
|
||||
try:
|
||||
reader = csv.reader(io.StringIO(content))
|
||||
rows = list(reader)
|
||||
if rows:
|
||||
headerRow = rows[0]
|
||||
stats["rowCount"] = len(rows) - 1 # Exclude header
|
||||
stats["columnCount"] = len(headerRow)
|
||||
stats["headerRow"] = headerRow
|
||||
stats["dataRowCount"] = len(rows) - 1
|
||||
except Exception as e:
|
||||
logger.debug(f"CSV statistics extraction failed: {e}")
|
||||
|
||||
elif formatExt == "json":
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
stats["isArray"] = isinstance(parsed, list)
|
||||
stats["isObject"] = isinstance(parsed, dict)
|
||||
if isinstance(parsed, list):
|
||||
stats["itemCount"] = len(parsed)
|
||||
stats["objectCount"] = sum(1 for item in parsed if isinstance(item, dict))
|
||||
stats["arrayCount"] = sum(1 for item in parsed if isinstance(item, list))
|
||||
elif isinstance(parsed, dict):
|
||||
stats["keyCount"] = len(parsed)
|
||||
stats["keys"] = list(parsed.keys())
|
||||
stats["objectCount"] = sum(1 for v in parsed.values() if isinstance(v, dict))
|
||||
stats["arrayCount"] = sum(1 for v in parsed.values() if isinstance(v, list))
|
||||
except Exception as e:
|
||||
logger.debug(f"JSON statistics extraction failed: {e}")
|
||||
|
||||
elif formatExt == "xml":
|
||||
try:
|
||||
import xml.etree.ElementTree as ET
|
||||
root = ET.fromstring(content)
|
||||
stats["elementCount"] = len(list(root.iter()))
|
||||
stats["attributeCount"] = sum(len(elem.attrib) for elem in root.iter())
|
||||
stats["rootElement"] = root.tag
|
||||
stats["hasRoot"] = True
|
||||
except Exception as e:
|
||||
logger.debug(f"XML statistics extraction failed: {e}")
|
||||
|
||||
return stats if stats else None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting code file statistics: {str(e)}")
|
||||
return None
|
||||
|
||||
def _isFormatCompatible(self, deliveredFormat: str, expectedFormat: str) -> bool:
|
||||
"""
|
||||
|
|
|
|||
556
tests/functional/test11_code_generation_formats.py
Normal file
556
tests/functional/test11_code_generation_formats.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Code Generation Formats Test 11 - Tests code generation in JSON, CSV, and XML formats
|
||||
Tests code generation with structured data formats including validation and formatting.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import csv
|
||||
import io
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
# Add the gateway to path (go up 2 levels from tests/functional/)
|
||||
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
if _gateway_path not in sys.path:
|
||||
sys.path.insert(0, _gateway_path)
|
||||
|
||||
# Import the service initialization
|
||||
from modules.services import getInterface as getServices
|
||||
from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.features.workflow import chatStart
|
||||
import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects
|
||||
|
||||
|
||||
class CodeGenerationFormatsTester11:
|
||||
def __init__(self):
|
||||
# Use root user for testing (has full access to everything)
|
||||
from modules.interfaces.interfaceDbAppObjects import getRootInterface
|
||||
rootInterface = getRootInterface()
|
||||
self.testUser = rootInterface.currentUser
|
||||
|
||||
# Initialize services using the existing system
|
||||
self.services = getServices(self.testUser, None) # Test user, no workflow
|
||||
self.workflow = None
|
||||
self.testResults = {}
|
||||
self.generatedDocuments = {}
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize the test environment."""
|
||||
# Enable debug file logging for tests
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True)
|
||||
|
||||
# Set logging level to INFO to see workflow progress
|
||||
import logging
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
print(f"Initialized test with user: {self.testUser.id}")
|
||||
print(f"Mandate ID: {self.testUser.mandateId}")
|
||||
print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}")
|
||||
|
||||
def createTestPrompt(self, format: str) -> str:
|
||||
"""Create a test prompt for code generation in the specified format.
|
||||
|
||||
The prompt requests 3 files for each format:
|
||||
- Structured data generation appropriate for the format
|
||||
- Proper formatting and validation
|
||||
"""
|
||||
formatPrompts = {
|
||||
"json": (
|
||||
"Generate 3 JSON code files for a customer management system:\n"
|
||||
"1) Create a config.json file with:\n"
|
||||
" - Application name: 'Customer Manager'\n"
|
||||
" - Version: '1.0.0'\n"
|
||||
" - Database settings: host, port, name\n"
|
||||
" - API settings: baseUrl, timeout\n"
|
||||
"2) Create a customers.json file with an array of customer objects:\n"
|
||||
" - Each customer should have: id, name, email, phone, address\n"
|
||||
" - Include at least 3 sample customers\n"
|
||||
"3) Create a settings.json file with:\n"
|
||||
" - Theme settings: darkMode, fontSize, language\n"
|
||||
" - Notification settings: email, sms, push\n"
|
||||
" - Feature flags: enableAnalytics, enableReports\n\n"
|
||||
"Format all files as valid JSON with proper indentation."
|
||||
),
|
||||
"csv": (
|
||||
"Generate 3 CSV code files for expense tracking:\n"
|
||||
"1) Create an expenses.csv file with:\n"
|
||||
" - Header row: Documentname, Datum, Händler, Kreditkartennummer, Gesamtbetrag, Währung, MWST-Satz\n"
|
||||
" - Data rows with at least 5 expense entries\n"
|
||||
" - Use consistent date format (DD.MM.YYYY)\n"
|
||||
" - Use CHF as currency\n"
|
||||
" - Use 7.7% as VAT rate\n"
|
||||
"2) Create a categories.csv file with:\n"
|
||||
" - Header row: CategoryID, CategoryName, Description, ParentCategory\n"
|
||||
" - Data rows with at least 8 categories\n"
|
||||
"3) Create a vendors.csv file with:\n"
|
||||
" - Header row: VendorID, VendorName, ContactPerson, Email, Phone, Address\n"
|
||||
" - Data rows with at least 6 vendors\n\n"
|
||||
"Format all files as valid CSV with proper header row and consistent column count."
|
||||
),
|
||||
"xml": (
|
||||
"Generate 3 XML code files for a product catalog:\n"
|
||||
"1) Create a products.xml file with:\n"
|
||||
" - Root element: <catalog>\n"
|
||||
" - Each product as <product> element with:\n"
|
||||
" - <id>, <name>, <description>, <price>, <category>\n"
|
||||
" - Include at least 4 products\n"
|
||||
"2) Create a categories.xml file with:\n"
|
||||
" - Root element: <categories>\n"
|
||||
" - Each category as <category> element with:\n"
|
||||
" - <id>, <name>, <description>, <parentId>\n"
|
||||
" - Include at least 5 categories\n"
|
||||
"3) Create a suppliers.xml file with:\n"
|
||||
" - Root element: <suppliers>\n"
|
||||
" - Each supplier as <supplier> element with:\n"
|
||||
" - <id>, <name>, <contact>, <address>\n"
|
||||
" - Include at least 3 suppliers\n\n"
|
||||
"Format all files as valid XML with proper indentation and structure."
|
||||
)
|
||||
}
|
||||
|
||||
return formatPrompts.get(format.lower(), formatPrompts["json"])
|
||||
|
||||
async def generateCodeInFormat(self, format: str) -> Dict[str, Any]:
|
||||
"""Generate code in the specified format using workflow."""
|
||||
print("\n" + "="*80)
|
||||
print(f"GENERATING CODE IN {format.upper()} FORMAT")
|
||||
print("="*80)
|
||||
|
||||
prompt = self.createTestPrompt(format)
|
||||
print(f"Prompt: {prompt[:200]}...")
|
||||
|
||||
# Create user input request
|
||||
userInput = UserInputRequest(
|
||||
prompt=prompt,
|
||||
listFileId=[],
|
||||
userLanguage="en"
|
||||
)
|
||||
|
||||
# Start workflow
|
||||
print(f"\nStarting workflow for {format.upper()} code generation...")
|
||||
workflow = await chatStart(
|
||||
currentUser=self.testUser,
|
||||
userInput=userInput,
|
||||
workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
|
||||
workflowId=None
|
||||
)
|
||||
|
||||
if not workflow:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Failed to start workflow"
|
||||
}
|
||||
|
||||
self.workflow = workflow
|
||||
print(f"Workflow started: {workflow.id}")
|
||||
|
||||
# Wait for workflow completion (no timeout - wait indefinitely)
|
||||
print(f"Waiting for workflow completion...")
|
||||
completed = await self.waitForWorkflowCompletion(timeout=None)
|
||||
|
||||
if not completed:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Workflow did not complete",
|
||||
"workflowId": workflow.id,
|
||||
"status": workflow.status if workflow else "unknown"
|
||||
}
|
||||
|
||||
# Analyze results
|
||||
results = self.analyzeWorkflowResults()
|
||||
|
||||
# Extract documents for this format
|
||||
documents = results.get("documents", [])
|
||||
formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")]
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"format": format,
|
||||
"workflowId": workflow.id,
|
||||
"status": results.get("status"),
|
||||
"documentCount": len(formatDocuments),
|
||||
"documents": formatDocuments,
|
||||
"results": results
|
||||
}
|
||||
|
||||
async def waitForWorkflowCompletion(self, timeout: Optional[int] = None, checkInterval: int = 2) -> bool:
|
||||
"""Wait for workflow to complete."""
|
||||
if not self.workflow:
|
||||
return False
|
||||
|
||||
startTime = time.time()
|
||||
lastStatus = None
|
||||
|
||||
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
|
||||
|
||||
if timeout is None:
|
||||
print("Waiting indefinitely (no timeout)")
|
||||
|
||||
while True:
|
||||
# Check timeout only if specified
|
||||
if timeout is not None and time.time() - startTime > timeout:
|
||||
print(f"\n⏱️ Timeout after {timeout} seconds")
|
||||
return False
|
||||
|
||||
# Get current workflow status
|
||||
try:
|
||||
currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id)
|
||||
if not currentWorkflow:
|
||||
print("\n❌ Workflow not found")
|
||||
return False
|
||||
|
||||
currentStatus = currentWorkflow.status
|
||||
elapsed = int(time.time() - startTime)
|
||||
|
||||
# Print status if it changed
|
||||
if currentStatus != lastStatus:
|
||||
print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)")
|
||||
lastStatus = currentStatus
|
||||
|
||||
# Check if workflow is complete
|
||||
if currentStatus in ["completed", "stopped", "failed"]:
|
||||
self.workflow = currentWorkflow
|
||||
statusIcon = "✅" if currentStatus == "completed" else "❌"
|
||||
print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)")
|
||||
return currentStatus == "completed"
|
||||
|
||||
# Wait before next check
|
||||
await asyncio.sleep(checkInterval)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n⚠️ Error checking workflow status: {str(e)}")
|
||||
await asyncio.sleep(checkInterval)
|
||||
|
||||
def analyzeWorkflowResults(self) -> Dict[str, Any]:
|
||||
"""Analyze workflow results and extract information."""
|
||||
if not self.workflow:
|
||||
return {"error": "No workflow to analyze"}
|
||||
|
||||
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
|
||||
workflow = interfaceDbChat.getWorkflow(self.workflow.id)
|
||||
|
||||
if not workflow:
|
||||
return {"error": "Workflow not found"}
|
||||
|
||||
# Get unified chat data
|
||||
chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
|
||||
|
||||
# Count messages
|
||||
messages = chatData.get("messages", [])
|
||||
userMessages = [m for m in messages if m.get("role") == "user"]
|
||||
assistantMessages = [m for m in messages if m.get("role") == "assistant"]
|
||||
|
||||
# Count documents
|
||||
documents = chatData.get("documents", [])
|
||||
|
||||
# Get logs
|
||||
logs = chatData.get("logs", [])
|
||||
|
||||
results = {
|
||||
"workflowId": workflow.id,
|
||||
"status": workflow.status,
|
||||
"workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None,
|
||||
"currentRound": workflow.currentRound,
|
||||
"totalTasks": workflow.totalTasks,
|
||||
"totalActions": workflow.totalActions,
|
||||
"messageCount": len(messages),
|
||||
"userMessageCount": len(userMessages),
|
||||
"assistantMessageCount": len(assistantMessages),
|
||||
"documentCount": len(documents),
|
||||
"logCount": len(logs),
|
||||
"documents": documents,
|
||||
"logs": logs
|
||||
}
|
||||
|
||||
print(f"\nWorkflow Results:")
|
||||
print(f" Status: {results['status']}")
|
||||
print(f" Tasks: {results['totalTasks']}")
|
||||
print(f" Actions: {results['totalActions']}")
|
||||
print(f" Messages: {results['messageCount']}")
|
||||
print(f" Documents: {results['documentCount']}")
|
||||
|
||||
# Print document details
|
||||
if documents:
|
||||
print(f"\nGenerated Documents:")
|
||||
for doc in documents:
|
||||
fileName = doc.get("fileName", "unknown")
|
||||
fileSize = doc.get("fileSize", 0)
|
||||
mimeType = doc.get("mimeType", "unknown")
|
||||
print(f" - {fileName} ({fileSize} bytes, {mimeType})")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def verifyCodeFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]:
|
||||
"""Verify that a code file matches the expected format and is valid."""
|
||||
fileName = document.get("fileName", "")
|
||||
mimeType = document.get("mimeType", "")
|
||||
fileSize = document.get("fileSize", 0)
|
||||
|
||||
# Expected MIME types
|
||||
expectedMimeTypes = {
|
||||
"json": ["application/json"],
|
||||
"csv": ["text/csv"],
|
||||
"xml": ["application/xml", "text/xml"]
|
||||
}
|
||||
|
||||
# Expected file extensions
|
||||
expectedExtensions = {
|
||||
"json": [".json"],
|
||||
"csv": [".csv"],
|
||||
"xml": [".xml"]
|
||||
}
|
||||
|
||||
formatLower = expectedFormat.lower()
|
||||
expectedMimes = expectedMimeTypes.get(formatLower, [])
|
||||
expectedExts = expectedExtensions.get(formatLower, [])
|
||||
|
||||
# Check file extension
|
||||
hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts)
|
||||
|
||||
# Check MIME type
|
||||
hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes)
|
||||
|
||||
# Check file size (should be > 0)
|
||||
hasValidSize = fileSize > 0
|
||||
|
||||
# Try to read and validate content
|
||||
isValidContent = False
|
||||
validationError = None
|
||||
|
||||
try:
|
||||
# Get file content from fileId
|
||||
fileId = document.get("fileId")
|
||||
if fileId and hasattr(self.services, 'interfaceDbComponent'):
|
||||
fileData = self.services.interfaceDbComponent.getFileData(fileId)
|
||||
if fileData:
|
||||
content = fileData.decode('utf-8') if isinstance(fileData, bytes) else fileData
|
||||
|
||||
# Validate format-specific syntax
|
||||
if formatLower == "json":
|
||||
try:
|
||||
json.loads(content)
|
||||
isValidContent = True
|
||||
except json.JSONDecodeError as e:
|
||||
validationError = f"Invalid JSON: {str(e)}"
|
||||
|
||||
elif formatLower == "csv":
|
||||
try:
|
||||
reader = csv.reader(io.StringIO(content))
|
||||
rows = list(reader)
|
||||
if len(rows) > 0:
|
||||
# Check header row exists
|
||||
headerCount = len(rows[0])
|
||||
# Check all rows have same column count
|
||||
allRowsValid = all(len(row) == headerCount for row in rows)
|
||||
isValidContent = allRowsValid
|
||||
if not allRowsValid:
|
||||
validationError = "CSV rows have inconsistent column counts"
|
||||
else:
|
||||
validationError = "CSV file is empty"
|
||||
except Exception as e:
|
||||
validationError = f"CSV parsing error: {str(e)}"
|
||||
|
||||
elif formatLower == "xml":
|
||||
try:
|
||||
ET.fromstring(content)
|
||||
isValidContent = True
|
||||
except ET.ParseError as e:
|
||||
validationError = f"Invalid XML: {str(e)}"
|
||||
else:
|
||||
validationError = "Could not read file data"
|
||||
else:
|
||||
validationError = "No fileId available"
|
||||
|
||||
except Exception as e:
|
||||
validationError = f"Error reading/validating file: {str(e)}"
|
||||
|
||||
verification = {
|
||||
"format": expectedFormat,
|
||||
"fileName": fileName,
|
||||
"mimeType": mimeType,
|
||||
"fileSize": fileSize,
|
||||
"hasCorrectExtension": hasCorrectExtension,
|
||||
"hasCorrectMimeType": hasCorrectMimeType,
|
||||
"hasValidSize": hasValidSize,
|
||||
"isValidContent": isValidContent,
|
||||
"validationError": validationError,
|
||||
"isValid": hasCorrectExtension and hasValidSize and hasCorrectMimeType,
|
||||
"isComplete": hasCorrectExtension and hasValidSize and hasCorrectMimeType and isValidContent
|
||||
}
|
||||
|
||||
return verification
|
||||
|
||||
async def testAllFormats(self) -> Dict[str, Any]:
|
||||
"""Test code generation in JSON, CSV, and XML formats."""
|
||||
print("\n" + "="*80)
|
||||
print("TESTING CODE GENERATION IN ALL FORMATS")
|
||||
print("="*80)
|
||||
|
||||
# Test all code formats
|
||||
formats = ["json", "csv", "xml"]
|
||||
results = {}
|
||||
|
||||
for format in formats:
|
||||
try:
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Testing {format.upper()} format...")
|
||||
print(f"{'='*80}")
|
||||
|
||||
result = await self.generateCodeInFormat(format)
|
||||
results[format] = result
|
||||
|
||||
if result.get("success"):
|
||||
documents = result.get("documents", [])
|
||||
if documents:
|
||||
# Verify all documents (expecting 3 files per format)
|
||||
verifications = []
|
||||
for doc in documents:
|
||||
verification = self.verifyCodeFormat(doc, format)
|
||||
verifications.append(verification)
|
||||
|
||||
result["verifications"] = verifications
|
||||
|
||||
# Count valid documents
|
||||
validCount = sum(1 for v in verifications if v.get("isValid"))
|
||||
contentValidCount = sum(1 for v in verifications if v.get("isValidContent"))
|
||||
|
||||
print(f"\n✅ {format.upper()} generation successful!")
|
||||
print(f" Documents: {len(documents)} (expected: 3)")
|
||||
print(f" Valid Format: {validCount}/{len(documents)}")
|
||||
print(f" Valid Content: {contentValidCount}/{len(documents)}")
|
||||
|
||||
# Print details for each file
|
||||
for i, verification in enumerate(verifications, 1):
|
||||
statusIcon = "✅" if verification.get("isValid") else "❌"
|
||||
contentIcon = "✅" if verification.get("isValidContent") else "❌"
|
||||
print(f" File {i}: {statusIcon} Format, {contentIcon} Content - {verification.get('fileName', 'unknown')}")
|
||||
if verification.get("validationError"):
|
||||
print(f" Error: {verification['validationError']}")
|
||||
else:
|
||||
print(f"\n⚠️ {format.upper()} generation completed but no documents found")
|
||||
else:
|
||||
error = result.get("error", "Unknown error")
|
||||
print(f"\n❌ {format.upper()} generation failed: {error}")
|
||||
|
||||
# Small delay between tests
|
||||
await asyncio.sleep(2)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"\n❌ Error testing {format.upper()}: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
results[format] = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
print("\n" + "="*80)
|
||||
print("CODE GENERATION FORMATS TEST 11 - JSON, CSV, XML")
|
||||
print("="*80)
|
||||
|
||||
try:
|
||||
# Initialize
|
||||
await self.initialize()
|
||||
|
||||
# Test all formats
|
||||
formatResults = await self.testAllFormats()
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*80)
|
||||
print("TEST SUMMARY")
|
||||
print("="*80)
|
||||
|
||||
# Format tests summary
|
||||
print("\nFormat Tests:")
|
||||
successCount = 0
|
||||
failCount = 0
|
||||
completeCount = 0 # Files with valid content
|
||||
|
||||
for format, result in formatResults.items():
|
||||
if result.get("success"):
|
||||
successCount += 1
|
||||
verifications = result.get("verifications", [])
|
||||
docCount = result.get("documentCount", 0)
|
||||
|
||||
# Count valid files
|
||||
validCount = sum(1 for v in verifications if v.get("isValid"))
|
||||
contentValidCount = sum(1 for v in verifications if v.get("isValidContent"))
|
||||
completeCount += contentValidCount
|
||||
|
||||
# Overall status (all files valid)
|
||||
allValid = len(verifications) > 0 and all(v.get("isValid") for v in verifications)
|
||||
allContentValid = len(verifications) > 0 and all(v.get("isValidContent") for v in verifications)
|
||||
|
||||
statusIcon = "✅" if allValid else "⚠️"
|
||||
contentIcon = "✅" if allContentValid else "❌"
|
||||
|
||||
print(f"{statusIcon} {format.upper():6s}: {'PASS' if allValid else 'PARTIAL'} - {docCount} file(s) ({validCount} valid format, {contentValidCount} valid content)")
|
||||
|
||||
# Print errors if any
|
||||
for v in verifications:
|
||||
if v.get("validationError"):
|
||||
print(f" {v.get('fileName', 'unknown')}: {v['validationError']}")
|
||||
else:
|
||||
failCount += 1
|
||||
error = result.get("error", "Unknown error")
|
||||
print(f"❌ {format.upper():6s}: FAIL - {error}")
|
||||
|
||||
print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats")
|
||||
print(f"Valid Content Files: {completeCount} total files with valid content")
|
||||
|
||||
self.testResults = {
|
||||
"success": failCount == 0,
|
||||
"formatTests": {
|
||||
"successCount": successCount,
|
||||
"failCount": failCount,
|
||||
"completeCount": completeCount,
|
||||
"totalFormats": len(formatResults),
|
||||
"results": formatResults
|
||||
},
|
||||
"totalSuccess": successCount,
|
||||
"totalFail": failCount
|
||||
}
|
||||
|
||||
return self.testResults
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
|
||||
print(f"Traceback:\n{traceback.format_exc()}")
|
||||
self.testResults = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
return self.testResults
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run code generation formats test 11."""
|
||||
tester = CodeGenerationFormatsTester11()
|
||||
results = await tester.runTest()
|
||||
|
||||
# Print final results as JSON for easy parsing
|
||||
print("\n" + "="*80)
|
||||
print("FINAL RESULTS (JSON)")
|
||||
print("="*80)
|
||||
print(json.dumps(results, indent=2, default=str))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
804
tests/functional/test12_json_split_merge.py
Normal file
804
tests/functional/test12_json_split_merge.py
Normal file
|
|
@ -0,0 +1,804 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
JSON Split and Merge Test 12 - Tests JSON splitting and merging using workflow tools
|
||||
Tests random splitting of JSON files into 3 parts and merging them back using ModularJsonMerger.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import random
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
# Add the gateway to path (go up 2 levels from tests/functional/)
|
||||
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
if _gateway_path not in sys.path:
|
||||
sys.path.insert(0, _gateway_path)
|
||||
|
||||
# Import JSON merger from workflow tools
|
||||
from modules.services.serviceAi.subJsonMerger import ModularJsonMerger, JsonMergeLogger
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
|
||||
|
||||
class JsonSplitMergeTester12:
|
||||
def __init__(self):
|
||||
self.testResults = {}
|
||||
self.testJsonFiles = []
|
||||
self.logBuffer = []
|
||||
self.logFile = None
|
||||
|
||||
def createTestJsonFiles(self) -> List[Dict[str, Any]]:
|
||||
"""Create various test JSON files with different structures."""
|
||||
testFiles = [
|
||||
{
|
||||
"name": "config.json",
|
||||
"data": {
|
||||
"application": "Customer Manager",
|
||||
"version": "1.0.0",
|
||||
"database": {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"name": "customers_db"
|
||||
},
|
||||
"api": {
|
||||
"baseUrl": "https://api.example.com",
|
||||
"timeout": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "customers.json",
|
||||
"data": {
|
||||
"customers": [
|
||||
{"id": 1, "name": "John Doe", "email": "john@example.com", "phone": "+1234567890", "address": "123 Main St"},
|
||||
{"id": 2, "name": "Jane Smith", "email": "jane@example.com", "phone": "+0987654321", "address": "456 Oak Ave"},
|
||||
{"id": 3, "name": "Bob Johnson", "email": "bob@example.com", "phone": "+1122334455", "address": "789 Pine Rd"},
|
||||
{"id": 4, "name": "Alice Williams", "email": "alice@example.com", "phone": "+5566778899", "address": "321 Elm St"},
|
||||
{"id": 5, "name": "Charlie Brown", "email": "charlie@example.com", "phone": "+9988776655", "address": "654 Maple Dr"}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "settings.json",
|
||||
"data": {
|
||||
"theme": {
|
||||
"darkMode": True,
|
||||
"fontSize": 14,
|
||||
"language": "en"
|
||||
},
|
||||
"notifications": {
|
||||
"email": True,
|
||||
"sms": False,
|
||||
"push": True
|
||||
},
|
||||
"features": {
|
||||
"enableAnalytics": True,
|
||||
"enableReports": False
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "products.json",
|
||||
"data": {
|
||||
"products": [
|
||||
{"id": "P001", "name": "Product A", "price": 29.99, "category": "Electronics", "inStock": True},
|
||||
{"id": "P002", "name": "Product B", "price": 49.99, "category": "Clothing", "inStock": True},
|
||||
{"id": "P003", "name": "Product C", "price": 19.99, "category": "Books", "inStock": False},
|
||||
{"id": "P004", "name": "Product D", "price": 99.99, "category": "Electronics", "inStock": True},
|
||||
{"id": "P005", "name": "Product E", "price": 14.99, "category": "Books", "inStock": True},
|
||||
{"id": "P006", "name": "Product F", "price": 79.99, "category": "Clothing", "inStock": True}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "document_structure.json",
|
||||
"data": {
|
||||
"metadata": {
|
||||
"title": "Test Document",
|
||||
"author": "Test Author",
|
||||
"date": "2025-01-05"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc1",
|
||||
"title": "Document 1",
|
||||
"sections": [
|
||||
{
|
||||
"id": "sec1",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{"type": "heading", "content": {"text": "Introduction", "level": 1}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "sec2",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{"type": "paragraph", "content": {"text": "This is a test paragraph."}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "table_example.json",
|
||||
"data": self._loadTableJsonExample()
|
||||
},
|
||||
{
|
||||
"name": "complete_json.json",
|
||||
"data": {
|
||||
"status": "complete",
|
||||
"message": "This is a complete, valid JSON object",
|
||||
"data": {
|
||||
"items": [1, 2, 3, 4, 5],
|
||||
"metadata": {
|
||||
"version": "1.0",
|
||||
"timestamp": "2025-01-05T12:00:00Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
"isComplete": True # Flag to indicate this is complete JSON (not cut)
|
||||
},
|
||||
{
|
||||
"name": "json_with_comments.json",
|
||||
"data": None, # Will be set as string with comments
|
||||
"jsonString": '''{
|
||||
// This is a single-line comment
|
||||
"name": "Test",
|
||||
"value": 42,
|
||||
/* This is a multi-line comment
|
||||
spanning multiple lines */
|
||||
"items": [1, 2, 3],
|
||||
"nested": {
|
||||
// Another comment
|
||||
"key": "value"
|
||||
}
|
||||
}''',
|
||||
"hasComments": True
|
||||
},
|
||||
{
|
||||
"name": "json_with_trailing_comma.json",
|
||||
"data": None, # Will be set as string with trailing comma
|
||||
"jsonString": '''{
|
||||
"name": "Test",
|
||||
"value": 42,
|
||||
"items": [1, 2, 3,],
|
||||
"nested": {
|
||||
"key": "value",
|
||||
}
|
||||
}''',
|
||||
"hasTrailingComma": True
|
||||
},
|
||||
{
|
||||
"name": "json_with_unquoted_keys.json",
|
||||
"data": None, # Will be set as string with unquoted keys
|
||||
"jsonString": '''{
|
||||
name: "Test",
|
||||
value: 42,
|
||||
items: [1, 2, 3],
|
||||
nested: {
|
||||
key: "value"
|
||||
}
|
||||
}''',
|
||||
"hasUnquotedKeys": True
|
||||
},
|
||||
{
|
||||
"name": "json_with_invalid_escape.json",
|
||||
"data": None, # Will be set as string with invalid escape
|
||||
"jsonString": '''{
|
||||
"name": "Test\\xInvalid",
|
||||
"value": 42,
|
||||
"description": "This has \\u invalid escape"
|
||||
}''',
|
||||
"hasInvalidEscape": True
|
||||
},
|
||||
{
|
||||
"name": "json_mixed_errors.json",
|
||||
"data": None, # Will be set as string with multiple errors
|
||||
"jsonString": '''{
|
||||
// Comment here
|
||||
name: "Test", // Unquoted key
|
||||
"value": 42,
|
||||
"items": [1, 2, 3,], // Trailing comma
|
||||
"description": "Has \\x invalid escape",
|
||||
"nested": {
|
||||
key: "value", // Unquoted key and trailing comma
|
||||
}
|
||||
}''',
|
||||
"hasMixedErrors": True
|
||||
}
|
||||
]
|
||||
|
||||
return testFiles
|
||||
|
||||
def _loadTableJsonExample(self) -> Dict[str, Any]:
|
||||
"""Load the table JSON example from the debug prompts file."""
|
||||
try:
|
||||
# Import jsonUtils for closing incomplete JSON structures
|
||||
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
|
||||
|
||||
# Path to the JSON example file
|
||||
jsonExamplePath = os.path.join(
|
||||
os.path.dirname(__file__), "..", "..", "..", "local", "debug", "prompts",
|
||||
"20260105-214826-020-chapter_1_section_section_2_response_iteration_2.txt"
|
||||
)
|
||||
|
||||
# Read the file content
|
||||
with open(jsonExamplePath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove markdown code block markers
|
||||
jsonContent = content.strip()
|
||||
if jsonContent.startswith('```json'):
|
||||
jsonContent = jsonContent[7:] # Remove ```json
|
||||
if jsonContent.startswith('```'):
|
||||
jsonContent = jsonContent[3:] # Remove ```
|
||||
jsonContent = jsonContent.strip()
|
||||
if jsonContent.endswith('```'):
|
||||
jsonContent = jsonContent[:-3] # Remove trailing ```
|
||||
jsonContent = jsonContent.strip()
|
||||
|
||||
# The JSON is incomplete - use closeJsonStructures to complete it
|
||||
closedJson = closeJsonStructures(jsonContent)
|
||||
|
||||
# Parse the closed JSON
|
||||
parsedJson, error, _ = tryParseJson(closedJson)
|
||||
if error is None and parsedJson is not None:
|
||||
return parsedJson
|
||||
else:
|
||||
raise Exception(f"Failed to parse JSON after closing structures: {error}")
|
||||
except Exception as e:
|
||||
# If loading fails, return a minimal valid structure
|
||||
print(f"Warning: Could not load table JSON example: {e}")
|
||||
return {
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
"headers": ["Spalte1", "Spalte2", "Spalte3"],
|
||||
"rows": [
|
||||
[36761, 36767, 36779]
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def splitJsonRandomly(self, jsonString: str, numParts: int = 3) -> List[str]:
|
||||
"""
|
||||
Split JSON string randomly into specified number of parts.
|
||||
Simulates real AI response cuts - can split anywhere, even in the middle of strings/numbers/structures.
|
||||
This is the REAL scenario: AI response gets cut off randomly, not at convenient points.
|
||||
"""
|
||||
if numParts < 2:
|
||||
return [jsonString]
|
||||
|
||||
jsonLength = len(jsonString)
|
||||
|
||||
# Generate truly random split points - can be anywhere!
|
||||
# Only ensure minimum part size to avoid empty parts
|
||||
minPartSize = max(10, jsonLength // (numParts * 3)) # Smaller minimum to allow more randomness
|
||||
|
||||
splitPoints = []
|
||||
for _ in range(numParts - 1):
|
||||
# Generate random point - can be anywhere in the string
|
||||
# Only ensure we don't create parts smaller than minimum
|
||||
minPoint = len(splitPoints) * minPartSize if splitPoints else minPartSize
|
||||
maxPoint = jsonLength - (numParts - len(splitPoints) - 1) * minPartSize
|
||||
|
||||
if maxPoint <= minPoint:
|
||||
# If we can't avoid minimum size, just use the boundary
|
||||
splitPoint = minPoint
|
||||
else:
|
||||
# Truly random point - can be in the middle of anything!
|
||||
splitPoint = random.randint(minPoint, maxPoint)
|
||||
|
||||
splitPoints.append(splitPoint)
|
||||
|
||||
splitPoints.sort()
|
||||
|
||||
# Create parts - these can be cut anywhere, even mid-string, mid-number, etc.
|
||||
parts = []
|
||||
start = 0
|
||||
for splitPoint in splitPoints:
|
||||
parts.append(jsonString[start:splitPoint])
|
||||
start = splitPoint
|
||||
parts.append(jsonString[start:]) # Last part
|
||||
|
||||
return parts
|
||||
|
||||
def _log(self, message: str):
|
||||
"""Add message to log buffer."""
|
||||
self.logBuffer.append(message)
|
||||
print(message)
|
||||
|
||||
|
||||
|
||||
def normalizeJson(self, jsonString: str) -> Optional[Dict[str, Any]]:
|
||||
"""Normalize JSON string by parsing and re-serializing. Returns None if parsing fails."""
|
||||
try:
|
||||
parsed = json.loads(jsonString)
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
# Try to close incomplete JSON structures
|
||||
try:
|
||||
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
|
||||
closed = closeJsonStructures(jsonString)
|
||||
parsed, error, _ = tryParseJson(closed)
|
||||
if error is None and parsed is not None:
|
||||
return parsed
|
||||
except Exception:
|
||||
pass
|
||||
# Return None if all parsing attempts fail
|
||||
return None
|
||||
|
||||
def compareJson(self, original: Dict[str, Any], merged: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Compare original and merged JSON structures."""
|
||||
originalStr = json.dumps(original, sort_keys=True, indent=2)
|
||||
mergedStr = json.dumps(merged, sort_keys=True, indent=2)
|
||||
|
||||
exactMatch = originalStr == mergedStr
|
||||
|
||||
# Deep comparison
|
||||
differences = []
|
||||
self._findDifferences(original, merged, "", differences)
|
||||
|
||||
return {
|
||||
"exactMatch": exactMatch,
|
||||
"differences": differences,
|
||||
"originalSize": len(originalStr),
|
||||
"mergedSize": len(mergedStr),
|
||||
"sizeMatch": len(originalStr) == len(mergedStr)
|
||||
}
|
||||
|
||||
def _findDifferences(self, obj1: Any, obj2: Any, path: str, differences: List[str]):
|
||||
"""Recursively find differences between two JSON objects."""
|
||||
if type(obj1) != type(obj2):
|
||||
differences.append(f"{path}: Type mismatch - {type(obj1).__name__} vs {type(obj2).__name__}")
|
||||
return
|
||||
|
||||
if isinstance(obj1, dict):
|
||||
allKeys = set(obj1.keys()) | set(obj2.keys())
|
||||
for key in allKeys:
|
||||
newPath = f"{path}.{key}" if path else key
|
||||
if key not in obj1:
|
||||
differences.append(f"{newPath}: Missing in original")
|
||||
elif key not in obj2:
|
||||
differences.append(f"{newPath}: Missing in merged")
|
||||
else:
|
||||
self._findDifferences(obj1[key], obj2[key], newPath, differences)
|
||||
elif isinstance(obj1, list):
|
||||
if len(obj1) != len(obj2):
|
||||
differences.append(f"{path}: Length mismatch - {len(obj1)} vs {len(obj2)}")
|
||||
else:
|
||||
for i, (item1, item2) in enumerate(zip(obj1, obj2)):
|
||||
newPath = f"{path}[{i}]"
|
||||
self._findDifferences(item1, item2, newPath, differences)
|
||||
else:
|
||||
if obj1 != obj2:
|
||||
differences.append(f"{path}: Value mismatch - {obj1} vs {obj2}")
|
||||
|
||||
async def testJsonSplitMerge(self, jsonFile: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Test splitting and merging a single JSON file."""
|
||||
fileName = jsonFile["name"]
|
||||
|
||||
# Check if this is a complete JSON test (no cut)
|
||||
isComplete = jsonFile.get("isComplete", False)
|
||||
|
||||
# Check if this is a JSON string with errors (not from data dict)
|
||||
jsonString = jsonFile.get("jsonString")
|
||||
if jsonString:
|
||||
# Use the provided JSON string directly (may have errors)
|
||||
originalJsonString = jsonString
|
||||
originalData = None # No original data for error tests
|
||||
else:
|
||||
# Convert data dict to JSON string
|
||||
originalData = jsonFile["data"]
|
||||
originalJsonString = json.dumps(originalData, indent=2, ensure_ascii=False)
|
||||
|
||||
originalSize = len(originalJsonString)
|
||||
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
testType = "COMPLETE JSON" if isComplete else ("JSON WITH ERRORS" if jsonString else "SPLIT JSON")
|
||||
self._log(f"TESTING {testType}: {fileName}")
|
||||
self._log("="*80)
|
||||
|
||||
# Log original JSON
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("ORIGINAL JSON")
|
||||
self._log("="*80)
|
||||
self._log(f"JSON length: {originalSize} characters")
|
||||
if isComplete:
|
||||
self._log(" ⚠️ This is COMPLETE JSON (not cut) - testing overlapContext='' detection")
|
||||
if jsonString:
|
||||
errorType = []
|
||||
if jsonFile.get("hasComments"):
|
||||
errorType.append("comments")
|
||||
if jsonFile.get("hasTrailingComma"):
|
||||
errorType.append("trailing commas")
|
||||
if jsonFile.get("hasUnquotedKeys"):
|
||||
errorType.append("unquoted keys")
|
||||
if jsonFile.get("hasInvalidEscape"):
|
||||
errorType.append("invalid escapes")
|
||||
if jsonFile.get("hasMixedErrors"):
|
||||
errorType.append("mixed errors")
|
||||
if errorType:
|
||||
self._log(f" ⚠️ This JSON has errors: {', '.join(errorType)} - testing repair function")
|
||||
self._log("")
|
||||
self._log("Full JSON content:")
|
||||
self._log("-"*80)
|
||||
jsonLines = originalJsonString.split('\n')
|
||||
if len(jsonLines) > 50:
|
||||
for line in jsonLines[:25]:
|
||||
self._log(line)
|
||||
self._log(f"... ({len(jsonLines) - 50} lines omitted) ...")
|
||||
for line in jsonLines[-25:]:
|
||||
self._log(line)
|
||||
else:
|
||||
for line in jsonLines:
|
||||
self._log(line)
|
||||
|
||||
# Handle complete JSON, JSON with errors, vs split JSON
|
||||
if isComplete or jsonString:
|
||||
# For complete JSON or JSON with errors, use the full string (no cut)
|
||||
# We want to test repair on the full error-containing JSON
|
||||
partContent = originalJsonString
|
||||
cutPosition = None # No cut
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
if isComplete:
|
||||
self._log("COMPLETE JSON TEST (NO CUT)")
|
||||
self._log("="*80)
|
||||
self._log(" Testing that getContexts() detects complete JSON and sets overlapContext=''")
|
||||
else:
|
||||
self._log("JSON WITH ERRORS TEST (NO CUT)")
|
||||
self._log("="*80)
|
||||
self._log(" Testing that getContexts() repairs the errors and produces valid JSON")
|
||||
else:
|
||||
# Split JSON at random position (simulating AI response cut)
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("SPLITTING JSON AT RANDOM POSITION (SIMULATING AI RESPONSE CUT)")
|
||||
self._log("="*80)
|
||||
|
||||
# Find random cut position (not at start or end)
|
||||
import random
|
||||
minCutPos = max(100, originalSize // 10) # At least 10% from start
|
||||
maxCutPos = min(originalSize - 100, originalSize * 9 // 10) # At least 10% from end
|
||||
|
||||
# Ensure valid range
|
||||
if maxCutPos <= minCutPos:
|
||||
# For small JSON, just cut in the middle
|
||||
cutPosition = originalSize // 2
|
||||
else:
|
||||
cutPosition = random.randint(minCutPos, maxCutPos)
|
||||
|
||||
# Get part from start to cut
|
||||
partContent = originalJsonString[:cutPosition]
|
||||
|
||||
if not isComplete:
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("PART (from start to cut):")
|
||||
self._log("="*80)
|
||||
self._log(f"Cut position: {cutPosition} characters")
|
||||
self._log(f"Part length: {len(partContent)} characters")
|
||||
self._log("")
|
||||
self._log("Part content:")
|
||||
partLines = partContent.split('\n')
|
||||
if len(partLines) > 30:
|
||||
for line in partLines[:15]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(partLines) - 30} lines omitted) ...")
|
||||
for line in partLines[-15:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in partLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Generate contexts using getContexts()
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("GENERATING CONTINUATION CONTEXTS")
|
||||
self._log("="*80)
|
||||
|
||||
contexts = getContexts(partContent)
|
||||
|
||||
# Log overlap context
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("OVERLAP CONTEXT (for merging):")
|
||||
self._log("="*80)
|
||||
overlapLines = contexts.overlapContext.split('\n')
|
||||
if len(overlapLines) > 30:
|
||||
for line in overlapLines[:15]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(overlapLines) - 30} lines omitted) ...")
|
||||
for line in overlapLines[-15:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in overlapLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Log hierarchy context (full, without budget)
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("HIERARCHY CONTEXT (full structure, no budget):")
|
||||
self._log("="*80)
|
||||
hierarchyLines = contexts.hierarchyContext.split('\n')
|
||||
if len(hierarchyLines) > 30:
|
||||
for line in hierarchyLines[:15]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(hierarchyLines) - 30} lines omitted) ...")
|
||||
for line in hierarchyLines[-15:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in hierarchyLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Log hierarchy context for prompt (with budget)
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("HIERARCHY CONTEXT FOR PROMPT (with budget logic):")
|
||||
self._log("="*80)
|
||||
hierarchyPromptLines = contexts.hierarchyContextForPrompt.split('\n')
|
||||
for line in hierarchyPromptLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Test completePart as valid JSON
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("COMPLETE PART (should be valid JSON):")
|
||||
self._log("="*80)
|
||||
completeLines = contexts.completePart.split('\n')
|
||||
if len(completeLines) > 30:
|
||||
for line in completeLines[:15]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(completeLines) - 30} lines omitted) ...")
|
||||
for line in completeLines[-15:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in completeLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Validate completePart as JSON and check overlapContext
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("VALIDATION RESULTS:")
|
||||
self._log("="*80)
|
||||
|
||||
# Check overlapContext for complete JSON
|
||||
if isComplete:
|
||||
if contexts.overlapContext == "":
|
||||
self._log(" ✅ overlapContext is empty (correct for complete JSON)")
|
||||
else:
|
||||
self._log(f" ❌ overlapContext is NOT empty: '{contexts.overlapContext[:50]}...'")
|
||||
self._log(" Expected empty string for complete JSON")
|
||||
|
||||
# Validate completePart as JSON
|
||||
self._log("")
|
||||
self._log("VALIDATING COMPLETE PART AS JSON:")
|
||||
isValidJson = False
|
||||
parsedCompletePart = None
|
||||
jsonError = None
|
||||
|
||||
try:
|
||||
parsedCompletePart = json.loads(contexts.completePart)
|
||||
isValidJson = True
|
||||
self._log(" ✅ completePart is valid JSON")
|
||||
self._log(f" Parsed type: {type(parsedCompletePart).__name__}")
|
||||
|
||||
# For error tests, verify repair worked
|
||||
if jsonString:
|
||||
self._log(" ✅ JSON repair successful - errors were fixed")
|
||||
|
||||
# For split JSON, compare with truncated JSON
|
||||
if not isComplete and not jsonString:
|
||||
# Compare with truncated JSON (not original) - parse the truncated part to compare
|
||||
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
|
||||
|
||||
# Try to parse the truncated JSON part (with structures closed)
|
||||
truncatedClosed = closeJsonStructures(partContent)
|
||||
truncatedParsed, truncatedError, _ = tryParseJson(truncatedClosed)
|
||||
|
||||
if truncatedParsed is not None:
|
||||
# Compare completePart with the parsed truncated JSON
|
||||
if isinstance(parsedCompletePart, dict) and isinstance(truncatedParsed, dict):
|
||||
comparison = self.compareJson(truncatedParsed, parsedCompletePart)
|
||||
self._log(f" Comparison with truncated JSON (at cut position {cutPosition}):")
|
||||
self._log(f" Exact match: {comparison['exactMatch']}")
|
||||
self._log(f" Size match: {comparison['sizeMatch']}")
|
||||
if comparison['differences']:
|
||||
self._log(f" Differences found: {len(comparison['differences'])}")
|
||||
for diff in comparison['differences'][:10]: # Show first 10 differences
|
||||
self._log(f" - {diff}")
|
||||
if len(comparison['differences']) > 10:
|
||||
self._log(f" ... ({len(comparison['differences']) - 10} more differences)")
|
||||
else:
|
||||
self._log(" No differences found - completePart matches truncated JSON structure")
|
||||
elif isinstance(parsedCompletePart, list) and isinstance(truncatedParsed, list):
|
||||
self._log(f" Both are lists: truncated={len(truncatedParsed)} items, completePart={len(parsedCompletePart)} items")
|
||||
else:
|
||||
self._log(f" Different types: truncated={type(truncatedParsed).__name__}, completePart={type(parsedCompletePart).__name__}")
|
||||
else:
|
||||
self._log(f" Could not parse truncated JSON for comparison (error: {truncatedError})")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
isValidJson = False
|
||||
jsonError = str(e)
|
||||
self._log(f" ❌ completePart is NOT valid JSON")
|
||||
self._log(f" Error: {jsonError}")
|
||||
self._log(f" Error position: line {e.lineno}, column {e.colno}")
|
||||
if jsonString:
|
||||
self._log(" ❌ JSON repair FAILED - errors were not fixed")
|
||||
|
||||
# Return test results
|
||||
result = {
|
||||
"success": isValidJson,
|
||||
"fileName": fileName,
|
||||
"originalSize": originalSize,
|
||||
"cutPosition": cutPosition if not isComplete else None,
|
||||
"partSize": len(partContent),
|
||||
"overlapContextSize": len(contexts.overlapContext),
|
||||
"hierarchyContextSize": len(contexts.hierarchyContext),
|
||||
"hierarchyContextForPromptSize": len(contexts.hierarchyContextForPrompt),
|
||||
"completePartSize": len(contexts.completePart),
|
||||
"isValidJson": isValidJson,
|
||||
"jsonError": jsonError,
|
||||
"parsedCompletePart": parsedCompletePart is not None,
|
||||
"jsonParsingSuccess": contexts.jsonParsingSuccess
|
||||
}
|
||||
|
||||
# Add complete JSON specific checks
|
||||
if isComplete:
|
||||
result["overlapContextIsEmpty"] = contexts.overlapContext == ""
|
||||
result["isComplete"] = True
|
||||
# For complete JSON, success means overlapContext is empty AND valid JSON
|
||||
result["success"] = isValidJson and (contexts.overlapContext == "")
|
||||
|
||||
# Add error test specific checks
|
||||
if jsonString:
|
||||
result["hasErrors"] = True
|
||||
result["repairSuccess"] = isValidJson
|
||||
|
||||
return result
|
||||
|
||||
async def testAllJsonFiles(self) -> Dict[str, Any]:
|
||||
"""Test splitting and merging all test JSON files."""
|
||||
print("\n" + "="*80)
|
||||
print("TESTING JSON SPLIT AND MERGE")
|
||||
print("="*80)
|
||||
|
||||
testFiles = self.createTestJsonFiles()
|
||||
results = {}
|
||||
|
||||
for jsonFile in testFiles:
|
||||
try:
|
||||
result = await self.testJsonSplitMerge(jsonFile)
|
||||
results[jsonFile["name"]] = result
|
||||
|
||||
# Small delay between tests
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"\n❌ Error testing {jsonFile['name']}: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
results[jsonFile["name"]] = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
def _writeLogFile(self):
|
||||
"""Write log buffer to file."""
|
||||
logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
logFilePath = os.path.join(logDir, "test12_json_split_merge_results.txt")
|
||||
|
||||
with open(logFilePath, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(self.logBuffer))
|
||||
|
||||
self.logFile = logFilePath
|
||||
print(f"\n📝 Detailed log written to: {logFilePath}")
|
||||
|
||||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
self._log("="*80)
|
||||
self._log("JSON SPLIT AND MERGE TEST 12")
|
||||
self._log("="*80)
|
||||
|
||||
try:
|
||||
# Test all JSON files
|
||||
results = await self.testAllJsonFiles()
|
||||
|
||||
# Write log file
|
||||
self._writeLogFile()
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*80)
|
||||
print("TEST SUMMARY")
|
||||
print("="*80)
|
||||
|
||||
successCount = 0
|
||||
|
||||
for fileName, result in results.items():
|
||||
if result.get("success"):
|
||||
successCount += 1
|
||||
isValidJson = result.get("isValidJson", False)
|
||||
isComplete = result.get("isComplete", False)
|
||||
hasErrors = result.get("hasErrors", False)
|
||||
|
||||
if isComplete:
|
||||
overlapEmpty = result.get("overlapContextIsEmpty", False)
|
||||
if isValidJson and overlapEmpty:
|
||||
print(f"✅ {fileName:30s}: Complete JSON - overlapContext='' and valid JSON")
|
||||
elif not overlapEmpty:
|
||||
print(f"⚠️ {fileName:30s}: Complete JSON but overlapContext not empty")
|
||||
else:
|
||||
jsonError = result.get("jsonError", "Unknown error")
|
||||
print(f"⚠️ {fileName:30s}: Complete JSON but not valid - {jsonError}")
|
||||
elif hasErrors:
|
||||
repairSuccess = result.get("repairSuccess", False)
|
||||
if repairSuccess:
|
||||
print(f"✅ {fileName:30s}: JSON with errors - repair successful")
|
||||
else:
|
||||
jsonError = result.get("jsonError", "Unknown error")
|
||||
print(f"❌ {fileName:30s}: JSON with errors - repair failed - {jsonError}")
|
||||
else:
|
||||
if isValidJson:
|
||||
print(f"✅ {fileName:30s}: Valid JSON - completePart parsed successfully")
|
||||
else:
|
||||
jsonError = result.get("jsonError", "Unknown error")
|
||||
print(f"⚠️ {fileName:30s}: Contexts generated but completePart is not valid JSON - {jsonError}")
|
||||
else:
|
||||
error = result.get("error", "Unknown error")
|
||||
print(f"❌ {fileName:30s}: FAILED - {error}")
|
||||
|
||||
print(f"\nResults: {successCount}/{len(results)} successful")
|
||||
|
||||
self.testResults = {
|
||||
"success": successCount == len(results),
|
||||
"totalFiles": len(results),
|
||||
"successCount": successCount,
|
||||
"results": results
|
||||
}
|
||||
|
||||
return self.testResults
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
|
||||
print(f"Traceback:\n{traceback.format_exc()}")
|
||||
self.testResults = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
return self.testResults
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run JSON split and merge test 12."""
|
||||
tester = JsonSplitMergeTester12()
|
||||
results = await tester.runTest()
|
||||
|
||||
# Print final results as JSON for easy parsing
|
||||
print("\n" + "="*80)
|
||||
print("FINAL RESULTS (JSON)")
|
||||
print("="*80)
|
||||
print(json.dumps(results, indent=2, default=str))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
307
tests/functional/test13_json_completion_cuts.py
Normal file
307
tests/functional/test13_json_completion_cuts.py
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
JSON Completion Test 13 - Tests JSON completion at various cut positions
|
||||
Tests a single JSON object (~300 chars) with all JSON structure types.
|
||||
Cuts the JSON at every position from character 50 to the end, completes it, and validates.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict, Any, List
|
||||
|
||||
# Add the gateway to path (go up 2 levels from tests/functional/)
|
||||
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
if _gateway_path not in sys.path:
|
||||
sys.path.insert(0, _gateway_path)
|
||||
|
||||
# Import JSON continuation module
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
|
||||
|
||||
class JsonCompletionTester13:
|
||||
def __init__(self):
|
||||
self.testResults = {}
|
||||
self.logBuffer = []
|
||||
self.logFile = None
|
||||
|
||||
def createTestJson(self) -> str:
|
||||
"""
|
||||
Create a single JSON object (~300 chars) containing all JSON structure types:
|
||||
- Objects (nested)
|
||||
- Arrays (nested)
|
||||
- Strings
|
||||
- Numbers (integers and floats)
|
||||
- Booleans (true, false)
|
||||
- null
|
||||
"""
|
||||
testData = {
|
||||
"id": 12345,
|
||||
"name": "Test Object",
|
||||
"active": True,
|
||||
"inactive": False,
|
||||
"value": None,
|
||||
"price": 99.99,
|
||||
"tags": ["tag1", "tag2", "tag3"],
|
||||
"metadata": {
|
||||
"created": "2025-01-01",
|
||||
"updated": "2025-01-02",
|
||||
"version": 1
|
||||
},
|
||||
"items": [
|
||||
{"id": 1, "name": "Item A", "count": 10},
|
||||
{"id": 2, "name": "Item B", "count": 20}
|
||||
],
|
||||
"settings": {
|
||||
"theme": "dark",
|
||||
"notifications": True,
|
||||
"features": ["feature1", "feature2"]
|
||||
}
|
||||
}
|
||||
|
||||
jsonString = json.dumps(testData, indent=2, ensure_ascii=False)
|
||||
|
||||
# Ensure it's approximately 300 characters (adjust if needed)
|
||||
targetLength = 300
|
||||
if len(jsonString) < targetLength:
|
||||
# Add padding to metadata
|
||||
testData["metadata"]["description"] = "A" * (targetLength - len(jsonString) + 20)
|
||||
jsonString = json.dumps(testData, indent=2, ensure_ascii=False)
|
||||
|
||||
# Trim to approximately 300 chars if too long
|
||||
if len(jsonString) > targetLength + 50:
|
||||
# Remove some content to get closer to target
|
||||
testData["metadata"].pop("description", None)
|
||||
jsonString = json.dumps(testData, indent=2, ensure_ascii=False)
|
||||
|
||||
return jsonString
|
||||
|
||||
def _log(self, message: str):
|
||||
"""Add message to log buffer."""
|
||||
self.logBuffer.append(message)
|
||||
print(message)
|
||||
|
||||
async def testJsonCompletionAtCuts(self, jsonString: str, startPos: int = 50, step: int = 5) -> Dict[str, Any]:
|
||||
"""
|
||||
Test JSON completion at various cut positions.
|
||||
|
||||
Args:
|
||||
jsonString: The full JSON string to test
|
||||
startPos: Starting position for cuts (default 50)
|
||||
step: Step size between cuts (default 5)
|
||||
|
||||
Returns:
|
||||
Dictionary with test results for each cut position
|
||||
"""
|
||||
jsonLength = len(jsonString)
|
||||
results = {}
|
||||
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("TESTING JSON COMPLETION AT VARIOUS CUT POSITIONS")
|
||||
self._log("="*80)
|
||||
self._log(f"JSON length: {jsonLength} characters")
|
||||
self._log(f"Testing cuts from position {startPos} to {jsonLength} (step: {step})")
|
||||
self._log("")
|
||||
|
||||
# Test at each cut position
|
||||
cutPositions = list(range(startPos, jsonLength, step))
|
||||
# Always include the last position
|
||||
if cutPositions[-1] != jsonLength - 1:
|
||||
cutPositions.append(jsonLength - 1)
|
||||
|
||||
successCount = 0
|
||||
totalCuts = len(cutPositions)
|
||||
|
||||
for cutPos in cutPositions:
|
||||
# Get truncated JSON
|
||||
truncatedJson = jsonString[:cutPos]
|
||||
|
||||
# Generate contexts
|
||||
try:
|
||||
contexts = getContexts(truncatedJson)
|
||||
completePart = contexts.completePart
|
||||
overlapContext = contexts.overlapContext
|
||||
|
||||
# Test if completePart is valid JSON
|
||||
isValidJson = False
|
||||
jsonError = None
|
||||
parsedData = None
|
||||
|
||||
try:
|
||||
parsedData = json.loads(completePart)
|
||||
isValidJson = True
|
||||
except json.JSONDecodeError as e:
|
||||
jsonError = str(e)
|
||||
isValidJson = False
|
||||
|
||||
# Store result
|
||||
result = {
|
||||
"cutPosition": cutPos,
|
||||
"truncatedLength": len(truncatedJson),
|
||||
"completePartLength": len(completePart),
|
||||
"overlapContextLength": len(overlapContext),
|
||||
"isValidJson": isValidJson,
|
||||
"jsonError": jsonError,
|
||||
"truncatedJson": truncatedJson[-50:] if len(truncatedJson) > 50 else truncatedJson, # Last 50 chars
|
||||
"completePart": completePart[-100:] if len(completePart) > 100 else completePart, # Last 100 chars
|
||||
"overlapContext": overlapContext[-100:] if len(overlapContext) > 100 else overlapContext # Last 100 chars
|
||||
}
|
||||
|
||||
results[cutPos] = result
|
||||
|
||||
if isValidJson:
|
||||
successCount += 1
|
||||
self._log(f"✅ Cut at position {cutPos:4d}: Valid JSON (completePart length: {len(completePart)}, overlap length: {len(overlapContext)})")
|
||||
self._log(f" Overlap: {overlapContext[-80:] if len(overlapContext) > 80 else overlapContext}")
|
||||
else:
|
||||
self._log(f"❌ Cut at position {cutPos:4d}: Invalid JSON - {jsonError}")
|
||||
self._log(f" Truncated (last 50): {truncatedJson[-50:]}")
|
||||
self._log(f" CompletePart (last 100): {completePart[-100:]}")
|
||||
self._log(f" Overlap: {overlapContext[-80:] if len(overlapContext) > 80 else overlapContext}")
|
||||
|
||||
except Exception as e:
|
||||
result = {
|
||||
"cutPosition": cutPos,
|
||||
"truncatedLength": len(truncatedJson),
|
||||
"isValidJson": False,
|
||||
"jsonError": f"Exception: {str(e)}",
|
||||
"truncatedJson": truncatedJson[-50:] if len(truncatedJson) > 50 else truncatedJson
|
||||
}
|
||||
results[cutPos] = result
|
||||
self._log(f"❌ Cut at position {cutPos:4d}: Exception - {str(e)}")
|
||||
|
||||
# Summary
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("CUT TEST SUMMARY")
|
||||
self._log("="*80)
|
||||
self._log(f"Total cuts tested: {totalCuts}")
|
||||
self._log(f"Successful completions: {successCount}")
|
||||
self._log(f"Failed completions: {totalCuts - successCount}")
|
||||
self._log(f"Success rate: {successCount/totalCuts*100:.1f}%")
|
||||
self._log("")
|
||||
|
||||
# Detailed results for failed cuts
|
||||
failedCuts = [pos for pos, res in results.items() if not res.get("isValidJson", False)]
|
||||
if failedCuts:
|
||||
self._log("Failed cuts:")
|
||||
for pos in failedCuts[:10]: # Show first 10 failures
|
||||
res = results[pos]
|
||||
self._log(f" Position {pos}: {res.get('jsonError', 'Unknown error')}")
|
||||
overlap = res.get('overlapContext', 'N/A')
|
||||
if overlap != 'N/A':
|
||||
self._log(f" Overlap: {overlap[-80:] if len(overlap) > 80 else overlap}")
|
||||
if len(failedCuts) > 10:
|
||||
self._log(f" ... ({len(failedCuts) - 10} more failures)")
|
||||
|
||||
return {
|
||||
"totalCuts": totalCuts,
|
||||
"successCount": successCount,
|
||||
"failedCount": totalCuts - successCount,
|
||||
"successRate": successCount / totalCuts * 100 if totalCuts > 0 else 0,
|
||||
"results": results,
|
||||
"failedCuts": failedCuts
|
||||
}
|
||||
|
||||
def _writeLogFile(self):
|
||||
"""Write log buffer to file."""
|
||||
logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
logFilePath = os.path.join(logDir, "test13_json_completion_cuts_results.txt")
|
||||
|
||||
with open(logFilePath, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(self.logBuffer))
|
||||
|
||||
self.logFile = logFilePath
|
||||
print(f"\n📝 Detailed log written to: {logFilePath}")
|
||||
|
||||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
self._log("="*80)
|
||||
self._log("JSON COMPLETION TEST 13")
|
||||
self._log("="*80)
|
||||
|
||||
try:
|
||||
# Create test JSON
|
||||
jsonString = self.createTestJson()
|
||||
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("TEST JSON OBJECT")
|
||||
self._log("="*80)
|
||||
self._log(f"Length: {len(jsonString)} characters")
|
||||
self._log("")
|
||||
self._log("Full JSON content:")
|
||||
self._log("-"*80)
|
||||
jsonLines = jsonString.split('\n')
|
||||
for line in jsonLines:
|
||||
self._log(line)
|
||||
|
||||
# Test completion at various cuts
|
||||
results = await self.testJsonCompletionAtCuts(jsonString, startPos=50, step=5)
|
||||
|
||||
# Write log file
|
||||
self._writeLogFile()
|
||||
|
||||
# Final summary
|
||||
self._log("")
|
||||
self._log("="*80)
|
||||
self._log("FINAL TEST SUMMARY")
|
||||
self._log("="*80)
|
||||
self._log(f"Total cuts tested: {results['totalCuts']}")
|
||||
self._log(f"✅ Successful: {results['successCount']}")
|
||||
self._log(f"❌ Failed: {results['failedCount']}")
|
||||
self._log(f"Success rate: {results['successRate']:.1f}%")
|
||||
|
||||
if results['failedCuts']:
|
||||
self._log("")
|
||||
self._log("Failed cut positions:")
|
||||
for pos in results['failedCuts']:
|
||||
res = results['results'][pos]
|
||||
self._log(f" Position {pos}: {res.get('jsonError', 'Unknown error')}")
|
||||
overlap = res.get('overlapContext', 'N/A')
|
||||
if overlap != 'N/A':
|
||||
self._log(f" Overlap: {overlap[-80:] if len(overlap) > 80 else overlap}")
|
||||
|
||||
self.testResults = {
|
||||
"success": results['successCount'] == results['totalCuts'],
|
||||
"totalCuts": results['totalCuts'],
|
||||
"successCount": results['successCount'],
|
||||
"failedCount": results['failedCount'],
|
||||
"successRate": results['successRate'],
|
||||
"failedCuts": results['failedCuts'],
|
||||
"results": results['results']
|
||||
}
|
||||
|
||||
return self.testResults
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
|
||||
print(f"Traceback:\n{traceback.format_exc()}")
|
||||
self.testResults = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
return self.testResults
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run JSON completion test 13."""
|
||||
tester = JsonCompletionTester13()
|
||||
results = await tester.runTest()
|
||||
|
||||
# Print final results as JSON for easy parsing
|
||||
print("\n" + "="*80)
|
||||
print("FINAL RESULTS (JSON)")
|
||||
print("="*80)
|
||||
print(json.dumps(results, indent=2, default=str))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
373
tests/functional/test14_json_continuation_context.py
Normal file
373
tests/functional/test14_json_continuation_context.py
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
JSON Continuation Context Test 14 - Tests getContexts() with a specific cut JSON from debug prompts.
|
||||
Reads a real AI response that was cut and analyzes the continuation contexts.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
# Add the gateway to path (go up 2 levels from tests/functional/)
|
||||
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
if _gateway_path not in sys.path:
|
||||
sys.path.insert(0, _gateway_path)
|
||||
|
||||
# Import jsonContinuation
|
||||
from modules.shared.jsonContinuation import getContexts
|
||||
|
||||
|
||||
class JsonContinuationContextTester14:
|
||||
def __init__(self):
|
||||
self.testResults = {}
|
||||
self.logBuffer = []
|
||||
self.logFile = None
|
||||
|
||||
def _log(self, message: str):
|
||||
"""Add message to log buffer."""
|
||||
self.logBuffer.append(message)
|
||||
print(message)
|
||||
|
||||
def _readDebugFile(self, fileName: str) -> Optional[str]:
|
||||
"""Read a debug prompt file from local/debug/prompts/."""
|
||||
try:
|
||||
filePath = os.path.join(
|
||||
os.path.dirname(__file__), "..", "..", "..", "local", "debug", "prompts",
|
||||
fileName
|
||||
)
|
||||
with open(filePath, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except Exception as e:
|
||||
self._log(f"Error reading file {fileName}: {e}")
|
||||
return None
|
||||
|
||||
def _extractJsonFromResponse(self, content: str) -> str:
|
||||
"""Extract JSON from response content (remove markdown code fences if present)."""
|
||||
jsonContent = content.strip()
|
||||
|
||||
# Remove markdown code block markers
|
||||
if jsonContent.startswith('```json'):
|
||||
jsonContent = jsonContent[7:]
|
||||
elif jsonContent.startswith('```'):
|
||||
jsonContent = jsonContent[3:]
|
||||
|
||||
jsonContent = jsonContent.strip()
|
||||
|
||||
if jsonContent.endswith('```'):
|
||||
jsonContent = jsonContent[:-3]
|
||||
|
||||
return jsonContent.strip()
|
||||
|
||||
async def testSpecificCutJson(self, fileName: str) -> Dict[str, Any]:
|
||||
"""Test getContexts() with a specific cut JSON file."""
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log(f"TESTING CUT JSON FROM: {fileName}")
|
||||
self._log("=" * 80)
|
||||
|
||||
# Read the file
|
||||
content = self._readDebugFile(fileName)
|
||||
if content is None:
|
||||
return {"success": False, "error": f"Could not read file: {fileName}"}
|
||||
|
||||
# Extract JSON
|
||||
jsonContent = self._extractJsonFromResponse(content)
|
||||
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("INPUT JSON (CUT)")
|
||||
self._log("=" * 80)
|
||||
self._log(f"Total length: {len(jsonContent)} characters")
|
||||
self._log("")
|
||||
|
||||
# Show first and last parts
|
||||
lines = jsonContent.split('\n')
|
||||
if len(lines) > 40:
|
||||
self._log("First 20 lines:")
|
||||
for line in lines[:20]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(lines) - 40} lines omitted) ...")
|
||||
self._log("Last 20 lines:")
|
||||
for line in lines[-20:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in lines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Call getContexts()
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("CALLING getContexts()")
|
||||
self._log("=" * 80)
|
||||
|
||||
try:
|
||||
contexts = getContexts(jsonContent)
|
||||
except Exception as e:
|
||||
self._log(f"ERROR calling getContexts(): {e}")
|
||||
import traceback
|
||||
self._log(traceback.format_exc())
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
# Log results
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("RESULTS FROM getContexts()")
|
||||
self._log("=" * 80)
|
||||
|
||||
# jsonParsingSuccess
|
||||
self._log("")
|
||||
self._log(f"jsonParsingSuccess: {contexts.jsonParsingSuccess}")
|
||||
|
||||
# overlapContext
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("overlapContext:")
|
||||
self._log("=" * 80)
|
||||
self._log(f"Length: {len(contexts.overlapContext)} characters")
|
||||
if contexts.overlapContext == "":
|
||||
self._log(" (empty - JSON is complete, no cut point)")
|
||||
else:
|
||||
overlapLines = contexts.overlapContext.split('\n')
|
||||
if len(overlapLines) > 20:
|
||||
for line in overlapLines[:10]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(overlapLines) - 20} lines omitted) ...")
|
||||
for line in overlapLines[-10:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in overlapLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# hierarchyContext
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("hierarchyContext (for merging - should be exact input JSON):")
|
||||
self._log("=" * 80)
|
||||
self._log(f"Length: {len(contexts.hierarchyContext)} characters")
|
||||
|
||||
# Verify hierarchyContext equals input
|
||||
if contexts.hierarchyContext == jsonContent:
|
||||
self._log(" ✅ hierarchyContext == input JSON (CORRECT)")
|
||||
else:
|
||||
self._log(" ❌ hierarchyContext != input JSON (BUG!)")
|
||||
self._log(f" Input length: {len(jsonContent)}, hierarchyContext length: {len(contexts.hierarchyContext)}")
|
||||
# Show difference at the end
|
||||
if len(contexts.hierarchyContext) > 0 and len(jsonContent) > 0:
|
||||
minLen = min(len(contexts.hierarchyContext), len(jsonContent))
|
||||
for i in range(minLen):
|
||||
if contexts.hierarchyContext[i] != jsonContent[i]:
|
||||
self._log(f" First difference at position {i}")
|
||||
self._log(f" Input: ...{repr(jsonContent[max(0,i-20):i+20])}...")
|
||||
self._log(f" Hierarchy: ...{repr(contexts.hierarchyContext[max(0,i-20):i+20])}...")
|
||||
break
|
||||
|
||||
# hierarchyContextForPrompt
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("hierarchyContextForPrompt (for AI prompt with budget/placeholders):")
|
||||
self._log("=" * 80)
|
||||
self._log(f"Length: {len(contexts.hierarchyContextForPrompt)} characters")
|
||||
hierarchyPromptLines = contexts.hierarchyContextForPrompt.split('\n')
|
||||
if len(hierarchyPromptLines) > 40:
|
||||
for line in hierarchyPromptLines[:20]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(hierarchyPromptLines) - 40} lines omitted) ...")
|
||||
for line in hierarchyPromptLines[-20:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in hierarchyPromptLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# completePart
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("completePart (closed JSON for parsing):")
|
||||
self._log("=" * 80)
|
||||
self._log(f"Length: {len(contexts.completePart)} characters")
|
||||
|
||||
# Try to parse completePart
|
||||
try:
|
||||
parsed = json.loads(contexts.completePart)
|
||||
self._log(" ✅ completePart is valid JSON")
|
||||
self._log(f" Parsed type: {type(parsed).__name__}")
|
||||
if isinstance(parsed, dict):
|
||||
self._log(f" Keys: {list(parsed.keys())}")
|
||||
elif isinstance(parsed, list):
|
||||
self._log(f" List length: {len(parsed)}")
|
||||
except json.JSONDecodeError as e:
|
||||
self._log(f" ❌ completePart is NOT valid JSON: {e}")
|
||||
|
||||
completeLines = contexts.completePart.split('\n')
|
||||
if len(completeLines) > 40:
|
||||
self._log("")
|
||||
self._log("First 20 lines:")
|
||||
for line in completeLines[:20]:
|
||||
self._log(f" {line}")
|
||||
self._log(f" ... ({len(completeLines) - 40} lines omitted) ...")
|
||||
self._log("Last 20 lines:")
|
||||
for line in completeLines[-20:]:
|
||||
self._log(f" {line}")
|
||||
else:
|
||||
for line in completeLines:
|
||||
self._log(f" {line}")
|
||||
|
||||
# Summary
|
||||
self._log("")
|
||||
self._log("=" * 80)
|
||||
self._log("SUMMARY")
|
||||
self._log("=" * 80)
|
||||
self._log(f" Input JSON length: {len(jsonContent)} chars")
|
||||
self._log(f" jsonParsingSuccess: {contexts.jsonParsingSuccess}")
|
||||
self._log(f" overlapContext length: {len(contexts.overlapContext)} chars")
|
||||
self._log(f" overlapContext empty: {contexts.overlapContext == ''}")
|
||||
self._log(f" hierarchyContext length: {len(contexts.hierarchyContext)} chars")
|
||||
self._log(f" hierarchyContext == input: {contexts.hierarchyContext == jsonContent}")
|
||||
self._log(f" hierarchyContextForPrompt length: {len(contexts.hierarchyContextForPrompt)} chars")
|
||||
self._log(f" completePart length: {len(contexts.completePart)} chars")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"fileName": fileName,
|
||||
"inputLength": len(jsonContent),
|
||||
"jsonParsingSuccess": contexts.jsonParsingSuccess,
|
||||
"overlapContextLength": len(contexts.overlapContext),
|
||||
"overlapContextEmpty": contexts.overlapContext == "",
|
||||
"hierarchyContextLength": len(contexts.hierarchyContext),
|
||||
"hierarchyContextEqualsInput": contexts.hierarchyContext == jsonContent,
|
||||
"hierarchyContextForPromptLength": len(contexts.hierarchyContextForPrompt),
|
||||
"completePartLength": len(contexts.completePart),
|
||||
"contexts": {
|
||||
"overlapContext": contexts.overlapContext,
|
||||
"hierarchyContext": contexts.hierarchyContext[:500] + "..." if len(contexts.hierarchyContext) > 500 else contexts.hierarchyContext,
|
||||
"hierarchyContextForPrompt": contexts.hierarchyContextForPrompt[:500] + "..." if len(contexts.hierarchyContextForPrompt) > 500 else contexts.hierarchyContextForPrompt,
|
||||
"completePart": contexts.completePart[:500] + "..." if len(contexts.completePart) > 500 else contexts.completePart,
|
||||
}
|
||||
}
|
||||
|
||||
def _writeLogFile(self):
|
||||
"""Write log buffer to file."""
|
||||
logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
logFilePath = os.path.join(logDir, "test14_json_continuation_context_results.txt")
|
||||
|
||||
with open(logFilePath, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(self.logBuffer))
|
||||
|
||||
self.logFile = logFilePath
|
||||
print(f"\n📝 Detailed log written to: {logFilePath}")
|
||||
|
||||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
self._log("=" * 80)
|
||||
self._log("JSON CONTINUATION CONTEXT TEST 14")
|
||||
self._log("=" * 80)
|
||||
self._log("Testing getContexts() with specific cut JSON from debug prompts")
|
||||
|
||||
results = {}
|
||||
|
||||
# Test files to analyze
|
||||
testFiles = [
|
||||
# The first AI response (iteration 1) - this is the cut JSON
|
||||
"20260106-173342-020-chapter_1_section_section_2_response.txt",
|
||||
]
|
||||
|
||||
# Also try to find today's response files dynamically
|
||||
debugDir = os.path.join(
|
||||
os.path.dirname(__file__), "..", "..", "..", "local", "debug", "prompts"
|
||||
)
|
||||
if os.path.exists(debugDir):
|
||||
for fileName in os.listdir(debugDir):
|
||||
if "section_2_response" in fileName and fileName.endswith(".txt"):
|
||||
if fileName not in testFiles:
|
||||
testFiles.append(fileName)
|
||||
|
||||
# Limit to first 3 files
|
||||
testFiles = testFiles[:3]
|
||||
|
||||
for fileName in testFiles:
|
||||
try:
|
||||
result = await self.testSpecificCutJson(fileName)
|
||||
results[fileName] = result
|
||||
except Exception as e:
|
||||
import traceback
|
||||
self._log(f"\n❌ Error testing {fileName}: {str(e)}")
|
||||
self._log(traceback.format_exc())
|
||||
results[fileName] = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
|
||||
# Write log file
|
||||
self._writeLogFile()
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 80)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
successCount = 0
|
||||
for fileName, result in results.items():
|
||||
if result.get("success"):
|
||||
successCount += 1
|
||||
hierarchyMatch = result.get("hierarchyContextEqualsInput", False)
|
||||
overlapEmpty = result.get("overlapContextEmpty", False)
|
||||
jsonSuccess = result.get("jsonParsingSuccess", False)
|
||||
|
||||
status = "✅" if hierarchyMatch else "⚠️"
|
||||
print(f"{status} {fileName}")
|
||||
print(f" hierarchyContext == input: {hierarchyMatch}")
|
||||
print(f" overlapContext empty: {overlapEmpty}")
|
||||
print(f" jsonParsingSuccess: {jsonSuccess}")
|
||||
else:
|
||||
print(f"❌ {fileName}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
print(f"\nResults: {successCount}/{len(results)} successful")
|
||||
|
||||
self.testResults = {
|
||||
"success": successCount == len(results),
|
||||
"totalFiles": len(results),
|
||||
"successCount": successCount,
|
||||
"results": results
|
||||
}
|
||||
|
||||
return self.testResults
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run JSON continuation context test 14."""
|
||||
tester = JsonContinuationContextTester14()
|
||||
results = await tester.runTest()
|
||||
|
||||
# Print final results as JSON for easy parsing
|
||||
print("\n" + "=" * 80)
|
||||
print("FINAL RESULTS (JSON)")
|
||||
print("=" * 80)
|
||||
|
||||
# Create a simplified version for printing (contexts are too large)
|
||||
printableResults = {
|
||||
"success": results.get("success"),
|
||||
"totalFiles": results.get("totalFiles"),
|
||||
"successCount": results.get("successCount"),
|
||||
"files": {}
|
||||
}
|
||||
for fileName, result in results.get("results", {}).items():
|
||||
printableResults["files"][fileName] = {
|
||||
"success": result.get("success"),
|
||||
"inputLength": result.get("inputLength"),
|
||||
"jsonParsingSuccess": result.get("jsonParsingSuccess"),
|
||||
"overlapContextLength": result.get("overlapContextLength"),
|
||||
"overlapContextEmpty": result.get("overlapContextEmpty"),
|
||||
"hierarchyContextEqualsInput": result.get("hierarchyContextEqualsInput"),
|
||||
"completePartLength": result.get("completePartLength"),
|
||||
}
|
||||
|
||||
print(json.dumps(printableResults, indent=2, default=str))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
Reference in a new issue