Merge pull request #81 from valueonag/feat/coding-path

Feat/coding path
This commit is contained in:
Patrick Motsch 2026-01-06 21:33:15 +01:00 committed by GitHub
commit c61255c12e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
49 changed files with 12431 additions and 4921 deletions

View file

@ -18,6 +18,9 @@ from modules.connectors.connectorDbPostgre import DatabaseConnector
logger = logging.getLogger(__name__)
# TODO TESTING: Override maxTokens for all models during testing
# Set to None to disable override, or set to an integer (e.g., 20000) to override all models
TESTING_MAX_TOKENS_OVERRIDE: Optional[int] = None # TODO TESTING: Set to None to disable
class ModelRegistry:
"""Dynamic registry for AI models from all connectors."""
@ -50,6 +53,12 @@ class ModelRegistry:
logger.error(errorMsg)
raise ValueError(errorMsg)
# TODO TESTING: Override maxTokens if testing override is enabled
if TESTING_MAX_TOKENS_OVERRIDE is not None and model.maxTokens > TESTING_MAX_TOKENS_OVERRIDE:
originalMaxTokens = model.maxTokens
model.maxTokens = TESTING_MAX_TOKENS_OVERRIDE
logger.debug(f"TESTING: Overrode maxTokens for {model.displayName}: {originalMaxTokens} -> {TESTING_MAX_TOKENS_OVERRIDE}")
# Use displayName as the key (must be unique)
self._models[model.displayName] = model
logger.debug(f"Registered model: {model.displayName} (name: {model.name}) from {connectorType}")
@ -118,6 +127,12 @@ class ModelRegistry:
logger.error(errorMsg)
raise ValueError(errorMsg)
# TODO TESTING: Override maxTokens if testing override is enabled
if TESTING_MAX_TOKENS_OVERRIDE is not None and model.maxTokens > TESTING_MAX_TOKENS_OVERRIDE:
originalMaxTokens = model.maxTokens
model.maxTokens = TESTING_MAX_TOKENS_OVERRIDE
logger.debug(f"TESTING: Overrode maxTokens for {model.displayName}: {originalMaxTokens} -> {TESTING_MAX_TOKENS_OVERRIDE}")
# Use displayName as the key (must be unique)
self._models[model.displayName] = model
except Exception as e:

View file

@ -6,8 +6,6 @@ from enum import Enum
# Import ContentPart for runtime use (needed for Pydantic model rebuilding)
from modules.datamodels.datamodelExtraction import ContentPart
# Import JSON utilities for safe conversion
from modules.shared.jsonUtils import extractJsonString, tryParseJson, repairBrokenJson
# Operation Types
class OperationTypeEnum(str, Enum):
@ -258,3 +256,70 @@ class JsonAccumulationState(BaseModel):
description="KPI definitions with current values: [{id, description, jsonPath, targetValue, currentValue}, ...]"
)
class ContinuationContext(BaseModel):
"""Pydantic model for continuation context information."""
section_count: int
delivered_summary: str
template_structure: Optional[str] = None
last_complete_part: Optional[str] = None
incomplete_part: Optional[str] = None
last_raw_json: Optional[str] = None
overlap_context: Optional[str] = None # From jsonContinuation.getContexts() - innermost element containing cut
hierarchy_context: Optional[str] = None # From jsonContinuation.getContexts() - full structure from root to cut
class JsonContinuationContexts(BaseModel):
"""
Pydantic model for JSON continuation contexts.
Contains contexts for truncated JSON strings:
- overlapContext: The innermost object/array element containing the cut point (for merging)
- hierarchyContext: Full structure from root to cut WITHOUT budget limitations (for internal use)
- hierarchyContextForPrompt: Full structure from root to cut WITH budget limitations (for prompts)
- completePart: Valid JSON with all structures properly closed
- jsonParsingSuccess: True if completePart is valid parseable JSON
"""
overlapContext: str = Field(description="The innermost object/array element containing the cut point (for merging)")
hierarchyContext: str = Field(description="Full structure from root to cut WITHOUT budget limitations (for internal use)")
hierarchyContextForPrompt: str = Field(description="Full structure from root to cut WITH budget limitations (for prompts)")
completePart: str = Field(description="Valid JSON with all structures properly closed")
jsonParsingSuccess: bool = Field(default=False, description="True if completePart is valid parseable JSON")
class SectionPromptArgs(BaseModel):
"""Type-safe arguments for section content prompt builder."""
section: Dict[str, Any]
contentParts: List[ContentPart]
userPrompt: str
generationHint: str
allSections: List[Dict[str, Any]]
sectionIndex: int
isAggregation: bool
language: str
class ChapterStructurePromptArgs(BaseModel):
"""Type-safe arguments for chapter structure prompt builder."""
userPrompt: str
contentParts: List[ContentPart] = Field(default_factory=list)
outputFormat: str
class CodeContentPromptArgs(BaseModel):
"""Type-safe arguments for code content prompt builder."""
filename: str
fileType: str
functions: List[Dict] = Field(default_factory=list)
classes: List[Dict] = Field(default_factory=list)
dependencies: List[str] = Field(default_factory=list)
metadata: Dict[str, Any] = Field(default_factory=dict)
userPrompt: str
contentParts: List[ContentPart] = Field(default_factory=list)
contextInfo: str = ""
class CodeStructurePromptArgs(BaseModel):
"""Type-safe arguments for code structure prompt builder."""
userPrompt: str
contentParts: List[ContentPart] = Field(default_factory=list)

View file

@ -89,6 +89,131 @@ async def getPermissions(
)
@router.get("/permissions/all", response_model=Dict[str, Any])
@limiter.limit("30/minute")
async def getAllPermissions(
request: Request,
context: Optional[str] = Query(None, description="Context type: UI or RESOURCE (if not provided, returns both)"),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""
Get all RBAC permissions for the current user for UI and/or RESOURCE contexts.
This endpoint is optimized for UI initialization to avoid multiple API calls.
Query Parameters:
- context: Optional context filter. If "UI", returns only UI permissions.
If "RESOURCE", returns only RESOURCE permissions.
If not provided, returns both UI and RESOURCE permissions.
Returns:
- Dictionary with structure:
{
"ui": {
"item1": UserPermissions,
"item2": UserPermissions,
...
},
"resource": {
"item1": UserPermissions,
"item2": UserPermissions,
...
}
}
If context is specified, only that context is returned.
Example:
- GET /api/rbac/permissions/all
- GET /api/rbac/permissions/all?context=UI
- GET /api/rbac/permissions/all?context=RESOURCE
"""
try:
# Get interface and RBAC permissions
interface = getInterface(currentUser)
if not interface.rbac:
raise HTTPException(
status_code=500,
detail="RBAC interface not available"
)
# Determine which contexts to fetch
contextsToFetch = []
if context:
try:
accessContext = AccessRuleContext(context.upper())
if accessContext in [AccessRuleContext.UI, AccessRuleContext.RESOURCE]:
contextsToFetch = [accessContext]
else:
raise HTTPException(
status_code=400,
detail=f"Context '{context}' must be UI or RESOURCE for this endpoint"
)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid context '{context}'. Must be UI or RESOURCE"
)
else:
# Return both UI and RESOURCE if no context specified
contextsToFetch = [AccessRuleContext.UI, AccessRuleContext.RESOURCE]
result: Dict[str, Any] = {}
# Get all access rules for user's roles
roleLabels = currentUser.roleLabels or []
if not roleLabels:
# User has no roles, return empty permissions
for ctx in contextsToFetch:
result[ctx.value.lower()] = {}
return result
# Get all access rules for user's roles and requested contexts
allRules: Dict[AccessRuleContext, List[AccessRule]] = {}
for ctx in contextsToFetch:
allRules[ctx] = []
# Get all rules for user's roles in this context
for roleLabel in roleLabels:
rules = interface.getAccessRules(
roleLabel=roleLabel,
context=ctx,
pagination=None
)
allRules[ctx].extend(rules)
# Build result: for each context, collect all unique items and calculate permissions
for ctx in contextsToFetch:
result[ctx.value.lower()] = {}
# Collect all unique items from rules
items = set()
for rule in allRules[ctx]:
if rule.item:
items.add(rule.item)
# For each item, calculate user permissions
for item in sorted(items):
permissions = interface.rbac.getUserPermissions(currentUser, ctx, item)
# Only include if user has view permission
if permissions.view:
result[ctx.value.lower()][item] = {
"view": permissions.view,
"read": permissions.read.value if permissions.read else None,
"create": permissions.create.value if permissions.create else None,
"update": permissions.update.value if permissions.update else None,
"delete": permissions.delete.value if permissions.delete else None
}
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting all RBAC permissions: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Failed to get all permissions: {str(e)}"
)
@router.get("/rules", response_model=PaginatedResponse)
@limiter.limit("30/minute")
async def getAccessRules(

File diff suppressed because it is too large Load diff

View file

@ -1,376 +0,0 @@
# Parallel Processing Refactoring Concept
## Current State (Sequential)
### Chapter Sections Structure Generation (`_generateChapterSectionsStructure`)
- **Current**: Processes chapters sequentially, one after another
- **Flow**:
1. Iterate through documents
2. For each document, iterate through chapters
3. For each chapter, generate sections structure using AI
4. Update progress after each chapter
### Section Content Generation (`_fillChapterSections`)
- **Current**: Processes chapters sequentially, sections within each chapter sequentially
- **Flow**:
1. Iterate through documents
2. For each document, iterate through chapters
3. For each chapter, iterate through sections
4. For each section, generate content using AI
5. Update progress after each section
## Desired State (Parallel)
### Chapter Sections Structure Generation
- **Target**: Process all chapters in parallel
- **Requirements**:
- Maintain chapter order in final result
- Each chapter can be processed independently
- Progress updates should reflect parallel processing
- Errors in one chapter should not stop others
### Section Content Generation
- **Target**: Process sections within each chapter in parallel
- **Requirements**:
- Maintain section order within each chapter
- Sections within a chapter can be processed independently
- Chapters still processed sequentially (to maintain order)
- Progress updates should reflect parallel processing
- Errors in one section should not stop others
## Implementation Strategy
### Phase 1: Chapter Sections Structure Generation Parallelization
#### Step 1.1: Extract Single Chapter Processing
- **Create**: `_generateSingleChapterSectionsStructure()` method
- **Purpose**: Process one chapter independently
- **Parameters**:
- `chapter`: Chapter dict
- `chapterIndex`: Index for ordering
- `chapterId`, `chapterLevel`, `chapterTitle`: Chapter metadata
- `generationHint`: Generation instructions
- `contentPartIds`, `contentPartInstructions`: Content part info
- `contentParts`: Full content parts list
- `userPrompt`: User's original prompt
- `language`: Language for generation
- `parentOperationId`: For progress logging
- **Returns**: None (modifies chapter dict in place)
- **Error Handling**: Logs errors, raises exception to be caught by caller
#### Step 1.2: Refactor Main Method
- **Modify**: `_generateChapterSectionsStructure()`
- **Changes**:
1. Collect all chapters with their indices
2. Create async tasks for each chapter using `_generateSingleChapterSectionsStructure`
3. Use `asyncio.gather()` to execute all tasks in parallel
4. Process results in order (using `zip` with original order)
5. Handle errors per chapter (don't fail entire operation)
6. Update progress after each chapter completes
#### Step 1.3: Progress Reporting
- **Maintain**: Overall progress tracking
- **Update**: Progress after each chapter completes (not sequentially)
- **Format**: "Chapter X/Y completed" or "Chapter X/Y error"
### Phase 2: Section Content Generation Parallelization
#### Step 2.1: Extract Single Section Processing
- **Create**: `_processSingleSection()` method
- **Purpose**: Process one section independently
- **Parameters**:
- `section`: Section dict
- `sectionIndex`: Index for ordering
- `totalSections`: Total sections in chapter
- `chapterIndex`: Chapter index
- `totalChapters`: Total chapters
- `chapterId`: Chapter ID
- `chapterOperationId`: Chapter progress operation ID
- `fillOperationId`: Overall fill operation ID
- `contentParts`: Full content parts list
- `userPrompt`: User's original prompt
- `all_sections_list`: All sections for context
- `language`: Language for generation
- `calculateOverallProgress`: Function to calculate overall progress
- **Returns**: `List[Dict[str, Any]]` (elements for the section)
- **Error Handling**: Returns error element instead of raising
#### Step 2.2: Extract Section Processing Logic
- **Create**: Helper methods for different processing paths:
- `_processSectionAggregation()`: Handle aggregation path (multiple parts)
- `_processSectionGeneration()`: Handle generation without parts (only generationHint)
- `_processSectionParts()`: Handle individual part processing
- **Purpose**: Keep logic organized and reusable
#### Step 2.3: Refactor Main Method
- **Modify**: `_fillChapterSections()`
- **Changes**:
1. Keep sequential chapter processing (maintains order)
2. For each chapter, collect all sections with indices
3. Create async tasks for each section using `_processSingleSection`
4. Use `asyncio.gather()` to execute all section tasks in parallel
5. Process results in order (using `zip` with original order)
6. Assign elements to sections in correct order
7. Update progress after each section completes
8. Handle errors per section (don't fail entire chapter)
#### Step 2.4: Progress Reporting
- **Maintain**: Hierarchical progress tracking
- **Update**:
- Section progress: After each section completes
- Chapter progress: After all sections in chapter complete
- Overall progress: After each section/chapter completes
- **Format**: "Chapter X/Y, Section A/B completed"
## Key Considerations
### Order Preservation
- **Chapters**: Must maintain document order → process chapters sequentially
- **Sections**: Must maintain chapter order → process sections sequentially within chapter
- **Solution**: Use `asyncio.gather()` with ordered task list, then `zip` results with original order
### Error Handling
- **Chapters**: Error in one chapter should not stop others
- **Sections**: Error in one section should not stop others
- **Solution**: Use `return_exceptions=True` in `asyncio.gather()`, check `isinstance(result, Exception)`
### Progress Reporting
- **Challenge**: Progress updates happen out of order
- **Solution**: Update progress when each task completes, not sequentially
- **Format**: Show completed count, not sequential position
### Shared State
- **Chapters**: Modify chapter dicts in place (safe, each chapter is independent)
- **Sections**: Return elements, assign to sections in order (safe, each section is independent)
- **Content Parts**: Read-only, passed to all tasks (safe)
### Dependencies
- **Chapters**: No dependencies between chapters
- **Sections**: No dependencies between sections (each is self-contained)
- **Solution**: All tasks can run truly in parallel
## Implementation Steps
### Step 1: Clean Current Code
1. Ensure current sequential implementation is correct
2. Fix any existing bugs
3. Verify all tests pass
### Step 2: Implement Chapter Parallelization
1. Create `_generateSingleChapterSectionsStructure()` method
2. Extract chapter processing logic
3. Refactor `_generateChapterSectionsStructure()` to use parallel processing
4. Test with single chapter
5. Test with multiple chapters
6. Verify order preservation
7. Verify error handling
### Step 3: Implement Section Parallelization
1. Create `_processSingleSection()` method
2. Extract section processing logic into helper methods
3. Refactor `_fillChapterSections()` to use parallel processing for sections
4. Test with single section
5. Test with multiple sections
6. Test with multiple chapters
7. Verify order preservation
8. Verify error handling
### Step 4: Testing & Validation
1. Test with various document structures
2. Test error scenarios
3. Verify progress reporting accuracy
4. Performance testing (compare sequential vs parallel)
5. Verify final output order matches input order
## Code Structure
### New Methods to Create
```python
async def _generateSingleChapterSectionsStructure(
self,
chapter: Dict[str, Any],
chapterIndex: int,
chapterId: str,
chapterLevel: int,
chapterTitle: str,
generationHint: str,
contentPartIds: List[str],
contentPartInstructions: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
language: str,
parentOperationId: str
) -> None:
"""Generate sections structure for a single chapter (used for parallel processing)."""
# Extract logic from current sequential loop
# Modify chapter dict in place
# Handle errors internally, raise if critical
async def _processSingleSection(
self,
section: Dict[str, Any],
sectionIndex: int,
totalSections: int,
chapterIndex: int,
totalChapters: int,
chapterId: str,
chapterOperationId: str,
fillOperationId: str,
contentParts: List[ContentPart],
userPrompt: str,
all_sections_list: List[Dict[str, Any]],
language: str,
calculateOverallProgress: Callable
) -> List[Dict[str, Any]]:
"""Process a single section and return its elements."""
# Extract logic from current sequential loop
# Return elements list
# Return error element on failure (don't raise)
async def _processSectionAggregation(
self,
section: Dict[str, Any],
sectionId: str,
sectionTitle: str,
sectionIndex: int,
totalSections: int,
chapterId: str,
chapterOperationId: str,
fillOperationId: str,
contentPartIds: List[str],
contentFormats: Dict[str, str],
contentParts: List[ContentPart],
userPrompt: str,
generationHint: str,
all_sections_list: List[Dict[str, Any]],
language: str
) -> List[Dict[str, Any]]:
"""Process section with aggregation (multiple parts together)."""
# Extract aggregation logic
# Return elements list
async def _processSectionGeneration(
self,
section: Dict[str, Any],
sectionId: str,
sectionTitle: str,
sectionIndex: int,
totalSections: int,
chapterId: str,
chapterOperationId: str,
fillOperationId: str,
contentType: str,
userPrompt: str,
generationHint: str,
all_sections_list: List[Dict[str, Any]],
language: str
) -> List[Dict[str, Any]]:
"""Process section generation without content parts (only generationHint)."""
# Extract generation logic
# Return elements list
async def _processSectionParts(
self,
section: Dict[str, Any],
sectionId: str,
sectionTitle: str,
sectionIndex: int,
totalSections: int,
chapterId: str,
chapterOperationId: str,
fillOperationId: str,
contentPartIds: List[str],
contentFormats: Dict[str, str],
contentParts: List[ContentPart],
contentType: str,
useAiCall: bool,
generationHint: str,
userPrompt: str,
all_sections_list: List[Dict[str, Any]],
language: str
) -> List[Dict[str, Any]]:
"""Process individual parts in a section."""
# Extract individual part processing logic
# Return elements list
```
### Modified Methods
```python
async def _generateChapterSectionsStructure(
self,
chapterStructure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str
) -> Dict[str, Any]:
"""Generate sections structure for all chapters in parallel."""
# Collect chapters with indices
# Create tasks
# Execute in parallel
# Process results in order
# Update progress
async def _fillChapterSections(
self,
chapterStructure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
fillOperationId: str
) -> Dict[str, Any]:
"""Fill sections with content, processing sections in parallel within each chapter."""
# Process chapters sequentially
# For each chapter, process sections in parallel
# Maintain order
# Update progress
```
## Testing Strategy
### Unit Tests
1. Test `_generateSingleChapterSectionsStructure` independently
2. Test `_processSingleSection` independently
3. Test helper methods independently
### Integration Tests
1. Test parallel chapter processing with multiple chapters
2. Test parallel section processing with multiple sections
3. Test error handling (one chapter/section fails)
4. Test order preservation
### Performance Tests
1. Measure sequential vs parallel execution time
2. Verify parallel processing is faster
3. Check resource usage (memory, CPU)
## Risk Mitigation
### Risks
1. **Order not preserved**: Use `zip` with original order
2. **Race conditions**: No shared mutable state between tasks
3. **Progress reporting incorrect**: Update progress when tasks complete
4. **Errors not handled**: Use `return_exceptions=True` and check results
5. **Performance degradation**: Test and measure, fallback to sequential if needed
### Safety Measures
1. Keep sequential implementation as fallback (commented out)
2. Add feature flag to enable/disable parallel processing
3. Extensive logging for debugging
4. Gradual rollout (test with small datasets first)
## Migration Path
1. **Phase 1**: Implement chapter parallelization, test thoroughly
2. **Phase 2**: Implement section parallelization, test thoroughly
3. **Phase 3**: Enable both in production with monitoring
4. **Phase 4**: Remove sequential fallback code (if stable)
## Notes
- All async methods must use `await` correctly
- Progress updates happen asynchronously (may appear out of order in logs)
- Final result order is guaranteed by processing results in order
- Error handling is per-task, not global
- No shared mutable state between parallel tasks (read-only contentParts, independent chapter/section dicts)

View file

@ -1,78 +0,0 @@
# Module Structure - serviceAi
## Übersicht
Das `mainServiceAi.py` Modul wurde in mehrere Submodule aufgeteilt, um die Übersichtlichkeit zu verbessern.
## Modulstruktur
### Hauptmodul
- **mainServiceAi.py** (~800 Zeilen)
- Initialisierung (`__init__`, `create`, `ensureAiObjectsInitialized`)
- Public API (`callAiPlanning`, `callAiContent`)
- Routing zu Submodulen
- Helper-Methoden
### Submodule
1. **subJsonResponseHandling.py** (bereits vorhanden)
- JSON Response Merging
- Section Merging
- Fragment Detection
2. **subResponseParsing.py** (~200 Zeilen)
- `ResponseParser.extractSectionsFromResponse()` - Extrahiert Sections aus AI-Responses
- `ResponseParser.shouldContinueGeneration()` - Entscheidet ob Generation fortgesetzt werden soll
- `ResponseParser._isStuckInLoop()` - Loop-Detection
- `ResponseParser.extractDocumentMetadata()` - Extrahiert Metadaten
- `ResponseParser.buildFinalResultFromSections()` - Baut finales JSON
3. **subDocumentIntents.py** (~300 Zeilen)
- `DocumentIntentAnalyzer.clarifyDocumentIntents()` - Analysiert Dokument-Intents
- `DocumentIntentAnalyzer.resolvePreExtractedDocument()` - Löst pre-extracted Dokumente auf
- `DocumentIntentAnalyzer._buildIntentAnalysisPrompt()` - Baut Intent-Analyse-Prompt
4. **subContentExtraction.py** (~600 Zeilen)
- `ContentExtractor.extractAndPrepareContent()` - Extrahiert und bereitet Content vor
- `ContentExtractor.extractTextFromImage()` - Vision AI für Bilder
- `ContentExtractor.processTextContentWithAi()` - AI-Verarbeitung von Text
- `ContentExtractor._isBinary()` - Helper für Binary-Check
5. **subStructureGeneration.py** (~200 Zeilen)
- `StructureGenerator.generateStructure()` - Generiert Dokument-Struktur
- `StructureGenerator._buildStructurePrompt()` - Baut Struktur-Prompt
6. **subStructureFilling.py** (~400 Zeilen)
- `StructureFiller.fillStructure()` - Füllt Struktur mit Content
- `StructureFiller._buildSectionGenerationPrompt()` - Baut Section-Generation-Prompt
- `StructureFiller._findContentPartById()` - Helper für ContentPart-Suche
- `StructureFiller._needsAggregation()` - Entscheidet ob Aggregation nötig
7. **subAiCallLooping.py** (~400 Zeilen)
- `AiCallLooper.callAiWithLooping()` - Haupt-Looping-Logik
- `AiCallLooper._defineKpisFromPrompt()` - KPI-Definition
## Verwendung
Alle Submodule werden über das Hauptmodul `AiService` verwendet:
```python
# Initialisierung
aiService = await AiService.create(serviceCenter)
# Submodule werden automatisch initialisiert
# aiService.responseParser
# aiService.intentAnalyzer
# aiService.contentExtractor
# etc.
```
## Migration
Die öffentliche API bleibt unverändert. Interne Methoden wurden in Submodule verschoben:
- `_extractSectionsFromResponse``responseParser.extractSectionsFromResponse`
- `_clarifyDocumentIntents``intentAnalyzer.clarifyDocumentIntents`
- `_extractAndPrepareContent``contentExtractor.extractAndPrepareContent`
- etc.

View file

@ -222,18 +222,6 @@ Respond with ONLY a JSON object in this exact format:
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId
)
async def _defineKpisFromPrompt(
self,
userPrompt: str,
rawJsonString: Optional[str],
continuationContext: Dict[str, Any],
debugPrefix: str = "kpi"
) -> List[Dict[str, Any]]:
"""Delegate to AiCallLooper."""
return await self.aiCallLooper._defineKpisFromPrompt(
userPrompt, rawJsonString, continuationContext, debugPrefix
)
# JSON merging logic moved to subJsonResponseHandling.py
def _extractSectionsFromResponse(

View file

@ -0,0 +1,661 @@
================================================================================
JSON MERGE OPERATION #1
================================================================================
Timestamp: 2026-01-06T20:08:23.213372
INPUT:
Accumulated length: 33682 chars
New Fragment length: 27012 chars
Accumulated: 306 lines (showing first 5 and last 5)
{
"elements": [
{
"type": "table",
"content": {
... (296 lines omitted) ...
[" 26821", " 26833", " 26839", " 26849", " 26861", " 26863", " 26879", " 26881", " 26891", " 26893"],
[" 26903", " 26921", " 26927", " 26947", " 26951", " 26953", " 26959", " 26981", " 26987", " 26993"],
[" 27011", " 27017", " 27031", " 27043", " 27059", " 27061", " 27067", " 27073", " 27077", " 27091"],
[" 27103", " 27107", " 27109", " 27127", " 27143", " 27179", " 27191", " 27197", " 27211", " 27239"],
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
New Fragment: 248 lines (showing first 5 and last 5)
```json
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283", " 27299", " 27329", " 27337"],
[" 27361", " 27367", " 27397", " 27407", " 27409", " 27427", " 27431", " 27437", " 27449", " 27457"],
[" 27479", " 27481", " 27487", " 27509", " 27527", " 27529", " 27539", " 27541", " 27551", " 27581"],
[" 27583", " 27611", " 27617", " 27631", " 27647", " 27653", " 27673", " 27689", " 27691", " 27697"],
... (238 lines omitted) ...
}
}
]
}
```
Normalized Accumulated (33682 chars)
(showing first 5 and last 5 of 306 lines)
{
"elements": [
{
"type": "table",
"content": {
... (296 lines omitted) ...
[" 26821", " 26833", " 26839", " 26849", " 26861", " 26863", " 26879", " 26881", " 26891", " 26893"],
[" 26903", " 26921", " 26927", " 26947", " 26951", " 26953", " 26959", " 26981", " 26987", " 26993"],
[" 27011", " 27017", " 27031", " 27043", " 27059", " 27061", " 27067", " 27073", " 27077", " 27091"],
[" 27103", " 27107", " 27109", " 27127", " 27143", " 27179", " 27191", " 27197", " 27211", " 27239"],
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
Normalized New Fragment (27000 chars)
(showing first 5 and last 5 of 246 lines)
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283", " 27299", " 27329", " 27337"],
[" 27361", " 27367", " 27397", " 27407", " 27409", " 27427", " 27431", " 27437", " 27449", " 27457"],
[" 27479", " 27481", " 27487", " 27509", " 27527", " 27529", " 27539", " 27541", " 27551", " 27581"],
[" 27583", " 27611", " 27617", " 27631", " 27647", " 27653", " 27673", " 27689", " 27691", " 27697"],
[" 27701", " 27733", " 27737", " 27739", " 27743", " 27749", " 27751", " 27763", " 27767", " 27773"],
... (236 lines omitted) ...
]
}
}
]
}
STEP: PHASE 1
Description: Finding overlap between JSON strings
⏳ In progress...
Overlap Detection (string (exact)):
Overlap length: 70
✅ Found overlap of 70 chars
Accumulated suffix (COMPLETE, 70 chars):
============================================================================
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
============================================================================
Fragment prefix (70 chars, 1 lines)
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
Overlap found (70 chars):
Accumulated suffix: [" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
Fragment prefix: [" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283",
STEP: PHASE 2
Description: Merging strings (overlap: 70 chars)
⏳ In progress...
Merged String (60612 chars)
(showing first 5 and last 5 of 551 lines)
{
"elements": [
{
"type": "table",
"content": {
... (541 lines omitted) ...
]
}
}
]
}
STEP: PHASE 3
Description: Returning merged string (may be unclosed)
⏳ In progress...
Returning merged string (preserving incomplete element at end for next iteration)
================================================================================
MERGE RESULT: ✅ SUCCESS
================================================================================
Final result length: 60612 chars
Final result (COMPLETE):
================================================================================
{
"elements": [
{
"type": "table",
"content": {
"headers": ["Spalte 1", "Spalte 2", "Spalte 3", "Spalte 4", "Spalte 5", "Spalte 6", "Spalte 7", "Spalte 8", "Spalte 9", "Spalte 10"],
"rows": [
[" 2", " 3", " 5", " 7", " 11", " 13", " 17", " 19", " 23", " 29"],
[" 31", " 37", " 41", " 43", " 47", " 53", " 59", " 61", " 67", " 71"],
[" 73", " 79", " 83", " 89", " 97", " 101", " 103", " 107", " 109", " 113"],
[" 127", " 131", " 137", " 139", " 149", " 151", " 157", " 163", " 167", " 173"],
[" 179", " 181", " 191", " 193", " 197", " 199", " 211", " 223", " 227", " 229"],
[" 233", " 239", " 241", " 251", " 257", " 263", " 269", " 271", " 277", " 281"],
[" 283", " 293", " 307", " 311", " 313", " 317", " 331", " 337", " 347", " 349"],
[" 353", " 359", " 367", " 373", " 379", " 383", " 389", " 397", " 401", " 409"],
[" 419", " 421", " 431", " 433", " 439", " 443", " 449", " 457", " 461", " 463"],
[" 467", " 479", " 487", " 491", " 499", " 503", " 509", " 521", " 523", " 541"],
[" 547", " 557", " 563", " 569", " 571", " 577", " 587", " 593", " 599", " 601"],
[" 607", " 613", " 617", " 619", " 631", " 641", " 643", " 647", " 653", " 659"],
[" 661", " 673", " 677", " 683", " 691", " 701", " 709", " 719", " 727", " 733"],
[" 739", " 743", " 751", " 757", " 761", " 769", " 773", " 787", " 797", " 809"],
[" 811", " 821", " 823", " 827", " 829", " 839", " 853", " 857", " 859", " 863"],
[" 877", " 881", " 883", " 887", " 907", " 911", " 919", " 929", " 937", " 941"],
[" 947", " 953", " 967", " 971", " 977", " 983", " 991", " 997", " 1009", " 1013"],
[" 1019", " 1021", " 1031", " 1033", " 1039", " 1049", " 1051", " 1061", " 1063", " 1069"],
[" 1087", " 1091", " 1093", " 1097", " 1103", " 1109", " 1117", " 1123", " 1129", " 1151"],
[" 1153", " 1163", " 1171", " 1181", " 1187", " 1193", " 1201", " 1213", " 1217", " 1223"],
[" 1229", " 1231", " 1237", " 1249", " 1259", " 1277", " 1279", " 1283", " 1289", " 1291"],
[" 1297", " 1301", " 1303", " 1307", " 1319", " 1321", " 1327", " 1361", " 1367", " 1373"],
[" 1381", " 1399", " 1409", " 1423", " 1427", " 1429", " 1433", " 1439", " 1447", " 1451"],
[" 1453", " 1459", " 1471", " 1481", " 1483", " 1487", " 1489", " 1493", " 1499", " 1511"],
[" 1523", " 1531", " 1543", " 1549", " 1553", " 1559", " 1567", " 1571", " 1579", " 1583"],
[" 1597", " 1601", " 1607", " 1609", " 1613", " 1619", " 1621", " 1627", " 1637", " 1657"],
[" 1663", " 1667", " 1669", " 1693", " 1697", " 1699", " 1709", " 1721", " 1723", " 1733"],
[" 1741", " 1747", " 1753", " 1759", " 1777", " 1783", " 1787", " 1789", " 1801", " 1811"],
[" 1823", " 1831", " 1847", " 1861", " 1867", " 1871", " 1873", " 1877", " 1879", " 1889"],
[" 1901", " 1907", " 1913", " 1931", " 1933", " 1949", " 1951", " 1973", " 1979", " 1987"],
[" 1993", " 1997", " 1999", " 2003", " 2011", " 2017", " 2027", " 2029", " 2039", " 2053"],
[" 2063", " 2069", " 2081", " 2083", " 2087", " 2089", " 2099", " 2111", " 2113", " 2129"],
[" 2131", " 2137", " 2141", " 2143", " 2153", " 2161", " 2179", " 2203", " 2207", " 2213"],
[" 2221", " 2237", " 2239", " 2243", " 2251", " 2267", " 2269", " 2273", " 2281", " 2287"],
[" 2293", " 2297", " 2309", " 2311", " 2333", " 2339", " 2341", " 2347", " 2351", " 2357"],
[" 2371", " 2377", " 2381", " 2383", " 2389", " 2393", " 2399", " 2411", " 2417", " 2423"],
[" 2437", " 2441", " 2447", " 2459", " 2467", " 2473", " 2477", " 2503", " 2521", " 2531"],
[" 2539", " 2543", " 2549", " 2551", " 2557", " 2579", " 2591", " 2593", " 2609", " 2617"],
[" 2621", " 2633", " 2647", " 2657", " 2659", " 2663", " 2671", " 2677", " 2683", " 2687"],
[" 2689", " 2693", " 2699", " 2707", " 2711", " 2713", " 2719", " 2729", " 2731", " 2741"],
[" 2749", " 2753", " 2767", " 2777", " 2789", " 2791", " 2797", " 2801", " 2803", " 2819"],
[" 2833", " 2837", " 2843", " 2851", " 2857", " 2861", " 2879", " 2887", " 2897", " 2903"],
[" 2909", " 2917", " 2927", " 2939", " 2953", " 2957", " 2963", " 2969", " 2971", " 2999"],
[" 3001", " 3011", " 3019", " 3023", " 3037", " 3041", " 3049", " 3061", " 3067", " 3079"],
[" 3083", " 3089", " 3109", " 3119", " 3121", " 3137", " 3163", " 3167", " 3169", " 3181"],
[" 3187", " 3191", " 3203", " 3209", " 3217", " 3221", " 3229", " 3251", " 3253", " 3257"],
[" 3259", " 3271", " 3299", " 3301", " 3307", " 3313", " 3319", " 3323", " 3329", " 3331"],
[" 3343", " 3347", " 3359", " 3361", " 3371", " 3373", " 3389", " 3391", " 3407", " 3413"],
[" 3433", " 3449", " 3457", " 3461", " 3463", " 3467", " 3469", " 3491", " 3499", " 3511"],
[" 3517", " 3527", " 3529", " 3533", " 3539", " 3541", " 3547", " 3557", " 3559", " 3571"],
[" 3581", " 3583", " 3593", " 3607", " 3613", " 3617", " 3623", " 3631", " 3637", " 3643"],
[" 3659", " 3671", " 3673", " 3677", " 3691", " 3697", " 3701", " 3709", " 3719", " 3727"],
[" 3733", " 3739", " 3761", " 3767", " 3769", " 3779", " 3793", " 3797", " 3803", " 3821"],
[" 3823", " 3833", " 3847", " 3851", " 3853", " 3863", " 3877", " 3881", " 3889", " 3907"],
[" 3911", " 3917", " 3919", " 3923", " 3929", " 3931", " 3943", " 3947", " 3967", " 3989"],
[" 4001", " 4003", " 4007", " 4013", " 4019", " 4021", " 4027", " 4049", " 4051", " 4057"],
[" 4073", " 4079", " 4091", " 4093", " 4099", " 4111", " 4127", " 4129", " 4133", " 4139"],
[" 4153", " 4157", " 4159", " 4177", " 4201", " 4211", " 4217", " 4219", " 4229", " 4231"],
[" 4241", " 4243", " 4253", " 4259", " 4261", " 4271", " 4273", " 4283", " 4289", " 4297"],
[" 4327", " 4337", " 4339", " 4349", " 4357", " 4363", " 4373", " 4391", " 4397", " 4409"],
[" 4421", " 4423", " 4441", " 4447", " 4451", " 4457", " 4463", " 4481", " 4483", " 4493"],
[" 4507", " 4513", " 4517", " 4519", " 4523", " 4547", " 4549", " 4561", " 4567", " 4583"],
[" 4591", " 4597", " 4603", " 4621", " 4637", " 4639", " 4643", " 4649", " 4651", " 4657"],
[" 4663", " 4673", " 4679", " 4691", " 4703", " 4721", " 4723", " 4729", " 4733", " 4751"],
[" 4759", " 4783", " 4787", " 4789", " 4793", " 4799", " 4801", " 4813", " 4817", " 4831"],
[" 4861", " 4871", " 4877", " 4889", " 4903", " 4909", " 4919", " 4931", " 4933", " 4937"],
[" 4943", " 4951", " 4957", " 4967", " 4969", " 4973", " 4987", " 4993", " 4999", " 5003"],
[" 5009", " 5011", " 5021", " 5023", " 5039", " 5051", " 5059", " 5077", " 5081", " 5087"],
[" 5099", " 5101", " 5107", " 5113", " 5119", " 5147", " 5153", " 5167", " 5171", " 5179"],
[" 5189", " 5197", " 5209", " 5227", " 5231", " 5233", " 5237", " 5261", " 5273", " 5279"],
[" 5281", " 5297", " 5303", " 5309", " 5323", " 5333", " 5347", " 5351", " 5381", " 5387"],
[" 5393", " 5399", " 5407", " 5413", " 5417", " 5419", " 5431", " 5437", " 5441", " 5443"],
[" 5449", " 5471", " 5477", " 5479", " 5483", " 5501", " 5503", " 5507", " 5519", " 5521"],
[" 5527", " 5531", " 5557", " 5563", " 5569", " 5573", " 5581", " 5591", " 5623", " 5639"],
[" 5641", " 5647", " 5651", " 5653", " 5657", " 5659", " 5669", " 5683", " 5689", " 5693"],
[" 5701", " 5711", " 5717", " 5737", " 5741", " 5743", " 5749", " 5779", " 5783", " 5791"],
[" 5801", " 5807", " 5813", " 5821", " 5827", " 5839", " 5843", " 5849", " 5851", " 5857"],
[" 5861", " 5867", " 5869", " 5879", " 5881", " 5897", " 5903", " 5923", " 5927", " 5939"],
[" 5953", " 5981", " 5987", " 6007", " 6011", " 6029", " 6037", " 6043", " 6047", " 6053"],
[" 6067", " 6073", " 6079", " 6089", " 6091", " 6101", " 6113", " 6121", " 6131", " 6133"],
[" 6143", " 6151", " 6163", " 6173", " 6197", " 6199", " 6203", " 6211", " 6217", " 6221"],
[" 6229", " 6247", " 6257", " 6263", " 6269", " 6271", " 6277", " 6287", " 6299", " 6301"],
[" 6311", " 6317", " 6323", " 6329", " 6337", " 6343", " 6353", " 6359", " 6361", " 6367"],
[" 6373", " 6379", " 6389", " 6397", " 6421", " 6427", " 6449", " 6451", " 6469", " 6473"],
[" 6481", " 6491", " 6521", " 6529", " 6547", " 6551", " 6553", " 6563", " 6569", " 6571"],
[" 6577", " 6581", " 6599", " 6607", " 6619", " 6637", " 6653", " 6659", " 6661", " 6673"],
[" 6679", " 6689", " 6691", " 6701", " 6703", " 6709", " 6719", " 6733", " 6737", " 6761"],
[" 6763", " 6779", " 6781", " 6791", " 6793", " 6803", " 6823", " 6827", " 6829", " 6833"],
[" 6841", " 6857", " 6863", " 6869", " 6871", " 6883", " 6899", " 6907", " 6911", " 6917"],
[" 6947", " 6949", " 6959", " 6961", " 6967", " 6971", " 6977", " 6983", " 6991", " 6997"],
[" 7001", " 7013", " 7019", " 7027", " 7039", " 7043", " 7057", " 7069", " 7079", " 7103"],
[" 7109", " 7121", " 7127", " 7129", " 7151", " 7159", " 7177", " 7187", " 7193", " 7207"],
[" 7211", " 7213", " 7219", " 7229", " 7237", " 7243", " 7247", " 7253", " 7283", " 7297"],
[" 7307", " 7309", " 7321", " 7331", " 7333", " 7349", " 7351", " 7369", " 7393", " 7411"],
[" 7417", " 7433", " 7451", " 7457", " 7459", " 7477", " 7481", " 7487", " 7489", " 7499"],
[" 7507", " 7517", " 7523", " 7529", " 7537", " 7541", " 7547", " 7549", " 7559", " 7561"],
[" 7573", " 7577", " 7583", " 7589", " 7591", " 7603", " 7607", " 7621", " 7639", " 7643"],
[" 7649", " 7669", " 7673", " 7681", " 7687", " 7691", " 7699", " 7703", " 7717", " 7723"],
[" 7727", " 7741", " 7753", " 7757", " 7759", " 7789", " 7793", " 7817", " 7823", " 7829"],
[" 7841", " 7853", " 7867", " 7873", " 7877", " 7879", " 7883", " 7901", " 7907", " 7919"],
[" 7927", " 7933", " 7937", " 7949", " 7951", " 7963", " 7993", " 8009", " 8011", " 8017"],
[" 8039", " 8053", " 8059", " 8069", " 8081", " 8087", " 8089", " 8093", " 8101", " 8111"],
[" 8117", " 8123", " 8147", " 8161", " 8167", " 8171", " 8179", " 8191", " 8209", " 8219"],
[" 8221", " 8231", " 8233", " 8237", " 8243", " 8263", " 8269", " 8273", " 8287", " 8291"],
[" 8293", " 8297", " 8311", " 8317", " 8329", " 8353", " 8363", " 8369", " 8377", " 8387"],
[" 8389", " 8419", " 8423", " 8429", " 8431", " 8443", " 8447", " 8461", " 8467", " 8501"],
[" 8513", " 8521", " 8527", " 8537", " 8539", " 8543", " 8563", " 8573", " 8581", " 8597"],
[" 8599", " 8609", " 8623", " 8627", " 8629", " 8641", " 8647", " 8663", " 8669", " 8677"],
[" 8681", " 8689", " 8693", " 8699", " 8707", " 8713", " 8719", " 8731", " 8737", " 8741"],
[" 8747", " 8753", " 8761", " 8779", " 8783", " 8803", " 8807", " 8819", " 8821", " 8831"],
[" 8837", " 8839", " 8849", " 8861", " 8863", " 8867", " 8887", " 8893", " 8923", " 8929"],
[" 8933", " 8941", " 8951", " 8963", " 8969", " 8971", " 8999", " 9001", " 9007", " 9011"],
[" 9013", " 9029", " 9041", " 9043", " 9049", " 9059", " 9067", " 9091", " 9103", " 9109"],
[" 9127", " 9133", " 9137", " 9151", " 9157", " 9161", " 9173", " 9181", " 9187", " 9199"],
[" 9203", " 9209", " 9221", " 9227", " 9239", " 9241", " 9257", " 9277", " 9281", " 9283"],
[" 9293", " 9311", " 9319", " 9323", " 9337", " 9341", " 9343", " 9349", " 9371", " 9377"],
[" 9391", " 9397", " 9403", " 9413", " 9419", " 9421", " 9431", " 9433", " 9437", " 9439"],
[" 9461", " 9463", " 9467", " 9473", " 9479", " 9491", " 9497", " 9511", " 9521", " 9533"],
[" 9539", " 9547", " 9551", " 9587", " 9601", " 9613", " 9619", " 9623", " 9629", " 9631"],
[" 9643", " 9649", " 9661", " 9677", " 9679", " 9689", " 9697", " 9719", " 9721", " 9733"],
[" 9739", " 9743", " 9749", " 9767", " 9769", " 9781", " 9787", " 9791", " 9803", " 9811"],
[" 9817", " 9829", " 9833", " 9839", " 9851", " 9857", " 9859", " 9871", " 9883", " 9887"],
[" 9901", " 9907", " 9923", " 9929", " 9931", " 9941", " 9949", " 9967", " 9973", " 10007"],
[" 10009", " 10037", " 10039", " 10061", " 10067", " 10069", " 10079", " 10091", " 10093", " 10099"],
[" 10103", " 10111", " 10133", " 10139", " 10141", " 10151", " 10159", " 10163", " 10169", " 10177"],
[" 10181", " 10193", " 10211", " 10223", " 10243", " 10247", " 10253", " 10259", " 10267", " 10271"],
[" 10273", " 10289", " 10301", " 10303", " 10313", " 10321", " 10331", " 10333", " 10337", " 10343"],
[" 10357", " 10369", " 10391", " 10399", " 10427", " 10429", " 10433", " 10453", " 10457", " 10459"],
[" 10463", " 10477", " 10487", " 10499", " 10501", " 10513", " 10529", " 10531", " 10559", " 10567"],
[" 10589", " 10597", " 10601", " 10607", " 10613", " 10627", " 10631", " 10639", " 10651", " 10657"],
[" 10663", " 10667", " 10687", " 10691", " 10709", " 10711", " 10723", " 10729", " 10733", " 10739"],
[" 10753", " 10771", " 10781", " 10789", " 10799", " 10831", " 10837", " 10847", " 10853", " 10859"],
[" 10861", " 10867", " 10883", " 10889", " 10891", " 10903", " 10909", " 10937", " 10939", " 10949"],
[" 10957", " 10973", " 10979", " 10987", " 10993", " 11003", " 11027", " 11047", " 11057", " 11059"],
[" 11069", " 11071", " 11083", " 11087", " 11093", " 11113", " 11117", " 11119", " 11131", " 11149"],
[" 11159", " 11161", " 11171", " 11173", " 11177", " 11197", " 11213", " 11239", " 11243", " 11251"],
[" 11257", " 11261", " 11273", " 11279", " 11287", " 11299", " 11311", " 11317", " 11321", " 11329"],
[" 11351", " 11353", " 11369", " 11383", " 11393", " 11399", " 11411", " 11423", " 11437", " 11443"],
[" 11447", " 11467", " 11471", " 11483", " 11489", " 11491", " 11497", " 11503", " 11519", " 11527"],
[" 11549", " 11551", " 11579", " 11587", " 11593", " 11597", " 11617", " 11621", " 11633", " 11657"],
[" 11677", " 11681", " 11689", " 11699", " 11701", " 11717", " 11719", " 11731", " 11743", " 11777"],
[" 11779", " 11783", " 11789", " 11801", " 11807", " 11813", " 11821", " 11827", " 11831", " 11833"],
[" 11839", " 11863", " 11867", " 11887", " 11897", " 11903", " 11909", " 11923", " 11927", " 11933"],
[" 11939", " 11941", " 11953", " 11959", " 11969", " 11971", " 11981", " 11987", " 12007", " 12011"],
[" 12037", " 12041", " 12043", " 12049", " 12071", " 12073", " 12097", " 12101", " 12107", " 12109"],
[" 12113", " 12119", " 12143", " 12149", " 12157", " 12161", " 12163", " 12197", " 12203", " 12211"],
[" 12227", " 12239", " 12241", " 12251", " 12253", " 12263", " 12269", " 12277", " 12281", " 12289"],
[" 12301", " 12323", " 12329", " 12343", " 12347", " 12373", " 12377", " 12379", " 12391", " 12401"],
[" 12409", " 12413", " 12421", " 12433", " 12437", " 12451", " 12457", " 12473", " 12479", " 12487"],
[" 12491", " 12497", " 12503", " 12511", " 12517", " 12527", " 12539", " 12541", " 12547", " 12553"],
[" 12569", " 12577", " 12583", " 12589", " 12601", " 12611", " 12613", " 12619", " 12637", " 12641"],
[" 12647", " 12653", " 12659", " 12671", " 12689", " 12697", " 12703", " 12713", " 12721", " 12739"],
[" 12743", " 12757", " 12763", " 12781", " 12791", " 12799", " 12809", " 12821", " 12823", " 12829"],
[" 12841", " 12853", " 12889", " 12893", " 12899", " 12907", " 12911", " 12917", " 12919", " 12923"],
[" 12941", " 12953", " 12959", " 12967", " 12973", " 12979", " 12983", " 13001", " 13003", " 13007"],
[" 13009", " 13033", " 13037", " 13043", " 13049", " 13063", " 13093", " 13099", " 13103", " 13109"],
[" 13121", " 13127", " 13147", " 13151", " 13159", " 13163", " 13171", " 13177", " 13183", " 13187"],
[" 13217", " 13219", " 13229", " 13241", " 13249", " 13259", " 13267", " 13291", " 13297", " 13309"],
[" 13313", " 13327", " 13331", " 13337", " 13339", " 13367", " 13381", " 13397", " 13399", " 13411"],
[" 13417", " 13421", " 13441", " 13451", " 13457", " 13463", " 13469", " 13477", " 13487", " 13499"],
[" 13513", " 13523", " 13537", " 13553", " 13567", " 13577", " 13591", " 13597", " 13613", " 13619"],
[" 13627", " 13633", " 13649", " 13669", " 13679", " 13681", " 13687", " 13691", " 13693", " 13697"],
[" 13709", " 13711", " 13721", " 13723", " 13729", " 13751", " 13757", " 13759", " 13763", " 13781"],
[" 13789", " 13799", " 13807", " 13829", " 13831", " 13841", " 13859", " 13873", " 13877", " 13879"],
[" 13883", " 13901", " 13903", " 13907", " 13913", " 13921", " 13931", " 13933", " 13963", " 13967"],
[" 13997", " 13999", " 14009", " 14011", " 14029", " 14033", " 14051", " 14057", " 14071", " 14081"],
[" 14083", " 14087", " 14107", " 14143", " 14149", " 14153", " 14159", " 14173", " 14177", " 14197"],
[" 14207", " 14221", " 14243", " 14249", " 14251", " 14281", " 14293", " 14303", " 14321", " 14323"],
[" 14327", " 14341", " 14347", " 14369", " 14387", " 14389", " 14401", " 14407", " 14411", " 14419"],
[" 14423", " 14431", " 14437", " 14447", " 14449", " 14461", " 14479", " 14489", " 14503", " 14519"],
[" 14533", " 14537", " 14543", " 14549", " 14551", " 14557", " 14561", " 14563", " 14591", " 14593"],
[" 14621", " 14627", " 14629", " 14633", " 14639", " 14653", " 14657", " 14669", " 14683", " 14699"],
[" 14713", " 14717", " 14723", " 14731", " 14737", " 14741", " 14747", " 14753", " 14759", " 14767"],
[" 14771", " 14779", " 14783", " 14797", " 14813", " 14821", " 14827", " 14831", " 14843", " 14851"],
[" 14867", " 14869", " 14879", " 14887", " 14891", " 14897", " 14923", " 14929", " 14939", " 14947"],
[" 14951", " 14957", " 14969", " 14983", " 15013", " 15017", " 15031", " 15053", " 15061", " 15073"],
[" 15077", " 15083", " 15091", " 15101", " 15107", " 15121", " 15131", " 15137", " 15139", " 15149"],
[" 15161", " 15173", " 15187", " 15193", " 15199", " 15217", " 15227", " 15233", " 15241", " 15259"],
[" 15263", " 15269", " 15271", " 15277", " 15287", " 15289", " 15299", " 15307", " 15313", " 15319"],
[" 15329", " 15331", " 15349", " 15359", " 15361", " 15373", " 15377", " 15383", " 15391", " 15401"],
[" 15413", " 15427", " 15439", " 15443", " 15451", " 15461", " 15467", " 15473", " 15493", " 15497"],
[" 15511", " 15527", " 15541", " 15551", " 15559", " 15569", " 15581", " 15583", " 15601", " 15607"],
[" 15619", " 15629", " 15641", " 15643", " 15647", " 15649", " 15661", " 15667", " 15671", " 15679"],
[" 15683", " 15727", " 15731", " 15733", " 15737", " 15739", " 15749", " 15761", " 15767", " 15773"],
[" 15787", " 15791", " 15797", " 15803", " 15809", " 15817", " 15823", " 15859", " 15877", " 15881"],
[" 15887", " 15889", " 15901", " 15907", " 15913", " 15919", " 15923", " 15937", " 15959", " 15971"],
[" 15973", " 15991", " 16001", " 16007", " 16033", " 16057", " 16061", " 16063", " 16067", " 16069"],
[" 16073", " 16087", " 16091", " 16097", " 16103", " 16111", " 16127", " 16139", " 16141", " 16183"],
[" 16187", " 16189", " 16193", " 16217", " 16223", " 16229", " 16231", " 16249", " 16253", " 16267"],
[" 16273", " 16301", " 16319", " 16333", " 16339", " 16349", " 16361", " 16363", " 16369", " 16381"],
[" 16411", " 16417", " 16421", " 16427", " 16433", " 16447", " 16451", " 16453", " 16477", " 16481"],
[" 16487", " 16493", " 16519", " 16529", " 16547", " 16553", " 16561", " 16567", " 16573", " 16603"],
[" 16607", " 16619", " 16631", " 16633", " 16649", " 16651", " 16657", " 16661", " 16673", " 16691"],
[" 16693", " 16699", " 16703", " 16729", " 16741", " 16747", " 16759", " 16763", " 16787", " 16811"],
[" 16823", " 16829", " 16831", " 16843", " 16871", " 16879", " 16883", " 16889", " 16901", " 16903"],
[" 16921", " 16927", " 16931", " 16937", " 16943", " 16963", " 16979", " 16981", " 16987", " 16993"],
[" 17011", " 17021", " 17027", " 17029", " 17033", " 17041", " 17047", " 17053", " 17077", " 17093"],
[" 17099", " 17107", " 17117", " 17123", " 17137", " 17159", " 17167", " 17183", " 17189", " 17191"],
[" 17203", " 17207", " 17209", " 17231", " 17239", " 17257", " 17291", " 17293", " 17299", " 17317"],
[" 17321", " 17327", " 17333", " 17341", " 17351", " 17359", " 17377", " 17383", " 17387", " 17389"],
[" 17393", " 17401", " 17417", " 17419", " 17431", " 17443", " 17449", " 17467", " 17471", " 17477"],
[" 17483", " 17489", " 17491", " 17497", " 17509", " 17519", " 17539", " 17551", " 17569", " 17573"],
[" 17579", " 17581", " 17597", " 17599", " 17609", " 17623", " 17627", " 17657", " 17659", " 17669"],
[" 17681", " 17683", " 17707", " 17713", " 17729", " 17737", " 17747", " 17749", " 17761", " 17783"],
[" 17789", " 17791", " 17807", " 17827", " 17837", " 17839", " 17851", " 17863", " 17881", " 17891"],
[" 17903", " 17909", " 17911", " 17921", " 17923", " 17929", " 17939", " 17957", " 17959", " 17971"],
[" 17977", " 17981", " 17987", " 17989", " 18013", " 18041", " 18043", " 18047", " 18049", " 18059"],
[" 18061", " 18077", " 18089", " 18097", " 18119", " 18121", " 18127", " 18131", " 18133", " 18143"],
[" 18149", " 18169", " 18181", " 18191", " 18199", " 18211", " 18217", " 18223", " 18229", " 18233"],
[" 18251", " 18253", " 18257", " 18269", " 18287", " 18289", " 18301", " 18307", " 18311", " 18313"],
[" 18329", " 18341", " 18353", " 18367", " 18371", " 18379", " 18397", " 18401", " 18413", " 18427"],
[" 18433", " 18439", " 18443", " 18451", " 18457", " 18461", " 18481", " 18493", " 18503", " 18517"],
[" 18521", " 18523", " 18539", " 18541", " 18553", " 18583", " 18587", " 18593", " 18617", " 18637"],
[" 18661", " 18671", " 18679", " 18691", " 18701", " 18713", " 18719", " 18731", " 18743", " 18749"],
[" 18757", " 18773", " 18787", " 18793", " 18797", " 18803", " 18839", " 18859", " 18869", " 18899"],
[" 18911", " 18913", " 18917", " 18919", " 18947", " 18959", " 18973", " 18979", " 19001", " 19009"],
[" 19013", " 19031", " 19037", " 19051", " 19069", " 19073", " 19079", " 19081", " 19087", " 19121"],
[" 19139", " 19141", " 19157", " 19163", " 19181", " 19183", " 19207", " 19211", " 19213", " 19219"],
[" 19231", " 19237", " 19249", " 19259", " 19267", " 19273", " 19289", " 19301", " 19309", " 19319"],
[" 19333", " 19373", " 19379", " 19381", " 19387", " 19391", " 19403", " 19417", " 19421", " 19423"],
[" 19427", " 19429", " 19433", " 19441", " 19447", " 19457", " 19463", " 19469", " 19471", " 19477"],
[" 19483", " 19501", " 19507", " 19531", " 19541", " 19543", " 19553", " 19559", " 19571", " 19577"],
[" 19583", " 19597", " 19603", " 19609", " 19661", " 19681", " 19687", " 19697", " 19699", " 19709"],
[" 19717", " 19727", " 19739", " 19751", " 19753", " 19759", " 19763", " 19777", " 19793", " 19801"],
[" 19813", " 19819", " 19841", " 19843", " 19853", " 19861", " 19867", " 19889", " 19891", " 19913"],
[" 19919", " 19927", " 19937", " 19949", " 19961", " 19963", " 19973", " 19979", " 19991", " 19993"],
[" 19997", " 20011", " 20021", " 20023", " 20029", " 20047", " 20051", " 20063", " 20071", " 20089"],
[" 20101", " 20107", " 20113", " 20117", " 20123", " 20129", " 20143", " 20147", " 20149", " 20161"],
[" 20173", " 20177", " 20183", " 20201", " 20219", " 20231", " 20233", " 20249", " 20261", " 20269"],
[" 20287", " 20297", " 20323", " 20327", " 20333", " 20341", " 20347", " 20353", " 20357", " 20359"],
[" 20369", " 20389", " 20393", " 20399", " 20407", " 20411", " 20431", " 20441", " 20443", " 20477"],
[" 20479", " 20483", " 20507", " 20509", " 20521", " 20533", " 20543", " 20549", " 20551", " 20563"],
[" 20593", " 20599", " 20611", " 20627", " 20639", " 20641", " 20663", " 20681", " 20693", " 20707"],
[" 20717", " 20719", " 20731", " 20743", " 20747", " 20749", " 20753", " 20759", " 20771", " 20773"],
[" 20789", " 20807", " 20809", " 20849", " 20857", " 20873", " 20879", " 20887", " 20897", " 20899"],
[" 20903", " 20921", " 20929", " 20939", " 20947", " 20959", " 20963", " 20981", " 20983", " 21001"],
[" 21011", " 21013", " 21017", " 21019", " 21023", " 21031", " 21059", " 21061", " 21067", " 21089"],
[" 21101", " 21107", " 21121", " 21139", " 21143", " 21149", " 21157", " 21163", " 21169", " 21179"],
[" 21187", " 21191", " 21193", " 21211", " 21221", " 21227", " 21247", " 21269", " 21277", " 21283"],
[" 21313", " 21317", " 21319", " 21323", " 21341", " 21347", " 21377", " 21379", " 21383", " 21391"],
[" 21397", " 21401", " 21407", " 21419", " 21433", " 21467", " 21481", " 21487", " 21491", " 21493"],
[" 21499", " 21503", " 21517", " 21521", " 21523", " 21529", " 21557", " 21559", " 21563", " 21569"],
[" 21577", " 21587", " 21589", " 21599", " 21601", " 21611", " 21613", " 21617", " 21647", " 21649"],
[" 21661", " 21673", " 21683", " 21701", " 21713", " 21727", " 21737", " 21739", " 21751", " 21757"],
[" 21767", " 21773", " 21787", " 21799", " 21803", " 21817", " 21821", " 21839", " 21841", " 21851"],
[" 21859", " 21863", " 21871", " 21881", " 21893", " 21911", " 21929", " 21937", " 21943", " 21961"],
[" 21977", " 21991", " 21997", " 22003", " 22013", " 22027", " 22031", " 22037", " 22039", " 22051"],
[" 22063", " 22067", " 22073", " 22079", " 22091", " 22093", " 22109", " 22111", " 22123", " 22129"],
[" 22133", " 22147", " 22153", " 22157", " 22159", " 22171", " 22189", " 22193", " 22229", " 22247"],
[" 22259", " 22271", " 22273", " 22277", " 22279", " 22283", " 22291", " 22303", " 22307", " 22343"],
[" 22349", " 22367", " 22369", " 22381", " 22391", " 22397", " 22409", " 22433", " 22441", " 22447"],
[" 22453", " 22469", " 22481", " 22483", " 22501", " 22511", " 22531", " 22541", " 22543", " 22549"],
[" 22567", " 22571", " 22573", " 22613", " 22619", " 22621", " 22637", " 22639", " 22643", " 22651"],
[" 22669", " 22679", " 22691", " 22697", " 22699", " 22709", " 22717", " 22721", " 22727", " 22739"],
[" 22741", " 22751", " 22769", " 22777", " 22783", " 22787", " 22807", " 22811", " 22817", " 22853"],
[" 22859", " 22861", " 22871", " 22877", " 22901", " 22907", " 22921", " 22937", " 22943", " 22961"],
[" 22963", " 22973", " 22993", " 23003", " 23011", " 23017", " 23021", " 23027", " 23029", " 23039"],
[" 23041", " 23053", " 23057", " 23059", " 23063", " 23071", " 23081", " 23087", " 23099", " 23117"],
[" 23131", " 23143", " 23159", " 23167", " 23173", " 23189", " 23197", " 23201", " 23203", " 23209"],
[" 23227", " 23251", " 23269", " 23279", " 23291", " 23293", " 23297", " 23311", " 23321", " 23327"],
[" 23333", " 23339", " 23357", " 23369", " 23371", " 23399", " 23417", " 23431", " 23447", " 23459"],
[" 23473", " 23497", " 23509", " 23531", " 23537", " 23539", " 23549", " 23557", " 23561", " 23563"],
[" 23567", " 23581", " 23593", " 23599", " 23603", " 23609", " 23623", " 23627", " 23629", " 23633"],
[" 23663", " 23669", " 23671", " 23677", " 23687", " 23689", " 23719", " 23741", " 23743", " 23747"],
[" 23753", " 23761", " 23767", " 23773", " 23789", " 23801", " 23813", " 23819", " 23827", " 23831"],
[" 23833", " 23857", " 23869", " 23873", " 23879", " 23887", " 23893", " 23899", " 23909", " 23911"],
[" 23917", " 23929", " 23957", " 23971", " 23977", " 23981", " 23993", " 24001", " 24007", " 24019"],
[" 24023", " 24029", " 24043", " 24049", " 24061", " 24071", " 24077", " 24083", " 24091", " 24097"],
[" 24103", " 24107", " 24109", " 24113", " 24121", " 24133", " 24137", " 24151", " 24169", " 24179"],
[" 24181", " 24197", " 24203", " 24223", " 24229", " 24239", " 24247", " 24251", " 24281", " 24317"],
[" 24329", " 24337", " 24359", " 24371", " 24373", " 24379", " 24391", " 24407", " 24413", " 24419"],
[" 24421", " 24439", " 24443", " 24469", " 24473", " 24481", " 24499", " 24509", " 24517", " 24527"],
[" 24533", " 24547", " 24551", " 24571", " 24593", " 24611", " 24623", " 24631", " 24659", " 24671"],
[" 24677", " 24683", " 24691", " 24697", " 24709", " 24733", " 24749", " 24763", " 24767", " 24781"],
[" 24793", " 24799", " 24809", " 24821", " 24841", " 24847", " 24851", " 24859", " 24877", " 24889"],
[" 24907", " 24917", " 24919", " 24923", " 24943", " 24953", " 24967", " 24971", " 24977", " 24979"],
[" 24989", " 25013", " 25031", " 25033", " 25037", " 25057", " 25073", " 25087", " 25097", " 25111"],
[" 25117", " 25121", " 25127", " 25147", " 25153", " 25163", " 25169", " 25171", " 25183", " 25189"],
[" 25219", " 25229", " 25237", " 25243", " 25247", " 25253", " 25261", " 25301", " 25303", " 25307"],
[" 25309", " 25321", " 25339", " 25343", " 25349", " 25357", " 25367", " 25373", " 25391", " 25409"],
[" 25411", " 25423", " 25439", " 25447", " 25453", " 25457", " 25463", " 25469", " 25471", " 25523"],
[" 25537", " 25541", " 25561", " 25577", " 25579", " 25583", " 25589", " 25601", " 25603", " 25609"],
[" 25621", " 25633", " 25639", " 25643", " 25657", " 25667", " 25673", " 25679", " 25693", " 25703"],
[" 25717", " 25733", " 25741", " 25747", " 25759", " 25763", " 25771", " 25793", " 25799", " 25801"],
[" 25819", " 25841", " 25847", " 25849", " 25867", " 25873", " 25889", " 25903", " 25913", " 25919"],
[" 25931", " 25933", " 25939", " 25943", " 25951", " 25969", " 25981", " 25997", " 25999", " 26003"],
[" 26017", " 26021", " 26029", " 26041", " 26053", " 26083", " 26099", " 26107", " 26111", " 26113"],
[" 26119", " 26141", " 26153", " 26161", " 26171", " 26177", " 26183", " 26189", " 26203", " 26209"],
[" 26227", " 26237", " 26249", " 26251", " 26261", " 26263", " 26267", " 26293", " 26297", " 26309"],
[" 26317", " 26321", " 26339", " 26347", " 26357", " 26371", " 26387", " 26393", " 26399", " 26407"],
[" 26417", " 26423", " 26431", " 26437", " 26449", " 26459", " 26479", " 26489", " 26497", " 26501"],
[" 26513", " 26539", " 26557", " 26561", " 26573", " 26591", " 26597", " 26627", " 26633", " 26641"],
[" 26647", " 26669", " 26681", " 26683", " 26687", " 26693", " 26699", " 26701", " 26711", " 26713"],
[" 26717", " 26723", " 26729", " 26731", " 26737", " 26759", " 26777", " 26783", " 26801", " 26813"],
[" 26821", " 26833", " 26839", " 26849", " 26861", " 26863", " 26879", " 26881", " 26891", " 26893"],
[" 26903", " 26921", " 26927", " 26947", " 26951", " 26953", " 26959", " 26981", " 26987", " 26993"],
[" 27011", " 27017", " 27031", " 27043", " 27059", " 27061", " 27067", " 27073", " 27077", " 27091"],
[" 27103", " 27107", " 27109", " 27127", " 27143", " 27179", " 27191", " 27197", " 27211", " 27239"],
[" 27241", " 27253", " 27259", " 27271", " 27277", " 27281", " 27283", " 27299", " 27329", " 27337"],
[" 27361", " 27367", " 27397", " 27407", " 27409", " 27427", " 27431", " 27437", " 27449", " 27457"],
[" 27479", " 27481", " 27487", " 27509", " 27527", " 27529", " 27539", " 27541", " 27551", " 27581"],
[" 27583", " 27611", " 27617", " 27631", " 27647", " 27653", " 27673", " 27689", " 27691", " 27697"],
[" 27701", " 27733", " 27737", " 27739", " 27743", " 27749", " 27751", " 27763", " 27767", " 27773"],
[" 27779", " 27791", " 27793", " 27799", " 27803", " 27809", " 27817", " 27823", " 27827", " 27847"],
[" 27851", " 27883", " 27893", " 27901", " 27917", " 27919", " 27941", " 27943", " 27947", " 27953"],
[" 27961", " 27967", " 27983", " 27997", " 28001", " 28019", " 28027", " 28031", " 28051", " 28057"],
[" 28069", " 28081", " 28087", " 28097", " 28099", " 28109", " 28111", " 28123", " 28151", " 28163"],
[" 28181", " 28183", " 28201", " 28211", " 28219", " 28229", " 28277", " 28279", " 28283", " 28289"],
[" 28297", " 28307", " 28309", " 28319", " 28349", " 28351", " 28387", " 28393", " 28403", " 28409"],
[" 28411", " 28429", " 28433", " 28439", " 28447", " 28463", " 28477", " 28493", " 28499", " 28513"],
[" 28517", " 28537", " 28541", " 28547", " 28549", " 28559", " 28571", " 28573", " 28579", " 28591"],
[" 28597", " 28603", " 28607", " 28619", " 28621", " 28627", " 28631", " 28643", " 28649", " 28657"],
[" 28661", " 28663", " 28669", " 28687", " 28697", " 28703", " 28711", " 28723", " 28729", " 28751"],
[" 28753", " 28759", " 28771", " 28789", " 28793", " 28807", " 28813", " 28817", " 28837", " 28843"],
[" 28859", " 28867", " 28871", " 28879", " 28901", " 28909", " 28921", " 28927", " 28933", " 28949"],
[" 28961", " 28979", " 29009", " 29017", " 29021", " 29023", " 29027", " 29033", " 29059", " 29063"],
[" 29077", " 29101", " 29123", " 29129", " 29131", " 29137", " 29147", " 29153", " 29167", " 29173"],
[" 29179", " 29191", " 29201", " 29207", " 29209", " 29221", " 29231", " 29243", " 29251", " 29269"],
[" 29287", " 29297", " 29303", " 29311", " 29327", " 29333", " 29339", " 29347", " 29363", " 29383"],
[" 29387", " 29389", " 29399", " 29401", " 29411", " 29423", " 29429", " 29437", " 29443", " 29453"],
[" 29473", " 29483", " 29501", " 29527", " 29531", " 29537", " 29567", " 29569", " 29573", " 29581"],
[" 29587", " 29599", " 29611", " 29629", " 29633", " 29641", " 29663", " 29669", " 29671", " 29683"],
[" 29717", " 29723", " 29741", " 29753", " 29759", " 29761", " 29789", " 29803", " 29819", " 29833"],
[" 29837", " 29851", " 29863", " 29867", " 29873", " 29879", " 29881", " 29917", " 29921", " 29927"],
[" 29947", " 29959", " 29983", " 29989", " 30011", " 30013", " 30029", " 30047", " 30059", " 30071"],
[" 30089", " 30091", " 30097", " 30103", " 30109", " 30113", " 30119", " 30133", " 30137", " 30139"],
[" 30161", " 30169", " 30181", " 30187", " 30197", " 30203", " 30211", " 30223", " 30241", " 30253"],
[" 30259", " 30269", " 30271", " 30293", " 30307", " 30313", " 30319", " 30323", " 30341", " 30347"],
[" 30367", " 30389", " 30391", " 30403", " 30427", " 30431", " 30449", " 30467", " 30469", " 30491"],
[" 30493", " 30497", " 30509", " 30517", " 30529", " 30539", " 30553", " 30557", " 30559", " 30577"],
[" 30593", " 30631", " 30637", " 30643", " 30649", " 30661", " 30671", " 30677", " 30689", " 30697"],
[" 30703", " 30707", " 30713", " 30727", " 30757", " 30763", " 30773", " 30781", " 30803", " 30809"],
[" 30817", " 30829", " 30839", " 30841", " 30851", " 30853", " 30859", " 30869", " 30871", " 30881"],
[" 30893", " 30911", " 30931", " 30937", " 30941", " 30949", " 30971", " 30977", " 30983", " 31013"],
[" 31019", " 31033", " 31039", " 31051", " 31063", " 31069", " 31079", " 31081", " 31091", " 31121"],
[" 31123", " 31139", " 31147", " 31151", " 31153", " 31159", " 31177", " 31181", " 31183", " 31189"],
[" 31193", " 31219", " 31223", " 31231", " 31237", " 31247", " 31249", " 31253", " 31259", " 31267"],
[" 31271", " 31277", " 31307", " 31319", " 31321", " 31327", " 31333", " 31337", " 31357", " 31379"],
[" 31387", " 31391", " 31393", " 31397", " 31469", " 31477", " 31481", " 31489", " 31511", " 31513"],
[" 31517", " 31531", " 31541", " 31543", " 31547", " 31567", " 31573", " 31583", " 31601", " 31607"],
[" 31627", " 31643", " 31649", " 31657", " 31663", " 31667", " 31687", " 31699", " 31721", " 31723"],
[" 31727", " 31729", " 31741", " 31751", " 31769", " 31771", " 31793", " 31799", " 31817", " 31847"],
[" 31849", " 31859", " 31873", " 31883", " 31891", " 31907", " 31957", " 31963", " 31973", " 31981"],
[" 31991", " 32003", " 32009", " 32027", " 32029", " 32051", " 32057", " 32059", " 32063", " 32069"],
[" 32077", " 32083", " 32089", " 32099", " 32117", " 32119", " 32141", " 32143", " 32159", " 32173"],
[" 32183", " 32189", " 32191", " 32203", " 32213", " 32233", " 32237", " 32251", " 32257", " 32261"],
[" 32297", " 32299", " 32303", " 32309", " 32321", " 32323", " 32327", " 32341", " 32353", " 32359"],
[" 32363", " 32369", " 32371", " 32377", " 32381", " 32401", " 32411", " 32413", " 32423", " 32429"],
[" 32441", " 32443", " 32467", " 32479", " 32491", " 32497", " 32503", " 32507", " 32531", " 32533"],
[" 32537", " 32561", " 32563", " 32569", " 32573", " 32579", " 32587", " 32603", " 32609", " 32611"],
[" 32621", " 32633", " 32647", " 32653", " 32687", " 32693", " 32707", " 32713", " 32717", " 32719"],
[" 32749", " 32771", " 32779", " 32783", " 32789", " 32797", " 32801", " 32803", " 32831", " 32833"],
[" 32839", " 32843", " 32869", " 32887", " 32909", " 32911", " 32917", " 32933", " 32939", " 32941"],
[" 32957", " 32969", " 32971", " 32983", " 32987", " 32993", " 32999", " 33013", " 33023", " 33029"],
[" 33037", " 33049", " 33053", " 33071", " 33073", " 33083", " 33091", " 33107", " 33113", " 33119"],
[" 33149", " 33151", " 33161", " 33179", " 33181", " 33191", " 33199", " 33203", " 33211", " 33223"],
[" 33247", " 33287", " 33289", " 33301", " 33311", " 33317", " 33329", " 33331", " 33343", " 33347"],
[" 33349", " 33353", " 33359", " 33377", " 33391", " 33403", " 33409", " 33413", " 33427", " 33457"],
[" 33461", " 33469", " 33479", " 33487", " 33493", " 33503", " 33521", " 33529", " 33533", " 33547"],
[" 33563", " 33569", " 33577", " 33581", " 33587", " 33589", " 33599", " 33601", " 33613", " 33617"],
[" 33619", " 33623", " 33629", " 33637", " 33641", " 33647", " 33679", " 33703", " 33713", " 33721"],
[" 33739", " 33749", " 33751", " 33757", " 33767", " 33769", " 33773", " 33791", " 33797", " 33809"],
[" 33811", " 33827", " 33829", " 33851", " 33857", " 33863", " 33871", " 33889", " 33893", " 33911"],
[" 33923", " 33931", " 33937", " 33941", " 33961", " 33967", " 33997", " 34019", " 34031", " 34033"],
[" 34039", " 34057", " 34061", " 34123", " 34127", " 34129", " 34141", " 34147", " 34157", " 34159"],
[" 34171", " 34183", " 34211", " 34213", " 34217", " 34231", " 34253", " 34259", " 34261", " 34267"],
[" 34273", " 34283", " 34297", " 34301", " 34303", " 34313", " 34319", " 34327", " 34337", " 34351"],
[" 34361", " 34367", " 34369", " 34381", " 34403", " 34421", " 34429", " 34439", " 34457", " 34469"],
[" 34471", " 34483", " 34487", " 34499", " 34501", " 34511", " 34513", " 34519", " 34537", " 34543"],
[" 34549", " 34583", " 34589", " 34591", " 34603", " 34607", " 34613", " 34631", " 34649", " 34651"],
[" 34667", " 34673", " 34679", " 34687", " 34693", " 34703", " 34721", " 34729", " 34739", " 34747"],
[" 34757", " 34759", " 34763", " 34781", " 34807", " 34819", " 34841", " 34843", " 34847", " 34849"],
[" 34871", " 34877", " 34883", " 34897", " 34913", " 34919", " 34939", " 34949", " 34961", " 34963"],
[" 34981", " 35023", " 35027", " 35051", " 35053", " 35059", " 35069", " 35081", " 35083", " 35089"],
[" 35099", " 35107", " 35111", " 35117", " 35129", " 35141", " 35149", " 35153", " 35159", " 35171"],
[" 35201", " 35221", " 35227", " 35251", " 35257", " 35267", " 35279", " 35281", " 35291", " 35311"],
[" 35317", " 35323", " 35327", " 35339", " 35353", " 35363", " 35381", " 35393", " 35401", " 35407"],
[" 35419", " 35423", " 35437", " 35447", " 35449", " 35461", " 35491", " 35507", " 35509", " 35521"],
[" 35527", " 35531", " 35533", " 35537", " 35543", " 35569", " 35573", " 35591", " 35593", " 35597"],
[" 35603", " 35617", " 35671", " 35677", " 35729", " 35731", " 35747", " 35753", " 35759", " 35771"],
[" 35797", " 35801", " 35803", " 35809", " 35831", " 35837", " 35839", " 35851", " 35863", " 35869"],
[" 35879", " 35897", " 35899", " 35911", " 35923", " 35933", " 35951", " 35963", " 35969", " 35977"],
[" 35983", " 35993", " 35999", " 36007", " 36011", " 36013", " 36017", " 36037", " 36061", " 36067"],
[" 36073", " 36083", " 36097", " 36107", " 36109", " 36131", " 36137", " 36151", " 36161", " 36187"],
[" 36191", " 36209", " 36217", " 36229", " 36241", " 36251", " 36263", " 36269", " 36277", " 36293"],
[" 36299", " 36307", " 36313", " 36319", " 36341", " 36343", " 36353", " 36373", " 36383", " 36389"],
[" 36433", " 36451", " 36457", " 36467", " 36469", " 36473", " 36479", " 36493", " 36497", " 36523"],
[" 36527", " 36529", " 36541", " 36551", " 36559", " 36563", " 36571", " 36583", " 36587", " 36599"],
[" 36607", " 36629", " 36637", " 36643", " 36653", " 36671", " 36677", " 36683", " 36691", " 36697"],
[" 36709", " 36713", " 36721", " 36739", " 36749", " 36761", " 36767", " 36779", " 36781", " 36787"],
[" 36791", " 36793", " 36809", " 36821", " 36833", " 36847", " 36857", " 36871", " 36877", " 36887"],
[" 36899", " 36901", " 36913", " 36919", " 36923", " 36929", " 36931", " 36943", " 36947", " 36973"],
[" 36979", " 36997", " 37003", " 37013", " 37019", " 37021", " 37039", " 37049", " 37057", " 37061"],
[" 37087", " 37097", " 37117", " 37123", " 37139", " 37159", " 37171", " 37181", " 37189", " 37199"],
[" 37201", " 37217", " 37223", " 37243", " 37253", " 37273", " 37277", " 37307", " 37309", " 37313"],
[" 37321", " 37337", " 37339", " 37357", " 37361", " 37363", " 37369", " 37379", " 37397", " 37409"],
[" 37423", " 37441", " 37447", " 37463", " 37483", " 37489", " 37493", " 37501", " 37507", " 37511"],
[" 37517", " 37529", " 37537", " 37547", " 37549", " 37561", " 37567", " 37571", " 37573", " 37579"],
[" 37589", " 37591", " 37607", " 37619", " 37633", " 37643", " 37649", " 37657", " 37663", " 37691"],
[" 37693", " 37699", " 37717", " 37747", " 37781", " 37783", " 37799", " 37811", " 37813", " 37831"],
[" 37847", " 37853", " 37861", " 37871", " 37879", " 37889", " 37897", " 37907", " 37951", " 37957"],
[" 37963", " 37967", " 37987", " 37991", " 37993", " 37997", " 38011", " 38039", " 38047", " 38053"],
[" 38069", " 38083", " 38113", " 38119", " 38149", " 38153", " 38167", " 38177", " 38183", " 38189"],
[" 38197", " 38201", " 38219", " 38231", " 38237", " 38239", " 38261", " 38273", " 38281", " 38287"],
[" 38299", " 38303", " 38317", " 38321", " 38327", " 38329", " 38333", " 38351", " 38371", " 38377"],
[" 38393", " 38431", " 38447", " 38449", " 38453", " 38459", " 38461", " 38501", " 38543", " 38557"],
[" 38561", " 38567", " 38569", " 38593", " 38603", " 38609", " 38611", " 38629", " 38639", " 38651"],
[" 38653", " 38669", " 38671", " 38677", " 38693", " 38699", " 38707", " 38711", " 38713", " 38723"],
[" 38729", " 38737", " 38747", " 38749", " 38767", " 38783", " 38791", " 38803", " 38821", " 38833"],
[" 38839", " 38851", " 38861", " 38867", " 38873", " 38891", " 38903", " 38917", " 38921", " 38923"],
[" 38933", " 38953", " 38959", " 38971", " 38977", " 38993", " 39019", " 39023", " 39041", " 39043"],
[" 39047", " 39079", " 39089", " 39097", " 39103", " 39107", " 39113", " 39119", " 39133", " 39139"],
[" 39157", " 39161", " 39163", " 39181", " 39191", " 39199", " 39209", " 39217", " 39227", " 39229"],
[" 39233", " 39239", " 39241", " 39251", " 39293", " 39301", " 39313", " 39317", " 39323", " 39341"],
[" 39343", " 39359", " 39367", " 39371", " 39373", " 39383", " 39397", " 39409", " 39419", " 39439"],
[" 39443", " 39451", " 39461", " 39499", " 39503", " 39509", " 39511", " 39521", " 39541", " 39551"],
[" 39563", " 39569", " 39581", " 39607", " 39619", " 39623", " 39631", " 39659", " 39667", " 39671"],
[" 39679", " 39703", " 39709", " 39719", " 39727", " 39733", " 39749", " 39761", " 39769", " 39779"],
[" 39791", " 39799", " 39821", " 39827", " 39829", " 39839", " 39841", " 39847", " 39857", " 39863"],
[" 39869", " 39877", " 39883", " 39887", " 39901", " 39929", " 39937", " 39953", " 39971", " 39979"],
[" 39983", " 39989", " 40009", " 40013", " 40031", " 40037", " 40039", " 40063", " 40087", " 40093"],
[" 40099", " 40111", " 40123", " 40127", " 40129", " 40151", " 40153", " 40163", " 40169", " 40177"],
[" 40189", " 40193", " 40213", " 40231", " 40237", " 40241", " 40253", " 40277", " 40283", " 40289"],
[" 40343", " 40351", " 40357", " 40361", " 40387", " 40423", " 40427", " 40429", " 40433", " 40459"],
[" 40471", " 40483", " 40487", " 40493", " 40499", " 40507", " 40519", " 40529", " 40531", " 40543"],
[" 40559", " 40577", " 40583", " 40591", " 40597", " 40609", " 40627", " 40637", " 40639", " 40693"],
[" 40697", " 40699", " 40709", " 40739", " 40751", " 40759", " 40763", " 40771", " 40787", " 40801"],
[" 40813", " 40819", " 40823", " 40829", " 40841", " 40847", " 40849", " 40853", " 40867", " 40879"],
[" 40883", " 40897", " 40903", " 40927", " 40933", " 40939", " 40949", " 40961", " 40973", " 40993"],
[" 41011", " 41017", " 41023", " 41039", " 41047", " 41051", " 41057", " 41077", " 41081", " 41113"],
[" 41117", " 41131", " 41141", " 41143", " 41149", " 41161", " 41177", " 41179", " 41183", " 41189"],
[" 41201", " 41203", " 41213", " 41221", " 41227", " 41231", " 41233", " 41243", " 41257", " 41263"],
[" 41269", " 41281", " 41299", " 41333", " 41341", " 41351", " 41357", " 41381", " 41387", " 41389"],
[" 41399", " 41411", " 41413", " 41443", " 41453", " 41467", " 41479", " 41491", " 41507", " 41513"],
[" 41519", " 41521", " 41539", " 41543", " 41549", " 41579", " 41593", " 41597", " 41603", " 41609"],
[" 41611", " 41617", " 41621", " 41627", " 41641", " 41647", " 41651", " 41659", " 41669", " 41681"],
[" 41687", " 41719", " 41729", " 41737", " 41759", " 41761", " 41771", " 41777", " 41801", " 41809"],
[" 41813", " 41843", " 41849", " 41851", " 41863", " 41879", " 41887", " 41893", " 41897", " 41903"],
[" 41911", " 41927", " 41941", " 41947", " 41953", " 41957", " 41959", " 41969", " 41981", " 41983"],
[" 41999", " 42013", " 42017", " 42019", " 42023", " 42043", " 42061", " 42071", " 42073", " 42083"],
[" 42089", " 42101", " 42131", " 42139", " 42157", " 42169", " 42179", " 42181", " 42187", " 42193"],
[" 42197", " 42209", " 42221", " 42223", " 42227", " 42239", " 42257", " 42281", " 42283", " 42293"],
[" 42299", " 42307", " 42323", " 42331", " 42337", " 42349", " 42359", " 42373", " 42379", " 42391"],
[" 42397", " 42403", " 42407", " 42409", " 42433", " 42437", " 42443", " 42451", " 42457", " 42461"],
[" 42463", " 42467", " 42473", " 42487", " 42491", " 42499", " 42509", " 42533", " 42557", " 42569"],
[" 42571", " 42577", " 42589", " 42611", " 42641", " 42643", " 42649", " 42667", " 42677", " 42683"],
[" 42689", " 42697", " 42701", " 42703", " 42709", " 42719", " 42727", " 42737", " 42743", " 42751"],
[" 42767", " 42773", " 42787", " 42793", " 42797", " 42821", " 42829", " 42839", " 42841", " 42853"],
[" 42859", " 42863", " 42899", " 42901", " 42923", " 42929", " 42937", " 42943", " 42953", " 42961"],
[" 42967", " 42979", " 42989", " 43003", " 43013", " 43019", " 43037", " 43049", " 43051", " 43063"],
[" 43067", " 43093", " 43103", " 43117", " 43133", " 43151", " 43159", " 43177", " 43189", " 43201"],
[" 43207", " 43223", " 43237", " 43261", " 43271", " 43283", " 43291", " 43313", " 43319", " 43321"],
[" 43331", " 43391", " 43397", " 43399", " 43403", " 43411", " 43427", " 43441", " 43451", " 43457"],
[" 43481", " 43487", " 43499", " 43517", " 43541", " 43543", " 43573", " 43577", " 43579", " 43591"],
[" 43597", " 43607", " 43609", " 43613", " 43627", " 43633", " 43649", " 43651", " 43661", " 43669"],
[" 43691", " 43711", " 43717", " 43721", " 43753", " 43759", " 43777", " 43781", " 43783", " 43787"],
[" 43789", " 43793", " 43801", " 43853", " 43867", " 43889", " 43891", " 43913", " 43933", " 43943"],
[" 43951", " 43961", " 43963", " 43969", " 43973", " 43987", " 43991", " 43997", " 44017", " 44021"],
[" 44027", " 44029", " 44041", " 44053", " 44059", " 44071", " 44087", " 44089", " 44101", " 44111"],
[" 44119", " 44123", " 44129", " 44131", " 44159", " 44171", " 44179", " 44189", " 44201", " 44203"],
[" 44207", " 44221", " 44249", " 44257", " 44263", " 44267", " 44269", " 44273", " 44279", " 44281"],
[" 44293", " 44351", " 44357", " 44371", " 44381", " 44383", " 44389", " 44417", " 44449", " 44453"],
[" 44483", " 44491", " 44497", " 44501", " 44507", " 44519", " 44531", " 44533", " 44537", " 44543"],
[" 44549", " 44563", " 44579", " 44587", " 44617", " 44621", " 44623", " 44633", " 44641", " 44647"],
[" 44651", " 44657", " 44683", " 44687", " 44699", " 44701", " 44711", " 44729", " 44741", " 44753"],
[" 44771", " 44773", " 44777", " 44789", " 44797", " 44809", " 44819", " 44839", " 44843", " 44851"],
[" 44867", " 44879", " 44887", " 44893", " 44909", " 44917", " 44927", " 44939", " 44953", " 44959"],
[" 44963", " 44971", " 44983", " 44987", " 45007", " 45013", " 45053", " 45061", " 45077", " 45083"],
[" 45119", " 45121", " 45127", " 45131", " 45137", " 45139", " 45161", " 45179", " 45181", " 45191"],
[" 45197", " 45233", " 45247", " 45259", " 45263", " 45281", " 45289", " 45293", " 45307", " 45317"],
[" 45319", " 45329", " 45337", " 45341", " 45343", " 45361", " 45377", " 45389", " 45403", " 45413"],
[" 45427", " 45433", " 45439", " 45481", " 45491", " 45497", " 45503", " 45523", " 45533", " 45541"],
[" 45553", " 45557", " 45569", " 45587", " 45589", " 45599", " 45613", " 45631", " 45641", " 45659"],
[" 45667", " 45673", " 45677", " 45691", " 45697", " 45707", " 45737", " 45751", " 45757", " 45763"],
[" 45767", " 45779", " 45817", " 45821", " 45823", " 45827", " 45833", " 45841", " 45853", " 45863"],
[" 45869", " 45887", " 45893", " 45943", " 45949", " 45953", " 45959", " 45971", " 45979", " 45989"],
[" 46021", " 46027", " 46049", " 46051", " 46061", " 46073", " 46091", " 46093", " 46099", " 46103"],
[" 46133", " 46141", " 46147", " 46153", " 46171", " 46181", " 46183", " 46187", " 46199", " 46219"],
[" 46229", " 46237", " 46261", " 46271", " 46273", " 46279", " 46301", " 46307", " 46309", " 46327"],
[" 46337", " 46349", " 46351", " 46381", " 46399", " 46411", " 46439", " 46441", " 46447", " 46451"],
[" 46457", " 46471", " 46477", " 46489", " 46499", " 46507", " 46511", " 46523", " 46549", " 46559"],
[" 46567", " 46573", " 46589", " 46591", " 46601", " 46619", " 46633", " 46639", " 46643", " 46649"],
[" 46663", " 46679", " 46681", " 46687", " 46691", " 46703", " 46723", " 46727", " 46747", " 46751"],
[" 46757", " 46769", " 46771", " 46807", " 46811", " 46817", " 46819", " 46829", " 46831", " 46853"],
[" 46861", " 46867", " 46877", " 46889", " 46901", " 46919", " 46933", " 46957", " 46993", " 46997"],
[" 47017", " 47041", " 47051", " 47057", " 47059", " 47087", " 47093", " 47111", " 47119", " 47123"],
[" 47129", " 47137", " 47143", " 47147", " 47149", " 47161", " 47189", " 47207", " 47221", " 47237"],
[" 47251", " 47269", " 47279", " 47287", " 47293", " 47297", " 47303", " 47309", " 47317", " 47339"],
[" 47351", " 47353", " 47363", " 47381", " 47387", " 47389", " 47407", " 47417", " 47419", " 47431"],
[" 47441", " 47459", " 47491", " 47497", " 47501", " 47507", " 47513", " 47521", " 47527", " 47533"],
[" 47543", " 47563", " 47569", " 47581", " 47591", " 47599", " 47609", " 47623", " 47629", " 47639"],
[" 47653", " 47657", " 47659", " 47681", " 47699", " 47701", " 47711", " 47713", " 47717", " 47737"],
[" 47741", " 47743", " 47777", " 47779", " 47791", " 47797", " 47807", " 47809", " 47819", " 47837"],
[" 47843", " 47857", " 47869", " 47881", " 47903", " 47911", " 47917", " 47933", " 47939", " 47947"],
[" 47951", " 47963", " 47969", " 47977", " 47981", " 48017", " 48023", " 48029", " 48049", " 48073"],
[" 48079", " 48091", " 48109", " 48119", " 48121", " 48131", " 48157", " 48163", " 48179", " 48187"],
[" 48193", " 48197", " 48221", " 48239", " 48247", " 48259", " 48271", " 48281", " 48299", " 48311"],
[" 48313", " 48337", " 48341", " 48353", " 48371", " 48383", " 48397", " 48407", " 48409", " 48413"],
[" 48437", " 48449", " 48463", " 48473", " 48479", " 48481", " 48487", " 48491", " 48497", " 48523"],
[" 48527", " 48533", " 48539", " 48541", " 48563", " 48571", " 48589", " 48593", " 48611", " 48619"],
[" 48623", " 48647", " 48649", " 48661", " 48673", " 48677", " 48679", " 48731", " 48733", " 48751"],
[" 48757", " 48761", " 48767", " 48779", " 48781", " 48787", " 48799", " 48809", " 48817", " 48821"],
[" 48823", " 48847", " 48857", " 48869", " 48871", " 48883", " 48889", " 48907", " 48947", " 48953"],
[" 48973", " 48989", " 48991", " 49003", " 49009", " 49019", " 49031", " 49033", " 49037", " 49043"],
[" 49057", " 49069", " 49081", " 49103", " 49109", " 49117", " 49121", " 49123", " 49139", " 49157"],
[" 49169", " 49171", " 49177", " 49193", " 49199", " 49201", " 49207", " 49211", " 49223", " 49253"],
[" 49261", " 49277", " 49279", " 49297", " 49307", " 49331", " 49333", " 49339", " 49363", " 49367"],
[" 49369", " 49391", " 49393", " 49409", " 49411", " 49417", " 49429", " 49433", " 49451", " 49459"],
[" 49463", " 49477", " 49481", " 49499", " 49523", " 49529", " 49531", " 49537", " 49547", " 49549"],
[" 49559", " 49597", " 49603", " 49613", " 49627", " 49633", " 49639", " 49663", " 49667", " 49669"],
[" 49681", " 49697", " 49711", " 49727", " 49739", " 49741", " 49747", " 49757", " 49783", " 49787"],
[" 49789", " 49801", " 49807", " 49811", " 49823", " 49831", " 49843", " 49853", " 49871", " 49877"],
[" 49891", " 49919", " 49921", " 49927", " 49937", " 49939", " 49943", " 49957", " 49991", " 49993"],
[" 49999", " 50021", " 50023", " 50033", " 50047", " 50051", " 50053", " 50069", " 50077", " 50087"],
[" 50093", " 50101", " 50111", " 50119", " 50123", " 50129", " 50131", " 50147", " 50153", " 50159"],
[" 50177", " 50207", " 50221", " 50227", " 50231", " 50261", " 50263", " 50273", " 50287", " 50291"],
[" 50311", " 50321", " 50329", " 50333", " 50341", " 50359", " 50363", " 50377", " 50383", " 50387"],
[" 50411", " 50417", " 50423", " 50441", " 50459", " 50461", " 50497", " 50503", " 50513", " 50527"],
[" 50539", " 50543", " 50549", " 50551", " 50581", " 50587", " 50591", " 50593", " 50599", " 50627"],
[" 50647", " 50651", " 50671", " 50683", " 50707", " 50723", " 50741", " 50753", " 50767", " 50773"],
[" 50777", " 50789", " 50821", " 50833", " 50839", " 50849", " 50857", " 50867", " 50873", " 50891"],
[" 50893", " 50909", " 50923", " 50929", " 50951", " 50957", " 50969", " 50971", " 50989", " 50993"],
[" 51001", " 51031", " 51043", " 51047", " 51059", " 51061", " 51071", " 51109", " 51131", " 51133"],
[" 51137", " 51151", " 51157", " 51169", " 51193", " 51197", " 51199", " 51203", " 51217", " 51229"],
[" 51239", " 51241", " 51257", " 51263", " 51283", " 51287", " 51307", " 51329", " 51341", " 51343"],
[" 51347", " 51349", " 51361", " 51383", " 51407", " 51413", " 51419", " 51421", " 51427", " 51431"],
[" 51437", " 51439", " 51449", " 51461", " 51473", " 51479", " 51481", " 51487", " 51503", " 51511"],
[" 51517", " 51521", " 51539", " 51551", " 51563", " 51577", " 51581", " 51593", " 51599", " 51607"],
[" 51613", " 51631", " 51637", " 51647", " 51659", " 51673", " 51679", " 51683", " 51691", " 51713"],
[" 51719", " 51721", " 51749", " 51767", " 51769", " 51787", " 51797", " 51803", " 51817", " 51827"],
[" 51829", " 51839", " 51853", " 51859", " 51869", " 51871", " 51893", " 51899", " 51907", " 51913"],
[" 51929", " 51941", " 51949", " 51971", " 51973", " 51977", " 51991", " 52009", " 52021", " 52027"],
[" 52051", " 52057", " 52067", " 52069", " 52081", " 52103", " 52121", " 52127", " 52147", " 52153"],
[" 52163", " 52177", " 52181", " 52183", " 52189", " 52201", " 52223", " 52237", " 52249", " 52253"],
[" 52259", " 52267", " 52289", " 52291", " 52301", " 52313", " 52321", " 52361", " 52363", " 52369"],
[" 52379", " 52387", " 52391", " 52433", " 52453", " 52457", " 52489", " 52501", " 52511", " 52517"],
[" 52529", " 52541", " 52543", " 52553", " 52561", " 52567", " 52571", " 52579", " 52583", " 52609"],
[" 52627", " 52631", " 52639", " 52667", " 52673", " 52691", " 52697", " 52709", " 52711", " 52721"],
[" 52727", " 52733", " 52747", " 52757", " 52769", " 52783", " 52807", " 52813", " 52817"]
]
}
}
]
}
================================================================================

View file

@ -0,0 +1,239 @@
# AI Call Iteration Flow - JSON Merging System
This document describes the iteration flow for handling large JSON responses from AI that may be truncated and need to be merged across multiple iterations.
## Overview
When an AI response is too large, it may be truncated (cut) at an arbitrary point. The iteration system:
1. Detects incomplete JSON
2. Requests continuation from the AI
3. Merges the continuation with the existing JSON
4. Repeats until complete or max failures reached
---
## Key Variables
| Variable | Type | Purpose |
|----------|------|---------|
| `jsonBase` | `str \| None` | The merged JSON string (CUT version for overlap matching) |
| `candidateJson` | `str` | Temporary holder for merged result until validated |
| `lastValidCompletePart` | `str \| None` | Fallback - last successfully parsed CLOSED JSON |
| `lastOverlapContext` | `str` | Context for retry/continuation prompts |
| `lastHierarchyContextForPrompt` | `str` | Context for retry/continuation prompts |
| `mergeFailCount` | `int` | Global counter (max 3 failures) |
---
## Key Distinction: hierarchyContext vs completePart
| Field | Description | Use Case |
|-------|-------------|----------|
| `hierarchyContext` | **CUT JSON** - truncated at cut point | Used as `jsonBase` for merging with next AI fragment |
| `completePart` | **CLOSED JSON** - all structures properly closed | Used for validation, parsing, and fallback |
**Why this matters:**
- The next AI fragment starts with an **overlap** that matches the CUT point
- If we used `completePart` (closed), the overlap detection would FAIL
- We must use `hierarchyContext` (cut) so overlap matching works correctly
---
## Flow Steps
### Step 1: BUILD PROMPT
**Location:** `subAiCallLooping.py` lines 163-212
**Function:** `buildContinuationContext()` from `modules/shared/jsonUtils.py`
- **First iteration:** Use original prompt
- **Continuation:** `buildContinuationContext(allSections, lastRawResponse, ...)`
- Internally calls `getContexts(lastRawResponse)` to get overlap/hierarchy
- Builds continuation prompt with `overlapContext` + `hierarchyContextForPrompt`
### Step 2: CALL AI
**Location:** `subAiCallLooping.py` lines 214-299
**Function:** `self.aiService.callAi(request)`
- Returns `response.content` as `result`
- NOTE: Do NOT update `lastRawResponse` yet! (only after successful merge)
### Step 4: MERGE
**Location:** `subAiCallLooping.py` lines 338-396
**Function:** `JsonResponseHandler.mergeJsonStringsWithOverlap()` from `modules/services/serviceAi/subJsonResponseHandling.py`
```
IF first iteration (jsonBase is None):
→ candidateJson = result
ELSE:
→ mergedJsonString, hasOverlap = mergeJsonStringsWithOverlap(jsonBase, result)
IF hasOverlap = False (MERGE FAILED):
→ mergeFailCount++
→ If mergeFailCount >= 3: return lastValidCompletePart (fallback)
→ Else: continue (retry with unchanged jsonBase AND lastRawResponse!)
ELSE:
→ candidateJson = mergedJsonString (don't update jsonBase yet!)
→ lastRawResponse = candidateJson (ONLY after first iteration or successful merge!)
TRY DIRECT PARSE of candidateJson:
IF parse succeeds:
→ jsonBase = candidateJson (commit)
→ FINISHED! Return normalized result
ELSE:
→ Proceed to Step 5
```
### Step 5: GET CONTEXTS
**Location:** `subAiCallLooping.py` lines 420-427
**Function:** `getContexts()` from `modules/shared/jsonContinuation.py`
```python
contexts = getContexts(candidateJson)
```
Returns `JsonContinuationContexts`:
- `overlapContext`: `""` if JSON is complete (no cut point)
- `hierarchyContext`: CUT JSON (for merging with next fragment)
- `hierarchyContextForPrompt`: CUT JSON with budget limits (for prompts)
- `completePart`: CLOSED JSON (repaired if needed)
- `jsonParsingSuccess`: `True` if completePart is valid JSON
**Enhancement:** If original JSON is already complete → `overlapContext = ""`
This signals "JSON is complete, no more continuation needed"
### Step 6: DECIDE
**Location:** `subAiCallLooping.py` lines 429-528
#### Case A: `jsonParsingSuccess=true` AND `overlapContext=""`
**→ FINISHED**
- JSON is complete (no cut point)
- `jsonBase = contexts.completePart` (use CLOSED version for final result)
- Return `completePart` as result
#### Case B: `jsonParsingSuccess=true` AND `overlapContext!=""`
**→ CONTINUE to next iteration**
- JSON parseable but has cut point
- `jsonBase = contexts.hierarchyContext` ← **CUT version for next merge!**
- `lastValidCompletePart = contexts.completePart` ← **CLOSED version for fallback**
- Store contexts for next prompt
- `mergeFailCount = 0` (reset on success)
- `lastRawResponse = jsonBase`
- Continue to next iteration
#### Case C: `jsonParsingSuccess=false`
**→ RETRY with same prompt**
- Do NOT update `jsonBase` (keep previous valid state)
- `mergeFailCount++`
- If `mergeFailCount >= 3`: return `lastValidCompletePart` (fallback)
- Else: continue (retry with unchanged jsonBase/lastRawResponse)
---
## Flow Diagram
```
┌───────────────────────────────────────────────────────────────┐
│ ITERATION START │
└───────────────────────────┬───────────────────────────────────┘
┌───────────────────────────▼───────────────────────────────────┐
│ STEP 1: BUILD PROMPT │
│ - First: original prompt │
│ - Next: buildContinuationContext(lastRawResponse) │
└───────────────────────────┬───────────────────────────────────┘
┌───────────────────────────▼───────────────────────────────────┐
│ STEP 2: CALL AI → result │
└───────────────────────────┬───────────────────────────────────┘
┌───────────────────────────▼───────────────────────────────────┐
│ STEP 4: MERGE jsonBase + result → candidateJson │
└───────────────────────────┬───────────────────────────────────┘
┌────────────▼────────────┐
│ Merge OK? │
└────────────┬────────────┘
┌─────────────────────┼─────────────────────┐
│ NO │ YES │
▼ ▼ │
┌──────────────┐ ┌──────────────────┐ │
│ fails++ │ │ TRY DIRECT PARSE │ │
│ if >=3: │ │ of candidateJson │ │
│ RETURN │ └────────┬─────────┘ │
│ fallback │ │ │
│ else: RETRY │ ┌────────▼─────────┐ │
│ (continue) │ │ Parse OK? │ │
└──────────────┘ └────────┬─────────┘ │
│ │
┌─────────────────────┼─────────────────────┐
│ YES │ NO │
▼ ▼ │
┌──────────────┐ ┌──────────────────────────────┐
│ FINISHED ✓ │ │ STEP 5: getContexts() │
│ Return │ │ → jsonParsingSuccess │
│ normalized │ │ → overlapContext │
│ result │ └────────────┬─────────────────┘
└──────────────┘ │
┌────────────▼────────────────────┐
│ STEP 6: DECIDE │
└────────────┬────────────────────┘
┌────────────────────────────┼────────────────────────────┐
│ │ │
▼ ▼ ▼
┌───────────────────┐ ┌───────────────────────┐ ┌───────────────────┐
│ success=true │ │ success=true │ │ success=false │
│ overlap="" │ │ overlap!="" │ │ │
│ ───────────── │ │ ───────────────── │ │ ───────────── │
│ FINISHED ✓ │ │ CONTINUE │ │ RETRY │
│ │ │ │ │ │
│ jsonBase = │ │ jsonBase = │ │ jsonBase unchanged│
│ completePart │ │ hierarchyContext │ │ fails++ │
│ (CLOSED) │ │ (CUT for merge!) │ │ │
│ │ │ │ │ if >=3: fallback │
│ Return result │ │ fallback = │ │ else: retry │
│ │ │ completePart │ │ │
│ │ │ (CLOSED) │ │ │
│ │ │ │ │ │
│ │ │ Next iteration → │ │ │
└───────────────────┘ └───────────────────────┘ └───────────────────┘
```
---
## Files Involved
| File | Purpose |
|------|---------|
| `modules/services/serviceAi/subAiCallLooping.py` | Main iteration loop |
| `modules/shared/jsonContinuation.py` | `getContexts()` - context extraction & repair |
| `modules/shared/jsonUtils.py` | `buildContinuationContext()` - prompt building |
| `modules/services/serviceAi/subJsonResponseHandling.py` | `mergeJsonStringsWithOverlap()` |
| `modules/services/serviceAi/subJsonMerger.py` | `ModularJsonMerger` - actual merge logic |
| `modules/datamodels/datamodelAi.py` | `JsonContinuationContexts` model |
---
## Error Handling
### Merge Failures
- Max 3 consecutive failures allowed
- On failure: retry with unchanged `jsonBase` (previous valid state)
- After 3 failures: return `lastValidCompletePart` as fallback
### Parse Failures
- If `getContexts()` cannot produce valid JSON: increment fail counter
- Retry with same prompt (don't update jsonBase)
- After 3 failures: return `lastValidCompletePart` as fallback
### Fallback Strategy
- `lastValidCompletePart` stores the last successfully parsed CLOSED JSON
- Always available as fallback when things go wrong
- Ensures we return valid JSON even after multiple failures

View file

@ -7,17 +7,60 @@ Handles AI calls with looping and repair logic, including:
- Looping with JSON repair and continuation
- KPI definition and tracking
- Progress tracking and iteration management
FLOW LOGIC
VARIABLES:
- jsonBase: str (merged JSON so far, starts empty)
- lastValidCompletePart: str (fallback for failures)
- mergeFailCount: int = 0 (max 3)
FLOW:
1. BUILD PROMPT
- First: original prompt
- Next: buildContinuationContext(lastRawResponse)
2. CALL AI response fragment
4. MERGE jsonBase + response
FAILS: repeat prompt, fails++ (if >=3 return fallback)
SUCCEEDS: try parse
SUCCEEDS: FINISHED
FAILS: step 5
5. GET CONTEXTS (merge OK, parse failed)
getContexts(mergedJson)
- If no cut point: overlapContext = ""
- Store contexts for next iteration
6. DECIDE
jsonParsingSuccess=true AND overlapContext="":
FINISHED. return completePart
jsonParsingSuccess=true AND overlapContext!="":
CONTINUE, fails=0
ELSE: repeat prompt, fails++
"""
import json
import logging
from typing import Dict, Any, List, Optional, Callable
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, JsonAccumulationState
from modules.datamodels.datamodelAi import (
AiCallRequest, AiCallOptions
)
from modules.datamodels.datamodelExtraction import ContentPart
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
from modules.shared.jsonContinuation import getContexts
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
from modules.shared.jsonUtils import tryParseJson
from modules.shared.jsonUtils import closeJsonStructures
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
logger = logging.getLogger(__name__)
@ -86,9 +129,18 @@ class AiCallLooper:
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
documentMetadata = None # Store document metadata (title, filename) from first iteration
accumulationState = None # Track accumulation state for string accumulation
accumulatedDirectJson = [] # Accumulate JSON strings for direct return use cases (chapter_structure, code_structure)
# JSON Base Iteration System:
# - jsonBase: the merged JSON string (replaces accumulatedDirectJson array)
# - After each iteration, new response is merged with jsonBase
# - On merge success: check if complete, store contexts for next iteration
# - On merge fail: retry with same prompt, increment fails
jsonBase = None # Merged JSON string (starts None, set on first response)
# Merge fail tracking - stop after 3 consecutive merge failures
MAX_MERGE_FAILS = 3
mergeFailCount = 0 # Global counter for merge failures across entire loop
lastValidCompletePart = None # Store last successfully parsed completePart for fallback
# Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID)
parentOperationId = operationId # Use the parent's operationId directly
@ -112,29 +164,49 @@ class AiCallLooper:
# CRITICAL: Build continuation prompt if we have sections OR if we have a previous response (even if broken)
# This ensures continuation prompts are built even when JSON is so broken that no sections can be extracted
if (len(allSections) > 0 or lastRawResponse) and promptBuilder and promptArgs:
# Extract templateStructure and basePrompt from promptArgs (REQUIRED)
templateStructure = promptArgs.get("templateStructure")
if not templateStructure:
raise ValueError(
f"templateStructure is REQUIRED in promptArgs for use case '{useCaseId}'. "
"Prompt creation functions must return (prompt, templateStructure) tuple."
)
basePrompt = promptArgs.get("basePrompt")
if not basePrompt:
# Fallback: use prompt parameter (should be the same)
basePrompt = prompt
logger.warning(
f"basePrompt not found in promptArgs for use case '{useCaseId}', "
"using prompt parameter instead. This may indicate a bug."
)
# This is a continuation - build continuation context with raw JSON and rebuild prompt
continuationContext = buildContinuationContext(allSections, lastRawResponse)
continuationContext = buildContinuationContext(
allSections, lastRawResponse, useCaseId, templateStructure
)
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# For section_content, pass all promptArgs (it uses buildSectionPromptWithContinuation which needs all args)
# For other use cases (chapter_structure, code_structure), filter to only accepted parameters
if useCaseId == "section_content":
# Pass all promptArgs plus continuationContext for section_content
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
else:
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services
filteredPromptArgs = {
k: v for k, v in promptArgs.items()
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services']
}
# Always include services if available
if not filteredPromptArgs.get('services') and hasattr(self, 'services'):
filteredPromptArgs['services'] = self.services
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
# Store valid completePart from continuation context for fallback on merge failures
# Use getContexts to check if completePart is parseable and store it
if lastRawResponse and not lastValidCompletePart:
try:
contexts = getContexts(lastRawResponse)
if contexts.jsonParsingSuccess and contexts.completePart:
lastValidCompletePart = contexts.completePart
logger.debug(f"Iteration {iteration}: Stored initial valid completePart ({len(lastValidCompletePart)} chars)")
except Exception as e:
logger.debug(f"Iteration {iteration}: Failed to extract completePart: {e}")
# Unified prompt builder call: Continuation builders only need continuationContext, templateStructure, and basePrompt
# All initial context (section, userPrompt, etc.) is already in basePrompt, so promptArgs is not needed
# Extract templateStructure and basePrompt from promptArgs (they're explicit parameters)
iterationPrompt = await promptBuilder(
continuationContext=continuationContext,
templateStructure=templateStructure,
basePrompt=basePrompt
)
else:
# First iteration - use original prompt
iterationPrompt = prompt
@ -155,14 +227,17 @@ class AiCallLooper:
)
# Write the ACTUAL prompt sent to AI
# For section content generation: only write one prompt file (first iteration)
# For section content generation: write prompt for first iteration and continuation iterations
# For document generation: write prompt for each iteration
isSectionContent = "_section_" in debugPrefix
if iteration == 1 or not isSectionContent:
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
elif not isSectionContent:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
elif isSectionContent:
# Save continuation prompts for section_content debugging
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
else:
# Document generation - save all iteration prompts
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiService.callAi(request)
result = response.content
@ -183,13 +258,16 @@ class AiCallLooper:
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})")
# Write raw AI response to debug file
# For section content generation: only write one response file (first iteration)
# For section content generation: write response for first iteration and continuation iterations
# For document generation: write response for each iteration
if iteration == 1 or not isSectionContent:
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
elif not isSectionContent:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
elif isSectionContent:
# Save continuation responses for section_content debugging
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
else:
# Document generation - save all iteration responses
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration (only if workflow exists and has id)
if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id:
@ -229,319 +307,230 @@ class AiCallLooper:
self.services.chat.progressLogFinish(iterationOperationId, True)
return result
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Parse JSON for use case handling
parsedJsonForUseCase = None
extractedJsonForUseCase = None
try:
extractedJsonForUseCase = extractJsonString(result)
parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase)
if parseError is None and parsedJson:
parsedJsonForUseCase = parsedJson
except Exception:
pass
# NOTE: Do NOT update lastRawResponse here!
# lastRawResponse should only be updated after successful merge
# This ensures retry iterations use the correct base context
# Handle use cases that return JSON directly (no section extraction needed)
directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"]
if useCaseId in directReturnUseCases:
# For chapter_structure, code_structure, and section_content, check completeness and support looping
loopingUseCases = ["chapter_structure", "code_structure", "section_content"]
if useCaseId in loopingUseCases:
# If parsing failed (e.g., invalid JSON with comments or truncated JSON), continue looping to get valid JSON
if not parsedJsonForUseCase:
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON parsing failed (likely incomplete/truncated), continuing iteration to complete")
# Accumulate response for merging in next iteration
accumulatedDirectJson.append(result)
# Continue to next iteration - continuation prompt builder will handle the rest
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
# Check completeness if we have parsed JSON
isComplete = JsonResponseHandler.isJsonComplete(parsedJsonForUseCase)
if not isComplete:
logger.warning(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is incomplete, continuing for continuation")
# Accumulate response for merging in next iteration
accumulatedDirectJson.append(result)
# Continue to next iteration - continuation prompt builder will handle the rest
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# JSON is complete - merge accumulated responses if any
if accumulatedDirectJson:
logger.info(f"Iteration {iteration}: Merging {len(accumulatedDirectJson) + 1} accumulated responses")
# Merge accumulated JSON strings with current response
mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result
for prevJson in accumulatedDirectJson[1:]:
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson)
# Finally merge with current response
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result)
# Re-parse merged JSON
try:
extractedMerged = extractJsonString(mergedJsonString)
parsedMerged, parseError, _ = tryParseJson(extractedMerged)
if parseError is None and parsedMerged:
parsedJsonForUseCase = parsedMerged
result = mergedJsonString
logger.info(f"Successfully merged and parsed {len(accumulatedDirectJson) + 1} JSON fragments")
except Exception as e:
logger.warning(f"Failed to parse merged JSON, using last response: {e}")
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is complete")
# Check if use case supports direct return (all registered use cases do)
if useCase and not useCase.requiresExtraction:
# =====================================================================
# ITERATION FLOW (Simplified)
# =====================================================================
# Step 4: MERGE jsonBase + new response
# - FAILS: repeat prompt, increment fails cont (if >=3 return fallback)
# - SUCCEEDS: try parse
# - SUCCEEDS: FINISHED
# - FAILS: proceed to Step 5
# Step 5: GET CONTEXTS (merge OK, parse failed)
# - getContexts() with repair
# - If no cut point: overlapContext = ""
# Step 6: DECIDE
# - jsonParsingSuccess=true AND overlapContext="": FINISHED
# - jsonParsingSuccess=true AND overlapContext!="": continue, fails=0
# - ELSE: repeat prompt, increment fails count
# =====================================================================
# STEP 4: MERGE jsonBase + new response
# Use candidateJson to hold merged result until we confirm it's valid
candidateJson = None
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly")
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
# For section_content, return raw result to allow merging of multiple JSON blocks
# The merging logic in subStructureFilling.py will handle extraction and merging
if useCaseId == "section_content":
final_json = result # Return raw response to preserve all JSON blocks
if jsonBase is None:
# First iteration - candidate is the current result
candidateJson = result
logger.debug(f"Iteration {iteration}: First response, candidateJson ({len(candidateJson)} chars)")
else:
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
# Write final result for chapter structure and code structure (section_content skips it)
if useCaseId in ["chapter_structure", "code_structure"]:
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
return final_json
# Extract sections from response (handles both valid and broken JSON)
# Only for document generation (JSON responses)
# CRITICAL: Pass allSections and accumulationState to enable string accumulation
extractedSections, wasJsonComplete, parsedResult, accumulationState = self.responseParser.extractSectionsFromResponse(
result, iteration, debugPrefix, allSections, accumulationState
)
# CRITICAL: Merge sections BEFORE KPI validation
# This ensures sections are preserved even if KPI validation fails
if extractedSections:
allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
# Define KPIs if we just entered accumulation mode (iteration 1, incomplete JSON)
if accumulationState and accumulationState.isAccumulationMode and iteration == 1 and not accumulationState.kpis:
logger.info(f"Iteration {iteration}: Defining KPIs for accumulation tracking")
continuationContext = buildContinuationContext(allSections, result)
# Pass raw response string from first iteration for KPI definition
kpiDefinitions = await self._defineKpisFromPrompt(
userPrompt or prompt,
result, # Pass raw JSON string from first iteration
continuationContext,
debugPrefix
)
# Initialize KPIs with currentValue = 0
accumulationState.kpis = [{**kpi, "currentValue": 0} for kpi in kpiDefinitions]
logger.info(f"Defined {len(accumulationState.kpis)} KPIs: {[kpi.get('id') for kpi in accumulationState.kpis]}")
# Extract and validate KPIs (if in accumulation mode with KPIs defined)
if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis:
# For KPI extraction, prefer accumulated JSON string over repaired JSON
# because repairBrokenJson may lose data (e.g., empty rows array when JSON is incomplete)
updatedKpis = []
# First try to extract from parsedResult (repaired JSON)
if parsedResult:
try:
updatedKpis = JsonResponseHandler.extractKpiValuesFromJson(
parsedResult,
accumulationState.kpis
# Merge jsonBase with new response
logger.info(f"Iteration {iteration}: Merging jsonBase ({len(jsonBase)} chars) with new response ({len(result)} chars)")
mergedJsonString, hasOverlap = JsonResponseHandler.mergeJsonStringsWithOverlap(jsonBase, result)
if not hasOverlap:
# MERGE FAILED - repeat prompt with unchanged jsonBase
mergeFailCount += 1
logger.warning(
f"Iteration {iteration}: Merge failed, no overlap found "
f"(fail {mergeFailCount}/{MAX_MERGE_FAILS})"
)
# Check if we got meaningful values (non-zero)
hasValidValues = any(kpi.get("currentValue", 0) > 0 for kpi in updatedKpis)
if not hasValidValues and accumulationState.accumulatedJsonString:
# Repaired JSON has empty values, try accumulated string
logger.debug("Repaired JSON has empty KPI values, trying accumulated JSON string")
updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson(
accumulationState.accumulatedJsonString,
accumulationState.kpis
if mergeFailCount >= MAX_MERGE_FAILS:
# Max failures reached - return last valid completePart
logger.error(
f"Iteration {iteration}: Max merge failures ({MAX_MERGE_FAILS}) reached, "
"returning last valid completePart"
)
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, False)
if lastValidCompletePart:
try:
extracted = extractJsonString(lastValidCompletePart)
parsed, parseErr, _ = tryParseJson(extracted)
if parseErr is None and parsed:
normalized = self._normalizeJsonStructure(parsed, useCase)
return json.dumps(normalized, indent=2, ensure_ascii=False)
except Exception:
pass
return lastValidCompletePart
else:
# No valid fallback - return whatever we have
return jsonBase if jsonBase else ""
# Not at max failures - retry with same prompt (jsonBase unchanged)
if iterationOperationId:
self.services.chat.progressLogUpdate(
iterationOperationId, 0.7,
f"Merge failed ({mergeFailCount}/{MAX_MERGE_FAILS}), retrying"
)
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
# MERGE SUCCEEDED - set candidate (don't update jsonBase yet!)
candidateJson = mergedJsonString
logger.debug(f"Iteration {iteration}: Merge succeeded, candidateJson ({len(candidateJson)} chars)")
# Update lastRawResponse ONLY after we have a valid candidateJson
# (first iteration or successful merge - NOT on merge failure!)
# This ensures retry iterations use the correct base context
lastRawResponse = candidateJson
# Try direct parse of candidate
try:
extracted = extractJsonString(candidateJson)
parsed, parseErr, _ = tryParseJson(extracted)
if parseErr is None and parsed:
# Direct parse succeeded - FINISHED
# Commit candidate to jsonBase
jsonBase = candidateJson
logger.info(f"Iteration {iteration}: Direct parse succeeded, JSON is complete")
normalized = self._normalizeJsonStructure(parsed, useCase)
result = json.dumps(normalized, indent=2, ensure_ascii=False)
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
if not useCase.finalResultHandler:
raise ValueError(
f"Use case '{useCaseId}' is missing required 'finalResultHandler' callback."
)
return useCase.finalResultHandler(
result, normalized, extracted, debugPrefix, self.services
)
except Exception as e:
logger.debug(f"Iteration {iteration}: Direct parse failed: {e}")
# STEP 5: GET CONTEXTS (merge OK, parse failed = cut JSON)
# Use candidateJson for context extraction
contexts = getContexts(candidateJson)
overlapInfo = "(empty=complete)" if contexts.overlapContext == "" else f"({len(contexts.overlapContext)} chars)"
logger.debug(
f"Iteration {iteration}: getContexts() -> "
f"jsonParsingSuccess={contexts.jsonParsingSuccess}, "
f"overlapContext={overlapInfo}"
)
# STEP 6: DECIDE based on jsonParsingSuccess and overlapContext
if contexts.jsonParsingSuccess and contexts.overlapContext == "":
# JSON is complete (no cut point) - FINISHED
# Use completePart for final result (closed, repaired JSON)
# No more merging needed, so we don't need the cut version
jsonBase = contexts.completePart
logger.info(f"Iteration {iteration}: jsonParsingSuccess=true, overlapContext='', JSON complete")
# Store and parse completePart
lastValidCompletePart = contexts.completePart
try:
extracted = extractJsonString(contexts.completePart)
parsed, parseErr, _ = tryParseJson(extracted)
if parseErr is None and parsed:
normalized = self._normalizeJsonStructure(parsed, useCase)
result = json.dumps(normalized, indent=2, ensure_ascii=False)
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
if not useCase.finalResultHandler:
raise ValueError(
f"Use case '{useCaseId}' is missing required 'finalResultHandler' callback."
)
return useCase.finalResultHandler(
result, normalized, extracted, debugPrefix, self.services
)
except Exception as e:
logger.debug(f"Error extracting KPIs from parsedResult: {e}")
updatedKpis = []
logger.warning(f"Iteration {iteration}: Failed to parse completePart: {e}")
# Fallback: return completePart as-is
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
return contexts.completePart
# If no parsedResult or extraction failed, try accumulated string
if not updatedKpis and accumulationState.accumulatedJsonString:
try:
updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson(
accumulationState.accumulatedJsonString,
accumulationState.kpis
)
except Exception as e:
logger.debug(f"Error extracting KPIs from accumulated JSON string: {e}")
updatedKpis = []
if updatedKpis:
shouldProceed, reason = JsonResponseHandler.validateKpiProgression(
accumulationState,
updatedKpis
elif contexts.jsonParsingSuccess and contexts.overlapContext != "":
# JSON parseable but has cut point - CONTINUE to next iteration
# CRITICAL: Use hierarchyContext (CUT json) as jsonBase for next merge!
# - hierarchyContext = the truncated JSON at cut point (needed for overlap matching)
# - completePart = closed JSON (for validation/fallback only)
# The next AI fragment's overlap must match the CUT point, not closed structures
jsonBase = contexts.hierarchyContext
logger.info(
f"Iteration {iteration}: jsonParsingSuccess=true, overlapContext not empty, "
f"continuing iteration (jsonBase updated to hierarchyContext: {len(jsonBase)} chars)"
)
if not shouldProceed:
logger.warning(f"Iteration {iteration}: KPI validation failed: {reason}")
# Store valid completePart as fallback (different from jsonBase!)
lastValidCompletePart = contexts.completePart
# Reset fail counter on successful progress
mergeFailCount = 0
# Update lastRawResponse for continuation prompt building
# Use the CUT version for prompt context as well
lastRawResponse = jsonBase
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# JSON not parseable after repair - repeat prompt, increment fails
# Do NOT update jsonBase - keep previous valid state
mergeFailCount += 1
logger.warning(
f"Iteration {iteration}: jsonParsingSuccess=false, "
f"repeat prompt (fail {mergeFailCount}/{MAX_MERGE_FAILS})"
)
if mergeFailCount >= MAX_MERGE_FAILS:
# Max failures reached - return last valid completePart
logger.error(
f"Iteration {iteration}: Max failures ({MAX_MERGE_FAILS}) reached, "
"returning last valid completePart"
)
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, False)
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.9, f"KPI validation failed: {reason} ({iteration} iterations)")
break
if lastValidCompletePart:
try:
extracted = extractJsonString(lastValidCompletePart)
parsed, parseErr, _ = tryParseJson(extracted)
if parseErr is None and parsed:
normalized = self._normalizeJsonStructure(parsed, useCase)
return json.dumps(normalized, indent=2, ensure_ascii=False)
except Exception:
pass
return lastValidCompletePart
else:
return jsonBase if jsonBase else ""
# Update KPIs in accumulation state
accumulationState.kpis = updatedKpis
logger.info(f"Iteration {iteration}: KPIs updated: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
# Check if all KPIs completed
allCompleted = True
for kpi in updatedKpis:
targetValue = kpi.get("targetValue", 0)
currentValue = kpi.get("currentValue", 0)
if currentValue < targetValue:
allCompleted = False
break
if allCompleted:
logger.info(f"Iteration {iteration}: All KPIs completed, finishing accumulation")
wasJsonComplete = True # Mark as complete to exit loop
# CRITICAL: Handle JSON fragments (continuation content)
# Fragment merging happens inside extractSectionsFromResponse
# If merge fails (returns wasJsonComplete=True), stop iterations and complete JSON
if not extractedSections and allSections:
if wasJsonComplete:
# Merge failed - stop iterations, complete JSON with available data
logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - Stopping iterations, completing JSON with available data")
# Not at max - retry with same prompt
# Do NOT update jsonBase or lastRawResponse - keep previous for retry
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, False)
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.9, f"Merge failed, completing JSON ({iteration} iterations)")
break
# Fragment was detected and merged successfully
logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing")
# Don't break - fragment was merged, continue to get more content if needed
# Check if we should continue based on JSON completeness
shouldContinue = self.responseParser.shouldContinueGeneration(
allSections,
iteration,
wasJsonComplete,
result
)
if shouldContinue:
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing")
self.services.chat.progressLogUpdate(
iterationOperationId, 0.7,
f"Parse failed ({mergeFailCount}/{MAX_MERGE_FAILS}), retrying"
)
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# Done - fragment was merged and JSON is complete
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)")
logger.info(f"Generation complete after {iteration} iterations: fragment merged")
break
# Extract document metadata from first iteration if available
if iteration == 1 and parsedResult and not documentMetadata:
documentMetadata = self.responseParser.extractDocumentMetadata(parsedResult)
# Update progress after parsing
if iterationOperationId:
if extractedSections:
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections")
if not extractedSections:
# CRITICAL: If JSON was incomplete/broken, continue even if no sections extracted
# This allows the AI to retry and complete the broken JSON
if not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
continue
# If JSON was complete but no sections extracted - check if it was a fragment
# Fragments are handled above, so if we get here and it's complete, it's an error
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
break
# NOTE: Section merging now happens BEFORE KPI validation (see above)
# This ensures sections are preserved even if KPI validation fails
# Calculate total bytes in merged content for progress display
merged_json_str = json.dumps(allSections, indent=2, ensure_ascii=False)
totalBytesGenerated = len(merged_json_str.encode('utf-8'))
# Update main operation with byte progress
if operationId:
# Format bytes for display
if totalBytesGenerated < 1024:
bytesDisplay = f"{totalBytesGenerated}B"
elif totalBytesGenerated < 1024 * 1024:
bytesDisplay = f"{totalBytesGenerated / 1024:.1f}kB"
else:
bytesDisplay = f"{totalBytesGenerated / (1024 * 1024):.1f}MB"
# Estimate progress based on iterations (rough estimate)
estimatedProgress = min(0.9, 0.4 + (iteration * 0.1))
self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})")
# Log merged sections for debugging
# For section content generation: skip merged sections debug files (only one prompt/response needed)
isSectionContent = "_section_" in debugPrefix
if not isSectionContent:
self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}")
# Check if we should continue (completion detection)
# Simple logic: JSON completeness determines continuation
shouldContinue = self.responseParser.shouldContinueGeneration(
allSections,
iteration,
wasJsonComplete,
result
)
if shouldContinue:
# Finish iteration operation (will continue with next iteration)
if iterationOperationId:
# Show byte progress in iteration completion
iterBytes = len(result.encode('utf-8')) if result else 0
if iterBytes < 1024:
iterBytesDisplay = f"{iterBytes}B"
elif iterBytes < 1024 * 1024:
iterBytesDisplay = f"{iterBytes / 1024:.1f}kB"
else:
iterBytesDisplay = f"{iterBytes / (1024 * 1024):.1f}MB"
self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Completed ({iterBytesDisplay})")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# Done - finish iteration and update main operation
if iterationOperationId:
# Show final byte count
finalBytes = len(merged_json_str.encode('utf-8'))
if finalBytes < 1024:
finalBytesDisplay = f"{finalBytes}B"
elif finalBytes < 1024 * 1024:
finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
else:
finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
self.services.chat.progressLogUpdate(iterationOperationId, 0.95, f"Complete ({finalBytesDisplay})")
self.services.chat.progressLogFinish(iterationOperationId, True)
if operationId:
# Show final size in main operation
finalBytes = len(merged_json_str.encode('utf-8'))
if finalBytes < 1024:
finalBytesDisplay = f"{finalBytes}B"
elif finalBytes < 1024 * 1024:
finalBytesDisplay = f"{finalBytes / 1024:.1f}kB"
else:
finalBytesDisplay = f"{finalBytes / (1024 * 1024):.1f}MB"
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete: {finalBytesDisplay} ({iteration} iterations, {len(allSections)} sections)")
logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
@ -552,113 +541,135 @@ class AiCallLooper:
if iteration >= maxIterations:
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
# CRITICAL: Complete any incomplete structures in sections before building final result
# This ensures JSON is properly closed even if merge failed or iterations stopped early
allSections = JsonResponseHandler.completeIncompleteStructures(allSections)
# Build final result from accumulated sections
final_result = self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
# Write final result to debug file
# For section content generation: skip final_result debug file (response already written)
isSectionContent = "_section_" in debugPrefix
if not isSectionContent:
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
return final_result
# This code path should never be reached because all registered use cases
# return early when JSON is complete. This would only execute for use cases that
# require section extraction, but no such use cases are currently registered.
logger.error(f"Unexpected code path: reached end of loop without return for use case '{useCaseId}'")
return result if result else ""
async def _defineKpisFromPrompt(
self,
userPrompt: str,
rawJsonString: Optional[str],
continuationContext: Dict[str, Any],
debugPrefix: str = "kpi"
) -> List[Dict[str, Any]]:
def _isJsonStringIncomplete(self, jsonString: str) -> bool:
"""
Make separate AI call to define KPIs based on user prompt and incomplete JSON.
Check if JSON string is incomplete (truncated) BEFORE closing/parsing.
This is critical because if JSON is truncated, closing it makes it appear complete,
but we need to detect the truncation to continue iteration.
Args:
userPrompt: Original user prompt
rawJsonString: Raw JSON string from first iteration response
continuationContext: Continuation context (not used for JSON, kept for compatibility)
debugPrefix: Prefix for debug file names
jsonString: JSON string to check
Returns:
List of KPI definitions: [{"id": str, "description": str, "jsonPath": str, "targetValue": int}, ...]
True if JSON string appears incomplete/truncated, False otherwise
"""
# Use raw JSON string from first iteration response
if rawJsonString:
# Remove markdown code fences if present
from modules.shared.jsonUtils import stripCodeFences
incompleteJson = stripCodeFences(rawJsonString.strip())
else:
incompleteJson = "Not available"
if not jsonString or not jsonString.strip():
return False
# Normalize JSON string
normalized = stripCodeFences(normalizeJsonText(jsonString)).strip()
if not normalized:
return False
kpiDefinitionPrompt = f"""Analyze the user request and incomplete JSON to define KPIs (Key Performance Indicators) for tracking progress.
User Request:
{userPrompt}
Delivered JSON part:
{incompleteJson}
Task: Define which JSON items should be tracked to measure completion progress.
IMPORTANT: Analyze the Delivered JSON part structure to understand what is being tracked:
1. Identify the structure type (table with rows, list with items, etc.)
2. Determine what the jsonPath actually counts (number of rows, number of items, etc.)
3. Calculate targetValue based on what is being tracked, NOT the total quantity requested
For each trackable item, provide:
- id: Unique identifier (use descriptive name)
- description: What this KPI measures (be specific about what is counted)
- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "documents[0].sections[1].elements[0].rows")
- targetValue: Target value to reach (integer) - MUST match what jsonPath actually tracks (rows count, items count, etc.)
Return ONLY valid JSON in this format:
{{
"kpis": [
{{
"id": "unique_id",
"description": "Description of what is measured",
"jsonPath": "path.to.value",
"targetValue": 0
}}
]
}}
If no trackable items can be identified, return: {{"kpis": []}}
"""
# Find first '{' or '[' to start
startIdx = -1
for i, char in enumerate(normalized):
if char in '{[':
startIdx = i
break
try:
request = AiCallRequest(
prompt=kpiDefinitionPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC
)
if startIdx == -1:
return False
jsonContent = normalized[startIdx:]
# Check if structures are balanced (all opened structures are closed)
braceCount = 0
bracketCount = 0
inString = False
escapeNext = False
for char in jsonContent:
if escapeNext:
escapeNext = False
continue
if char == '\\':
escapeNext = True
continue
if char == '"':
inString = not inString
continue
if not inString:
if char == '{':
braceCount += 1
elif char == '}':
braceCount -= 1
elif char == '[':
bracketCount += 1
elif char == ']':
bracketCount -= 1
# If structures are unbalanced, JSON is incomplete
if braceCount > 0 or bracketCount > 0:
return True
# Check if JSON ends with incomplete value (e.g., unclosed string, incomplete number, trailing comma)
trimmed = jsonContent.rstrip()
if not trimmed:
return False
# Check for trailing comma (might indicate incomplete)
if trimmed.endswith(','):
# Trailing comma might indicate incomplete, but could also be valid
# Check if there's a closing bracket/brace after the comma
return False # Trailing comma alone doesn't mean incomplete
# Check if ends with incomplete string (odd number of quotes)
quoteCount = jsonContent.count('"')
if quoteCount % 2 == 1:
# Odd number of quotes - string is not closed
return True
# Check if ends mid-value (e.g., ends with "417 instead of "4170. 41719"])
# Look for patterns that suggest truncation:
# - Ends with incomplete number (e.g., "417)
# - Ends with incomplete array element (e.g., ["417)
# - Ends with incomplete object property (e.g., {"key": "val)
# If JSON parses successfully without closing, it's complete
parsed, parseErr, _ = tryParseJson(jsonContent)
if parseErr is None:
# Parses successfully - it's complete
return False
# If it doesn't parse, try closing it and see if that helps
closed = closeJsonStructures(jsonContent)
parsedClosed, parseErrClosed, _ = tryParseJson(closed)
if parseErrClosed is None:
# Only parses after closing - it was incomplete
return True
# Doesn't parse even after closing - might be malformed, but assume incomplete to be safe
return True
def _normalizeJsonStructure(self, parsed: Any, useCase) -> Any:
"""
Normalize JSON structure to ensure consistent format before merging.
Handles different response formats and converts them to expected structure.
Args:
parsed: Parsed JSON object (can be dict, list, or primitive)
useCase: LoopingUseCase instance with jsonNormalizer callback
Returns:
Normalized JSON structure
"""
# Use callback to normalize JSON structure (REQUIRED - no fallback)
if not useCase or not useCase.jsonNormalizer:
raise ValueError(
f"Use case '{useCase.useCaseId if useCase else 'unknown'}' is missing required 'jsonNormalizer' callback. "
"All use cases must provide a jsonNormalizer function."
)
# Write KPI definition prompt to debug file
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Write KPI definition response to debug file
self.services.utils.writeDebugFile(response.content, f"{debugPrefix}_kpi_definition_response")
# Parse response
extracted = extractJsonString(response.content)
kpiResponse = json.loads(extracted)
kpiDefinitions = kpiResponse.get("kpis", [])
logger.info(f"Defined {len(kpiDefinitions)} KPIs for tracking")
return kpiDefinitions
except Exception as e:
logger.warning(f"Failed to define KPIs: {e}, continuing without KPI tracking")
return []
return useCase.jsonNormalizer(parsed, useCase.useCaseId)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -12,13 +12,96 @@ from typing import Dict, Any, List, Optional, Callable
logger = logging.getLogger(__name__)
# Callback functions for use-case-specific logic
def _handleSectionContentFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
debugPrefix: str, services: Any) -> str:
"""Handle final result for section_content: return raw result to preserve all JSON blocks."""
final_json = result # Return raw response to preserve all JSON blocks
# Write final merged result for section_content (overwrites iteration 1 response with complete merged result)
if services and hasattr(services, 'utils') and hasattr(services.utils, 'writeDebugFile'):
services.utils.writeDebugFile(final_json, f"{debugPrefix}_response")
return final_json
def _handleChapterStructureFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
debugPrefix: str, services: Any) -> str:
"""Handle final result for chapter_structure: format JSON and write debug file."""
import json
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
# Write final result for chapter structure
if services and hasattr(services, 'utils') and hasattr(services.utils, 'writeDebugFile'):
services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
return final_json
def _handleCodeStructureFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
debugPrefix: str, services: Any) -> str:
"""Handle final result for code_structure: format JSON and write debug file."""
import json
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
# Write final result for code structure
if services and hasattr(services, 'utils') and hasattr(services.utils, 'writeDebugFile'):
services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
return final_json
def _handleCodeContentFinalResult(result: str, parsedJsonForUseCase: Any, extractedJsonForUseCase: str,
debugPrefix: str, services: Any) -> str:
"""Handle final result for code_content: format JSON."""
import json
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
return final_json
def _normalizeSectionContentJson(parsed: Any, useCaseId: str) -> Any:
"""Normalize JSON structure for section_content use case."""
# For section_content, expect {"elements": [...]} structure
if isinstance(parsed, list):
# Check if list contains strings (invalid format) or element objects
if parsed and isinstance(parsed[0], str):
# Invalid format - list of strings instead of elements
# Try to convert strings to paragraph elements as fallback
logger.debug(f"Received list of strings instead of elements array, converting to paragraph elements")
elements = []
for text in parsed:
if isinstance(text, str) and text.strip():
elements.append({
"type": "paragraph",
"content": {
"text": text.strip()
}
})
return {"elements": elements} if elements else {"elements": []}
else:
# Convert plain list of elements to elements structure
return {"elements": parsed}
elif isinstance(parsed, dict):
# If it already has "elements", return as-is
if "elements" in parsed:
return parsed
# If it has "type" and looks like an element, wrap in elements array
elif parsed.get("type"):
return {"elements": [parsed]}
# Otherwise, assume it's already in correct format
else:
return parsed
# For other use cases, return as-is (they have their own structures)
return parsed
def _normalizeDefaultJson(parsed: Any, useCaseId: str) -> Any:
"""Default normalizer: return as-is."""
return parsed
@dataclass
class LoopingUseCase:
"""Configuration for a specific looping use case."""
# Identification
useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch"
useCaseId: str # "section_content", "chapter_structure", "code_structure", "code_content"
# JSON Format Detection
jsonTemplate: Dict[str, Any] # Expected JSON structure template
@ -39,6 +122,10 @@ class LoopingUseCase:
# Result Building
resultBuilder: Optional[Callable] = None # Build final result from accumulated data
# Use-case-specific handlers (callbacks to avoid if/elif chains in generic code)
finalResultHandler: Optional[Callable] = None # Handle final result formatting and debug file writing
jsonNormalizer: Optional[Callable] = None # Normalize JSON structure for this use case
# Metadata
supportsAccumulation: bool = True # Whether this use case supports accumulation
requiresExtraction: bool = False # Whether this requires extraction (like sections)
@ -124,6 +211,8 @@ class LoopingUseCaseRegistry:
merger=None,
continuationContextBuilder=None, # Will use default continuation context
resultBuilder=None, # Return JSON directly
finalResultHandler=_handleSectionContentFinalResult,
jsonNormalizer=_normalizeSectionContentJson,
supportsAccumulation=False,
requiresExtraction=False
))
@ -141,28 +230,13 @@ class LoopingUseCaseRegistry:
merger=None,
continuationContextBuilder=None,
resultBuilder=None, # Return JSON directly
finalResultHandler=_handleChapterStructureFinalResult,
jsonNormalizer=_normalizeDefaultJson,
supportsAccumulation=False,
requiresExtraction=False
))
# Use Case 3: Document Structure Generation
# Returns JSON with "documents[0].sections" structure, requires extraction and accumulation
self.register(LoopingUseCase(
useCaseId="document_structure",
jsonTemplate={"documents": [{"sections": []}]},
detectionKeys=["sections"],
detectionPath="documents[0].sections",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Will use default accumulator
merger=None, # Will use default merger
continuationContextBuilder=None,
resultBuilder=None, # Will use default result builder
supportsAccumulation=True,
requiresExtraction=True
))
# Use Case 4: Code Structure Generation (NEW)
# Use Case 3: Code Structure Generation
self.register(LoopingUseCase(
useCaseId="code_structure",
jsonTemplate={
@ -191,6 +265,8 @@ class LoopingUseCaseRegistry:
merger=None,
continuationContextBuilder=None,
resultBuilder=None,
finalResultHandler=_handleCodeStructureFinalResult,
jsonNormalizer=_normalizeDefaultJson,
supportsAccumulation=False,
requiresExtraction=False
))
@ -207,25 +283,11 @@ class LoopingUseCaseRegistry:
merger=None, # Will use default merger
continuationContextBuilder=None,
resultBuilder=None, # Will use default result builder
finalResultHandler=_handleCodeContentFinalResult,
jsonNormalizer=_normalizeDefaultJson,
supportsAccumulation=True,
requiresExtraction=False
))
# Use Case 6: Image Batch Generation (NEW)
self.register(LoopingUseCase(
useCaseId="image_batch",
jsonTemplate={"images": []},
detectionKeys=["images"],
detectionPath="images",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Direct return
merger=None,
continuationContextBuilder=None,
resultBuilder=None,
supportsAccumulation=False,
requiresExtraction=False
))
logger.info(f"Registered {len(self.useCases)} default looping use cases")

View file

@ -213,15 +213,16 @@ class StructureFiller:
if not isinstance(doc["language"], str) or len(doc["language"]) != 2:
raise ValueError(f"Document {doc.get('id')} has invalid language format in filled structure: {doc['language']} - should be 2-character ISO 639-1 code")
for chapter in doc.get("chapters", []):
for section in chapter.get("sections", []):
# Validation 4.2: Section missing 'elements' field
if "elements" not in section:
section["elements"] = []
logger.info(f"Section {section.get('id')} missing 'elements' - created empty list")
# Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK)
# No action needed - empty elements are allowed
# CRITICAL: flattenedStructure has sections, not chapters!
# After flattening, chapters are converted to sections, so we need to validate sections directly
for section in doc.get("sections", []):
# Validation 4.2: Section missing 'elements' field
if "elements" not in section:
section["elements"] = []
logger.info(f"Section {section.get('id')} missing 'elements' - created empty list")
# Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK)
# No action needed - empty elements are allowed
# ChatLog abschließen
self.services.chat.progressLogFinish(fillOperationId, True)
@ -246,6 +247,7 @@ class StructureFiller:
contentParts: List[ContentPart],
userPrompt: str,
language: str,
outputFormat: str,
parentOperationId: str,
totalChapters: int
) -> None:
@ -271,7 +273,8 @@ class StructureFiller:
contentPartInstructions=contentPartInstructions,
contentParts=contentParts,
userPrompt=userPrompt,
language=language
language=language,
outputFormat=outputFormat
)
# AI-Call für Chapter-Struktur-Generierung
@ -372,6 +375,8 @@ class StructureFiller:
docId = doc.get("id", "unknown")
# Get language for this specific document
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
# Get output format for this specific document
docFormat = doc.get("outputFormat", "txt")
for chapter in doc.get("chapters", []):
chapterIndex += 1
@ -382,7 +387,7 @@ class StructureFiller:
contentPartIds, contentPartInstructions = self._extractContentPartInfo(chapter)
# Create task for parallel processing with semaphore
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage):
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage, docFormat):
checkWorkflowStopped(self.services)
async with semaphore:
return await self._generateSingleChapterSectionsStructure(
@ -397,12 +402,13 @@ class StructureFiller:
contentParts=contentParts,
userPrompt=userPrompt,
language=docLanguage, # Use document-specific language
outputFormat=docFormat, # Use document-specific format
parentOperationId=parentOperationId,
totalChapters=totalChapters
)
task = processChapterWithSemaphore(
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage, docFormat
)
chapterTasks.append((chapterIndex, chapter, task))
@ -747,7 +753,7 @@ class StructureFiller:
if processedExtractedParts:
logger.debug(f"Section {sectionId}: Aggregating {len(processedExtractedParts)} extracted parts with AI")
isAggregation = True
generationPrompt = self._buildSectionGenerationPrompt(
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
section=section,
contentParts=processedExtractedParts,
userPrompt=userPrompt,
@ -805,48 +811,8 @@ class StructureFiller:
f"{chapterId}_section_{sectionId}_response"
)
else:
async def buildSectionPromptWithContinuation(
section: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
generationHint: str,
allSections: List[Dict[str, Any]],
sectionIndex: int,
isAggregation: bool,
continuationContext: Dict[str, Any],
services: Any
) -> str:
basePrompt = self._buildSectionGenerationPrompt(
section=section,
contentParts=contentParts,
userPrompt=userPrompt,
generationHint=generationHint,
allSections=allSections,
sectionIndex=sectionIndex,
isAggregation=isAggregation,
language=language
)
continuationInfo = continuationContext.get("delivered_summary", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Please continue from where it stopped.
PREVIOUSLY DELIVERED SUMMARY:
{continuationInfo}
LAST INCOMPLETE ELEMENT:
{cutOffElement}
TASK: Continue generating the JSON elements array from where it was cut off.
Complete the incomplete element and continue with remaining elements.
Return ONLY the continuation JSON (starting from the incomplete element).
The JSON should be a fragment that can be merged with the previous response."""
return continuationPrompt
# Use consolidated class method
buildSectionPromptWithContinuation = self.buildSectionPromptWithContinuation
options = AiCallOptions(
operationType=operationType,
@ -868,7 +834,8 @@ The JSON should be a fragment that can be merged with the previous response."""
"allSections": all_sections_list,
"sectionIndex": sectionIndex,
"isAggregation": isAggregation,
"services": self.services
"templateStructure": templateStructure,
"basePrompt": generationPrompt
},
operationId=sectionOperationId,
userPrompt=userPrompt,
@ -974,7 +941,7 @@ The JSON should be a fragment that can be merged with the previous response."""
if len(contentPartIds) == 0 and useAiCall and generationHint:
# Generate content from scratch using only generationHint
logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only")
generationPrompt = self._buildSectionGenerationPrompt(
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
section=section,
contentParts=[],
userPrompt=userPrompt,
@ -1033,48 +1000,8 @@ The JSON should be a fragment that can be merged with the previous response."""
else:
isAggregation = False
async def buildSectionPromptWithContinuation(
section: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
generationHint: str,
allSections: List[Dict[str, Any]],
sectionIndex: int,
isAggregation: bool,
continuationContext: Dict[str, Any],
services: Any
) -> str:
basePrompt = self._buildSectionGenerationPrompt(
section=section,
contentParts=contentParts,
userPrompt=userPrompt,
generationHint=generationHint,
allSections=allSections,
sectionIndex=sectionIndex,
isAggregation=isAggregation,
language=language
)
continuationInfo = continuationContext.get("delivered_summary", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Please continue from where it stopped.
PREVIOUSLY DELIVERED SUMMARY:
{continuationInfo}
LAST INCOMPLETE ELEMENT:
{cutOffElement}
TASK: Continue generating the JSON elements array from where it was cut off.
Complete the incomplete element and continue with remaining elements.
Return ONLY the continuation JSON (starting from the incomplete element).
The JSON should be a fragment that can be merged with the previous response."""
return continuationPrompt
# Use consolidated class method
buildSectionPromptWithContinuation = self.buildSectionPromptWithContinuation
options = AiCallOptions(
operationType=operationType,
@ -1086,7 +1013,7 @@ The JSON should be a fragment that can be merged with the previous response."""
prompt=generationPrompt,
options=options,
debugPrefix=f"{chapterId}_section_{sectionId}",
promptBuilder=buildSectionPromptWithContinuation,
promptBuilder=self.buildSectionPromptWithContinuation,
promptArgs={
"section": section,
"contentParts": [],
@ -1095,7 +1022,9 @@ The JSON should be a fragment that can be merged with the previous response."""
"allSections": all_sections_list,
"sectionIndex": sectionIndex,
"isAggregation": isAggregation,
"services": self.services
"templateStructure": templateStructure,
"basePrompt": generationPrompt,
"language": language
},
operationId=sectionOperationId,
userPrompt=userPrompt,
@ -1277,7 +1206,7 @@ The JSON should be a fragment that can be merged with the previous response."""
if useAiCall and generationHint:
# AI-Call mit einzelnen ContentPart (now may be text part after Vision extraction)
logger.debug(f"Processing section {sectionId}: Single extracted part with AI call")
generationPrompt = self._buildSectionGenerationPrompt(
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
section=section,
contentParts=[part],
userPrompt=userPrompt,
@ -1336,48 +1265,8 @@ The JSON should be a fragment that can be merged with the previous response."""
else:
isAggregation = False
async def buildSectionPromptWithContinuation(
section: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
generationHint: str,
allSections: List[Dict[str, Any]],
sectionIndex: int,
isAggregation: bool,
continuationContext: Dict[str, Any],
services: Any
) -> str:
basePrompt = self._buildSectionGenerationPrompt(
section=section,
contentParts=contentParts,
userPrompt=userPrompt,
generationHint=generationHint,
allSections=allSections,
sectionIndex=sectionIndex,
isAggregation=isAggregation,
language=language
)
continuationInfo = continuationContext.get("delivered_summary", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Please continue from where it stopped.
PREVIOUSLY DELIVERED SUMMARY:
{continuationInfo}
LAST INCOMPLETE ELEMENT:
{cutOffElement}
TASK: Continue generating the JSON elements array from where it was cut off.
Complete the incomplete element and continue with remaining elements.
Return ONLY the continuation JSON (starting from the incomplete element).
The JSON should be a fragment that can be merged with the previous response."""
return continuationPrompt
# Use consolidated class method
buildSectionPromptWithContinuation = self.buildSectionPromptWithContinuation
options = AiCallOptions(
operationType=operationType,
@ -1389,7 +1278,7 @@ The JSON should be a fragment that can be merged with the previous response."""
prompt=generationPrompt,
options=options,
debugPrefix=f"{chapterId}_section_{sectionId}",
promptBuilder=buildSectionPromptWithContinuation,
promptBuilder=self.buildSectionPromptWithContinuation,
promptArgs={
"section": section,
"contentParts": [part],
@ -1398,7 +1287,10 @@ The JSON should be a fragment that can be merged with the previous response."""
"allSections": all_sections_list,
"sectionIndex": sectionIndex,
"isAggregation": isAggregation,
"services": self.services
"services": self.services,
"templateStructure": templateStructure,
"basePrompt": generationPrompt,
"language": language
},
operationId=sectionOperationId,
userPrompt=userPrompt,
@ -1639,104 +1531,88 @@ The JSON should be a fragment that can be merged with the previous response."""
maxConcurrent = self._getMaxConcurrentGeneration(options)
sectionSemaphore = asyncio.Semaphore(maxConcurrent)
# Helper function to calculate overall progress
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
"""Calculate overall progress: 0.0 to 1.0"""
if totalChapters == 0:
return 1.0
# Progress from completed chapters (0 to chapterIndex-1)
completedChaptersProgress = chapterIndex / totalChapters
# Progress from current chapter (sectionIndex / totalSections)
currentChapterProgress = (sectionIndex / totalSections) / totalChapters if totalSections > 0 else 0
return min(1.0, completedChaptersProgress + currentChapterProgress)
# Collect ALL sections from ALL chapters for fully parallel processing
# Each task carries: (docId, chapterId, chapterTitle, sectionIndex, section, docLanguage)
allSectionTasks = []
totalSections = len(all_sections_list)
completedSections = [0] # Mutable counter for progress tracking
# Process chapters sequentially with chapter-level progress
chapterIndex = 0
for doc in chapterStructure.get("documents", []):
docId = doc.get("id", "unknown")
# Get language for this specific document
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
for chapter in doc.get("chapters", []):
chapterIndex += 1
chapterId = chapter.get("id", "unknown")
chapterTitle = chapter.get("title", "Untitled Chapter")
sections = chapter.get("sections", [])
totalSections = len(sections)
chapterSectionCount = len(sections)
# Start chapter operation
chapterOperationId = f"{fillOperationId}_chapter_{chapterId}"
self.services.chat.progressLogStart(
chapterOperationId,
"Chapter Generation",
f"Chapter {chapterIndex}/{totalChapters}",
chapterTitle,
parentOperationId=fillOperationId
for sectionIndex, section in enumerate(sections):
allSectionTasks.append({
"docId": docId,
"chapterId": chapterId,
"chapterTitle": chapterTitle,
"sectionIndex": sectionIndex,
"chapterSectionCount": chapterSectionCount,
"section": section,
"docLanguage": docLanguage
})
logger.info(f"Starting FULLY PARALLEL section generation: {totalSections} sections across {totalChapters} chapters")
# Create task wrapper for each section with progress tracking
async def processSectionWithSemaphore(taskInfo):
checkWorkflowStopped(self.services)
async with sectionSemaphore:
result = await self._processSingleSection(
section=taskInfo["section"],
sectionIndex=taskInfo["sectionIndex"],
totalSections=taskInfo["chapterSectionCount"],
chapterIndex=0, # Not used for sequential logic anymore
totalChapters=totalChapters,
chapterId=taskInfo["chapterId"],
chapterOperationId=fillOperationId, # Use fillOperationId as parent (no chapter-level ops in parallel mode)
fillOperationId=fillOperationId,
contentParts=contentParts,
userPrompt=userPrompt,
all_sections_list=all_sections_list,
language=taskInfo["docLanguage"],
calculateOverallProgress=lambda *args: completedSections[0] / totalSections if totalSections > 0 else 1.0
)
# Process sections within chapter in parallel with concurrency control
sectionTasks = []
for sectionIndex, section in enumerate(sections):
# Create task wrapper with semaphore for parallel processing
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress):
checkWorkflowStopped(self.services)
async with sectionSemaphore:
return await self._processSingleSection(
section=section,
sectionIndex=sectionIndex,
totalSections=totalSections,
chapterIndex=chapterIndex,
totalChapters=totalChapters,
chapterId=chapterId,
chapterOperationId=chapterOperationId,
fillOperationId=fillOperationId,
contentParts=contentParts,
userPrompt=userPrompt,
all_sections_list=all_sections_list,
language=docLanguage, # Use document-specific language
calculateOverallProgress=calculateOverallProgress
)
task = processSectionWithSemaphore(
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress
)
sectionTasks.append((sectionIndex, section, task))
# Execute all section tasks in parallel with concurrency control
if sectionTasks:
# Create list of tasks (without indices for gather)
tasks = [task for _, _, task in sectionTasks]
# Execute in parallel with error handling
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results in order and assign elements to sections
for (originalIndex, originalSection, _), result in zip(sectionTasks, results):
if isinstance(result, Exception):
logger.error(f"Error processing section {originalSection.get('id')}: {str(result)}")
# Set error element
originalSection["elements"] = [{
"type": "error",
"message": f"Error processing section: {str(result)}",
"sectionId": originalSection.get("id")
}]
else:
# Assign elements to section in correct order
originalSection["elements"] = result
# Finish chapter operation after all sections processed
self.services.chat.progressLogFinish(chapterOperationId, True)
# Update overall progress after chapter completion
overallProgress = chapterIndex / totalChapters if totalChapters > 0 else 1.0
# Update progress after each section completes
completedSections[0] += 1
overallProgress = completedSections[0] / totalSections if totalSections > 0 else 1.0
sectionId = taskInfo["section"].get("id", "unknown")
self.services.chat.progressLogUpdate(
fillOperationId,
overallProgress,
f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}"
f"Section {completedSections[0]}/{totalSections} completed: {sectionId}"
)
return result
# Create all tasks
tasks = [processSectionWithSemaphore(taskInfo) for taskInfo in allSectionTasks]
# Execute ALL sections in parallel with concurrency control
if tasks:
results = await asyncio.gather(*tasks, return_exceptions=True)
# Assign results back to sections
for taskInfo, result in zip(allSectionTasks, results):
section = taskInfo["section"]
if isinstance(result, Exception):
logger.error(f"Error processing section {section.get('id')}: {str(result)}")
section["elements"] = [{
"type": "error",
"message": f"Error processing section: {str(result)}",
"sectionId": section.get("id")
}]
else:
section["elements"] = result if result is not None else []
logger.info(f"Completed FULLY PARALLEL section generation: {totalSections} sections")
return chapterStructure
@ -1830,7 +1706,13 @@ The JSON should be a fragment that can be merged with the previous response."""
# 2. Generierte Sections - adjust heading levels
for section in chapter.get("sections", []):
# CRITICAL: Ensure elements are preserved when flattening
# _adjustSectionHeadingLevels uses deepcopy which should preserve elements,
# but verify that elements exist in the source section
adjusted_section = self._adjustSectionHeadingLevels(section)
# Ensure elements are preserved (deepcopy should handle this, but double-check)
if "elements" in section and "elements" not in adjusted_section:
adjusted_section["elements"] = section["elements"]
flattened_doc["sections"].append(adjusted_section)
result["documents"].append(flattened_doc)
@ -1868,9 +1750,10 @@ The JSON should be a fragment that can be merged with the previous response."""
contentPartInstructions: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
language: str = "en"
language: str = "en",
outputFormat: str = "txt"
) -> str:
"""Baue Prompt für Chapter-Sections-Struktur-Generierung."""
"""Baue Prompt für Chapter-Sections-Struktur-Generierung, querying renderer for accepted section types."""
# Baue ContentParts-Index (nur IDs, keine Previews!)
contentPartsIndex = ""
for partId in contentPartIds:
@ -1904,6 +1787,9 @@ The JSON should be a fragment that can be merged with the previous response."""
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts specified for this chapter)"
# Query renderer for accepted section types
acceptedSectionTypes = self._getAcceptedSectionTypesForFormat(outputFormat)
prompt = f"""TASK: Generate Chapter Sections Structure
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
@ -1936,11 +1822,24 @@ If AVAILABLE CONTENT PARTS are listed above, then EVERY section that generates c
## CONTENT TYPES
Available content types for sections: table, bullet_list, heading, paragraph, code_block, image
useAiCall RULES:
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
- useAiCall: false if Format is "object" or "reference" (direct insertion)
- useAiCall: false if Format is "extracted" AND simple "include full text" instruction
- useAiCall: true if no ContentPartIds provided (content must be generated from scratch); Sections without ContentParts must have a clear, detailed generationHint explaining what content to generate
## ACCEPTED SECTION TYPES FOR THIS FORMAT
The document output format ({outputFormat}) accepts only the following section types:
{', '.join(acceptedSectionTypes) if acceptedSectionTypes else 'All section types'}
**IMPORTANT**: Only create sections with content types from the accepted list above. Do not create sections with types that are not accepted by this format.
## FORMAT-APPROPRIATE SECTION STRUCTURE
When determining which sections to create for this chapter, consider the document's output format ({outputFormat}) and ensure sections are structured appropriately for that format:
- Different formats have different capabilities and constraints
- Structure sections to match what the format can effectively represent
- Consider what content types work best for each format
- Ensure the section structure aligns with the format's strengths and limitations
- Select content types that are well-suited for the target format
- **CRITICAL**: Only use section types from the ACCEPTED SECTION TYPES list above
useAiCall RULE (simple):
- useAiCall: true Content needs AI processing (extract, transform, generate, filter, summarize)
- useAiCall: false Content can be inserted directly without changes (Format is "object" or "reference")
RETURN JSON:
{{
@ -1948,10 +1847,9 @@ RETURN JSON:
{{
"id": "section_1",
"content_type": "paragraph",
"contentPartIds": ["extracted_part_1"],
"generationHint": "Include full text",
"useAiCall": false,
"caption": "optional, only for image sections",
"contentPartIds": ["extracted_part_id"],
"generationHint": "Description of what to extract or generate",
"useAiCall": true,
"elements": []
}}
]
@ -1993,7 +1891,7 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
sectionIndex: Optional[int] = None,
isAggregation: bool = False,
language: str = "en"
) -> str:
) -> tuple[str, str]:
"""Baue Prompt für Section-Generierung mit vollständigem Kontext."""
# Filtere None-Werte
validParts = [p for p in contentParts if p is not None]
@ -2102,8 +2000,16 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
contentStructureExample = self._getContentStructureExample(contentType)
# Special handling for image content type with IMAGE_GENERATE
isImageGeneration = contentType == "image" and len(validParts) == 0
# Create template structure explicitly (not extracted from prompt)
# This ensures exact identity between initial and continuation prompts
templateStructure = f"""{{
"elements": [
{{
"type": "{contentType}",
"content": {contentStructureExample}
}}
]
}}"""
if isAggregation:
prompt = f"""# TASK: Generate Section Content (Aggregation)
@ -2126,6 +2032,8 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
5. For table: Extract all rows from the context. Return {{"headers": [...], "rows": []}} only if no data exists.
6. Format based on content_type ({contentType}).
7. No HTML/styling: Plain text only, no markup.
8. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
## OUTPUT FORMAT
Return a JSON object with this structure:
@ -2177,6 +2085,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
3. Format based on content_type ({contentType}).
4. Return only valid JSON with "elements" array.
5. No HTML/styling: Plain text only, no markup.
6. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
## OUTPUT FORMAT
Return a JSON object with this structure:
@ -2221,6 +2130,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
3. The content should be relevant to the USER REQUEST and fit the context of surrounding sections.
4. Return only valid JSON with "elements" array.
5. No HTML/styling: Plain text only, no markup.
6. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
## OUTPUT FORMAT
Return a JSON object with this structure:
@ -2248,7 +2158,69 @@ Output requirements:
## CONTEXT
{contextText if contextText else ""}
"""
return prompt
return prompt, templateStructure
async def buildSectionPromptWithContinuation(
self,
continuationContext: Any,
templateStructure: str,
basePrompt: str
) -> str:
"""Build section prompt with continuation context. Uses unified signature.
Single unified implementation for all section content generation contexts.
Note: All initial context (section, contentParts, userPrompt, etc.) is already
contained in basePrompt. This function only adds continuation-specific instructions.
"""
# Extract continuation context fields (only what's needed for continuation)
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
# Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
unifiedContext = ""
if lastRawJson:
# Get contexts directly from jsonContinuation
from modules.shared.jsonContinuation import getContexts
contexts = getContexts(lastRawJson)
overlapContext = contexts.overlapContext
unifiedContext = contexts.hierarchyContextForPrompt
elif incompletePart:
unifiedContext = incompletePart
else:
unifiedContext = "Unable to extract context - response was completely broken"
# Build unified continuation prompt format
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Continue from where it stopped.
Context showing structure hierarchy with cut point:
```
{unifiedContext}
```
Overlap Requirement:
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
Overlap context (start your response with this exact text):
```json
{overlapContext if overlapContext else "No overlap context available"}
```
TASK:
1. Start your response EXACTLY with the overlap context shown above (character by character)
2. Continue seamlessly from where the overlap context ends
3. Complete the remaining content following the JSON structure template above
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
CRITICAL:
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
- Continue seamlessly after the overlap context with new content
- Your response must be valid JSON matching the structure template above"""
return continuationPrompt
def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
"""
@ -2547,4 +2519,38 @@ Output requirements:
# (z.B. Vergleich mehrerer Dokumente)
# Standard: Keine Aggregation für paragraph
return False
def _getAcceptedSectionTypesForFormat(self, outputFormat: str) -> List[str]:
"""
Get accepted section types for a given output format by querying the renderer.
Args:
outputFormat: Format name (e.g., 'csv', 'json', 'pdf')
Returns:
List of accepted section content types (e.g., ["table", "code_block"])
"""
try:
from modules.services.serviceGeneration.renderers.registry import getRenderer
# Get renderer for this format
renderer = getRenderer(outputFormat, self.services)
if renderer and hasattr(renderer, 'getAcceptedSectionTypes'):
# Query renderer for accepted types
acceptedTypes = renderer.getAcceptedSectionTypes(outputFormat)
if acceptedTypes:
logger.debug(f"Renderer for format '{outputFormat}' accepts section types: {acceptedTypes}")
return acceptedTypes
# Fallback: if no renderer or method not found, return all types
from modules.datamodels.datamodelJson import supportedSectionTypes
logger.debug(f"No renderer found for format '{outputFormat}' or method not available, using all section types")
return list(supportedSectionTypes)
except Exception as e:
logger.warning(f"Error querying renderer for accepted section types for format '{outputFormat}': {str(e)}")
# Fallback: return all types
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)

View file

@ -107,47 +107,71 @@ class StructureGenerator:
resultFormat="json"
)
structurePrompt, templateStructure = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
# Create prompt builder for continuation support
async def buildChapterStructurePromptWithContinuation(
continuationContext: Optional[Dict[str, Any]] = None,
**kwargs
continuationContext: Any,
templateStructure: str,
basePrompt: str
) -> str:
"""Build chapter structure prompt with optional continuation context."""
basePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
"""Build chapter structure prompt with continuation context. Uses unified signature.
if continuationContext:
# Add continuation instructions
deliveredSummary = continuationContext.get("delivered_summary", "")
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationText = f"{deliveredSummary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
if elementBeforeCutoff:
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
continuationText += f"{elementBeforeCutoff}\n\n"
if cutOffElement:
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
continuationText += f"{cutOffElement}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
continuationText += "Start directly with the next chapter that should follow.\n\n"
return f"""{basePrompt}
{continuationText}
Continue generating the remaining chapters now.
"""
Note: All initial context (userPrompt, contentParts, outputFormat, etc.) is already
contained in basePrompt. This function only adds continuation-specific instructions.
"""
# Extract continuation context fields (only what's needed for continuation)
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
# Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
unifiedContext = ""
if lastRawJson:
# Get contexts directly from jsonContinuation
from modules.shared.jsonContinuation import getContexts
contexts = getContexts(lastRawJson)
overlapContext = contexts.overlapContext
unifiedContext = contexts.hierarchyContextForPrompt
elif incompletePart:
unifiedContext = incompletePart
else:
return basePrompt
unifiedContext = "Unable to extract context - response was completely broken"
# Build unified continuation prompt format
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Continue from where it stopped.
Context showing structure hierarchy with cut point:
```
{unifiedContext}
```
Overlap Requirement:
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
Overlap context (start your response with this exact text):
```json
{overlapContext if overlapContext else "No overlap context available"}
```
TASK:
1. Start your response EXACTLY with the overlap context shown above (character by character)
2. Continue seamlessly from where the overlap context ends
3. Complete the remaining content following the JSON structure template above
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
CRITICAL:
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
- Continue seamlessly after the overlap context with new content
- Your response must be valid JSON matching the structure template above"""
return continuationPrompt
# Call AI with looping support
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
@ -162,7 +186,8 @@ Continue generating the remaining chapters now.
promptArgs={
"userPrompt": userPrompt,
"outputFormat": outputFormat,
"services": self.services
"templateStructure": templateStructure,
"basePrompt": structurePrompt
},
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
operationId=structureOperationId,
@ -275,7 +300,7 @@ Continue generating the remaining chapters now.
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str
) -> str:
) -> tuple[str, str]:
"""Baue Prompt für Chapter-Struktur-Generierung."""
# Baue ContentParts-Index - filtere leere Parts heraus
contentPartsIndex = ""
@ -331,6 +356,36 @@ Continue generating the remaining chapters now.
language = self._getUserLanguage()
logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}")
# Create template structure explicitly (not extracted from prompt)
# This ensures exact identity between initial and continuation prompts
templateStructure = f"""{{
"metadata": {{
"title": "Document Title",
"language": "{language}"
}},
"documents": [{{
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
"outputFormat": "{outputFormat}",
"language": "{language}",
"chapters": [
{{
"id": "chapter_1",
"level": 1,
"title": "Chapter Title",
"contentParts": {{
"extracted_part_id": {{
"instruction": "Use extracted content with ALL relevant details from user request"
}}
}},
"generationHint": "Detailed description including ALL relevant details from user request for this chapter",
"sections": []
}}
]
}}]
}}"""
prompt = f"""# TASK: Generate Chapter Structure
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
@ -363,13 +418,24 @@ Then chapters that generate those generic content types MUST assign the relevant
## CHAPTER STRUCTURE REQUIREMENTS
- Generate chapters based on USER REQUEST - analyze what structure the user wants
- Each chapter needs: id, level (1, 2, 3, etc.), title
- IMPORTANT: Each chapter MUST have ALL these fields:
- id: Unique identifier (e.g., "chapter_1")
- level: Heading level (1, 2, 3, etc.)
- title: Chapter title
- contentParts: Object mapping ContentPart IDs to usage instructions
- generationHint: Description of what content to generate
- sections: Empty array [] (REQUIRED - sections are generated in next phase)
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
- The "instruction" field for each ContentPart MUST contain ALL relevant details from the USER REQUEST that apply to content extraction for this specific chapter. Include all formatting rules, data requirements, constraints, and specifications mentioned in the user request that are relevant for processing this ContentPart in this chapter.
- generationHint: Description of what content to generate for this chapter
The generationHint MUST contain ALL relevant details from the USER REQUEST that apply to this specific chapter. Include all formatting rules, data requirements, constraints, column specifications, validation rules, and any other specifications mentioned in the user request that are relevant for generating content for this chapter. Do NOT use generic descriptions - include specific details from the user request.
- The number of chapters depends on the user request - create only what is requested
## WHAT IS A CHAPTER vs WHAT IS FORMATTING
- A CHAPTER contains CONTENT (text, tables, lists, images, etc.)
- FORMATTING INSTRUCTIONS (CSS styling, spacing, typography, colors, borders) are NOT separate chapters
- If user mentions formatting topics, apply these to ALL chapters via generationHint, do NOT create a separate "Formatting" chapter
## DOCUMENT OUTPUT FORMAT
For each document, determine the output format by analyzing the USER REQUEST:
- Look for explicit format mentions
@ -379,6 +445,13 @@ For each document, determine the output format by analyzing the USER REQUEST:
- Include "outputFormat" field in each document in the JSON structure
- Multiple documents can have different formats
## FORMAT-APPROPRIATE CHAPTER STRUCTURE
When determining the chapter structure, consider the document's output format and ensure chapters are structured appropriately for that format:
- Different formats have different capabilities and constraints
- Structure chapters to match what the format can effectively represent
- Consider what content types work best for each format
- Ensure the chapter structure aligns with the format's strengths and limitations
## DOCUMENT LANGUAGE
For each document, determine the language by analyzing the USER REQUEST:
- Look for explicit language mentions
@ -401,7 +474,7 @@ For each document, determine the language by analyzing the USER REQUEST:
- title: Chapter title
- contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
- generationHint: Description of what content to generate
- sections: Empty array []
- sections: Empty array [] (MANDATORY - always include this field)
EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
@ -451,5 +524,5 @@ For each chapter, verify:
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
"""
return prompt
return prompt, templateStructure

View file

@ -13,7 +13,7 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati
# Type hint for renderer parameter
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from modules.services.serviceGeneration.renderers.rendererBaseTemplate import BaseRenderer
from modules.services.serviceGeneration.renderers.documentRendererBaseTemplate import BaseRenderer
_RendererLike = BaseRenderer
else:
_RendererLike = Any

View file

@ -1,114 +0,0 @@
# Document Generation Architecture Analysis
## Current Flow
### 1. Document Input → ContentParts (`extractAndPrepareContent`)
**Location**: `gateway/modules/services/serviceAi/subContentExtraction.py`
**Flow**:
- Regular documents → Calls `extractContent()` (NON-AI extraction) → Creates contentParts with raw extracted text
- **BUT THEN**:
- Images with "extract" intent → Calls Vision AI (line 190) → AI extraction
- Text with "extract" intent + extractionPrompt → Calls AI processing (line 265) → AI extraction
- Pre-extracted JSON → Uses contentParts directly (no AI)
**Result**: ContentParts may already be AI-processed before structure generation
### 2. Structure Generation
**Location**: `gateway/modules/services/serviceAi/subStructureGeneration.py`
**Flow**:
- Uses contentParts (may already be AI-processed)
- Generates document structure (chapters, sections)
### 3. Section Generation (`_processSingleSection`)
**Location**: `gateway/modules/services/serviceAi/subStructureFilling.py`
**Flow**:
- Uses contentParts (which may already be AI-processed)
- Aggregates "extracted" contentParts with AI (line 554-682)
- Generates section content using `callAiWithLooping` with `useCaseId="section_content"`
## Issues Identified
### Issue 1: Duplicate AI Processing
- AI extraction happens in `extractAndPrepareContent` (for images/text)
- AI generation happens again in section generation
- This is redundant and inefficient
### Issue 2: Architecture Inconsistency
- Pre-extracted JSON files → contentParts directly (no AI)
- Regular documents → contentParts + AI extraction (inconsistent)
- User wants: Documents → contentParts (like pre-extracted JSON) → AI only in section generation
### Issue 3: Image Processing
- Images need Vision AI to extract text
- Currently happens in `extractAndPrepareContent`
- Question: Should this happen during section generation instead?
## Proposed Architecture
### Option A: Remove All AI from `extractAndPrepareContent`
- Documents → `extractContent()` → Raw contentParts (text, tables, etc.)
- Images → Keep as image contentParts (no Vision AI extraction)
- Section generation → Handle images with Vision AI when needed
**Pros**:
- Consistent with pre-extracted JSON flow
- Single point of AI processing (section generation)
- Clear separation of concerns
**Cons**:
- Images won't have extracted text until section generation
- May need to handle images differently in section generation
### Option B: Keep Vision AI for Images Only
- Documents → `extractContent()` → Raw contentParts
- Images → Vision AI extraction → Text contentParts
- Section generation → Uses text contentParts (no additional AI extraction)
**Pros**:
- Images get text extracted early
- Section generation can use text directly
**Cons**:
- Still has AI extraction before structure generation
- Inconsistent with user's request
## Recommendation
**Follow Option A** - Remove all AI extraction from `extractAndPrepareContent`:
1. **Documents → ContentParts** (like pre-extracted JSON):
- Call `extractContent()` (NON-AI)
- Create contentParts with raw extracted content
- Images remain as image contentParts (no Vision AI)
2. **Section Generation**:
- Handle images with Vision AI when needed
- Aggregate all contentParts with AI
- Single point of AI processing
**Benefits**:
- Clear architecture: Documents = raw contentParts
- Consistent with pre-extracted JSON flow
- AI processing only where needed (section generation)
- Easier to understand and maintain
## Questions to Resolve
1. **Image handling**: How should images be processed during section generation?
- Option 1: Vision AI extraction happens automatically when image contentParts are used
- Option 2: Images are passed to AI with Vision models during section generation
- Option 3: Images remain as binary and are rendered directly (no text extraction)
2. **Text with extractionPrompt**: Should text contentParts with extractionPrompt be processed differently?
- Currently: AI processing in `extractAndPrepareContent`
- Proposed: Raw text → AI processing during section generation
3. **Performance**: Will deferring image extraction to section generation cause performance issues?
- Need to test with multiple images

View file

@ -1,77 +0,0 @@
# Architecture Changes Summary
## Problem Identified
The architecture had AI extraction happening in TWO places:
1. **`extractAndPrepareContent`**: Vision AI for images, AI processing for text with extractionPrompt
2. **Section generation**: AI aggregation of contentParts
This was:
- Redundant (double AI processing)
- Inconsistent (pre-extracted JSON had no AI, regular documents had AI)
- Against the desired architecture (documents should become contentParts like pre-extracted JSON)
## Solution Implemented
### 1. Removed AI Extraction from `extractAndPrepareContent`
**File**: `gateway/modules/services/serviceAi/subContentExtraction.py`
**Changes**:
- **Removed**: Vision AI extraction for images (lines 186-246)
- **Removed**: AI text processing with extractionPrompt (lines 260-334)
- **Updated**: Images with extract intent are now marked with `needsVisionExtraction=True` flag
- **Updated**: Regular documents mark images with `needsVisionExtraction=True` when extract intent is present
**Result**: Documents → contentParts (raw extraction only, no AI)
### 2. Added Vision AI Extraction in Section Generation
**File**: `gateway/modules/services/serviceAi/subStructureFilling.py`
**Changes**:
- **Added**: Vision AI extraction logic before aggregation (lines 553-610)
- **Added**: Vision AI extraction logic for single-part processing (lines 1074-1115)
- **Logic**:
- Checks if `part.typeGroup == "image"` AND `needsVisionExtraction == True` AND `intent == "extract"`
- Extracts text using Vision AI (`IMAGE_ANALYSE` operation)
- Replaces image part with text part for further processing
- Images with `contentFormat == "object"` (render intent) are rendered directly (no extraction)
**Result**: AI extraction happens ONLY during section generation
## Architecture Flow (After Changes)
### Document Input → ContentParts
1. **Regular documents**: `extractContent()` (NON-AI) → Raw contentParts
- Images with extract intent: `contentFormat="extracted"`, `needsVisionExtraction=True`
- Images with render intent: `contentFormat="object"` (rendered directly)
- Text: `contentFormat="extracted"` (raw text, no AI processing)
2. **Pre-extracted JSON**: Direct contentParts (no changes)
### Section Generation → AI Processing
1. **Images with extract intent**: Vision AI extraction → Text part → AI aggregation
2. **Images with render intent**: Rendered directly (no extraction)
3. **Text contentParts**: AI aggregation with extractionPrompt (if provided)
## Key Benefits
1. **Consistent Architecture**: Documents = raw contentParts (like pre-extracted JSON)
2. **Single Point of AI Processing**: Only in section generation
3. **Clear Separation**: Extraction vs Generation
4. **Intent-Based Logic**:
- `intent == "extract"` → Vision AI extraction during section generation
- `intent == "render"` → Direct rendering (no extraction)
- `contentFormat == "object"` → Embedded/referenced images (no extraction)
## Testing Checklist
- [ ] Regular documents create contentParts without AI extraction
- [ ] Images with extract intent are marked with `needsVisionExtraction=True`
- [ ] Images with render intent are marked with `contentFormat="object"`
- [ ] Section generation extracts images with Vision AI when needed
- [ ] Section generation renders images with object format directly
- [ ] Text contentParts are processed with AI during section generation
- [ ] Pre-extracted JSON flow still works correctly

View file

@ -15,6 +15,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
from modules.shared.jsonUtils import extractJsonString
logger = logging.getLogger(__name__)
@ -25,6 +26,7 @@ class CodeGenerationPath:
def __init__(self, services):
self.services = services
async def generateCode(
self,
userPrompt: str,
@ -66,27 +68,67 @@ class CodeGenerationPath:
# Phase 2: Code content generation (with dependency handling)
self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
codeFiles = await self._generateCodeContent(
codeStructure,
codeOperationId,
userPrompt=userPrompt,
contentParts=contentParts
)
# Phase 3: Code formatting & validation
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
self.services.chat.progressLogUpdate(codeOperationId, 0.8, "Formatting code files")
formattedFiles = await self._formatAndValidateCode(codeFiles)
# Convert to unified document format
documents = []
# Phase 4: Code Rendering (Renderer-Based)
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Rendering code files")
# Group files by format
filesByFormat = {}
for file in formattedFiles:
mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
content = file.get("content", "")
if isinstance(content, str):
contentBytes = content.encode('utf-8')
else:
contentBytes = content
fileType = file.get("fileType", outputFormat or "txt")
if fileType not in filesByFormat:
filesByFormat[fileType] = []
filesByFormat[fileType].append(file)
# Render each format group using appropriate renderer
allRenderedDocuments = []
for fileType, files in filesByFormat.items():
# Get renderer for this format
renderer = self._getCodeRenderer(fileType)
if renderer:
# Use code renderer
renderedDocs = await renderer.renderCodeFiles(
codeFiles=files,
metadata=codeStructure.get("metadata", {}),
userPrompt=userPrompt
)
allRenderedDocuments.extend(renderedDocs)
else:
# Fallback: output directly (for formats without renderers)
for file in files:
mimeType = self._getMimeType(file.get("fileType", "txt"))
content = file.get("content", "")
contentBytes = content.encode('utf-8') if isinstance(content, str) else content
from modules.datamodels.datamodelDocument import RenderedDocument
allRenderedDocuments.append(
RenderedDocument(
documentData=contentBytes,
mimeType=mimeType,
filename=file.get("filename", "generated.txt"),
metadata=codeStructure.get("metadata", {})
)
)
# Convert RenderedDocument to DocumentData
documents = []
for renderedDoc in allRenderedDocuments:
documents.append(DocumentData(
documentName=file.get("filename", "generated.txt"),
documentData=contentBytes,
mimeType=mimeType,
sourceJson=file
documentName=renderedDoc.filename,
documentData=renderedDoc.documentData,
mimeType=renderedDoc.mimeType,
sourceJson=renderedDoc.metadata if hasattr(renderedDoc, 'metadata') else None
))
metadata = AiResponseMetadata(
@ -94,11 +136,25 @@ class CodeGenerationPath:
operationType=OperationTypeEnum.DATA_GENERATE.value
)
# Create summary JSON for content field
summaryContent = {
"type": "code_generation",
"metadata": codeStructure.get("metadata", {}),
"files": [
{
"filename": doc.documentName,
"mimeType": doc.mimeType
}
for doc in documents
],
"fileCount": len(documents)
}
self.services.chat.progressLogFinish(codeOperationId, True)
return AiResponse(
documents=documents,
content=None,
content=json.dumps(summaryContent, ensure_ascii=False),
metadata=metadata
)
@ -149,47 +205,184 @@ class CodeGenerationPath:
) -> Dict[str, Any]:
"""Generate code structure using looping system."""
# Build structure generation prompt
structurePrompt = f"""Analyze the following code generation request and create a project structure.
Request: {userPrompt}
Language: {language}
Create a JSON structure with:
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
2. files: Array of file structures, each with:
- id: Unique identifier
- filename: File name (e.g., "main.py", "utils.py")
- fileType: File extension (e.g., "py", "js")
- dependencies: List of file IDs this file depends on (for multi-file projects)
- imports: List of import statements (for dependency extraction)
- functions: Array of function signatures {{"name": "...", "signature": "..."}}
- classes: Array of class definitions {{"name": "...", "signature": "..."}}
For single-file projects, return one file. For multi-file projects, break down into logical modules.
Return ONLY valid JSON in this format:
{{
# Build content parts index (similar to document generation)
contentPartsIndex = ""
if contentParts:
validParts = []
for part in contentParts:
contentFormat = part.metadata.get("contentFormat", "unknown")
originalFileName = part.metadata.get('originalFileName', 'N/A')
# Include reference parts and parts with data
if contentFormat == "reference" or (part.data and len(str(part.data).strip()) > 0):
validParts.append(part)
if validParts:
contentPartsIndex = "\n## AVAILABLE CONTENT PARTS\n"
for i, part in enumerate(validParts, 1):
contentFormat = part.metadata.get("contentFormat", "unknown")
originalFileName = part.metadata.get('originalFileName', 'N/A')
contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
contentPartsIndex += f" Format: {contentFormat}\n"
contentPartsIndex += f" Type: {part.typeGroup}\n"
contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n"
contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
contentPartsIndex += f" Original file name: {originalFileName}\n"
contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts available)"
# Create template structure explicitly (not extracted from prompt)
templateStructure = f"""{{
"metadata": {{
"language": "{language}",
"projectType": "single_file",
"projectName": "generated-project"
"projectType": "single_file|multi_file",
"projectName": ""
}},
"files": [
{{
"id": "file_1",
"filename": "main.py",
"fileType": "py",
"id": "",
"filename": "",
"fileType": "",
"dependencies": [],
"imports": [],
"functions": [],
"classes": []
}}
]
}}
}}"""
# Build structure generation prompt
structurePrompt = f"""# TASK: Generate Code Project Structure
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
## USER REQUEST (for context)
```
{userPrompt}
```
{contentPartsIndex}
## LANGUAGE
{language}
## TASK DESCRIPTION
Analyze the USER REQUEST above and create a project structure that fulfills ALL requirements mentioned in the request.
IMPORTANT: If the request mentions multiple files (e.g., "3 files", "config.json and customers.json", etc.), you MUST include ALL requested files in the files array. Set projectType to "multi_file" when multiple files are requested.
## CONTENT PARTS USAGE (if available)
If AVAILABLE CONTENT PARTS are listed above, use them to inform the file structure:
**Analyzing Content Parts:**
- Review each ContentPart's format, type, original file name, and usage hint
- Content parts with "reference" format = documents/images that will be processed/extracted
- Content parts with "extracted" format = pre-processed data ready to use
- Content parts with "object" format = images/documents to be displayed or processed
**Mapping Content Parts to Files:**
- If content parts contain data (e.g., expense receipts, customer lists), create data files (JSON/CSV) that will store/represent that data
- If content parts are documents to be processed (e.g., PDFs), you may need code files that parse/process them
- Use the original file names and usage hints to determine appropriate filenames and file types
**Populating File Structure Fields:**
- **dependencies**: List file IDs that this file depends on (e.g., if a Python script reads a JSON config file, the script depends on the config file)
- **imports**: For code files, list imports needed based on content parts (e.g., if processing PDFs: ["import PyPDF2"], if processing CSV: ["import csv"], if processing JSON: ["import json"])
- **functions**: For CODE files only - list function signatures if the USER REQUEST specifies functionality (e.g., {{"name": "parseReceipt", "signature": "def parseReceipt(pdf_path: str) -> dict"}})
- **classes**: For CODE files only - list class definitions if the USER REQUEST specifies OOP structure
- **functions/classes for DATA files**: Leave as empty arrays [] - data files (JSON/CSV/XML) don't contain executable code
## FILE STRUCTURE REQUIREMENTS
Create a JSON structure with:
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
- projectName: Derive from USER REQUEST or content parts (e.g., "expense-tracker", "customer-manager")
2. files: Array of file structures, each with:
- id: Unique identifier (e.g., "file_1", "file_2")
- filename: File name matching USER REQUEST requirements (e.g., "config.json", "customers.json", "expenses.csv")
- fileType: File extension matching the requested format (e.g., "json", "py", "js", "csv", "xml")
- dependencies: List of file IDs this file depends on (for multi-file projects where files reference each other)
- imports: List of import statements that this file will need (e.g., ["import json", "import csv"] for Python files processing JSON/CSV)
- functions: Array of function signatures {{"name": "...", "signature": "..."}} - ONLY if the file will contain executable code (not for pure data files like JSON/CSV)
- classes: Array of class definitions {{"name": "...", "signature": "..."}} - ONLY if the file will contain executable code (not for pure data files like JSON/CSV)
IMPORTANT FOR DATA FILES (JSON, CSV, XML):
- For pure data files (config.json, customers.json, expenses.csv), leave functions and classes as empty arrays []
- These files contain structured data, not executable code
- Use imports only if the file will be processed by code (e.g., a Python script that reads the CSV)
IMPORTANT FOR CODE FILES (Python, JavaScript, etc.):
- Include functions/classes if the USER REQUEST specifies functionality
- Use dependencies to indicate which data files this code file reads/processes
- Use imports to specify what libraries/modules are needed
For single-file projects, return one file. For multi-file projects, include ALL requested files in the files array.
Return ONLY valid JSON matching the request above.
"""
# Build continuation prompt builder
async def buildCodeStructurePromptWithContinuation(
continuationContext: Any,
templateStructure: str,
basePrompt: str
) -> str:
"""Build code structure prompt with continuation context. Uses unified signature.
Note: All initial context (userPrompt, contentParts, etc.) is already
contained in basePrompt. This function only adds continuation-specific instructions.
"""
# Extract continuation context fields (only what's needed for continuation)
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
# Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
unifiedContext = ""
if lastRawJson:
# Get contexts directly from jsonContinuation
from modules.shared.jsonContinuation import getContexts
contexts = getContexts(lastRawJson)
overlapContext = contexts.overlapContext
unifiedContext = contexts.hierarchyContextForPrompt
elif incompletePart:
unifiedContext = incompletePart
else:
unifiedContext = "Unable to extract context - response was completely broken"
# Build unified continuation prompt format
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Continue from where it stopped.
Context showing structure hierarchy with cut point:
```
{unifiedContext}
```
Overlap Requirement:
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
Overlap context (start your response with this exact text):
```json
{overlapContext if overlapContext else "No overlap context available"}
```
TASK:
1. Start your response EXACTLY with the overlap context shown above (character by character)
2. Continue seamlessly from where the overlap context ends
3. Complete the remaining content following the JSON structure template above
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
CRITICAL:
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
- Continue seamlessly after the overlap context with new content
- Your response must be valid JSON matching the structure template above"""
return continuationPrompt
# Use generic looping system with code_structure use case
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
@ -199,18 +392,29 @@ Return ONLY valid JSON in this format:
structureJson = await self.services.ai.callAiWithLooping(
prompt=structurePrompt,
options=options,
promptBuilder=buildCodeStructurePromptWithContinuation,
promptArgs={
"userPrompt": userPrompt,
"contentParts": contentParts,
"templateStructure": templateStructure,
"basePrompt": structurePrompt
},
useCaseId="code_structure",
debugPrefix="code_structure_generation",
contentParts=contentParts
)
parsed = json.loads(structureJson)
# Extract JSON from markdown fences if present
extractedJson = extractJsonString(structureJson)
parsed = json.loads(extractedJson)
return parsed
async def _generateCodeContent(
self,
codeStructure: Dict[str, Any],
parentOperationId: str
parentOperationId: str,
userPrompt: str = None,
contentParts: Optional[List[ContentPart]] = None
) -> List[Dict[str, Any]]:
"""Generate code content for each file with dependency handling."""
files = codeStructure.get("files", [])
@ -246,7 +450,9 @@ Return ONLY valid JSON in this format:
fileStructure,
fileContext=fileContext,
allFilesStructure=orderedFiles,
metadata=metadata
metadata=metadata,
userPrompt=userPrompt,
contentParts=contentParts
)
codeFiles.append(fileContent)
@ -452,7 +658,9 @@ Return ONLY valid JSON in this format:
fileStructure: Dict[str, Any],
fileContext: Dict[str, Any] = None,
allFilesStructure: List[Dict[str, Any]] = None,
metadata: Dict[str, Any] = None
metadata: Dict[str, Any] = None,
userPrompt: str = None,
contentParts: Optional[List[ContentPart]] = None
) -> Dict[str, Any]:
"""Generate code content for a single file with context about other files."""
@ -479,10 +687,68 @@ Return ONLY valid JSON in this format:
contextInfo += ", ".join(exports)
contextInfo += "\n"
contentPrompt = f"""Generate complete, executable code for the file: {filename}
# Build content parts section if available
contentPartsSection = ""
if contentParts:
relevantParts = []
for part in contentParts:
# Include parts that might be relevant to this file
usageHint = part.metadata.get('usageHint', '').lower()
originalFileName = part.metadata.get('originalFileName', '').lower()
filenameLower = filename.lower()
# Check if this content part is relevant to this file
if (filenameLower in usageHint or
filenameLower in originalFileName or
part.metadata.get('contentFormat') == 'reference' or
(part.data and len(str(part.data).strip()) > 0)):
relevantParts.append(part)
if relevantParts:
contentPartsSection = "\n## AVAILABLE CONTENT PARTS\n"
for i, part in enumerate(relevantParts, 1):
contentFormat = part.metadata.get("contentFormat", "unknown")
originalFileName = part.metadata.get('originalFileName', 'N/A')
contentPartsSection += f"\n{i}. ContentPart ID: {part.id}\n"
contentPartsSection += f" Format: {contentFormat}\n"
contentPartsSection += f" Type: {part.typeGroup}\n"
contentPartsSection += f" Original file name: {originalFileName}\n"
contentPartsSection += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
# Include actual content if it's small enough (for data files like CSV, JSON)
if part.data and isinstance(part.data, str) and len(part.data) < 2000:
contentPartsSection += f" Content preview: {part.data[:500]}...\n"
# Build user request section
userRequestSection = ""
if userPrompt:
userRequestSection = f"""
## ORIGINAL USER REQUEST
```
{userPrompt}
```
"""
# Create template structure explicitly (not extracted from prompt)
templateStructure = f"""{{
"files": [
{{
"filename": "{filename}",
"content": "// Complete code here",
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
}}
]
}}"""
# Build base prompt
contentPrompt = f"""# TASK: Generate Code File Content
Generate complete, executable code for the file: {filename}
{userRequestSection}## FILE SPECIFICATIONS
File Type: {fileType}
Language: {metadata.get('language', 'python') if metadata else 'python'}
{contentPartsSection}
Required functions:
{json.dumps(functions, indent=2) if functions else 'None specified'}
@ -501,18 +767,69 @@ Generate complete, production-ready code with:
5. Type hints where appropriate
Return ONLY valid JSON in this format:
{{
"files": [
{{
"filename": "{filename}",
"content": "// Complete code here",
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
}}
]
}}
{templateStructure}
"""
# Build continuation prompt builder
async def buildCodeContentPromptWithContinuation(
continuationContext: Any,
templateStructure: str,
basePrompt: str
) -> str:
"""Build code content prompt with continuation context. Uses unified signature.
Note: All initial context (filename, fileType, functions, etc.) is already
contained in basePrompt. This function only adds continuation-specific instructions.
"""
# Extract continuation context fields (only what's needed for continuation)
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
# Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
unifiedContext = ""
if lastRawJson:
# Get contexts directly from jsonContinuation
from modules.shared.jsonContinuation import getContexts
contexts = getContexts(lastRawJson)
overlapContext = contexts.overlapContext
unifiedContext = contexts.hierarchyContextForPrompt
elif incompletePart:
unifiedContext = incompletePart
else:
unifiedContext = "Unable to extract context - response was completely broken"
# Build unified continuation prompt format
continuationPrompt = f"""{basePrompt}
--- CONTINUATION REQUEST ---
The previous JSON response was incomplete. Continue from where it stopped.
Context showing structure hierarchy with cut point:
```
{unifiedContext}
```
Overlap Requirement:
To ensure proper merging, your response MUST start EXACTLY with the overlap context shown below, then continue with new content.
Overlap context (start your response with this exact text):
```json
{overlapContext if overlapContext else "No overlap context available"}
```
TASK:
1. Start your response EXACTLY with the overlap context shown above (character by character)
2. Continue seamlessly from where the overlap context ends
3. Complete the remaining content following the JSON structure template above
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
CRITICAL:
- Your response MUST begin with the exact overlap context text (this enables automatic merging)
- Continue seamlessly after the overlap context with new content
- Your response must be valid JSON matching the structure template above"""
return continuationPrompt
# Use generic looping system with code_content use case
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
@ -522,11 +839,27 @@ Return ONLY valid JSON in this format:
contentJson = await self.services.ai.callAiWithLooping(
prompt=contentPrompt,
options=options,
promptBuilder=buildCodeContentPromptWithContinuation,
promptArgs={
"filename": filename,
"fileType": fileType,
"functions": functions,
"classes": classes,
"dependencies": dependencies,
"metadata": metadata,
"userPrompt": userPrompt,
"contentParts": contentParts,
"contextInfo": contextInfo,
"templateStructure": templateStructure,
"basePrompt": contentPrompt
},
useCaseId="code_content",
debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
)
parsed = json.loads(contentJson)
# Extract JSON from markdown fences if present
extractedJson = extractJsonString(contentJson)
parsed = json.loads(extractedJson)
# Extract file content and metadata
files = parsed.get("files", [])
@ -579,6 +912,28 @@ Return ONLY valid JSON in this format:
"md": "text/markdown",
"java": "text/x-java-source",
"cpp": "text/x-c++src",
"c": "text/x-csrc"
"c": "text/x-csrc",
"csv": "text/csv",
"xml": "application/xml"
}
return mimeTypes.get(fileType.lower(), "text/plain")
def _getCodeRenderer(self, fileType: str):
"""Get code renderer for file type."""
from modules.services.serviceGeneration.renderers.registry import getRenderer
# Map file types to renderer formats
formatMap = {
'json': 'json',
'csv': 'csv',
'xml': 'xml'
}
rendererFormat = formatMap.get(fileType.lower())
if rendererFormat:
renderer = getRenderer(rendererFormat, self.services)
# Check if renderer supports code rendering
if renderer and hasattr(renderer, 'renderCodeFiles'):
return renderer
return None

View file

@ -9,6 +9,7 @@ Handles document generation using existing chapter/section model.
import json
import logging
import time
import copy
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
@ -153,6 +154,11 @@ class DocumentGenerationPath:
# Use validated currentUserLanguage as global fallback (always valid infrastructure)
language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en"
# IMPORTANT: Create deep copy BEFORE renderResult to preserve filledStructure with elements
# renderResult might modify the structure, so we need to preserve the original for sourceJson
# This ensures sourceJson contains the complete structure with elements for validation
filledStructureForSourceJson = copy.deepcopy(filledStructure) if filledStructure else None
renderedDocuments = await self.services.ai.renderResult(
filledStructure,
outputFormat,
@ -167,11 +173,12 @@ class DocumentGenerationPath:
for renderedDoc in renderedDocuments:
try:
# Erstelle DocumentData für jedes gerenderte Dokument
# Use the preserved filledStructureForSourceJson (with elements) for sourceJson
docDataObj = DocumentData(
documentName=renderedDoc.filename,
documentData=renderedDoc.documentData,
mimeType=renderedDoc.mimeType,
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
sourceJson=filledStructureForSourceJson if len(documentDataList) == 0 else None # Nur für erstes Dokument
)
documentDataList.append(docDataObj)
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")

View file

@ -0,0 +1,45 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Base renderer class for code format renderers.
"""
from abc import abstractmethod
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger(__name__)
class BaseCodeRenderer(BaseRenderer):
"""Base class for code format renderers."""
@abstractmethod
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render code files to format-specific output.
Args:
codeFiles: List of file dictionaries with:
- filename: str
- fileType: str (json, csv, xml, etc.)
- content: str (generated code)
- id: str (optional)
metadata: Project metadata (language, projectType, etc.)
userPrompt: Original user prompt
Returns:
List of RenderedDocument objects (can be 1..n files)
"""
pass
def _validateCodeFile(self, codeFile: Dict[str, Any]) -> bool:
"""Validate code file structure."""
required = ['filename', 'fileType', 'content']
return all(key in codeFile for key in required)

View file

@ -63,6 +63,27 @@ class BaseRenderer(ABC):
"""
return 'document' # Default to document style
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that this renderer accepts.
This allows renderers to declare which section types they can process.
Default implementation returns all supported section types.
Override this method in subclasses to restrict accepted types.
Args:
formatName: Optional format name (e.g., 'txt', 'js', 'csv') - useful for renderers
that handle multiple formats with different accepted types (e.g., RendererText)
Returns:
List of accepted section content types (e.g., ["table", "paragraph", "heading"])
Valid types: "table", "bullet_list", "heading", "paragraph", "code_block", "image"
"""
# Default: accept all section types
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
@abstractmethod
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
@ -325,9 +346,18 @@ class BaseRenderer(ABC):
response = await aiService.callAi(request)
# Save styling prompt and response to debug
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
# Save styling prompt and response to debug (fire and forget - don't block on slow file I/O)
# The writeDebugFile calls os.listdir() which can be slow with many files
# Run in background thread to avoid blocking rendering
import threading
def _writeDebugFiles():
try:
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
except Exception:
pass # Silently fail - debug writing should never block rendering
threading.Thread(target=_writeDebugFiles, daemon=True).start()
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""

View file

@ -7,7 +7,7 @@ Renderer registry for automatic discovery and registration of renderers.
import logging
import importlib
from typing import Dict, Type, List, Optional
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
logger = logging.getLogger(__name__)
@ -38,7 +38,7 @@ class RendererRegistry:
# Scan all Python files in the renderers directory
for filePath in renderersDir.glob("*.py"):
if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
if filePath.name in ['registry.py', 'documentRendererBaseTemplate.py', '__init__.py']:
continue
# Extract module name from filename
@ -76,9 +76,26 @@ class RendererRegistry:
# Get supported formats from the renderer class
supportedFormats = rendererClass.getSupportedFormats()
# Get priority (default to 0 if not specified)
priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0
for formatName in supportedFormats:
# Register primary format
self._renderers[formatName.lower()] = rendererClass
formatKey = formatName.lower()
# Check if format already registered - use priority to decide
if formatKey in self._renderers:
existingRenderer = self._renderers[formatKey]
existingPriority = existingRenderer.getPriority() if hasattr(existingRenderer, 'getPriority') else 0
# Only replace if new renderer has higher priority
if priority > existingPriority:
logger.debug(f"Replacing {existingRenderer.__name__} with {rendererClass.__name__} for format '{formatName}' (priority {priority} > {existingPriority})")
self._renderers[formatKey] = rendererClass
else:
logger.debug(f"Keeping {existingRenderer.__name__} for format '{formatName}' (priority {existingPriority} >= {priority})")
else:
# Register primary format
self._renderers[formatKey] = rendererClass
# Register aliases if any
if hasattr(rendererClass, 'getFormatAliases'):
@ -86,7 +103,7 @@ class RendererRegistry:
for alias in aliases:
self._format_mappings[alias.lower()] = formatName.lower()
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats} (priority: {priority})")
except Exception as e:
logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")

View file

@ -0,0 +1,159 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
CSV code renderer for code generation.
"""
from .codeRendererBaseTemplate import BaseCodeRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import csv
import io
class RendererCodeCsv(BaseCodeRenderer):
"""Renders CSV code files."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported CSV formats."""
return ['csv']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for CSV code renderer."""
return 75 # Higher than document renderer (70) for code generation
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: CSV requires specific structure."""
return 'code'
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render CSV code files.
For single file: output as-is (validate structure)
For multiple files: output separately (each is independent CSV)
"""
renderedDocs = []
for codeFile in codeFiles:
if not self._validateCodeFile(codeFile):
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
continue
filename = codeFile['filename']
content = codeFile['content']
# Validate CSV structure (header row, consistent columns)
validatedContent = self._validateAndFixCsv(content)
# Extract CSV statistics for validation
csvStats = self._extractCsvStatistics(validatedContent)
# Merge file-specific metadata with project metadata
fileMetadata = dict(metadata) if metadata else {}
fileMetadata.update({
"filename": filename,
"fileType": "csv",
"statistics": csvStats
})
renderedDocs.append(
RenderedDocument(
documentData=validatedContent.encode('utf-8'),
mimeType="text/csv",
filename=filename,
metadata=fileMetadata
)
)
return renderedDocs
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render method for document generation compatibility.
Delegates to document renderer if needed, or handles code files directly.
"""
# Check if this is code generation (has files array) or document generation (has documents array)
if "files" in extractedContent:
# Code generation path - use renderCodeFiles
files = extractedContent.get("files", [])
metadata = extractedContent.get("metadata", {})
return await self.renderCodeFiles(files, metadata, userPrompt)
else:
# Document generation path - delegate to document renderer
from .rendererCsv import RendererCsv
documentRenderer = RendererCsv(self.services)
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
def _validateAndFixCsv(self, content: str) -> str:
"""Validate CSV structure and fix common issues."""
try:
# Parse CSV to validate structure
reader = csv.reader(io.StringIO(content))
rows = list(reader)
if not rows:
return content # Empty CSV
# Check header row exists
headerRow = rows[0]
headerCount = len(headerRow)
# Validate all rows have same column count
fixedRows = [headerRow] # Start with header
for i, row in enumerate(rows[1:], 1):
if len(row) != headerCount:
self.logger.debug(f"Row {i} has {len(row)} columns, expected {headerCount}. Auto-fixing...")
# Pad or truncate to match header
if len(row) < headerCount:
row.extend([''] * (headerCount - len(row)))
else:
row = row[:headerCount]
fixedRows.append(row)
# Convert back to CSV string
output = io.StringIO()
writer = csv.writer(output)
for row in fixedRows:
writer.writerow(row)
return output.getvalue()
except Exception as e:
self.logger.warning(f"CSV validation failed: {e}, returning original content")
return content
def _extractCsvStatistics(self, content: str) -> Dict[str, Any]:
"""Extract CSV statistics for validation (row count, column count, headers)."""
try:
reader = csv.reader(io.StringIO(content))
rows = list(reader)
if not rows:
return {"rowCount": 0, "columnCount": 0, "headerRow": []}
headerRow = rows[0]
columnCount = len(headerRow)
rowCount = len(rows) - 1 # Exclude header
return {
"rowCount": rowCount,
"columnCount": columnCount,
"headerRow": headerRow,
"dataRowCount": rowCount
}
except Exception as e:
self.logger.warning(f"CSV statistics extraction failed: {e}")
return {}

View file

@ -0,0 +1,141 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
JSON code renderer for code generation.
"""
from .codeRendererBaseTemplate import BaseCodeRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import json
class RendererCodeJson(BaseCodeRenderer):
"""Renders JSON code files."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for JSON code renderer."""
return 85 # Higher than document renderer (80) for code generation
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: JSON is structured data format."""
return 'code'
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render JSON code files.
For single file: output as-is
For multiple files: output separately (each file is independent JSON)
"""
renderedDocs = []
for codeFile in codeFiles:
if not self._validateCodeFile(codeFile):
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
continue
filename = codeFile['filename']
content = codeFile['content']
# Validate JSON syntax and extract statistics
parsed = None
try:
parsed = json.loads(content) # Validate JSON
except json.JSONDecodeError as e:
self.logger.warning(f"Invalid JSON in {filename}: {e}")
# Could fix/format JSON here if needed
# Format JSON (pretty print)
try:
if parsed is None:
parsed = json.loads(content)
formattedContent = json.dumps(parsed, indent=2, ensure_ascii=False)
except Exception:
formattedContent = content # Use original if formatting fails
# Extract JSON statistics for validation
jsonStats = self._extractJsonStatistics(parsed) if parsed else {}
# Merge file-specific metadata with project metadata
fileMetadata = dict(metadata) if metadata else {}
fileMetadata.update({
"filename": filename,
"fileType": "json",
"statistics": jsonStats
})
renderedDocs.append(
RenderedDocument(
documentData=formattedContent.encode('utf-8'),
mimeType="application/json",
filename=filename,
metadata=fileMetadata
)
)
return renderedDocs
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render method for document generation compatibility.
Delegates to document renderer if needed, or handles code files directly.
"""
# Check if this is code generation (has files array) or document generation (has documents array)
if "files" in extractedContent:
# Code generation path - use renderCodeFiles
files = extractedContent.get("files", [])
metadata = extractedContent.get("metadata", {})
return await self.renderCodeFiles(files, metadata, userPrompt)
else:
# Document generation path - delegate to document renderer
# Import here to avoid circular dependency
from .rendererJson import RendererJson
documentRenderer = RendererJson(self.services)
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
def _extractJsonStatistics(self, parsed: Any) -> Dict[str, Any]:
"""Extract JSON statistics for validation (object count, array count, key count)."""
try:
stats = {
"isArray": isinstance(parsed, list),
"isObject": isinstance(parsed, dict),
"itemCount": 0,
"keyCount": 0
}
if isinstance(parsed, list):
stats["itemCount"] = len(parsed)
# Count nested objects/arrays
objectCount = sum(1 for item in parsed if isinstance(item, dict))
arrayCount = sum(1 for item in parsed if isinstance(item, list))
stats["objectCount"] = objectCount
stats["arrayCount"] = arrayCount
elif isinstance(parsed, dict):
stats["keyCount"] = len(parsed)
stats["keys"] = list(parsed.keys())
# Count nested objects/arrays
objectCount = sum(1 for v in parsed.values() if isinstance(v, dict))
arrayCount = sum(1 for v in parsed.values() if isinstance(v, list))
stats["objectCount"] = objectCount
stats["arrayCount"] = arrayCount
return stats
except Exception as e:
self.logger.warning(f"JSON statistics extraction failed: {e}")
return {}

View file

@ -0,0 +1,148 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
XML code renderer for code generation.
"""
from .codeRendererBaseTemplate import BaseCodeRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import xml.etree.ElementTree as ET
from xml.dom import minidom
class RendererCodeXml(BaseCodeRenderer):
"""Renders XML code files."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported XML formats."""
return ['xml']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for XML code renderer."""
return 80
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: XML is structured data format."""
return 'code'
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render XML code files.
Validates XML syntax and formats (pretty print).
"""
renderedDocs = []
for codeFile in codeFiles:
if not self._validateCodeFile(codeFile):
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
continue
filename = codeFile['filename']
content = codeFile['content']
# Validate and format XML
formattedContent = self._validateAndFormatXml(content)
# Extract XML statistics for validation
xmlStats = self._extractXmlStatistics(formattedContent)
# Merge file-specific metadata with project metadata
fileMetadata = dict(metadata) if metadata else {}
fileMetadata.update({
"filename": filename,
"fileType": "xml",
"statistics": xmlStats
})
renderedDocs.append(
RenderedDocument(
documentData=formattedContent.encode('utf-8'),
mimeType="application/xml",
filename=filename,
metadata=fileMetadata
)
)
return renderedDocs
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render method for document generation compatibility.
For XML, we only support code generation (no document renderer exists yet).
"""
# Check if this is code generation (has files array)
if "files" in extractedContent:
# Code generation path - use renderCodeFiles
files = extractedContent.get("files", [])
metadata = extractedContent.get("metadata", {})
return await self.renderCodeFiles(files, metadata, userPrompt)
else:
# Document generation path - not supported yet, return error
self.logger.warning("XML document generation not supported, only code generation")
return [
RenderedDocument(
documentData=f"XML document generation not yet supported".encode('utf-8'),
mimeType="text/plain",
filename="error.txt",
metadata={}
)
]
def _validateAndFormatXml(self, content: str) -> str:
"""Validate XML syntax and format (pretty print)."""
try:
# Parse XML to validate
root = ET.fromstring(content)
# Format XML (pretty print)
rough_string = ET.tostring(root, encoding='unicode')
reparsed = minidom.parseString(rough_string)
formatted = reparsed.toprettyxml(indent=" ")
# Remove extra blank lines
lines = [line for line in formatted.split('\n') if line.strip()]
return '\n'.join(lines)
except ET.ParseError as e:
self.logger.warning(f"Invalid XML: {e}, returning original content")
return content
except Exception as e:
self.logger.warning(f"XML formatting failed: {e}, returning original content")
return content
def _extractXmlStatistics(self, content: str) -> Dict[str, Any]:
"""Extract XML statistics for validation (element count, attribute count, root element)."""
try:
root = ET.fromstring(content)
# Count all elements recursively
elementCount = len(list(root.iter()))
# Count attributes
attributeCount = sum(len(elem.attrib) for elem in root.iter())
# Get root element name
rootElement = root.tag
return {
"elementCount": elementCount,
"attributeCount": attributeCount,
"rootElement": rootElement,
"hasRoot": True
}
except Exception as e:
self.logger.warning(f"XML statistics extraction failed: {e}")
return {}

View file

@ -4,7 +4,7 @@
CSV renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
@ -28,45 +28,131 @@ class RendererCsv(BaseRenderer):
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: CSV requires specific structure (header, then data rows)."""
return 'code'
"""Return output style classification: CSV document renderer converts structured document content to CSV."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that CSV renderer accepts.
CSV renderer only accepts table sections.
"""
return ["table"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to CSV format."""
"""Render extracted JSON content to CSV format. Produces one CSV file per table section."""
try:
# Generate CSV directly from JSON (no styling needed for CSV)
csvContent = await self._generateCsvFromJson(extractedContent, title)
# Validate JSON structure
if not self._validateJsonStructure(extractedContent):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
# Determine filename from document or title
# Extract sections and metadata
sections = self._extractSections(extractedContent)
metadata = self._extractMetadata(extractedContent)
# Determine base filename from document or title
documents = extractedContent.get("documents", [])
baseFilename = None
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "text/csv")
else:
filename = self._determineFilename(title, "text/csv")
baseFilename = documents[0].get("filename")
if not baseFilename:
baseFilename = self._determineFilename(title, "text/csv")
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
# Remove extension from base filename if present
if baseFilename.endswith('.csv'):
baseFilename = baseFilename[:-4]
return [
RenderedDocument(
documentData=csvContent.encode('utf-8'),
mimeType="text/csv",
filename=filename,
documentType=documentType,
metadata=metadata if isinstance(metadata, dict) else None
# Find all table sections
tableSections = []
for section in sections:
sectionType = section.get("content_type", "paragraph")
if sectionType == "table":
tableSections.append(section)
# If no table sections found, return empty CSV
if not tableSections:
self.logger.warning("No table sections found in CSV document - returning empty CSV")
emptyCsv = self._convertRowsToCsv([["No table data available"]])
return [
RenderedDocument(
documentData=emptyCsv.encode('utf-8'),
mimeType="text/csv",
filename=self._determineFilename(title, "text/csv"),
documentType=metadata.get("documentType") if isinstance(metadata, dict) else None,
metadata=metadata if isinstance(metadata, dict) else None
)
]
# Generate one CSV file per table section
renderedDocuments = []
for i, tableSection in enumerate(tableSections):
# Generate CSV content for this table section
csvRows = []
# Add section title if available
sectionTitle = tableSection.get("title")
if sectionTitle:
csvRows.append([sectionTitle])
csvRows.append([]) # Empty row after title
# Render table from section elements
elements = tableSection.get("elements", [])
for element in elements:
tableRows = self._renderJsonTableToCsv(element)
if tableRows:
csvRows.extend(tableRows)
# Convert to CSV string
csvContent = self._convertRowsToCsv(csvRows)
# Determine filename for this table
if len(tableSections) == 1:
# Single table - use base filename
filename = f"{baseFilename}.csv"
else:
# Multiple tables - add index or section title to filename
sectionId = tableSection.get("id", f"table_{i+1}")
# Use section title if available, otherwise use section ID
if sectionTitle:
# Sanitize section title for filename
safeTitle = "".join(c for c in sectionTitle if c.isalnum() or c in (' ', '-', '_')).strip()
safeTitle = safeTitle.replace(' ', '_')[:30] # Limit length
filename = f"{baseFilename}_{safeTitle}.csv"
else:
filename = f"{baseFilename}_{sectionId}.csv"
# Extract document type from metadata
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
renderedDocuments.append(
RenderedDocument(
documentData=csvContent.encode('utf-8'),
mimeType="text/csv",
filename=filename,
documentType=documentType,
metadata=metadata if isinstance(metadata, dict) else None
)
)
]
return renderedDocuments
except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}")
# Return minimal CSV fallback
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
fallbackCsv = self._convertRowsToCsv([["Title", "Content"], [title, f"Error rendering report: {str(e)}"]])
return [
RenderedDocument(
documentData=fallbackCsv.encode('utf-8'),
mimeType="text/csv",
filename=self._determineFilename(title, "text/csv"),
metadata=extractedContent.get("metadata", {}) if extractedContent else None
)
]
async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate CSV content from structured JSON document."""
"""Generate CSV content from structured JSON document. DEPRECATED: Use render() method instead."""
# This method is kept for backward compatibility but is no longer used
# The render() method now handles CSV generation directly
try:
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(jsonContent):
@ -88,12 +174,14 @@ class RendererCsv(BaseRenderer):
csvRows.append([documentTitle])
csvRows.append([]) # Empty row
# Process each section in order
# Process each section in order - only table sections
for section in sections:
sectionCsv = self._renderJsonSectionToCsv(section)
if sectionCsv:
csvRows.extend(sectionCsv)
csvRows.append([]) # Empty row between sections
sectionType = section.get("content_type", "paragraph")
if sectionType == "table":
sectionCsv = self._renderJsonSectionToCsv(section)
if sectionCsv:
csvRows.extend(sectionCsv)
csvRows.append([]) # Empty row between sections
# Convert to CSV string
csvContent = self._convertRowsToCsv(csvRows)
@ -309,3 +397,4 @@ class RendererCsv(BaseRenderer):
content = '\n'.join(lines[1:-1]).strip()
return content

View file

@ -4,7 +4,7 @@
DOCX renderer for report generation using python-docx.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
@ -44,6 +44,15 @@ class RendererDocx(BaseRenderer):
"""Return output style classification: Word documents are formatted documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that DOCX renderer accepts.
DOCX renderer accepts all section types (Word documents can contain all content types).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
@ -107,24 +116,37 @@ class RendererDocx(BaseRenderer):
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate DOCX content from structured JSON document."""
import time
start_time = time.time()
try:
self.logger.debug("_generateDocxFromJson: Starting document generation")
# Create new document
doc = Document()
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
# Get style set: use styles from metadata if available, otherwise enhance with AI
style_start = time.time()
self.logger.debug("_generateDocxFromJson: About to get style set")
styleSet = await self._getStyleSet(json_content, userPrompt, aiService)
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
# Setup basic document styles and create all styles from style set
setup_start = time.time()
self.logger.debug("_generateDocxFromJson: Setting up document styles")
self._setupBasicDocumentStyles(doc)
self._setupDocumentStyles(doc, styleSet)
self.logger.debug(f"_generateDocxFromJson: Document styles setup in {time.time() - setup_start:.2f}s")
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(json_content):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
# Extract sections and metadata from standardized schema
extract_start = time.time()
self.logger.debug("_generateDocxFromJson: Extracting sections and metadata")
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
self.logger.debug(f"_generateDocxFromJson: Extracted {len(sections)} sections in {time.time() - extract_start:.2f}s")
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
@ -135,18 +157,32 @@ class RendererDocx(BaseRenderer):
doc.add_paragraph(document_title, style='Title')
# Process each section in order
for section in sections:
render_start = time.time()
self.logger.debug(f"_generateDocxFromJson: Starting to render {len(sections)} sections")
for idx, section in enumerate(sections):
section_start = time.time()
self.logger.debug(f"_generateDocxFromJson: Rendering section {idx + 1}/{len(sections)}")
self._renderJsonSection(doc, section, styleSet)
self.logger.debug(f"_generateDocxFromJson: Section {idx + 1} rendered in {time.time() - section_start:.2f}s")
self.logger.debug(f"_generateDocxFromJson: All sections rendered in {time.time() - render_start:.2f}s")
# Save to buffer
save_start = time.time()
self.logger.debug("_generateDocxFromJson: Starting to save document to buffer")
buffer = io.BytesIO()
doc.save(buffer)
buffer.seek(0)
self.logger.debug(f"_generateDocxFromJson: Document saved to buffer in {time.time() - save_start:.2f}s")
# Convert to base64
encode_start = time.time()
self.logger.debug("_generateDocxFromJson: Converting to base64")
docx_bytes = buffer.getvalue()
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
self.logger.debug(f"_generateDocxFromJson: Converted to base64 in {time.time() - encode_start:.2f}s (document size: {len(docx_bytes)} bytes)")
total_time = time.time() - start_time
self.logger.info(f"_generateDocxFromJson: Document generation completed in {total_time:.2f}s")
return docx_base64
except Exception as e:
@ -299,6 +335,9 @@ class RendererDocx(BaseRenderer):
# Process each element in the section
for element in elements:
# Skip non-dict elements (e.g., int, str, etc.)
if not isinstance(element, dict):
continue
element_type = element.get("type", "")
# Support three content formats from Phase 5D
@ -368,7 +407,23 @@ class RendererDocx(BaseRenderer):
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON table to DOCX using AI-generated styles."""
"""
Render a JSON table to DOCX using AI-generated styles.
PERFORMANCE OPTIMIZATION: Uses direct XML manipulation via lxml instead of
python-docx high-level API. This bypasses the slow cell.text assignment
which creates multiple XML operations per cell.
The key insight: python-docx's cell.text setter is slow because it:
1. Clears existing content (XML manipulation)
2. Creates a new paragraph element
3. Creates a new run element
4. Sets text value
By building the XML directly, we achieve 100-1000x faster performance.
"""
import time
table_start = time.time()
try:
# Extract from nested content structure
content = table_data.get("content", {})
@ -380,59 +435,244 @@ class RendererDocx(BaseRenderer):
if not headers or not rows:
return
# Create table
table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
table.alignment = WD_TABLE_ALIGNMENT.CENTER
totalRows = len(rows)
totalCols = len(headers)
totalCells = totalRows * totalCols
# Apply table borders based on AI style
border_style = styles["table_border"]["style"]
if border_style == "horizontal_only":
self._applyHorizontalBordersOnly(table)
elif border_style == "grid":
table.style = 'Table Grid'
# else: no borders
self.logger.debug(f"_renderJsonTable: Starting FAST table render - {totalRows} rows x {totalCols} columns = {totalCells} cells")
# Add headers with AI-generated styling
header_row = table.rows[0]
header_style = styles["table_header"]
for i, header in enumerate(headers):
if i < len(header_row.cells):
cell = header_row.cells[i]
cell.text = str(header)
# Apply background color
bg_color = header_style["background"].lstrip('#')
self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
# Apply text styling
for paragraph in cell.paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
for run in paragraph.runs:
run.bold = header_style["bold"]
run.font.size = Pt(11)
text_color = header_style["text_color"].lstrip('#')
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
# Use fast XML-based table rendering
self._renderTableFastXml(doc, headers, rows, styles)
# Add data rows with AI-generated styling
cell_style = styles["table_cell"]
for row_idx, row_data in enumerate(rows):
if row_idx + 1 < len(table.rows):
table_row = table.rows[row_idx + 1]
for col_idx, cell_data in enumerate(row_data):
if col_idx < len(table_row.cells):
cell = table_row.cells[col_idx]
cell.text = str(cell_data)
# Apply text styling
for paragraph in cell.paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
for run in paragraph.runs:
run.font.size = Pt(10)
text_color = cell_style["text_color"].lstrip('#')
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
total_time = time.time() - table_start
rate = totalCells / total_time if total_time > 0 else 0
self.logger.info(f"_renderJsonTable: Table completed in {total_time:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s")
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None:
"""
High-performance table rendering using direct XML manipulation.
This bypasses python-docx's slow high-level API and builds the table
XML structure directly using lxml, which is 100-1000x faster.
"""
import time
from docx.oxml.shared import OxmlElement, qn
from docx.oxml.ns import nsmap
from lxml import etree
create_start = time.time()
# Get the document body element
body = doc._body._body
# Create table element
tbl = OxmlElement('w:tbl')
# Add table properties
tblPr = OxmlElement('w:tblPr')
# Table width - auto
tblW = OxmlElement('w:tblW')
tblW.set(qn('w:type'), 'auto')
tblW.set(qn('w:w'), '0')
tblPr.append(tblW)
# Center alignment
jc = OxmlElement('w:jc')
jc.set(qn('w:val'), 'center')
tblPr.append(jc)
# Apply table borders directly (works without template styles)
borderStyle = styles.get("table_border", {}).get("style", "grid")
tblBorders = self._createTableBordersXml(borderStyle)
tblPr.append(tblBorders)
# Table cell margins for better readability
tblCellMar = OxmlElement('w:tblCellMar')
for side in ['top', 'left', 'bottom', 'right']:
margin = OxmlElement(f'w:{side}')
margin.set(qn('w:w'), '80') # 80 twips = ~4pt padding
margin.set(qn('w:type'), 'dxa')
tblCellMar.append(margin)
tblPr.append(tblCellMar)
tbl.append(tblPr)
# Create table grid (column definitions)
tblGrid = OxmlElement('w:tblGrid')
for _ in range(len(headers)):
gridCol = OxmlElement('w:gridCol')
tblGrid.append(gridCol)
tbl.append(tblGrid)
self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - create_start:.3f}s")
# Build all rows using fast XML
rows_start = time.time()
# Header row
headerRow = self._createTableRowXml(headers, isHeader=True)
tbl.append(headerRow)
header_time = time.time() - rows_start
self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
# Data rows - batch process for performance
data_start = time.time()
rowCount = len(rows)
for idx, rowData in enumerate(rows):
# Convert all cells to strings
cellTexts = [str(cell) if cell is not None else '' for cell in rowData]
# Pad if needed
while len(cellTexts) < len(headers):
cellTexts.append('')
row = self._createTableRowXml(cellTexts, isHeader=False)
tbl.append(row)
# Log progress every 10%
if rowCount > 100 and (idx + 1) % (rowCount // 10) == 0:
elapsed = time.time() - data_start
rate = (idx + 1) * len(headers) / elapsed if elapsed > 0 else 0
self.logger.debug(f"_renderTableFastXml: Progress {((idx + 1) / rowCount * 100):.0f}% ({idx + 1}/{rowCount} rows) - Rate: {rate:.0f} cells/s")
data_time = time.time() - data_start
# Append table to document body
body.append(tbl)
total_time = time.time() - create_start
totalCells = (rowCount + 1) * len(headers)
rate = totalCells / total_time if total_time > 0 else 0
self.logger.debug(f"_renderTableFastXml: All rows created in {data_time:.2f}s, total: {total_time:.2f}s, rate: {rate:.0f} cells/s")
def _createTableBordersXml(self, borderStyle: str) -> Any:
"""
Create table borders XML element based on style.
Supports:
- 'grid': Full grid with all borders (default)
- 'horizontal_only': Only horizontal lines between rows
- 'none' or other: Minimal/no borders
"""
from docx.oxml.shared import OxmlElement, qn
tblBorders = OxmlElement('w:tblBorders')
# Border color - dark gray for professional look
borderColor = '404040'
borderSize = '4' # 0.5pt (in eighths of a point)
if borderStyle == "grid":
# Full grid - all borders
for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'single')
border.set(qn('w:sz'), borderSize)
border.set(qn('w:space'), '0')
border.set(qn('w:color'), borderColor)
tblBorders.append(border)
elif borderStyle == "horizontal_only":
# Only horizontal lines
for borderName in ['top', 'bottom', 'insideH']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'single')
border.set(qn('w:sz'), borderSize)
border.set(qn('w:space'), '0')
border.set(qn('w:color'), borderColor)
tblBorders.append(border)
# No vertical borders
for borderName in ['left', 'right', 'insideV']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'nil')
tblBorders.append(border)
else:
# Minimal - just outer border
for borderName in ['top', 'left', 'bottom', 'right']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'single')
border.set(qn('w:sz'), borderSize)
border.set(qn('w:space'), '0')
border.set(qn('w:color'), borderColor)
tblBorders.append(border)
return tblBorders
def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any:
"""
Create a table row XML element with cells.
This is the core fast-path: builds the row XML directly without
going through python-docx's slow cell.text assignment.
"""
from docx.oxml.shared import OxmlElement, qn
tr = OxmlElement('w:tr')
# Row properties for header
if isHeader:
trPr = OxmlElement('w:trPr')
tblHeader = OxmlElement('w:tblHeader')
trPr.append(tblHeader)
tr.append(trPr)
for cellText in cells:
# Create cell
tc = OxmlElement('w:tc')
# Cell properties
tcPr = OxmlElement('w:tcPr')
tcW = OxmlElement('w:tcW')
tcW.set(qn('w:type'), 'auto')
tcW.set(qn('w:w'), '0')
tcPr.append(tcW)
# Header cell styling - light blue background
if isHeader:
shd = OxmlElement('w:shd')
shd.set(qn('w:val'), 'clear')
shd.set(qn('w:color'), 'auto')
shd.set(qn('w:fill'), '4472C4') # Professional blue
tcPr.append(shd)
tc.append(tcPr)
# Paragraph with text
p = OxmlElement('w:p')
# Add run with text
r = OxmlElement('w:r')
# Header text styling - bold and white
if isHeader:
rPr = OxmlElement('w:rPr')
b = OxmlElement('w:b')
rPr.append(b)
# White text color
color = OxmlElement('w:color')
color.set(qn('w:val'), 'FFFFFF')
rPr.append(color)
r.append(rPr)
# Text element
t = OxmlElement('w:t')
# Preserve spaces if text starts/ends with whitespace
if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
t.text = cellText
r.append(t)
p.append(r)
tc.append(p)
tr.append(tc)
return tr
def _applyHorizontalBordersOnly(self, table) -> None:
"""Apply only horizontal borders to the table (no vertical borders)."""
@ -526,9 +766,38 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not set cell background: {str(e)}")
def _setCellBackgroundFast(self, cell, hex_color: str) -> None:
"""
Set the background color of a table cell using pre-calculated hex string.
PERFORMANCE OPTIMIZED: Avoids RGBColor unpacking and string formatting in hot loop.
"""
try:
from docx.oxml.shared import OxmlElement, qn
# Get cell properties
tc_pr = cell._element.find(qn('w:tcPr'))
if tc_pr is None:
tc_pr = OxmlElement('w:tcPr')
cell._element.insert(0, tc_pr)
# Remove existing shading
existing_shading = tc_pr.find(qn('w:shd'))
if existing_shading is not None:
tc_pr.remove(existing_shading)
# Create new shading element with pre-calculated hex color
shading = OxmlElement('w:shd')
shading.set(qn('w:val'), 'clear')
shading.set(qn('w:color'), 'auto')
shading.set(qn('w:fill'), hex_color)
tc_pr.append(shading)
except Exception as e:
self.logger.warning(f"Could not set cell background: {str(e)}")
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles."""
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
try:
# Extract from nested content structure
content = list_data.get("content", {})
@ -537,20 +806,38 @@ class RendererDocx(BaseRenderer):
items = content.get("items", [])
bullet_style = styles.get("bullet_list", {})
# Pre-calculate and cache style objects to avoid repeated parsing
font_size_pt = None
text_color_rgb = None
if bullet_style:
if "font_size" in bullet_style:
font_size_pt = Pt(bullet_style["font_size"])
if "color" in bullet_style:
color_hex = bullet_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
for item in items:
if isinstance(item, str):
para = doc.add_paragraph(item, style='List Bullet')
elif isinstance(item, dict) and "text" in item:
para = doc.add_paragraph(item["text"], style='List Bullet')
# Apply bullet list styling from style set
# Apply bullet list styling from style set - use cached objects
if bullet_style and para.runs:
for run in para.runs:
if "font_size" in bullet_style:
run.font.size = Pt(bullet_style["font_size"])
if "color" in bullet_style:
color_hex = bullet_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
# Use direct access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
else:
# Create run if none exists
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
@ -603,17 +890,36 @@ class RendererDocx(BaseRenderer):
if text:
para = doc.add_paragraph(text)
# Apply paragraph styling from style set
# Apply paragraph styling from style set - OPTIMIZED: pre-calculate style objects
paragraph_style = styles.get("paragraph", {})
if paragraph_style:
for run in para.runs:
if "font_size" in paragraph_style:
run.font.size = Pt(paragraph_style["font_size"])
if "bold" in paragraph_style:
run.font.bold = paragraph_style["bold"]
if "color" in paragraph_style:
color_hex = paragraph_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
# Pre-calculate and cache style objects
font_size_pt = None
text_color_rgb = None
if "font_size" in paragraph_style:
font_size_pt = Pt(paragraph_style["font_size"])
if "color" in paragraph_style:
color_hex = paragraph_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
bold = paragraph_style.get("bold", False)
# Use direct access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
if font_size_pt:
run.font.size = font_size_pt
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
else:
# Create run if none exists
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
if "align" in paragraph_style:
align = paragraph_style["align"]
if align == "center":
@ -640,16 +946,32 @@ class RendererDocx(BaseRenderer):
if code:
if language:
lang_para = doc.add_paragraph(f"Code ({language}):")
if lang_para.runs:
if len(lang_para.runs) > 0:
lang_para.runs[0].bold = True
# Pre-calculate and cache style objects
code_font_name = code_style.get("font", "Courier New")
code_font_size_pt = Pt(code_style.get("font_size", 9))
code_text_color_rgb = None
if "color" in code_style:
color_hex = code_style["color"].lstrip('#')
code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
code_para = doc.add_paragraph(code)
for run in code_para.runs:
run.font.name = code_style.get("font", "Courier New")
run.font.size = Pt(code_style.get("font_size", 9))
if "color" in code_style:
color_hex = code_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
# Use direct access instead of iterating
if len(code_para.runs) > 0:
run = code_para.runs[0]
run.font.name = code_font_name
run.font.size = code_font_size_pt
if code_text_color_rgb:
run.font.color.rgb = code_text_color_rgb
else:
# Create run if none exists
run = code_para.add_run()
run.font.name = code_font_name
run.font.size = code_font_size_pt
if code_text_color_rgb:
run.font.color.rgb = code_text_color_rgb
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")

View file

@ -4,7 +4,7 @@
HTML renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
@ -31,6 +31,15 @@ class RendererHtml(BaseRenderer):
"""Return output style classification: HTML web pages are rendered documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that HTML renderer accepts.
HTML renderer accepts all section types (HTML pages can contain all content types including images).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render HTML document with images as separate files.

View file

@ -4,7 +4,7 @@
Image renderer for report generation using AI image generation.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import logging
@ -35,6 +35,14 @@ class RendererImage(BaseRenderer):
"""Return output style classification: Images are visual media."""
return 'image'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that Image renderer accepts.
Image renderer only accepts image sections (images are generated from image sections).
"""
return ["image"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to image format using AI image generation."""
try:

View file

@ -4,7 +4,7 @@
JSON renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import json
@ -29,8 +29,18 @@ class RendererJson(BaseRenderer):
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: JSON is structured data format."""
return 'code'
"""Return output style classification: JSON document renderer converts structured document content to JSON."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that JSON renderer accepts.
JSON renderer accepts all section types except images (images cannot be serialized to JSON).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
# Return all types except image
return [st for st in supportedSectionTypes if st != "image"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to JSON format."""

View file

@ -4,7 +4,7 @@
Markdown renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
@ -31,6 +31,15 @@ class RendererMarkdown(BaseRenderer):
"""Return output style classification: Markdown documents are formatted documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that Markdown renderer accepts.
Markdown renderer accepts all section types except images.
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return [st for st in supportedSectionTypes if st != "image"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Markdown format."""
try:

View file

@ -4,7 +4,7 @@
PDF renderer for report generation using reportlab.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
@ -44,6 +44,15 @@ class RendererPdf(BaseRenderer):
"""Return output style classification: PDF documents are formatted documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that PDF renderer accepts.
PDF renderer accepts all section types (PDF documents can contain all content types).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:

View file

@ -7,7 +7,7 @@ import json
import re
from datetime import datetime, UTC
from typing import Dict, Any, Optional, List
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
logger = logging.getLogger(__name__)
@ -41,6 +41,15 @@ class RendererPptx(BaseRenderer):
"""Return output style classification: PowerPoint presentations are formatted documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that PowerPoint renderer accepts.
PowerPoint renderer accepts all section types (presentations can contain all content types including images).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render content as PowerPoint presentation from JSON data.
@ -1257,78 +1266,96 @@ JSON ONLY. NO OTHER TEXT."""
for col_idx in range(num_cols):
table.columns[col_idx].width = col_width_emu
# Add headers with styling
# Add headers with styling - OPTIMIZED: pre-calculate color/style objects
header_style = styles.get("table_header", {})
header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
header_font_size = header_style.get("font_size", 18)
# Pre-calculate and cache RGB color objects
header_bg_rgb = RGBColor(*header_bg_color)
header_text_rgb = RGBColor(*header_text_color)
header_font_size_pt = Pt(header_font_size)
header_bold = header_style.get("bold", True)
# Determine alignment once
align = header_style.get("align", "center")
if align == "left":
header_alignment = PP_ALIGN.LEFT
elif align == "right":
header_alignment = PP_ALIGN.RIGHT
else:
header_alignment = PP_ALIGN.CENTER
for col_idx, header in enumerate(headers):
cell = table.cell(0, col_idx)
# Clear existing text and set new text
cell.text_frame.clear()
cell.text = str(header) if header else ""
header_text = str(header) if header else ""
cell.text = header_text
# Ensure paragraph exists
if len(cell.text_frame.paragraphs) == 0:
cell.text_frame.add_paragraph()
# Apply styling
# Apply styling - use cached objects
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(*header_bg_color)
cell.fill.fore_color.rgb = header_bg_rgb
para = cell.text_frame.paragraphs[0]
para.font.bold = header_style.get("bold", True)
para.font.size = Pt(header_font_size)
para.font.color.rgb = RGBColor(*header_text_color)
align = header_style.get("align", "center")
if align == "left":
para.alignment = PP_ALIGN.LEFT
elif align == "right":
para.alignment = PP_ALIGN.RIGHT
else:
para.alignment = PP_ALIGN.CENTER
para.font.bold = header_bold
para.font.size = header_font_size_pt
para.font.color.rgb = header_text_rgb
para.alignment = header_alignment
# Ensure text is set on paragraph
if not para.text:
para.text = str(header) if header else ""
para.text = header_text
# Add data rows with styling
# Add data rows with styling - OPTIMIZED: pre-calculate color/style objects
cell_style = styles.get("table_cell", {})
cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
cell_font_size = cell_style.get("font_size", 16)
# Pre-calculate and cache RGB color objects
cell_bg_rgb = RGBColor(*cell_bg_color)
cell_text_rgb = RGBColor(*cell_text_color)
cell_font_size_pt = Pt(cell_font_size)
cell_bold = cell_style.get("bold", False)
# Determine alignment once
align = cell_style.get("align", "left")
if align == "center":
cell_alignment = PP_ALIGN.CENTER
elif align == "right":
cell_alignment = PP_ALIGN.RIGHT
else:
cell_alignment = PP_ALIGN.LEFT
for row_idx, row_data in enumerate(rows, 1):
for col_idx, cell_data in enumerate(row_data[:num_cols]):
cell = table.cell(row_idx, col_idx)
# Clear existing text and set new text
cell.text_frame.clear()
cell.text = str(cell_data) if cell_data is not None else ""
cell_text = str(cell_data) if cell_data is not None else ""
cell.text = cell_text
# Ensure paragraph exists
if len(cell.text_frame.paragraphs) == 0:
cell.text_frame.add_paragraph()
# Apply styling
# Apply styling - use cached objects
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(*cell_bg_color)
cell.fill.fore_color.rgb = cell_bg_rgb
para = cell.text_frame.paragraphs[0]
para.font.size = Pt(cell_font_size)
para.font.bold = cell_style.get("bold", False)
para.font.color.rgb = RGBColor(*cell_text_color)
align = cell_style.get("align", "left")
if align == "center":
para.alignment = PP_ALIGN.CENTER
elif align == "right":
para.alignment = PP_ALIGN.RIGHT
else:
para.alignment = PP_ALIGN.LEFT
para.font.size = cell_font_size_pt
para.font.bold = cell_bold
para.font.color.rgb = cell_text_rgb
para.alignment = cell_alignment
# Ensure text is set on paragraph
if not para.text:
para.text = str(cell_data) if cell_data is not None else ""
para.text = cell_text
except Exception as e:
logger.warning(f"Error adding table to slide: {str(e)}")
@ -1353,6 +1380,13 @@ JSON ONLY. NO OTHER TEXT."""
base_font_size = list_style.get("font_size", 14)
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
# Pre-calculate and cache style objects to avoid repeated parsing
font_size_pt = Pt(calculated_size)
text_color = self._getSafeColor(list_style.get("color", (47, 47, 47)))
text_color_rgb = RGBColor(*text_color)
space_before_pt = Pt(2)
space_after_pt = Pt(2)
logger.debug(f"Rendering bullet list with {len(items)} items")
for idx, item in enumerate(items):
@ -1378,12 +1412,12 @@ JSON ONLY. NO OTHER TEXT."""
# Set text content
p.text = item_text
# Apply formatting first
p.font.size = Pt(calculated_size)
p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
# Apply formatting - use cached objects
p.font.size = font_size_pt
p.font.color.rgb = text_color_rgb
p.alignment = PP_ALIGN.LEFT # Left align bullet lists
p.space_before = Pt(2) # Small spacing before
p.space_after = Pt(2) # Small spacing after
p.space_before = space_before_pt # Small spacing before
p.space_after = space_after_pt # Small spacing after
# In python-pptx, setting level > 0 should enable bullets automatically
# However, some versions may not support paragraph_format, so we'll use manual bullets as fallback

View file

@ -4,7 +4,7 @@
Text renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
@ -63,6 +63,17 @@ class RendererText(BaseRenderer):
# All other formats handled by RendererText are code style
return 'code'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that Text renderer accepts.
Text renderer accepts all section types except images (text formats cannot display images).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
# Text renderer accepts all types except images
return [st for st in supportedSectionTypes if st != "image"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to plain text format."""
try:

View file

@ -4,11 +4,12 @@
Excel renderer for report generation using openpyxl.
"""
from .rendererBaseTemplate import BaseRenderer
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
import base64
import re
from datetime import datetime, UTC, date
try:
from dateutil import parser as date_parser
@ -25,6 +26,16 @@ try:
except ImportError:
OPENPYXL_AVAILABLE = False
# PERFORMANCE: Pre-compile regex patterns used in hot loops
_DATE_PATTERN = re.compile(
r'^\d{1,4}[-./]\d{1,2}[-./]\d{1,4}' # Basic date pattern: YYYY-MM-DD or DD.MM.YYYY
r'|^\d{1,2}[-./]\d{1,2}[-./]\d{2,4}' # DD/MM/YYYY or MM/DD/YYYY
r'|^\d{4}-\d{2}-\d{2}' # ISO format: YYYY-MM-DD
r'|^\d{1,2}[-./]\d{1,2}[-./]\d{2,4}\s+\d{1,2}:\d{2}' # With time
)
_NUMBER_PATTERN = re.compile(r'^[\s\']*[+-]?\d+([.,]\d+)?([eE][+-]?\d+)?[\s\']*$')
_DIGIT_CHECK_PATTERN = re.compile(r'\d') # Simple digit check
class RendererXlsx(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@ -48,6 +59,15 @@ class RendererXlsx(BaseRenderer):
"""Return output style classification: Excel spreadsheets are formatted documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that Excel renderer accepts.
Excel renderer accepts all section types (spreadsheets can contain tables, text, headings, etc.).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
@ -1006,7 +1026,12 @@ class RendererXlsx(BaseRenderer):
return startRow + 1
def _parseDateString(self, text: str) -> Any:
"""Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise."""
"""
Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise.
PERFORMANCE OPTIMIZED: Uses regex pre-check before attempting parsing to avoid expensive
operations on non-date strings. This dramatically improves performance for large tables.
"""
if not text or not isinstance(text, str):
return None
@ -1014,6 +1039,17 @@ class RendererXlsx(BaseRenderer):
if not text:
return None
# PERFORMANCE FIX: Pre-check with regex to avoid expensive parsing attempts
# Only attempt parsing if text looks like a date (contains digits and separators)
# Quick check: does it look like a date? (contains digits and date separators)
if not _DIGIT_CHECK_PATTERN.search(text): # No digits at all
return None
# Check for common date patterns before attempting full parsing
# This filters out most non-date strings quickly (uses pre-compiled pattern)
if not _DATE_PATTERN.search(text):
return None # Doesn't look like a date, skip expensive parsing
# Common date formats to try (in order of likelihood)
date_formats = [
"%Y-%m-%d", # 2025-01-01
@ -1036,7 +1072,7 @@ class RendererXlsx(BaseRenderer):
except ValueError:
continue
# If dateutil is available, use it for more flexible parsing
# If dateutil is available, use it for more flexible parsing (only if regex matched)
if DATEUTIL_AVAILABLE:
try:
parsed_date = date_parser.parse(text, dayfirst=True, yearfirst=False)
@ -1067,38 +1103,44 @@ class RendererXlsx(BaseRenderer):
# Try to convert numeric strings to actual numbers
# This ensures Excel treats them as numbers, not strings
# PERFORMANCE OPTIMIZED: Use regex pre-check before attempting conversion
if text:
# Clean text for number conversion: remove common formatting characters
# but preserve the original for fallback
cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
# Only attempt conversion if cleaned text looks like a number
# (starts with digit, +, -, or . followed by digit)
if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
# Try integer first (more restrictive)
try:
# Check if it's a valid integer (no decimal point, no scientific notation)
if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
int_value = int(cleaned_for_number)
return int_value
except (ValueError, OverflowError):
pass
# PERFORMANCE FIX: Quick regex check to see if text looks like a number
# This avoids expensive string operations and conversion attempts for non-numbers
# Uses pre-compiled pattern for better performance
if _NUMBER_PATTERN.match(text.strip()):
# Clean text for number conversion: remove common formatting characters
cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
# Try float if integer conversion failed
try:
float_value = float(cleaned_for_number)
# Only return as float if it's actually a number representation
# Avoid converting things like "NaN", "inf" which are valid floats but not useful
if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
# Check for reasonable float values (not too large/small)
if abs(float_value) < 1e308: # Avoid overflow
return float_value
except (ValueError, OverflowError):
pass
# Only attempt conversion if cleaned text looks like a number
# (starts with digit, +, -, or . followed by digit)
if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
# Try integer first (more restrictive)
try:
# Check if it's a valid integer (no decimal point, no scientific notation)
if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
int_value = int(cleaned_for_number)
return int_value
except (ValueError, OverflowError):
pass
# Try float if integer conversion failed
try:
float_value = float(cleaned_for_number)
# Only return as float if it's actually a number representation
# Avoid converting things like "NaN", "inf" which are valid floats but not useful
if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
# Check for reasonable float values (not too large/small)
if abs(float_value) < 1e308: # Avoid overflow
return float_value
except (ValueError, OverflowError):
pass
# Try to convert date strings to datetime objects
# This ensures Excel treats them as dates, not strings
# Use original text (not cleaned) for date parsing
# PERFORMANCE OPTIMIZED: Date parsing now uses regex pre-check to avoid expensive operations
# on non-date strings. This dramatically improves performance for large tables.
date_value = self._parseDateString(text)
if date_value is not None:
return date_value
@ -1109,7 +1151,17 @@ class RendererXlsx(BaseRenderer):
return text
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a table element to Excel sheet with proper formatting and borders."""
"""
Add a table element to Excel sheet with proper formatting and borders.
PERFORMANCE OPTIMIZATIONS:
1. Pre-calculated style objects (Font, PatternFill, Alignment) to avoid repeated creation
2. Optimized _sanitizeCellValue() with regex pre-checks for numbers and dates
3. Batch cell operations where possible
4. Reduced exception handling overhead
Expected performance: 10-30x faster for large tables compared to unoptimized version.
"""
try:
# Extract from nested content structure
content = element.get("content", {})
@ -1139,60 +1191,69 @@ class RendererXlsx(BaseRenderer):
headerRow = startRow
header_style = styles.get("table_header", {})
# Add headers with formatting
# Pre-calculate and cache style objects to avoid repeated parsing
header_font_color = self._getSafeColor(header_style.get("text_color", "FF000000"))
header_font = Font(bold=header_style.get("bold", True), color=header_font_color)
header_bg_color = None
header_fill = None
if header_style.get("background"):
header_bg_color = self._getSafeColor(header_style["background"])
header_fill = PatternFill(start_color=header_bg_color, end_color=header_bg_color, fill_type="solid")
header_alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
# Add headers with formatting - OPTIMIZED: use cached style objects
for col, header in enumerate(headers, 1):
sanitized_header = self._sanitizeCellValue(header)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
# Apply styling with fallbacks - use pre-calculated objects
try:
# Font styling
cell.font = Font(
bold=header_style.get("bold", True),
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
)
cell.font = header_font
except Exception:
# Fallback to default font if styling fails
try:
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
except Exception:
pass # Continue even if font fails
pass
try:
# Background color
if header_style.get("background"):
cell.fill = PatternFill(
start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
)
if header_fill:
cell.fill = header_fill
except Exception:
pass # Continue without background color if it fails
pass
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
cell.alignment = header_alignment
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
pass
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
pass
startRow += 1
# Add rows with formatting
# Add rows with formatting - OPTIMIZED: pre-calculate style objects
cell_style = styles.get("table_cell", {})
header_count = len(headers)
# Pre-calculate and cache style objects to avoid repeated parsing
cell_text_color = None
cell_font = None
if cell_style.get("text_color"):
cell_text_color = self._getSafeColor(cell_style["text_color"])
cell_font = Font(color=cell_text_color)
cell_alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
for row_data in rows:
# Handle different row formats
if isinstance(row_data, list):
@ -1214,32 +1275,25 @@ class RendererXlsx(BaseRenderer):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
# Apply styling with fallbacks - use pre-calculated objects
try:
# Font styling
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
if cell_font:
cell.font = cell_font
except Exception:
pass # Continue without font color if it fails
pass
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
cell.alignment = cell_alignment
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
pass
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
pass
startRow += 1
@ -1439,28 +1493,32 @@ class RendererXlsx(BaseRenderer):
if code:
code_style = styles.get("code_block", {})
# Pre-calculate and cache style objects to avoid repeated parsing
code_font_name = code_style.get("font", "Courier New")
code_font_size = code_style.get("font_size", 10)
code_text_color = self._getSafeColor(code_style.get("color", "FF2F2F2F"))
code_font = Font(name=code_font_name, size=code_font_size, color=code_text_color)
code_bg_color = None
code_fill = None
if code_style.get("background"):
code_bg_color = self._getSafeColor(code_style["background"])
code_fill = PatternFill(start_color=code_bg_color, end_color=code_bg_color, fill_type="solid")
# Add language label if present
if language:
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
langCell.font = Font(bold=True, color=code_text_color)
startRow += 1
# Split code into lines and add each line
# Split code into lines and add each line - use cached style objects
code_lines = code.split('\n')
for line in code_lines:
codeCell = sheet.cell(row=startRow, column=1, value=line)
codeCell.font = Font(
name=code_style.get("font", "Courier New"),
size=code_style.get("font_size", 10),
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
)
codeCell.font = code_font
# Set background color if specified
if code_style.get("background"):
codeCell.fill = PatternFill(
start_color=self._getSafeColor(code_style["background"]),
end_color=self._getSafeColor(code_style["background"]),
fill_type="solid"
)
if code_fill:
codeCell.fill = code_fill
startRow += 1
# Add spacing after code block

View file

@ -64,25 +64,27 @@ async def buildGenerationPrompt(
)
if hasContinuation:
# CONTINUATION PROMPT - use new summary format from buildContinuationContext
# CONTINUATION PROMPT - use centralized jsonContinuation system
delivered_summary = continuationContext.get("delivered_summary", "")
element_before_cutoff = continuationContext.get("element_before_cutoff")
cut_off_element = continuationContext.get("cut_off_element")
# Use centralized system: overlap_context and hierarchy_context from jsonContinuation.getContexts()
overlap_context = continuationContext.get("overlap_context")
hierarchy_context = continuationContext.get("hierarchy_context")
# Build continuation text with delivered summary and cut-off information
# CRITICAL: Always include cut-off information if available (per loop_plan.md)
continuationText = f"{delivered_summary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
# Add cut-off point information (per loop_plan.md: always add if available)
# Add cut-off point information using centralized jsonContinuation contexts
# These are shown ONLY as REFERENCE to know where generation stopped
if element_before_cutoff:
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
continuationText += f"{element_before_cutoff}\n\n"
if hierarchy_context:
continuationText += "# REFERENCE: Structure context (already delivered - DO NOT repeat):\n"
continuationText += f"{hierarchy_context}\n\n"
if cut_off_element:
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
continuationText += f"{cut_off_element}\n\n"
if overlap_context:
continuationText += "# REFERENCE: Overlap context - incomplete element at cut point (DO NOT repeat):\n"
continuationText += f"{overlap_context}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"

View file

@ -0,0 +1,164 @@
# JSON Continuation Context Module
Ein Python-Modul zur Generierung von Kontextinformationen für abgeschnittene JSON-Strings, um AI-Modellen die Fortsetzung zu ermöglichen.
## Problem
Wenn eine AI-Antwort als JSON abgeschnitten wird (z.B. Token-Limit erreicht), muss die nächste Iteration wissen:
- **Wo** der JSON abgeschnitten wurde
- **Was** bereits generiert wurde
- **Was** als nächstes geliefert werden soll
## Lösung: Drei Kontexte
### 1. Overlap Context
- Zeigt das **innerste Objekt/Array-Element**, das den Cut-Punkt enthält
- Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu **mergen**
- Exakt so wie im Original-String (für String-Matching beim Merge)
### 2. Hierarchy Context
- Zeigt die **hierarchische Struktur** vom Root bis zum Cut-Punkt
- Mit **Budget-Logik**: Näher am Cut = vollständige Werte, weiter weg = `"..."` Platzhalter
- Gibt der AI den Kontext der gesamten JSON-Struktur
### 3. Complete Part (NEU)
- Der **vollständige, valide JSON** bis zum Cut-Punkt
- Alle offenen Strukturen werden geschlossen (`}`, `]`, `"`)
- Unvollständige Keys werden entfernt
- Kann direkt als valides JSON geparst werden
## Installation
```bash
# Keine externen Abhängigkeiten erforderlich
cp json_continuation.py /your/project/
```
## Modulkonstanten
```python
# Diese Konstanten können vor dem Import angepasst werden
BUDGET_LIMIT: int = 500 # Zeichen-Budget für Datenwerte
OVERLAP_MAX_CHARS: int = 1000 # Max Zeichen für Overlap Context
```
## Verwendung
### Grundlegende Verwendung
```python
from json_continuation import extract_continuation_contexts
truncated_json = '''{"customers": [
{"id": 1, "name": "John"},
{"id": 2, "name": "Jane", "email": "jane@exa'''
overlap, hierarchy, complete = extract_continuation_contexts(truncated_json)
print("Overlap Context:")
print(overlap)
# {"id": 2, "name": "Jane", "email": "jane@exa
print("Hierarchy Context:")
print(hierarchy)
# {"customers": [...structure with budget logic...]
print("Complete Part (valid JSON):")
print(complete)
# {"customers": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane", "email": "jane@exa"}]}
import json
parsed = json.loads(complete) # ✓ Funktioniert!
```
### Mit Dictionary-Interface
```python
from json_continuation import get_contexts
contexts = get_contexts(truncated_json)
print(contexts['overlap'])
print(contexts['hierarchy'])
print(contexts['complete_part'])
```
### Konstanten anpassen
```python
import json_continuation
# Budget anpassen bevor Funktionen aufgerufen werden
json_continuation.BUDGET_LIMIT = 200
json_continuation.OVERLAP_MAX_CHARS = 500
overlap, hierarchy, complete = json_continuation.extract_continuation_contexts(truncated_json)
```
## Rückgabewerte
| Rückgabe | Typ | Beschreibung |
|----------|-----|--------------|
| `overlap` | str | Innerstes Element mit Cut-Punkt (für Merge) |
| `hierarchy` | str | Volle Struktur mit Budget-Logik |
| `complete_part` | str | Valides JSON mit geschlossenen Strukturen |
## Beispiele
### Verschachtelte Objekte
```python
json_str = '{"user": {"profile": {"bio": "Hello Wor'
overlap, hierarchy, complete = extract_continuation_contexts(json_str)
# Overlap: {"bio": "Hello Wor
# Hierarchy: {"user": {"profile": {"bio": "Hello Wor
# Complete: {"user": {"profile": {"bio": "Hello Wor"}}} ← Valides JSON!
```
### Array von Objekten mit unvollständigem Key
```python
json_str = '''{
"items": [
{"id": 1, "name": "First"},
{"id": 2, "name": "Second"},
{"id": 3, "name": "Third", "add'''
overlap, hierarchy, complete = extract_continuation_contexts(json_str)
# Complete entfernt den unvollständigen Key "add":
# {"items": [{"id": 1, ...}, {"id": 2, ...}, {"id": 3, "name": "Third"}]}
```
## Budget-Logik
Die Budget-Logik funktioniert wie folgt:
1. **Sammeln**: Alle String-Werte werden mit ihrer Position gesammelt
2. **Sortieren**: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität)
3. **Zuweisen**: Budget wird von hinten nach vorne aufgebraucht
4. **Ersetzen**: Werte außerhalb des Budgets werden durch `"..."` ersetzt
## Tests ausführen
```bash
python -m unittest test_json_continuation -v
```
## API Referenz
### `extract_continuation_contexts(truncated_json: str) -> Tuple[str, str, str]`
Hauptfunktion. Gibt `(overlap, hierarchy, complete_part)` zurück.
### `get_contexts(truncated_json: str) -> dict`
Convenience-Funktion. Gibt Dictionary mit Keys `'overlap'`, `'hierarchy'`, `'complete_part'` zurück.
### Modulkonstanten
- `BUDGET_LIMIT`: int (default: 500) - Zeichen-Budget für Hierarchy-Context
- `OVERLAP_MAX_CHARS`: int (default: 1000) - Max Zeichen für Overlap-Context

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ import logging
import re
from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar
from pydantic import BaseModel, ValidationError
from modules.datamodels.datamodelAi import ContinuationContext
logger = logging.getLogger(__name__)
@ -122,6 +123,160 @@ def tryParseJson(text: Union[str, bytes]) -> Tuple[Optional[Union[Dict, List]],
return None, e, cleaned
def _fixUnescapedQuotesInStrings(jsonStr: str) -> str:
"""
Fix unescaped quotes inside JSON string values.
AI often generates JSON with unescaped quotes like:
"text with "quoted" words"
This should be:
"text with \"quoted\" words"
Strategy:
- Parse JSON structure to find string values
- Within a string, find unescaped quotes that are followed by content
that looks like it continues the string (not a : or , or } or ])
- Escape those quotes
"""
if not jsonStr or not jsonStr.strip():
return jsonStr
result = []
i = 0
inString = False
escaped = False
while i < len(jsonStr):
char = jsonStr[i]
if escaped:
result.append(char)
escaped = False
i += 1
continue
if char == '\\' and inString:
result.append(char)
escaped = True
i += 1
continue
if char == '"':
if not inString:
# Starting a string
inString = True
result.append(char)
i += 1
continue
else:
# Could be end of string OR unescaped quote inside string
# Look ahead to determine
nextNonSpace = i + 1
while nextNonSpace < len(jsonStr) and jsonStr[nextNonSpace] in ' \t\n\r':
nextNonSpace += 1
if nextNonSpace < len(jsonStr):
nextChar = jsonStr[nextNonSpace]
# If next char is a structural character, this is end of string
if nextChar in ':,}]':
inString = False
result.append(char)
i += 1
continue
# If next char is a quote, might be end of string followed by another string
# Check if we're at a reasonable string end (has a colon or comma before next structure)
if nextChar == '"':
# This is end of string, start of next
inString = False
result.append(char)
i += 1
continue
# Otherwise, this quote is INSIDE the string - escape it!
result.append('\\')
result.append(char)
i += 1
continue
else:
# End of JSON - this must be closing quote
inString = False
result.append(char)
i += 1
continue
result.append(char)
i += 1
return ''.join(result)
def _fixUnescapedControlCharacters(jsonStr: str) -> str:
"""
Fix unescaped control characters in JSON strings.
JSON requires control characters (ASCII 0-31) to be escaped as \\uXXXX.
Common ones have shortcuts: \\n, \\r, \\t, \\b, \\f
This function finds unescaped control chars inside strings and escapes them.
"""
if not jsonStr or not jsonStr.strip():
return jsonStr
result = []
i = 0
inString = False
escaped = False
# Mapping of common control chars to their escape sequences
controlEscapes = {
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
'\b': '\\b',
'\f': '\\f',
}
while i < len(jsonStr):
char = jsonStr[i]
if escaped:
result.append(char)
escaped = False
i += 1
continue
if char == '\\' and inString:
result.append(char)
escaped = True
i += 1
continue
if char == '"':
inString = not inString
result.append(char)
i += 1
continue
if inString:
# Check for control characters (ASCII 0-31)
if ord(char) < 32:
if char in controlEscapes:
result.append(controlEscapes[char])
else:
# Use \uXXXX format for other control chars
result.append(f'\\u{ord(char):04x}')
i += 1
continue
result.append(char)
i += 1
return ''.join(result)
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
"""
Attempt to repair broken JSON using multiple strategies.
@ -134,6 +289,11 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
if not text:
return None
# Pre-processing: Fix unescaped quotes and control characters inside strings
# AI often generates JSON like: "text with "quoted" words"
text = _fixUnescapedQuotesInStrings(text)
text = _fixUnescapedControlCharacters(text)
# Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
# This preserves all data and should be tried first
closedStr = closeJsonStructures(text)
@ -212,106 +372,77 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
def closeJsonStructures(text: str) -> str:
"""
Close incomplete JSON structures by adding missing closing brackets.
Also handles unterminated strings by closing them.
Close incomplete JSON structures generically and correctly.
Generic approach:
1. Close unterminated strings (if odd number of quotes)
2. Track structure opening order with stack (LIFO)
3. Close structures in reverse order (last opened, first closed)
4. Remove trailing commas only directly before closing brackets/braces
"""
if not text:
return text
result = text
# Handle unterminated strings: find the last unclosed string
# Look for patterns like: "value" or "value\n (unterminated)
# Check if we're in the middle of a string value when text ends
if result.strip():
# re is already imported at module level
# Count quotes - if odd number, we have an unterminated string
quoteCount = result.count('"')
if quoteCount % 2 == 1:
# Find the last opening quote that's not escaped
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
# Check if it's escaped
# Step 1: Close unterminated strings
# Simple: if odd number of quotes, find last unescaped quote and close it
quoteCount = result.count('"')
if quoteCount % 2 == 1:
# Find last unescaped quote
i = len(result) - 1
while i >= 0:
if result[i] == '"':
# Count backslashes before quote
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
j = i - 1
while j >= 0 and result[j] == '\\':
escapeCount += 1
i -= 1
# If not escaped (even number of backslashes), close the string
j -= 1
# If even number of backslashes, quote is not escaped
if escapeCount % 2 == 0:
# Find where the string should end (before next comma, bracket, or brace)
# For now, just close it at the end
result += '"'
else:
# Even number of quotes, but might still be in middle of string if cut off
# More robust detection: check if text ends with alphanumeric/text chars after a quote
# This handles cases like: "text": "value cut off mid-word
# Pattern 1: ends with colon + quote + text (no closing quote)
if re.search(r':\s*"[^"]*$', result):
# We're in the middle of a string value, close it
result += '"'
else:
# Pattern 2: find last quote and check what comes after
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
afterQuote = result[lastQuotePos + 1:]
# If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace
# and the text doesn't end with structural characters, we're likely in a string
if afterQuote:
# Check if it looks like we're in a string value (has text, no closing quote)
# Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ]
if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote):
# Check if it's escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
if escapeCount % 2 == 0:
# Verify we're actually in a string context (not in a key name)
# Look backwards to see if we have ": " before the quote (value context)
beforeQuote = result[:lastQuotePos]
# Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote)
if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]):
result += '"'
# Also check if text ends with alphanumeric (likely cut off mid-word)
elif re.search(r'[a-zA-Z]$', result):
# If we end with a letter and have a quote before it, likely in a string
result += '"'
# Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string
# This handles edge cases where patterns above didn't match
if result.strip() and re.search(r'[a-zA-Z0-9]$', result):
# Count quotes - if we have quotes and end with text, might be in a string
if quoteCount > 0:
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
afterQuote = result[lastQuotePos + 1:]
# If after quote is text (not empty, not structural), close it
if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]): # Check first 50 chars after quote
# Make sure we're not already closed (check if next char would be quote/comma/brace)
if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'):
# Check if escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
if escapeCount % 2 == 0:
result += '"'
break
i -= 1
# Count open/close brackets and braces
openBraces = result.count('{')
closeBraces = result.count('}')
openBrackets = result.count('[')
closeBrackets = result.count(']')
# Step 2: Track structure opening order with stack
stack = []
inString = False
escapeNext = False
# Close incomplete structures
for _ in range(openBraces - closeBraces):
result += '}'
for _ in range(openBrackets - closeBrackets):
result += ']'
for char in result:
if escapeNext:
escapeNext = False
continue
if char == '\\':
escapeNext = True
continue
if char == '"':
inString = not inString
continue
# Only track braces/brackets outside of strings
if not inString:
if char == '{':
stack.append('}')
elif char == '[':
stack.append(']')
elif char == '}' or char == ']':
# Pop matching closing bracket/brace from stack
if stack and stack[-1] == char:
stack.pop()
# Step 3: Close remaining structures in reverse order (LIFO)
# Remove trailing comma ONLY directly before each closing bracket/brace
while stack:
closingChar = stack.pop()
result = result.rstrip()
# Remove trailing comma if present (invalid before closing)
if result and result[-1] == ',':
result = result[:-1].rstrip()
result += closingChar
return result
@ -731,7 +862,12 @@ def extractSectionsFromDocument(documentData: Dict[str, Any]) -> List[Dict[str,
return []
def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse: Optional[str] = None) -> Dict[str, Any]:
def buildContinuationContext(
allSections: List[Dict[str, Any]],
lastRawResponse: Optional[str] = None,
useCaseId: Optional[str] = None,
templateStructure: Optional[str] = None
) -> ContinuationContext:
"""
Build context information from accumulated sections for continuation prompt.
@ -740,13 +876,13 @@ def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse:
Args:
allSections: List of ALL sections accumulated across ALL iterations
lastRawResponse: Raw JSON response from last iteration (can be broken/incomplete)
useCaseId: Optional use case ID to determine expected JSON structure
templateStructure: JSON structure template from initial prompt (MUST be identical)
Returns:
Dict with delivered_summary, cut_off_element, element_before_cutoff
ContinuationContext: Pydantic model with all continuation context information
"""
context = {
"section_count": len(allSections),
}
section_count = len(allSections)
# Build summary of delivered data (per-section counts)
summary_lines = []
@ -863,452 +999,53 @@ def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse:
else:
summary_lines.extend(summary_items)
context["delivered_summary"] = "\n".join(summary_lines)
delivered_summary = "\n".join(summary_lines)
# Extract cut-off point using new algorithm
# 1. Loop over all sections until finding incomplete section
# 2. In incomplete section, loop through elements until finding cut-off element
# CRITICAL: There is always only ONE section incomplete (JSON cut-off point)
cut_off_element = None
element_before_cutoff = None
# Extract continuation contexts using centralized jsonContinuation module
# This is the single source of truth for handling cut-off JSON strings
last_raw_json = lastRawResponse or ""
last_complete_part = ""
incomplete_part = ""
overlap_context = ""
hierarchy_context = ""
if lastRawResponse:
try:
# CRITICAL: Always try to find incomplete section from raw JSON
# Even if JSON can be parsed, it might be incomplete (cut off mid-element)
raw_stripped = stripCodeFences(lastRawResponse.strip()).strip()
from modules.shared.jsonContinuation import getContexts
# Check if response is just a fragment (not full JSON structure)
# Fragments are continuation content that should be appended to the last incomplete element
is_fragment = not (raw_stripped.strip().startswith('{') or raw_stripped.strip().startswith('['))
if is_fragment:
# Response is a fragment - it continues the last incomplete element
# Find the last incomplete element from allSections
if allSections:
last_section = allSections[-1]
elements = last_section.get("elements", [])
if isinstance(elements, list) and elements:
# Get the last element (which should be incomplete)
last_elem = elements[-1]
if isinstance(last_elem, dict):
# The fragment continues this element
# Show the fragment as cut_off_element
cut_off_element = raw_stripped
# Show the element before (if there is one)
if len(elements) > 1:
element_before_cutoff = json.dumps(elements[-2])
else:
element_before_cutoff = json.dumps(last_elem)
else:
# Response is full JSON - use standard extraction
# Strategy 1: Try to find incomplete section using structured parsing
incomplete_section = _findIncompleteSectionInRaw(raw_stripped)
if incomplete_section:
cut_off_element, element_before_cutoff = _extractCutOffElements(incomplete_section, raw_stripped)
# Normalize JSON string
normalized = stripCodeFences(normalizeJsonText(lastRawResponse)).strip()
if normalized:
# Find first '{' or '[' to start
startIdx = -1
for i, char in enumerate(normalized):
if char in '{[':
startIdx = i
break
# Strategy 2: If no incomplete section found, extract directly from raw JSON
# This handles cases where JSON is cut off mid-element within a complete section
if not cut_off_element:
cut_off_element, element_before_cutoff = _extractCutOffElementsFromRaw(raw_stripped, allSections)
if startIdx >= 0:
jsonContent = normalized[startIdx:]
contexts = getContexts(jsonContent)
# Store all contexts from centralized module
last_complete_part = contexts.completePart
incomplete_part = jsonContent[len(contexts.completePart):].strip()
overlap_context = contexts.overlapContext
hierarchy_context = contexts.hierarchyContext
except Exception as e:
logger.debug(f"Error extracting cut-off point: {e}")
logger.warning(f"Error extracting JSON continuation contexts: {e}", exc_info=True)
context["element_before_cutoff"] = element_before_cutoff
context["cut_off_element"] = cut_off_element
# Store raw JSON response for prompt builder to check
if lastRawResponse:
context["last_raw_json"] = lastRawResponse
else:
context["last_raw_json"] = ""
return context
def _findIncompleteSectionInRaw(raw_json: str) -> Optional[Dict[str, Any]]:
"""
Find the incomplete section in raw JSON.
CRITICAL: JSON can be cut off mid-element (e.g., {"text": "20327,20)
We need to find the last section and check if it's incomplete.
"""
try:
# Try to parse documents structure
if '"documents"' in raw_json:
# Find last document
doc_start = raw_json.rfind('"documents"')
if doc_start >= 0:
doc_section = raw_json[doc_start:]
# Try to find sections array
sections_start = doc_section.find('"sections"')
if sections_start >= 0:
sections_section = doc_section[sections_start:]
# Find sections array start
array_start = sections_section.find('[')
if array_start >= 0:
# Find all complete sections
section_objects = []
depth = 0
section_start = None
for i in range(array_start, len(sections_section)):
if sections_section[i] == '{':
if depth == 0:
section_start = i
depth += 1
elif sections_section[i] == '}':
depth -= 1
if depth == 0 and section_start is not None:
# Found complete section
section_str = sections_section[section_start:i+1]
try:
section_obj = json.loads('{' + section_str + '}')
section_objects.append(section_obj)
except:
pass
section_start = None
# CRITICAL: Check if there's content after the last complete section
# If JSON ends mid-element, the last section is incomplete
if section_objects:
# Find position after last complete section
last_section_end = sections_section.rfind('}')
if last_section_end >= 0:
# Check if there's more content after the last }
remaining_after_last_section = sections_section[last_section_end+1:].strip()
# Remove closing brackets/braces that might be there
remaining_after_last_section = remaining_after_last_section.lstrip('],}')
# If there's still content (like incomplete element), section is incomplete
if remaining_after_last_section and not remaining_after_last_section.startswith(']'):
# Last section is incomplete - return it
return section_objects[-1]
# Also check: if we can't parse the full sections array, last section is incomplete
try:
# Try to parse the sections array
sections_array_str = sections_section[array_start:]
json.loads(sections_array_str)
# Parsed successfully - all sections complete
return None
except:
# Cannot parse - last section is incomplete
return section_objects[-1] if section_objects else None
except Exception as e:
logger.debug(f"Error finding incomplete section: {e}")
return None
def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> Tuple[Optional[str], Optional[str]]:
"""Extract cut-off element and element before from incomplete section."""
cut_off_element = None
element_before_cutoff = None
elements = incomplete_section.get("elements", [])
if not elements:
return None, None
# CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number
# Deliver the cut-off part AS-IS (don't try to "complete" it)
if isinstance(elements, list):
# Find last element (might be incomplete)
if elements:
# Edge case: If cut-off is in first element, just show cut-off element
if len(elements) == 1:
# Only one element - might be cut-off
last_elem = elements[0]
if isinstance(last_elem, dict):
# Check if element contains nested content (e.g., code_block with JSON string)
cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
if not cut_off_element:
cut_off_element = json.dumps(last_elem)
else:
cut_off_element = str(last_elem)
else:
# Multiple elements - last one might be cut-off, get element before
element_before_cutoff = json.dumps(elements[-2]) if isinstance(elements[-2], dict) else str(elements[-2])
last_elem = elements[-1]
if isinstance(last_elem, dict):
# Check if element contains nested content
cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
if not cut_off_element:
cut_off_element = json.dumps(last_elem)
else:
cut_off_element = str(last_elem)
elif isinstance(elements, dict):
# Single element - might be cut-off
cut_off_element = _extractCutOffFromElement(elements, raw_json)
if not cut_off_element:
cut_off_element = json.dumps(elements)
# If we couldn't extract from parsed structure, extract from raw JSON
if not cut_off_element:
# Extract the last incomplete part from raw JSON
# Find the last incomplete string/number/array
# re is already imported at module level
# Look for incomplete string at the end
incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL)
if incomplete_match:
cut_off_element = incomplete_match.group(1)
else:
# Look for incomplete number
number_match = re.search(r'(\d+\.?\d*)(?:\s*[,}\]]|$)', raw_json[-200:])
if number_match:
cut_off_element = number_match.group(1)
return cut_off_element, element_before_cutoff
def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optional[str]:
"""
Extract cut-off point from within an element (e.g., code_block with JSON string, table with incomplete rows).
This helps identify where exactly to continue within nested structures.
"""
# re is already imported at module level
# Check for code_block with nested JSON
if "code" in element:
code_content = element.get("code", "")
if isinstance(code_content, str) and code_content.strip().startswith("{"):
# This is JSON inside a code string - find where it was cut off
# Look for the last complete value in the raw JSON
# Find the code string in raw JSON
code_match = re.search(r'"code"\s*:\s*"([^"]*?)(?:"|$)', raw_json[-2000:], re.DOTALL)
if code_match:
code_str = code_match.group(1)
# Try to find the last complete value in the JSON string
# Look for patterns like: [2, 3, 5, ... 17929, (cut off here)
array_match = re.search(r'\[([^\]]*?)(?:\]|$)', code_str, re.DOTALL)
if array_match:
array_content = array_match.group(1)
# Find last complete number/item
# Match: number followed by comma or end
last_complete = re.findall(r'(\d+)\s*[,]', array_content)
if last_complete:
last_num = last_complete[-1]
# Return context showing where to continue
return f'{{"code": "{{\\"primes\\": [... up to {last_num}, <CONTINUE FROM HERE>]"}}'
# Check for table with incomplete rows
if "rows" in element:
rows = element.get("rows", [])
if isinstance(rows, list) and rows:
# Find last complete row in raw JSON
rows_str = str(rows)
# Try to find where rows were cut off
last_row_match = re.search(r'\[([^\]]*?)(?:\]|$)', raw_json[-1000:], re.DOTALL)
if last_row_match:
return f'{{"rows": [... last complete row shown above, <CONTINUE FROM HERE>]}}'
# Check for list items
if "items" in element:
items = element.get("items", [])
if isinstance(items, list) and items:
# Find last complete item
last_item_match = re.search(r'"([^"]*?)"\s*(?:,|\])', raw_json[-1000:], re.DOTALL)
if last_item_match:
return f'{{"items": [... last item shown above, <CONTINUE FROM HERE>]}}'
return None
def _extractCutOffElementsFromRaw(raw_json: str, allSections: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
"""
Extract cut-off element directly from raw JSON when section parsing fails.
This handles ALL cases where JSON is cut off:
- Mid-element (incomplete element object)
- Mid-string/number within an element
- Mid-array within an element (e.g., rows in table, items in list)
- Mid-nested structure
CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number - deliver as-is.
"""
cut_off_element = None
element_before_cutoff = None
try:
# Find the last "elements" array in raw JSON
if '"elements"' in raw_json:
# Find the last occurrence of "elements"
last_elements_pos = raw_json.rfind('"elements"')
if last_elements_pos >= 0:
elements_section = raw_json[last_elements_pos:]
# Find the array start '['
array_start = elements_section.find('[')
if array_start >= 0:
# Use a simpler approach: find all element objects by tracking braces
# This works even if elements contain nested arrays/objects
element_strings = []
depth = 0
in_string = False
escape_next = False
elem_start = None
for i in range(array_start, len(elements_section)):
char = elements_section[i]
# Track string state (ignore brackets/braces inside strings)
if escape_next:
escape_next = False
continue
if char == '\\':
escape_next = True
continue
if char == '"' and not escape_next:
in_string = not in_string
continue
if not in_string:
if char == '{':
if depth == 0:
elem_start = i
depth += 1
elif char == '}':
depth -= 1
if depth == 0 and elem_start is not None:
# Found complete element (all braces closed, even if nested arrays are incomplete)
elem_str = elements_section[elem_start:i+1]
element_strings.append(elem_str)
elem_start = None
# Now analyze what we found
if element_strings:
last_elem = element_strings[-1]
last_complete_pos = elements_section.rfind('}')
# Check if there's content after the last complete element
if last_complete_pos >= 0:
remaining = elements_section[last_complete_pos+1:].strip()
remaining_clean = remaining.lstrip(',').strip().lstrip(']').strip()
# Case 1: Incomplete element after last complete one
if remaining_clean and not remaining_clean.startswith(']'):
incomplete_start = last_complete_pos + 1
while incomplete_start < len(elements_section) and elements_section[incomplete_start] in ' \n\t\r,':
incomplete_start += 1
if incomplete_start < len(elements_section):
incomplete_elem_str = elements_section[incomplete_start:].strip()
incomplete_elem_str = incomplete_elem_str.rstrip(']').rstrip('}').rstrip()
cut_off_element = incomplete_elem_str
element_before_cutoff = element_strings[-1]
# Case 2: Last element itself is incomplete (cut off in nested structure like rows, items, etc.)
else:
# Check if JSON is incomplete by analyzing structure
# Count unclosed brackets/braces in elements section (ignoring strings)
elements_section_braces = 0
elements_section_brackets = 0
in_str = False
esc = False
for char in elements_section:
if esc:
esc = False
continue
if char == '\\':
esc = True
continue
if char == '"':
in_str = not in_str
continue
if not in_str:
if char == '{':
elements_section_braces += 1
elif char == '}':
elements_section_braces -= 1
elif char == '[':
elements_section_brackets += 1
elif char == ']':
elements_section_brackets -= 1
# Also check raw JSON for unclosed structures
raw_braces = 0
raw_brackets = 0
in_str = False
esc = False
for char in raw_json:
if esc:
esc = False
continue
if char == '\\':
esc = True
continue
if char == '"':
in_str = not in_str
continue
if not in_str:
if char == '{':
raw_braces += 1
elif char == '}':
raw_braces -= 1
elif char == '[':
raw_brackets += 1
elif char == ']':
raw_brackets -= 1
# Check if last element can be parsed
last_elem_parsable = False
try:
json.loads(last_elem)
last_elem_parsable = True
except:
pass
# Determine if last element is incomplete
is_incomplete = False
# If there are unclosed structures, element is incomplete
if elements_section_brackets > 0 or elements_section_braces > 0 or raw_brackets > 0 or raw_braces > 0:
is_incomplete = True
# If element cannot be parsed, it's incomplete
elif not last_elem_parsable:
is_incomplete = True
# Check if JSON ends mid-element by finding where element ends in raw JSON
elif last_elem_parsable:
# Find where this element ends in the raw JSON
elem_end_marker = last_elem[-100:] if len(last_elem) > 100 else last_elem
elem_end_in_raw = raw_json.rfind(elem_end_marker)
if elem_end_in_raw >= 0:
actual_elem_end = elem_end_in_raw + len(last_elem)
if actual_elem_end < len(raw_json):
remaining_after_elem = raw_json[actual_elem_end:].strip()
remaining_clean = remaining_after_elem.lstrip(',').strip()
# If there's unexpected content, element is incomplete
if remaining_clean and not remaining_clean.startswith(']'):
is_incomplete = True
if is_incomplete:
cut_off_element = last_elem
if len(element_strings) >= 2:
element_before_cutoff = element_strings[-2]
elif len(element_strings) == 1:
element_before_cutoff = last_elem
# Case 3: No complete elements found, but there's an incomplete one
elif elem_start is not None:
# There's an incomplete element that hasn't been closed
incomplete_elem_str = elements_section[elem_start:].strip()
cut_off_element = incomplete_elem_str
# No element before (this is the first/only element)
element_before_cutoff = None
except Exception as e:
logger.debug(f"Error extracting cut-off elements from raw JSON: {e}")
return cut_off_element, element_before_cutoff
# Return ContinuationContext Pydantic model
return ContinuationContext(
section_count=section_count,
delivered_summary=delivered_summary,
template_structure=templateStructure,
last_complete_part=last_complete_part,
incomplete_part=incomplete_part,
last_raw_json=last_raw_json,
overlap_context=overlap_context,
hierarchy_context=hierarchy_context
)
def parseJsonWithModel(jsonString: str, modelClass: Type[T]) -> T:
"""

View file

@ -26,9 +26,16 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
return await self.process({
# Pass parentOperationId to maintain progress hierarchy
parentOperationId = parameters.get("parentOperationId")
processParams = {
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": normalizedFormat
})
}
if parentOperationId:
processParams["parentOperationId"] = parentOperationId
return await self.process(processParams)

View file

@ -28,10 +28,17 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
aiPrompt += f" Focus specifically on: {focus}."
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
return await self.process({
# Pass parentOperationId to maintain progress hierarchy
parentOperationId = parameters.get("parentOperationId")
processParams = {
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType,
"generationIntent": "document" # NEW: Explicit intent
})
}
if parentOperationId:
processParams["parentOperationId"] = parentOperationId
return await self.process(processParams)

View file

@ -29,6 +29,9 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
aiPrompt += " Focus on accurate translation of content."
aiPrompt += " Maintain the same document structure, headings, and organization."
# Pass parentOperationId to maintain progress hierarchy
parentOperationId = parameters.get("parentOperationId")
processParams = {
"aiPrompt": aiPrompt,
"documentList": documentList,
@ -36,6 +39,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
}
if resultType:
processParams["resultType"] = resultType
if parentOperationId:
processParams["parentOperationId"] = parentOperationId
return await self.process(processParams)

View file

@ -282,7 +282,7 @@ class MethodAi(MethodBase):
),
"generateCode": WorkflowActionDefinition(
actionId="ai.generateCode",
description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt",
description="Generate one or multiple code files in a single action - explicitly sets intent to 'code'. This action can generate multiple files (e.g., config.json, customers.json, settings.json) when the prompt requests multiple files. If the prompt specifies file formats to deliver, include them in the prompt. IMPORTANT: When the user requests multiple files (e.g., 'generate 3 JSON files'), use a SINGLE ai.generateCode action with a prompt that describes ALL requested files, rather than splitting into multiple actions.",
dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
@ -290,7 +290,7 @@ class MethodAi(MethodBase):
type="str",
frontendType=FrontendType.TEXTAREA,
required=True,
description="Description of code to generate"
description="Description of code to generate. If multiple files are requested, describe ALL files in this single prompt (e.g., 'Generate 3 JSON files: 1) config.json with..., 2) customers.json with..., 3) settings.json with...')."
),
"documentList": WorkflowActionParameter(
name="documentList",
@ -303,9 +303,9 @@ class MethodAi(MethodBase):
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"],
required=False,
description="Output format (html, js, py, etc.). Optional: if omitted, formats are determined from prompt by AI. With per-document format determination, AI can determine different formats for different documents based on prompt."
description="Output format (html, js, py, json, csv, xml, etc.). Optional: if omitted, formats are determined from prompt by AI. This action can return MULTIPLE files in a single call when the prompt requests multiple files. With per-document format determination, AI can determine different formats for different files based on prompt. When multiple files are requested, the action will return multiple documents (one per file)."
)
},
execute=generateCode.__get__(self, self.__class__)

View file

@ -80,46 +80,64 @@ class ContentValidator:
# For tables: extract caption and statistics
if section.get("content_type") == "table":
# Try to extract from elements first
if elements and isinstance(elements, list) and len(elements) > 0:
tableElement = elements[0]
content = tableElement.get("content", {})
if isinstance(content, dict):
headers = content.get("headers", [])
rows = content.get("rows", [])
else:
headers = tableElement.get("headers", [])
rows = tableElement.get("rows", [])
if headers:
sectionSummary["columnCount"] = len(headers)
sectionSummary["headers"] = headers # Include headers for context
if rows:
sectionSummary["rowCount"] = len(rows)
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
# Ensure tableElement is a dictionary before accessing
if isinstance(tableElement, dict):
content = tableElement.get("content", {})
if isinstance(content, dict):
headers = content.get("headers", [])
rows = content.get("rows", [])
else:
headers = tableElement.get("headers", [])
rows = tableElement.get("rows", [])
if headers:
sectionSummary["columnCount"] = len(headers)
sectionSummary["headers"] = headers # Include headers for context
if rows:
sectionSummary["rowCount"] = len(rows)
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
else:
# Fallback: extract KPIs from section metadata if elements are missing
# This handles cases where filledStructure doesn't have elements populated
if "columnCount" in section:
sectionSummary["columnCount"] = section.get("columnCount")
if "rowCount" in section:
sectionSummary["rowCount"] = section.get("rowCount")
if "headers" in section:
sectionSummary["headers"] = section.get("headers")
if "caption" in section:
sectionSummary["caption"] = section.get("caption")
# For lists and bullet_lists: extract item count
elif section.get("content_type") in ["list", "bullet_list"]:
if elements and isinstance(elements, list) and len(elements) > 0:
listElement = elements[0]
content = listElement.get("content", {})
if isinstance(content, dict):
items = content.get("items", [])
else:
items = listElement.get("items", [])
if items:
sectionSummary["itemCount"] = len(items)
# Ensure listElement is a dictionary before accessing
if isinstance(listElement, dict):
content = listElement.get("content", {})
if isinstance(content, dict):
items = content.get("items", [])
else:
items = listElement.get("items", [])
if items:
sectionSummary["itemCount"] = len(items)
# For paragraphs/headings: extract text statistics (no preview for security)
elif section.get("content_type") in ["paragraph", "heading"]:
if elements and isinstance(elements, list) and len(elements) > 0:
textElement = elements[0]
content = textElement.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
else:
text = textElement.get("text", "")
if text:
sectionSummary["textLength"] = len(text)
sectionSummary["wordCount"] = len(text.split())
# Ensure textElement is a dictionary before accessing
if isinstance(textElement, dict):
content = textElement.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
else:
text = textElement.get("text", "")
if text:
sectionSummary["textLength"] = len(text)
sectionSummary["wordCount"] = len(text.split())
# Also check for text length if available directly in section
if section.get("textLength"):
sectionSummary["textLength"] = section.get("textLength")
@ -153,6 +171,7 @@ class ContentValidator:
# Include any additional fields from section (generic approach)
# This ensures all action-specific fields are preserved
# BUT exclude type-specific KPIs that don't belong to this content_type
# AND exclude internal planning fields that confuse validation
contentType = section.get("content_type", "")
# Define KPIs that are ONLY valid for specific types
typeExclusiveKpis = {
@ -165,8 +184,12 @@ class ContentValidator:
if kpiType != contentType:
excludedKpis.extend(kpiFields)
# Internal planning fields that should NOT be shown to validation AI
# These are implementation details, not content indicators
internalFields = ["generationHint", "useAiCall", "elements"]
for key, value in section.items():
if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis:
if key not in sectionSummary and key not in internalFields and key not in excludedKpis:
# Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves
# This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase
if key in ["columnCount", "rowCount", "headers", "itemCount"]:
@ -198,39 +221,61 @@ class ContentValidator:
elements = section.get("elements", [])
if section.get("content_type") == "table":
# Try to extract from elements first
if elements and isinstance(elements, list) and len(elements) > 0:
tableElement = elements[0]
content = tableElement.get("content", {})
if isinstance(content, dict):
headers = content.get("headers", [])
rows = content.get("rows", [])
else:
headers = tableElement.get("headers", [])
rows = tableElement.get("rows", [])
if headers:
sectionSummary["columnCount"] = len(headers)
sectionSummary["headers"] = headers
if rows:
sectionSummary["rowCount"] = len(rows)
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
# Ensure tableElement is a dictionary before accessing
if isinstance(tableElement, dict):
content = tableElement.get("content", {})
if isinstance(content, dict):
headers = content.get("headers", [])
rows = content.get("rows", [])
else:
headers = tableElement.get("headers", [])
rows = tableElement.get("rows", [])
if headers:
sectionSummary["columnCount"] = len(headers)
sectionSummary["headers"] = headers
if rows:
sectionSummary["rowCount"] = len(rows)
sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
else:
# Fallback: extract KPIs from section metadata if elements are missing
# This handles cases where filledStructure doesn't have elements populated
if "columnCount" in section:
sectionSummary["columnCount"] = section.get("columnCount")
if "rowCount" in section:
sectionSummary["rowCount"] = section.get("rowCount")
if "headers" in section:
sectionSummary["headers"] = section.get("headers")
if "caption" in section:
sectionSummary["caption"] = section.get("caption")
# For lists and bullet_lists: extract item count
elif section.get("content_type") in ["list", "bullet_list"]:
if elements and isinstance(elements, list) and len(elements) > 0:
listElement = elements[0]
content = listElement.get("content", {})
if isinstance(content, dict):
items = content.get("items", [])
else:
items = listElement.get("items", [])
if items:
sectionSummary["itemCount"] = len(items)
# Ensure listElement is a dictionary before accessing
if isinstance(listElement, dict):
content = listElement.get("content", {})
if isinstance(content, dict):
items = content.get("items", [])
else:
items = listElement.get("items", [])
if items:
sectionSummary["itemCount"] = len(items)
else:
# Fallback: extract KPIs from section metadata if elements are missing
if "itemCount" in section:
sectionSummary["itemCount"] = section.get("itemCount")
# For paragraphs/headings: extract text statistics (no preview for security)
elif section.get("content_type") in ["paragraph", "heading"]:
if elements and isinstance(elements, list) and len(elements) > 0:
textElement = elements[0]
content = textElement.get("content", {})
# Ensure textElement is a dictionary before accessing
if isinstance(textElement, dict):
content = textElement.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
else:
@ -269,6 +314,7 @@ class ContentValidator:
# Include any additional fields from section (generic approach)
# BUT exclude type-specific KPIs that don't belong to this content_type
# AND exclude internal planning fields that confuse validation
contentType = section.get("content_type", "")
# Define KPIs that are ONLY valid for specific types
typeExclusiveKpis = {
@ -281,8 +327,12 @@ class ContentValidator:
if kpiType != contentType:
excludedKpis.extend(kpiFields)
# Internal planning fields that should NOT be shown to validation AI
# These are implementation details, not content indicators
internalFields = ["generationHint", "useAiCall", "elements"]
for key, value in section.items():
if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis:
if key not in sectionSummary and key not in internalFields and key not in excludedKpis:
# Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves
# This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase
if key in ["columnCount", "rowCount", "headers", "itemCount"]:
@ -341,11 +391,22 @@ class ContentValidator:
# NOT the actual rendered content. The actual content is in documentData.
# Include both: jsonStructure for structure metadata, and contentPreview for actual content check
if sourceJson and isinstance(sourceJson, dict):
# Use source JSON for structure analysis (for rendered documents like xlsx/docx/pdf)
jsonSummary = self._summarizeJsonStructure(sourceJson)
summary["jsonStructure"] = jsonSummary
# Add note that this is metadata, not actual content
summary["note"] = "jsonStructure contains metadata about document structure. Actual rendered content is in documentData."
# Check if this is code generation metadata (has statistics field)
if "statistics" in sourceJson and "fileType" in sourceJson:
# Code generation format - extract statistics from metadata
codeStats = sourceJson.get("statistics", {})
jsonSummary = {
"metadata": sourceJson,
"sections": [],
"statistics": codeStats
}
summary["jsonStructure"] = jsonSummary
summary["note"] = "jsonStructure contains metadata and statistics for code generation file. Actual rendered content is in documentData."
else:
# Document generation format - use standard structure analysis
jsonSummary = self._summarizeJsonStructure(sourceJson)
summary["jsonStructure"] = jsonSummary
summary["note"] = "jsonStructure contains metadata about document structure. Actual rendered content is in documentData."
# For rendered documents, also check actual content
if data is not None:
@ -353,8 +414,19 @@ class ContentValidator:
if contentPreview:
summary["contentPreview"] = contentPreview
elif data is not None:
# For code generation files without sourceJson, extract statistics from content
if formatExt in ["csv", "json", "xml"]:
codeStats = self._extractCodeFileStatistics(data, formatExt, mimeType)
if codeStats:
jsonSummary = {
"metadata": {},
"sections": [],
"statistics": codeStats
}
summary["jsonStructure"] = jsonSummary
summary["note"] = "jsonStructure contains statistics extracted from code file content."
# Fallback: try to parse documentData as JSON (for non-rendered documents)
if isinstance(data, dict):
elif isinstance(data, dict):
# Summarize JSON structure
jsonSummary = self._summarizeJsonStructure(data)
summary["jsonStructure"] = jsonSummary
@ -502,6 +574,74 @@ class ContentValidator:
logger.warning(f"Error getting content structure info: {str(e)}")
return None
def _extractCodeFileStatistics(self, data: Any, formatExt: str, mimeType: str) -> Optional[Dict[str, Any]]:
"""Extract statistics from code generation files (CSV, JSON, XML) for validation."""
try:
# Convert bytes to string if needed
content = None
if isinstance(data, bytes):
try:
content = data.decode('utf-8')
except UnicodeDecodeError:
return None
elif isinstance(data, str):
content = data
else:
return None
if not content:
return None
stats = {}
if formatExt == "csv":
import csv
import io
try:
reader = csv.reader(io.StringIO(content))
rows = list(reader)
if rows:
headerRow = rows[0]
stats["rowCount"] = len(rows) - 1 # Exclude header
stats["columnCount"] = len(headerRow)
stats["headerRow"] = headerRow
stats["dataRowCount"] = len(rows) - 1
except Exception as e:
logger.debug(f"CSV statistics extraction failed: {e}")
elif formatExt == "json":
try:
parsed = json.loads(content)
stats["isArray"] = isinstance(parsed, list)
stats["isObject"] = isinstance(parsed, dict)
if isinstance(parsed, list):
stats["itemCount"] = len(parsed)
stats["objectCount"] = sum(1 for item in parsed if isinstance(item, dict))
stats["arrayCount"] = sum(1 for item in parsed if isinstance(item, list))
elif isinstance(parsed, dict):
stats["keyCount"] = len(parsed)
stats["keys"] = list(parsed.keys())
stats["objectCount"] = sum(1 for v in parsed.values() if isinstance(v, dict))
stats["arrayCount"] = sum(1 for v in parsed.values() if isinstance(v, list))
except Exception as e:
logger.debug(f"JSON statistics extraction failed: {e}")
elif formatExt == "xml":
try:
import xml.etree.ElementTree as ET
root = ET.fromstring(content)
stats["elementCount"] = len(list(root.iter()))
stats["attributeCount"] = sum(len(elem.attrib) for elem in root.iter())
stats["rootElement"] = root.tag
stats["hasRoot"] = True
except Exception as e:
logger.debug(f"XML statistics extraction failed: {e}")
return stats if stats else None
except Exception as e:
logger.warning(f"Error extracting code file statistics: {str(e)}")
return None
def _isFormatCompatible(self, deliveredFormat: str, expectedFormat: str) -> bool:
"""

View file

@ -0,0 +1,556 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Code Generation Formats Test 11 - Tests code generation in JSON, CSV, and XML formats
Tests code generation with structured data formats including validation and formatting.
"""
import asyncio
import json
import sys
import os
import time
import csv
import io
import xml.etree.ElementTree as ET
from typing import Dict, Any, List, Optional
# Add the gateway to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
sys.path.insert(0, _gateway_path)
# Import the service initialization
from modules.services import getInterface as getServices
from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum
from modules.datamodels.datamodelUam import User
from modules.features.workflow import chatStart
import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects
class CodeGenerationFormatsTester11:
def __init__(self):
# Use root user for testing (has full access to everything)
from modules.interfaces.interfaceDbAppObjects import getRootInterface
rootInterface = getRootInterface()
self.testUser = rootInterface.currentUser
# Initialize services using the existing system
self.services = getServices(self.testUser, None) # Test user, no workflow
self.workflow = None
self.testResults = {}
self.generatedDocuments = {}
async def initialize(self):
"""Initialize the test environment."""
# Enable debug file logging for tests
from modules.shared.configuration import APP_CONFIG
APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True)
# Set logging level to INFO to see workflow progress
import logging
logging.getLogger().setLevel(logging.INFO)
print(f"Initialized test with user: {self.testUser.id}")
print(f"Mandate ID: {self.testUser.mandateId}")
print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}")
def createTestPrompt(self, format: str) -> str:
"""Create a test prompt for code generation in the specified format.
The prompt requests 3 files for each format:
- Structured data generation appropriate for the format
- Proper formatting and validation
"""
formatPrompts = {
"json": (
"Generate 3 JSON code files for a customer management system:\n"
"1) Create a config.json file with:\n"
" - Application name: 'Customer Manager'\n"
" - Version: '1.0.0'\n"
" - Database settings: host, port, name\n"
" - API settings: baseUrl, timeout\n"
"2) Create a customers.json file with an array of customer objects:\n"
" - Each customer should have: id, name, email, phone, address\n"
" - Include at least 3 sample customers\n"
"3) Create a settings.json file with:\n"
" - Theme settings: darkMode, fontSize, language\n"
" - Notification settings: email, sms, push\n"
" - Feature flags: enableAnalytics, enableReports\n\n"
"Format all files as valid JSON with proper indentation."
),
"csv": (
"Generate 3 CSV code files for expense tracking:\n"
"1) Create an expenses.csv file with:\n"
" - Header row: Documentname, Datum, Händler, Kreditkartennummer, Gesamtbetrag, Währung, MWST-Satz\n"
" - Data rows with at least 5 expense entries\n"
" - Use consistent date format (DD.MM.YYYY)\n"
" - Use CHF as currency\n"
" - Use 7.7% as VAT rate\n"
"2) Create a categories.csv file with:\n"
" - Header row: CategoryID, CategoryName, Description, ParentCategory\n"
" - Data rows with at least 8 categories\n"
"3) Create a vendors.csv file with:\n"
" - Header row: VendorID, VendorName, ContactPerson, Email, Phone, Address\n"
" - Data rows with at least 6 vendors\n\n"
"Format all files as valid CSV with proper header row and consistent column count."
),
"xml": (
"Generate 3 XML code files for a product catalog:\n"
"1) Create a products.xml file with:\n"
" - Root element: <catalog>\n"
" - Each product as <product> element with:\n"
" - <id>, <name>, <description>, <price>, <category>\n"
" - Include at least 4 products\n"
"2) Create a categories.xml file with:\n"
" - Root element: <categories>\n"
" - Each category as <category> element with:\n"
" - <id>, <name>, <description>, <parentId>\n"
" - Include at least 5 categories\n"
"3) Create a suppliers.xml file with:\n"
" - Root element: <suppliers>\n"
" - Each supplier as <supplier> element with:\n"
" - <id>, <name>, <contact>, <address>\n"
" - Include at least 3 suppliers\n\n"
"Format all files as valid XML with proper indentation and structure."
)
}
return formatPrompts.get(format.lower(), formatPrompts["json"])
async def generateCodeInFormat(self, format: str) -> Dict[str, Any]:
"""Generate code in the specified format using workflow."""
print("\n" + "="*80)
print(f"GENERATING CODE IN {format.upper()} FORMAT")
print("="*80)
prompt = self.createTestPrompt(format)
print(f"Prompt: {prompt[:200]}...")
# Create user input request
userInput = UserInputRequest(
prompt=prompt,
listFileId=[],
userLanguage="en"
)
# Start workflow
print(f"\nStarting workflow for {format.upper()} code generation...")
workflow = await chatStart(
currentUser=self.testUser,
userInput=userInput,
workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
workflowId=None
)
if not workflow:
return {
"success": False,
"error": "Failed to start workflow"
}
self.workflow = workflow
print(f"Workflow started: {workflow.id}")
# Wait for workflow completion (no timeout - wait indefinitely)
print(f"Waiting for workflow completion...")
completed = await self.waitForWorkflowCompletion(timeout=None)
if not completed:
return {
"success": False,
"error": "Workflow did not complete",
"workflowId": workflow.id,
"status": workflow.status if workflow else "unknown"
}
# Analyze results
results = self.analyzeWorkflowResults()
# Extract documents for this format
documents = results.get("documents", [])
formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")]
return {
"success": True,
"format": format,
"workflowId": workflow.id,
"status": results.get("status"),
"documentCount": len(formatDocuments),
"documents": formatDocuments,
"results": results
}
async def waitForWorkflowCompletion(self, timeout: Optional[int] = None, checkInterval: int = 2) -> bool:
"""Wait for workflow to complete."""
if not self.workflow:
return False
startTime = time.time()
lastStatus = None
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
if timeout is None:
print("Waiting indefinitely (no timeout)")
while True:
# Check timeout only if specified
if timeout is not None and time.time() - startTime > timeout:
print(f"\n⏱️ Timeout after {timeout} seconds")
return False
# Get current workflow status
try:
currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id)
if not currentWorkflow:
print("\n❌ Workflow not found")
return False
currentStatus = currentWorkflow.status
elapsed = int(time.time() - startTime)
# Print status if it changed
if currentStatus != lastStatus:
print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)")
lastStatus = currentStatus
# Check if workflow is complete
if currentStatus in ["completed", "stopped", "failed"]:
self.workflow = currentWorkflow
statusIcon = "" if currentStatus == "completed" else ""
print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)")
return currentStatus == "completed"
# Wait before next check
await asyncio.sleep(checkInterval)
except Exception as e:
print(f"\n⚠️ Error checking workflow status: {str(e)}")
await asyncio.sleep(checkInterval)
def analyzeWorkflowResults(self) -> Dict[str, Any]:
"""Analyze workflow results and extract information."""
if not self.workflow:
return {"error": "No workflow to analyze"}
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
workflow = interfaceDbChat.getWorkflow(self.workflow.id)
if not workflow:
return {"error": "Workflow not found"}
# Get unified chat data
chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
# Count messages
messages = chatData.get("messages", [])
userMessages = [m for m in messages if m.get("role") == "user"]
assistantMessages = [m for m in messages if m.get("role") == "assistant"]
# Count documents
documents = chatData.get("documents", [])
# Get logs
logs = chatData.get("logs", [])
results = {
"workflowId": workflow.id,
"status": workflow.status,
"workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None,
"currentRound": workflow.currentRound,
"totalTasks": workflow.totalTasks,
"totalActions": workflow.totalActions,
"messageCount": len(messages),
"userMessageCount": len(userMessages),
"assistantMessageCount": len(assistantMessages),
"documentCount": len(documents),
"logCount": len(logs),
"documents": documents,
"logs": logs
}
print(f"\nWorkflow Results:")
print(f" Status: {results['status']}")
print(f" Tasks: {results['totalTasks']}")
print(f" Actions: {results['totalActions']}")
print(f" Messages: {results['messageCount']}")
print(f" Documents: {results['documentCount']}")
# Print document details
if documents:
print(f"\nGenerated Documents:")
for doc in documents:
fileName = doc.get("fileName", "unknown")
fileSize = doc.get("fileSize", 0)
mimeType = doc.get("mimeType", "unknown")
print(f" - {fileName} ({fileSize} bytes, {mimeType})")
return results
def verifyCodeFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]:
"""Verify that a code file matches the expected format and is valid."""
fileName = document.get("fileName", "")
mimeType = document.get("mimeType", "")
fileSize = document.get("fileSize", 0)
# Expected MIME types
expectedMimeTypes = {
"json": ["application/json"],
"csv": ["text/csv"],
"xml": ["application/xml", "text/xml"]
}
# Expected file extensions
expectedExtensions = {
"json": [".json"],
"csv": [".csv"],
"xml": [".xml"]
}
formatLower = expectedFormat.lower()
expectedMimes = expectedMimeTypes.get(formatLower, [])
expectedExts = expectedExtensions.get(formatLower, [])
# Check file extension
hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts)
# Check MIME type
hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes)
# Check file size (should be > 0)
hasValidSize = fileSize > 0
# Try to read and validate content
isValidContent = False
validationError = None
try:
# Get file content from fileId
fileId = document.get("fileId")
if fileId and hasattr(self.services, 'interfaceDbComponent'):
fileData = self.services.interfaceDbComponent.getFileData(fileId)
if fileData:
content = fileData.decode('utf-8') if isinstance(fileData, bytes) else fileData
# Validate format-specific syntax
if formatLower == "json":
try:
json.loads(content)
isValidContent = True
except json.JSONDecodeError as e:
validationError = f"Invalid JSON: {str(e)}"
elif formatLower == "csv":
try:
reader = csv.reader(io.StringIO(content))
rows = list(reader)
if len(rows) > 0:
# Check header row exists
headerCount = len(rows[0])
# Check all rows have same column count
allRowsValid = all(len(row) == headerCount for row in rows)
isValidContent = allRowsValid
if not allRowsValid:
validationError = "CSV rows have inconsistent column counts"
else:
validationError = "CSV file is empty"
except Exception as e:
validationError = f"CSV parsing error: {str(e)}"
elif formatLower == "xml":
try:
ET.fromstring(content)
isValidContent = True
except ET.ParseError as e:
validationError = f"Invalid XML: {str(e)}"
else:
validationError = "Could not read file data"
else:
validationError = "No fileId available"
except Exception as e:
validationError = f"Error reading/validating file: {str(e)}"
verification = {
"format": expectedFormat,
"fileName": fileName,
"mimeType": mimeType,
"fileSize": fileSize,
"hasCorrectExtension": hasCorrectExtension,
"hasCorrectMimeType": hasCorrectMimeType,
"hasValidSize": hasValidSize,
"isValidContent": isValidContent,
"validationError": validationError,
"isValid": hasCorrectExtension and hasValidSize and hasCorrectMimeType,
"isComplete": hasCorrectExtension and hasValidSize and hasCorrectMimeType and isValidContent
}
return verification
async def testAllFormats(self) -> Dict[str, Any]:
"""Test code generation in JSON, CSV, and XML formats."""
print("\n" + "="*80)
print("TESTING CODE GENERATION IN ALL FORMATS")
print("="*80)
# Test all code formats
formats = ["json", "csv", "xml"]
results = {}
for format in formats:
try:
print(f"\n{'='*80}")
print(f"Testing {format.upper()} format...")
print(f"{'='*80}")
result = await self.generateCodeInFormat(format)
results[format] = result
if result.get("success"):
documents = result.get("documents", [])
if documents:
# Verify all documents (expecting 3 files per format)
verifications = []
for doc in documents:
verification = self.verifyCodeFormat(doc, format)
verifications.append(verification)
result["verifications"] = verifications
# Count valid documents
validCount = sum(1 for v in verifications if v.get("isValid"))
contentValidCount = sum(1 for v in verifications if v.get("isValidContent"))
print(f"\n{format.upper()} generation successful!")
print(f" Documents: {len(documents)} (expected: 3)")
print(f" Valid Format: {validCount}/{len(documents)}")
print(f" Valid Content: {contentValidCount}/{len(documents)}")
# Print details for each file
for i, verification in enumerate(verifications, 1):
statusIcon = "" if verification.get("isValid") else ""
contentIcon = "" if verification.get("isValidContent") else ""
print(f" File {i}: {statusIcon} Format, {contentIcon} Content - {verification.get('fileName', 'unknown')}")
if verification.get("validationError"):
print(f" Error: {verification['validationError']}")
else:
print(f"\n⚠️ {format.upper()} generation completed but no documents found")
else:
error = result.get("error", "Unknown error")
print(f"\n{format.upper()} generation failed: {error}")
# Small delay between tests
await asyncio.sleep(2)
except Exception as e:
import traceback
print(f"\n❌ Error testing {format.upper()}: {str(e)}")
print(traceback.format_exc())
results[format] = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return results
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
print("CODE GENERATION FORMATS TEST 11 - JSON, CSV, XML")
print("="*80)
try:
# Initialize
await self.initialize()
# Test all formats
formatResults = await self.testAllFormats()
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
# Format tests summary
print("\nFormat Tests:")
successCount = 0
failCount = 0
completeCount = 0 # Files with valid content
for format, result in formatResults.items():
if result.get("success"):
successCount += 1
verifications = result.get("verifications", [])
docCount = result.get("documentCount", 0)
# Count valid files
validCount = sum(1 for v in verifications if v.get("isValid"))
contentValidCount = sum(1 for v in verifications if v.get("isValidContent"))
completeCount += contentValidCount
# Overall status (all files valid)
allValid = len(verifications) > 0 and all(v.get("isValid") for v in verifications)
allContentValid = len(verifications) > 0 and all(v.get("isValidContent") for v in verifications)
statusIcon = "" if allValid else "⚠️"
contentIcon = "" if allContentValid else ""
print(f"{statusIcon} {format.upper():6s}: {'PASS' if allValid else 'PARTIAL'} - {docCount} file(s) ({validCount} valid format, {contentValidCount} valid content)")
# Print errors if any
for v in verifications:
if v.get("validationError"):
print(f" {v.get('fileName', 'unknown')}: {v['validationError']}")
else:
failCount += 1
error = result.get("error", "Unknown error")
print(f"{format.upper():6s}: FAIL - {error}")
print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats")
print(f"Valid Content Files: {completeCount} total files with valid content")
self.testResults = {
"success": failCount == 0,
"formatTests": {
"successCount": successCount,
"failCount": failCount,
"completeCount": completeCount,
"totalFormats": len(formatResults),
"results": formatResults
},
"totalSuccess": successCount,
"totalFail": failCount
}
return self.testResults
except Exception as e:
import traceback
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
print(f"Traceback:\n{traceback.format_exc()}")
self.testResults = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return self.testResults
async def main():
"""Run code generation formats test 11."""
tester = CodeGenerationFormatsTester11()
results = await tester.runTest()
# Print final results as JSON for easy parsing
print("\n" + "="*80)
print("FINAL RESULTS (JSON)")
print("="*80)
print(json.dumps(results, indent=2, default=str))
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,804 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
JSON Split and Merge Test 12 - Tests JSON splitting and merging using workflow tools
Tests random splitting of JSON files into 3 parts and merging them back using ModularJsonMerger.
"""
import asyncio
import json
import sys
import os
import time
import random
from typing import Dict, Any, List, Optional, Tuple
# Add the gateway to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
sys.path.insert(0, _gateway_path)
# Import JSON merger from workflow tools
from modules.services.serviceAi.subJsonMerger import ModularJsonMerger, JsonMergeLogger
from modules.shared.jsonContinuation import getContexts
class JsonSplitMergeTester12:
def __init__(self):
self.testResults = {}
self.testJsonFiles = []
self.logBuffer = []
self.logFile = None
def createTestJsonFiles(self) -> List[Dict[str, Any]]:
"""Create various test JSON files with different structures."""
testFiles = [
{
"name": "config.json",
"data": {
"application": "Customer Manager",
"version": "1.0.0",
"database": {
"host": "localhost",
"port": 5432,
"name": "customers_db"
},
"api": {
"baseUrl": "https://api.example.com",
"timeout": 30
}
}
},
{
"name": "customers.json",
"data": {
"customers": [
{"id": 1, "name": "John Doe", "email": "john@example.com", "phone": "+1234567890", "address": "123 Main St"},
{"id": 2, "name": "Jane Smith", "email": "jane@example.com", "phone": "+0987654321", "address": "456 Oak Ave"},
{"id": 3, "name": "Bob Johnson", "email": "bob@example.com", "phone": "+1122334455", "address": "789 Pine Rd"},
{"id": 4, "name": "Alice Williams", "email": "alice@example.com", "phone": "+5566778899", "address": "321 Elm St"},
{"id": 5, "name": "Charlie Brown", "email": "charlie@example.com", "phone": "+9988776655", "address": "654 Maple Dr"}
]
}
},
{
"name": "settings.json",
"data": {
"theme": {
"darkMode": True,
"fontSize": 14,
"language": "en"
},
"notifications": {
"email": True,
"sms": False,
"push": True
},
"features": {
"enableAnalytics": True,
"enableReports": False
}
}
},
{
"name": "products.json",
"data": {
"products": [
{"id": "P001", "name": "Product A", "price": 29.99, "category": "Electronics", "inStock": True},
{"id": "P002", "name": "Product B", "price": 49.99, "category": "Clothing", "inStock": True},
{"id": "P003", "name": "Product C", "price": 19.99, "category": "Books", "inStock": False},
{"id": "P004", "name": "Product D", "price": 99.99, "category": "Electronics", "inStock": True},
{"id": "P005", "name": "Product E", "price": 14.99, "category": "Books", "inStock": True},
{"id": "P006", "name": "Product F", "price": 79.99, "category": "Clothing", "inStock": True}
]
}
},
{
"name": "document_structure.json",
"data": {
"metadata": {
"title": "Test Document",
"author": "Test Author",
"date": "2025-01-05"
},
"documents": [
{
"id": "doc1",
"title": "Document 1",
"sections": [
{
"id": "sec1",
"content_type": "heading",
"elements": [
{"type": "heading", "content": {"text": "Introduction", "level": 1}}
]
},
{
"id": "sec2",
"content_type": "paragraph",
"elements": [
{"type": "paragraph", "content": {"text": "This is a test paragraph."}}
]
}
]
}
]
}
},
{
"name": "table_example.json",
"data": self._loadTableJsonExample()
},
{
"name": "complete_json.json",
"data": {
"status": "complete",
"message": "This is a complete, valid JSON object",
"data": {
"items": [1, 2, 3, 4, 5],
"metadata": {
"version": "1.0",
"timestamp": "2025-01-05T12:00:00Z"
}
}
},
"isComplete": True # Flag to indicate this is complete JSON (not cut)
},
{
"name": "json_with_comments.json",
"data": None, # Will be set as string with comments
"jsonString": '''{
// This is a single-line comment
"name": "Test",
"value": 42,
/* This is a multi-line comment
spanning multiple lines */
"items": [1, 2, 3],
"nested": {
// Another comment
"key": "value"
}
}''',
"hasComments": True
},
{
"name": "json_with_trailing_comma.json",
"data": None, # Will be set as string with trailing comma
"jsonString": '''{
"name": "Test",
"value": 42,
"items": [1, 2, 3,],
"nested": {
"key": "value",
}
}''',
"hasTrailingComma": True
},
{
"name": "json_with_unquoted_keys.json",
"data": None, # Will be set as string with unquoted keys
"jsonString": '''{
name: "Test",
value: 42,
items: [1, 2, 3],
nested: {
key: "value"
}
}''',
"hasUnquotedKeys": True
},
{
"name": "json_with_invalid_escape.json",
"data": None, # Will be set as string with invalid escape
"jsonString": '''{
"name": "Test\\xInvalid",
"value": 42,
"description": "This has \\u invalid escape"
}''',
"hasInvalidEscape": True
},
{
"name": "json_mixed_errors.json",
"data": None, # Will be set as string with multiple errors
"jsonString": '''{
// Comment here
name: "Test", // Unquoted key
"value": 42,
"items": [1, 2, 3,], // Trailing comma
"description": "Has \\x invalid escape",
"nested": {
key: "value", // Unquoted key and trailing comma
}
}''',
"hasMixedErrors": True
}
]
return testFiles
def _loadTableJsonExample(self) -> Dict[str, Any]:
"""Load the table JSON example from the debug prompts file."""
try:
# Import jsonUtils for closing incomplete JSON structures
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
# Path to the JSON example file
jsonExamplePath = os.path.join(
os.path.dirname(__file__), "..", "..", "..", "local", "debug", "prompts",
"20260105-214826-020-chapter_1_section_section_2_response_iteration_2.txt"
)
# Read the file content
with open(jsonExamplePath, 'r', encoding='utf-8') as f:
content = f.read()
# Remove markdown code block markers
jsonContent = content.strip()
if jsonContent.startswith('```json'):
jsonContent = jsonContent[7:] # Remove ```json
if jsonContent.startswith('```'):
jsonContent = jsonContent[3:] # Remove ```
jsonContent = jsonContent.strip()
if jsonContent.endswith('```'):
jsonContent = jsonContent[:-3] # Remove trailing ```
jsonContent = jsonContent.strip()
# The JSON is incomplete - use closeJsonStructures to complete it
closedJson = closeJsonStructures(jsonContent)
# Parse the closed JSON
parsedJson, error, _ = tryParseJson(closedJson)
if error is None and parsedJson is not None:
return parsedJson
else:
raise Exception(f"Failed to parse JSON after closing structures: {error}")
except Exception as e:
# If loading fails, return a minimal valid structure
print(f"Warning: Could not load table JSON example: {e}")
return {
"elements": [
{
"type": "table",
"content": {
"headers": ["Spalte1", "Spalte2", "Spalte3"],
"rows": [
[36761, 36767, 36779]
]
}
}
]
}
def splitJsonRandomly(self, jsonString: str, numParts: int = 3) -> List[str]:
"""
Split JSON string randomly into specified number of parts.
Simulates real AI response cuts - can split anywhere, even in the middle of strings/numbers/structures.
This is the REAL scenario: AI response gets cut off randomly, not at convenient points.
"""
if numParts < 2:
return [jsonString]
jsonLength = len(jsonString)
# Generate truly random split points - can be anywhere!
# Only ensure minimum part size to avoid empty parts
minPartSize = max(10, jsonLength // (numParts * 3)) # Smaller minimum to allow more randomness
splitPoints = []
for _ in range(numParts - 1):
# Generate random point - can be anywhere in the string
# Only ensure we don't create parts smaller than minimum
minPoint = len(splitPoints) * minPartSize if splitPoints else minPartSize
maxPoint = jsonLength - (numParts - len(splitPoints) - 1) * minPartSize
if maxPoint <= minPoint:
# If we can't avoid minimum size, just use the boundary
splitPoint = minPoint
else:
# Truly random point - can be in the middle of anything!
splitPoint = random.randint(minPoint, maxPoint)
splitPoints.append(splitPoint)
splitPoints.sort()
# Create parts - these can be cut anywhere, even mid-string, mid-number, etc.
parts = []
start = 0
for splitPoint in splitPoints:
parts.append(jsonString[start:splitPoint])
start = splitPoint
parts.append(jsonString[start:]) # Last part
return parts
def _log(self, message: str):
"""Add message to log buffer."""
self.logBuffer.append(message)
print(message)
def normalizeJson(self, jsonString: str) -> Optional[Dict[str, Any]]:
"""Normalize JSON string by parsing and re-serializing. Returns None if parsing fails."""
try:
parsed = json.loads(jsonString)
return parsed
except json.JSONDecodeError:
# Try to close incomplete JSON structures
try:
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
closed = closeJsonStructures(jsonString)
parsed, error, _ = tryParseJson(closed)
if error is None and parsed is not None:
return parsed
except Exception:
pass
# Return None if all parsing attempts fail
return None
def compareJson(self, original: Dict[str, Any], merged: Dict[str, Any]) -> Dict[str, Any]:
"""Compare original and merged JSON structures."""
originalStr = json.dumps(original, sort_keys=True, indent=2)
mergedStr = json.dumps(merged, sort_keys=True, indent=2)
exactMatch = originalStr == mergedStr
# Deep comparison
differences = []
self._findDifferences(original, merged, "", differences)
return {
"exactMatch": exactMatch,
"differences": differences,
"originalSize": len(originalStr),
"mergedSize": len(mergedStr),
"sizeMatch": len(originalStr) == len(mergedStr)
}
def _findDifferences(self, obj1: Any, obj2: Any, path: str, differences: List[str]):
"""Recursively find differences between two JSON objects."""
if type(obj1) != type(obj2):
differences.append(f"{path}: Type mismatch - {type(obj1).__name__} vs {type(obj2).__name__}")
return
if isinstance(obj1, dict):
allKeys = set(obj1.keys()) | set(obj2.keys())
for key in allKeys:
newPath = f"{path}.{key}" if path else key
if key not in obj1:
differences.append(f"{newPath}: Missing in original")
elif key not in obj2:
differences.append(f"{newPath}: Missing in merged")
else:
self._findDifferences(obj1[key], obj2[key], newPath, differences)
elif isinstance(obj1, list):
if len(obj1) != len(obj2):
differences.append(f"{path}: Length mismatch - {len(obj1)} vs {len(obj2)}")
else:
for i, (item1, item2) in enumerate(zip(obj1, obj2)):
newPath = f"{path}[{i}]"
self._findDifferences(item1, item2, newPath, differences)
else:
if obj1 != obj2:
differences.append(f"{path}: Value mismatch - {obj1} vs {obj2}")
async def testJsonSplitMerge(self, jsonFile: Dict[str, Any]) -> Dict[str, Any]:
"""Test splitting and merging a single JSON file."""
fileName = jsonFile["name"]
# Check if this is a complete JSON test (no cut)
isComplete = jsonFile.get("isComplete", False)
# Check if this is a JSON string with errors (not from data dict)
jsonString = jsonFile.get("jsonString")
if jsonString:
# Use the provided JSON string directly (may have errors)
originalJsonString = jsonString
originalData = None # No original data for error tests
else:
# Convert data dict to JSON string
originalData = jsonFile["data"]
originalJsonString = json.dumps(originalData, indent=2, ensure_ascii=False)
originalSize = len(originalJsonString)
self._log("")
self._log("="*80)
testType = "COMPLETE JSON" if isComplete else ("JSON WITH ERRORS" if jsonString else "SPLIT JSON")
self._log(f"TESTING {testType}: {fileName}")
self._log("="*80)
# Log original JSON
self._log("")
self._log("="*80)
self._log("ORIGINAL JSON")
self._log("="*80)
self._log(f"JSON length: {originalSize} characters")
if isComplete:
self._log(" ⚠️ This is COMPLETE JSON (not cut) - testing overlapContext='' detection")
if jsonString:
errorType = []
if jsonFile.get("hasComments"):
errorType.append("comments")
if jsonFile.get("hasTrailingComma"):
errorType.append("trailing commas")
if jsonFile.get("hasUnquotedKeys"):
errorType.append("unquoted keys")
if jsonFile.get("hasInvalidEscape"):
errorType.append("invalid escapes")
if jsonFile.get("hasMixedErrors"):
errorType.append("mixed errors")
if errorType:
self._log(f" ⚠️ This JSON has errors: {', '.join(errorType)} - testing repair function")
self._log("")
self._log("Full JSON content:")
self._log("-"*80)
jsonLines = originalJsonString.split('\n')
if len(jsonLines) > 50:
for line in jsonLines[:25]:
self._log(line)
self._log(f"... ({len(jsonLines) - 50} lines omitted) ...")
for line in jsonLines[-25:]:
self._log(line)
else:
for line in jsonLines:
self._log(line)
# Handle complete JSON, JSON with errors, vs split JSON
if isComplete or jsonString:
# For complete JSON or JSON with errors, use the full string (no cut)
# We want to test repair on the full error-containing JSON
partContent = originalJsonString
cutPosition = None # No cut
self._log("")
self._log("="*80)
if isComplete:
self._log("COMPLETE JSON TEST (NO CUT)")
self._log("="*80)
self._log(" Testing that getContexts() detects complete JSON and sets overlapContext=''")
else:
self._log("JSON WITH ERRORS TEST (NO CUT)")
self._log("="*80)
self._log(" Testing that getContexts() repairs the errors and produces valid JSON")
else:
# Split JSON at random position (simulating AI response cut)
self._log("")
self._log("="*80)
self._log("SPLITTING JSON AT RANDOM POSITION (SIMULATING AI RESPONSE CUT)")
self._log("="*80)
# Find random cut position (not at start or end)
import random
minCutPos = max(100, originalSize // 10) # At least 10% from start
maxCutPos = min(originalSize - 100, originalSize * 9 // 10) # At least 10% from end
# Ensure valid range
if maxCutPos <= minCutPos:
# For small JSON, just cut in the middle
cutPosition = originalSize // 2
else:
cutPosition = random.randint(minCutPos, maxCutPos)
# Get part from start to cut
partContent = originalJsonString[:cutPosition]
if not isComplete:
self._log("")
self._log("="*80)
self._log("PART (from start to cut):")
self._log("="*80)
self._log(f"Cut position: {cutPosition} characters")
self._log(f"Part length: {len(partContent)} characters")
self._log("")
self._log("Part content:")
partLines = partContent.split('\n')
if len(partLines) > 30:
for line in partLines[:15]:
self._log(f" {line}")
self._log(f" ... ({len(partLines) - 30} lines omitted) ...")
for line in partLines[-15:]:
self._log(f" {line}")
else:
for line in partLines:
self._log(f" {line}")
# Generate contexts using getContexts()
self._log("")
self._log("="*80)
self._log("GENERATING CONTINUATION CONTEXTS")
self._log("="*80)
contexts = getContexts(partContent)
# Log overlap context
self._log("")
self._log("="*80)
self._log("OVERLAP CONTEXT (for merging):")
self._log("="*80)
overlapLines = contexts.overlapContext.split('\n')
if len(overlapLines) > 30:
for line in overlapLines[:15]:
self._log(f" {line}")
self._log(f" ... ({len(overlapLines) - 30} lines omitted) ...")
for line in overlapLines[-15:]:
self._log(f" {line}")
else:
for line in overlapLines:
self._log(f" {line}")
# Log hierarchy context (full, without budget)
self._log("")
self._log("="*80)
self._log("HIERARCHY CONTEXT (full structure, no budget):")
self._log("="*80)
hierarchyLines = contexts.hierarchyContext.split('\n')
if len(hierarchyLines) > 30:
for line in hierarchyLines[:15]:
self._log(f" {line}")
self._log(f" ... ({len(hierarchyLines) - 30} lines omitted) ...")
for line in hierarchyLines[-15:]:
self._log(f" {line}")
else:
for line in hierarchyLines:
self._log(f" {line}")
# Log hierarchy context for prompt (with budget)
self._log("")
self._log("="*80)
self._log("HIERARCHY CONTEXT FOR PROMPT (with budget logic):")
self._log("="*80)
hierarchyPromptLines = contexts.hierarchyContextForPrompt.split('\n')
for line in hierarchyPromptLines:
self._log(f" {line}")
# Test completePart as valid JSON
self._log("")
self._log("="*80)
self._log("COMPLETE PART (should be valid JSON):")
self._log("="*80)
completeLines = contexts.completePart.split('\n')
if len(completeLines) > 30:
for line in completeLines[:15]:
self._log(f" {line}")
self._log(f" ... ({len(completeLines) - 30} lines omitted) ...")
for line in completeLines[-15:]:
self._log(f" {line}")
else:
for line in completeLines:
self._log(f" {line}")
# Validate completePart as JSON and check overlapContext
self._log("")
self._log("="*80)
self._log("VALIDATION RESULTS:")
self._log("="*80)
# Check overlapContext for complete JSON
if isComplete:
if contexts.overlapContext == "":
self._log(" ✅ overlapContext is empty (correct for complete JSON)")
else:
self._log(f" ❌ overlapContext is NOT empty: '{contexts.overlapContext[:50]}...'")
self._log(" Expected empty string for complete JSON")
# Validate completePart as JSON
self._log("")
self._log("VALIDATING COMPLETE PART AS JSON:")
isValidJson = False
parsedCompletePart = None
jsonError = None
try:
parsedCompletePart = json.loads(contexts.completePart)
isValidJson = True
self._log(" ✅ completePart is valid JSON")
self._log(f" Parsed type: {type(parsedCompletePart).__name__}")
# For error tests, verify repair worked
if jsonString:
self._log(" ✅ JSON repair successful - errors were fixed")
# For split JSON, compare with truncated JSON
if not isComplete and not jsonString:
# Compare with truncated JSON (not original) - parse the truncated part to compare
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
# Try to parse the truncated JSON part (with structures closed)
truncatedClosed = closeJsonStructures(partContent)
truncatedParsed, truncatedError, _ = tryParseJson(truncatedClosed)
if truncatedParsed is not None:
# Compare completePart with the parsed truncated JSON
if isinstance(parsedCompletePart, dict) and isinstance(truncatedParsed, dict):
comparison = self.compareJson(truncatedParsed, parsedCompletePart)
self._log(f" Comparison with truncated JSON (at cut position {cutPosition}):")
self._log(f" Exact match: {comparison['exactMatch']}")
self._log(f" Size match: {comparison['sizeMatch']}")
if comparison['differences']:
self._log(f" Differences found: {len(comparison['differences'])}")
for diff in comparison['differences'][:10]: # Show first 10 differences
self._log(f" - {diff}")
if len(comparison['differences']) > 10:
self._log(f" ... ({len(comparison['differences']) - 10} more differences)")
else:
self._log(" No differences found - completePart matches truncated JSON structure")
elif isinstance(parsedCompletePart, list) and isinstance(truncatedParsed, list):
self._log(f" Both are lists: truncated={len(truncatedParsed)} items, completePart={len(parsedCompletePart)} items")
else:
self._log(f" Different types: truncated={type(truncatedParsed).__name__}, completePart={type(parsedCompletePart).__name__}")
else:
self._log(f" Could not parse truncated JSON for comparison (error: {truncatedError})")
except json.JSONDecodeError as e:
isValidJson = False
jsonError = str(e)
self._log(f" ❌ completePart is NOT valid JSON")
self._log(f" Error: {jsonError}")
self._log(f" Error position: line {e.lineno}, column {e.colno}")
if jsonString:
self._log(" ❌ JSON repair FAILED - errors were not fixed")
# Return test results
result = {
"success": isValidJson,
"fileName": fileName,
"originalSize": originalSize,
"cutPosition": cutPosition if not isComplete else None,
"partSize": len(partContent),
"overlapContextSize": len(contexts.overlapContext),
"hierarchyContextSize": len(contexts.hierarchyContext),
"hierarchyContextForPromptSize": len(contexts.hierarchyContextForPrompt),
"completePartSize": len(contexts.completePart),
"isValidJson": isValidJson,
"jsonError": jsonError,
"parsedCompletePart": parsedCompletePart is not None,
"jsonParsingSuccess": contexts.jsonParsingSuccess
}
# Add complete JSON specific checks
if isComplete:
result["overlapContextIsEmpty"] = contexts.overlapContext == ""
result["isComplete"] = True
# For complete JSON, success means overlapContext is empty AND valid JSON
result["success"] = isValidJson and (contexts.overlapContext == "")
# Add error test specific checks
if jsonString:
result["hasErrors"] = True
result["repairSuccess"] = isValidJson
return result
async def testAllJsonFiles(self) -> Dict[str, Any]:
"""Test splitting and merging all test JSON files."""
print("\n" + "="*80)
print("TESTING JSON SPLIT AND MERGE")
print("="*80)
testFiles = self.createTestJsonFiles()
results = {}
for jsonFile in testFiles:
try:
result = await self.testJsonSplitMerge(jsonFile)
results[jsonFile["name"]] = result
# Small delay between tests
await asyncio.sleep(0.5)
except Exception as e:
import traceback
print(f"\n❌ Error testing {jsonFile['name']}: {str(e)}")
print(traceback.format_exc())
results[jsonFile["name"]] = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return results
def _writeLogFile(self):
"""Write log buffer to file."""
logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
os.makedirs(logDir, exist_ok=True)
logFilePath = os.path.join(logDir, "test12_json_split_merge_results.txt")
with open(logFilePath, 'w', encoding='utf-8') as f:
f.write('\n'.join(self.logBuffer))
self.logFile = logFilePath
print(f"\n📝 Detailed log written to: {logFilePath}")
async def runTest(self):
"""Run the complete test."""
self._log("="*80)
self._log("JSON SPLIT AND MERGE TEST 12")
self._log("="*80)
try:
# Test all JSON files
results = await self.testAllJsonFiles()
# Write log file
self._writeLogFile()
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
successCount = 0
for fileName, result in results.items():
if result.get("success"):
successCount += 1
isValidJson = result.get("isValidJson", False)
isComplete = result.get("isComplete", False)
hasErrors = result.get("hasErrors", False)
if isComplete:
overlapEmpty = result.get("overlapContextIsEmpty", False)
if isValidJson and overlapEmpty:
print(f"{fileName:30s}: Complete JSON - overlapContext='' and valid JSON")
elif not overlapEmpty:
print(f"⚠️ {fileName:30s}: Complete JSON but overlapContext not empty")
else:
jsonError = result.get("jsonError", "Unknown error")
print(f"⚠️ {fileName:30s}: Complete JSON but not valid - {jsonError}")
elif hasErrors:
repairSuccess = result.get("repairSuccess", False)
if repairSuccess:
print(f"{fileName:30s}: JSON with errors - repair successful")
else:
jsonError = result.get("jsonError", "Unknown error")
print(f"{fileName:30s}: JSON with errors - repair failed - {jsonError}")
else:
if isValidJson:
print(f"{fileName:30s}: Valid JSON - completePart parsed successfully")
else:
jsonError = result.get("jsonError", "Unknown error")
print(f"⚠️ {fileName:30s}: Contexts generated but completePart is not valid JSON - {jsonError}")
else:
error = result.get("error", "Unknown error")
print(f"{fileName:30s}: FAILED - {error}")
print(f"\nResults: {successCount}/{len(results)} successful")
self.testResults = {
"success": successCount == len(results),
"totalFiles": len(results),
"successCount": successCount,
"results": results
}
return self.testResults
except Exception as e:
import traceback
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
print(f"Traceback:\n{traceback.format_exc()}")
self.testResults = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return self.testResults
async def main():
"""Run JSON split and merge test 12."""
tester = JsonSplitMergeTester12()
results = await tester.runTest()
# Print final results as JSON for easy parsing
print("\n" + "="*80)
print("FINAL RESULTS (JSON)")
print("="*80)
print(json.dumps(results, indent=2, default=str))
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,307 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
JSON Completion Test 13 - Tests JSON completion at various cut positions
Tests a single JSON object (~300 chars) with all JSON structure types.
Cuts the JSON at every position from character 50 to the end, completes it, and validates.
"""
import asyncio
import json
import sys
import os
from typing import Dict, Any, List
# Add the gateway to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
sys.path.insert(0, _gateway_path)
# Import JSON continuation module
from modules.shared.jsonContinuation import getContexts
class JsonCompletionTester13:
def __init__(self):
self.testResults = {}
self.logBuffer = []
self.logFile = None
def createTestJson(self) -> str:
"""
Create a single JSON object (~300 chars) containing all JSON structure types:
- Objects (nested)
- Arrays (nested)
- Strings
- Numbers (integers and floats)
- Booleans (true, false)
- null
"""
testData = {
"id": 12345,
"name": "Test Object",
"active": True,
"inactive": False,
"value": None,
"price": 99.99,
"tags": ["tag1", "tag2", "tag3"],
"metadata": {
"created": "2025-01-01",
"updated": "2025-01-02",
"version": 1
},
"items": [
{"id": 1, "name": "Item A", "count": 10},
{"id": 2, "name": "Item B", "count": 20}
],
"settings": {
"theme": "dark",
"notifications": True,
"features": ["feature1", "feature2"]
}
}
jsonString = json.dumps(testData, indent=2, ensure_ascii=False)
# Ensure it's approximately 300 characters (adjust if needed)
targetLength = 300
if len(jsonString) < targetLength:
# Add padding to metadata
testData["metadata"]["description"] = "A" * (targetLength - len(jsonString) + 20)
jsonString = json.dumps(testData, indent=2, ensure_ascii=False)
# Trim to approximately 300 chars if too long
if len(jsonString) > targetLength + 50:
# Remove some content to get closer to target
testData["metadata"].pop("description", None)
jsonString = json.dumps(testData, indent=2, ensure_ascii=False)
return jsonString
def _log(self, message: str):
"""Add message to log buffer."""
self.logBuffer.append(message)
print(message)
async def testJsonCompletionAtCuts(self, jsonString: str, startPos: int = 50, step: int = 5) -> Dict[str, Any]:
"""
Test JSON completion at various cut positions.
Args:
jsonString: The full JSON string to test
startPos: Starting position for cuts (default 50)
step: Step size between cuts (default 5)
Returns:
Dictionary with test results for each cut position
"""
jsonLength = len(jsonString)
results = {}
self._log("")
self._log("="*80)
self._log("TESTING JSON COMPLETION AT VARIOUS CUT POSITIONS")
self._log("="*80)
self._log(f"JSON length: {jsonLength} characters")
self._log(f"Testing cuts from position {startPos} to {jsonLength} (step: {step})")
self._log("")
# Test at each cut position
cutPositions = list(range(startPos, jsonLength, step))
# Always include the last position
if cutPositions[-1] != jsonLength - 1:
cutPositions.append(jsonLength - 1)
successCount = 0
totalCuts = len(cutPositions)
for cutPos in cutPositions:
# Get truncated JSON
truncatedJson = jsonString[:cutPos]
# Generate contexts
try:
contexts = getContexts(truncatedJson)
completePart = contexts.completePart
overlapContext = contexts.overlapContext
# Test if completePart is valid JSON
isValidJson = False
jsonError = None
parsedData = None
try:
parsedData = json.loads(completePart)
isValidJson = True
except json.JSONDecodeError as e:
jsonError = str(e)
isValidJson = False
# Store result
result = {
"cutPosition": cutPos,
"truncatedLength": len(truncatedJson),
"completePartLength": len(completePart),
"overlapContextLength": len(overlapContext),
"isValidJson": isValidJson,
"jsonError": jsonError,
"truncatedJson": truncatedJson[-50:] if len(truncatedJson) > 50 else truncatedJson, # Last 50 chars
"completePart": completePart[-100:] if len(completePart) > 100 else completePart, # Last 100 chars
"overlapContext": overlapContext[-100:] if len(overlapContext) > 100 else overlapContext # Last 100 chars
}
results[cutPos] = result
if isValidJson:
successCount += 1
self._log(f"✅ Cut at position {cutPos:4d}: Valid JSON (completePart length: {len(completePart)}, overlap length: {len(overlapContext)})")
self._log(f" Overlap: {overlapContext[-80:] if len(overlapContext) > 80 else overlapContext}")
else:
self._log(f"❌ Cut at position {cutPos:4d}: Invalid JSON - {jsonError}")
self._log(f" Truncated (last 50): {truncatedJson[-50:]}")
self._log(f" CompletePart (last 100): {completePart[-100:]}")
self._log(f" Overlap: {overlapContext[-80:] if len(overlapContext) > 80 else overlapContext}")
except Exception as e:
result = {
"cutPosition": cutPos,
"truncatedLength": len(truncatedJson),
"isValidJson": False,
"jsonError": f"Exception: {str(e)}",
"truncatedJson": truncatedJson[-50:] if len(truncatedJson) > 50 else truncatedJson
}
results[cutPos] = result
self._log(f"❌ Cut at position {cutPos:4d}: Exception - {str(e)}")
# Summary
self._log("")
self._log("="*80)
self._log("CUT TEST SUMMARY")
self._log("="*80)
self._log(f"Total cuts tested: {totalCuts}")
self._log(f"Successful completions: {successCount}")
self._log(f"Failed completions: {totalCuts - successCount}")
self._log(f"Success rate: {successCount/totalCuts*100:.1f}%")
self._log("")
# Detailed results for failed cuts
failedCuts = [pos for pos, res in results.items() if not res.get("isValidJson", False)]
if failedCuts:
self._log("Failed cuts:")
for pos in failedCuts[:10]: # Show first 10 failures
res = results[pos]
self._log(f" Position {pos}: {res.get('jsonError', 'Unknown error')}")
overlap = res.get('overlapContext', 'N/A')
if overlap != 'N/A':
self._log(f" Overlap: {overlap[-80:] if len(overlap) > 80 else overlap}")
if len(failedCuts) > 10:
self._log(f" ... ({len(failedCuts) - 10} more failures)")
return {
"totalCuts": totalCuts,
"successCount": successCount,
"failedCount": totalCuts - successCount,
"successRate": successCount / totalCuts * 100 if totalCuts > 0 else 0,
"results": results,
"failedCuts": failedCuts
}
def _writeLogFile(self):
"""Write log buffer to file."""
logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
os.makedirs(logDir, exist_ok=True)
logFilePath = os.path.join(logDir, "test13_json_completion_cuts_results.txt")
with open(logFilePath, 'w', encoding='utf-8') as f:
f.write('\n'.join(self.logBuffer))
self.logFile = logFilePath
print(f"\n📝 Detailed log written to: {logFilePath}")
async def runTest(self):
"""Run the complete test."""
self._log("="*80)
self._log("JSON COMPLETION TEST 13")
self._log("="*80)
try:
# Create test JSON
jsonString = self.createTestJson()
self._log("")
self._log("="*80)
self._log("TEST JSON OBJECT")
self._log("="*80)
self._log(f"Length: {len(jsonString)} characters")
self._log("")
self._log("Full JSON content:")
self._log("-"*80)
jsonLines = jsonString.split('\n')
for line in jsonLines:
self._log(line)
# Test completion at various cuts
results = await self.testJsonCompletionAtCuts(jsonString, startPos=50, step=5)
# Write log file
self._writeLogFile()
# Final summary
self._log("")
self._log("="*80)
self._log("FINAL TEST SUMMARY")
self._log("="*80)
self._log(f"Total cuts tested: {results['totalCuts']}")
self._log(f"✅ Successful: {results['successCount']}")
self._log(f"❌ Failed: {results['failedCount']}")
self._log(f"Success rate: {results['successRate']:.1f}%")
if results['failedCuts']:
self._log("")
self._log("Failed cut positions:")
for pos in results['failedCuts']:
res = results['results'][pos]
self._log(f" Position {pos}: {res.get('jsonError', 'Unknown error')}")
overlap = res.get('overlapContext', 'N/A')
if overlap != 'N/A':
self._log(f" Overlap: {overlap[-80:] if len(overlap) > 80 else overlap}")
self.testResults = {
"success": results['successCount'] == results['totalCuts'],
"totalCuts": results['totalCuts'],
"successCount": results['successCount'],
"failedCount": results['failedCount'],
"successRate": results['successRate'],
"failedCuts": results['failedCuts'],
"results": results['results']
}
return self.testResults
except Exception as e:
import traceback
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
print(f"Traceback:\n{traceback.format_exc()}")
self.testResults = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return self.testResults
async def main():
"""Run JSON completion test 13."""
tester = JsonCompletionTester13()
results = await tester.runTest()
# Print final results as JSON for easy parsing
print("\n" + "="*80)
print("FINAL RESULTS (JSON)")
print("="*80)
print(json.dumps(results, indent=2, default=str))
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,373 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
JSON Continuation Context Test 14 - Tests getContexts() with a specific cut JSON from debug prompts.
Reads a real AI response that was cut and analyzes the continuation contexts.
"""
import asyncio
import json
import sys
import os
from typing import Dict, Any, Optional
# Add the gateway to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
sys.path.insert(0, _gateway_path)
# Import jsonContinuation
from modules.shared.jsonContinuation import getContexts
class JsonContinuationContextTester14:
def __init__(self):
self.testResults = {}
self.logBuffer = []
self.logFile = None
def _log(self, message: str):
"""Add message to log buffer."""
self.logBuffer.append(message)
print(message)
def _readDebugFile(self, fileName: str) -> Optional[str]:
"""Read a debug prompt file from local/debug/prompts/."""
try:
filePath = os.path.join(
os.path.dirname(__file__), "..", "..", "..", "local", "debug", "prompts",
fileName
)
with open(filePath, 'r', encoding='utf-8') as f:
return f.read()
except Exception as e:
self._log(f"Error reading file {fileName}: {e}")
return None
def _extractJsonFromResponse(self, content: str) -> str:
"""Extract JSON from response content (remove markdown code fences if present)."""
jsonContent = content.strip()
# Remove markdown code block markers
if jsonContent.startswith('```json'):
jsonContent = jsonContent[7:]
elif jsonContent.startswith('```'):
jsonContent = jsonContent[3:]
jsonContent = jsonContent.strip()
if jsonContent.endswith('```'):
jsonContent = jsonContent[:-3]
return jsonContent.strip()
async def testSpecificCutJson(self, fileName: str) -> Dict[str, Any]:
"""Test getContexts() with a specific cut JSON file."""
self._log("")
self._log("=" * 80)
self._log(f"TESTING CUT JSON FROM: {fileName}")
self._log("=" * 80)
# Read the file
content = self._readDebugFile(fileName)
if content is None:
return {"success": False, "error": f"Could not read file: {fileName}"}
# Extract JSON
jsonContent = self._extractJsonFromResponse(content)
self._log("")
self._log("=" * 80)
self._log("INPUT JSON (CUT)")
self._log("=" * 80)
self._log(f"Total length: {len(jsonContent)} characters")
self._log("")
# Show first and last parts
lines = jsonContent.split('\n')
if len(lines) > 40:
self._log("First 20 lines:")
for line in lines[:20]:
self._log(f" {line}")
self._log(f" ... ({len(lines) - 40} lines omitted) ...")
self._log("Last 20 lines:")
for line in lines[-20:]:
self._log(f" {line}")
else:
for line in lines:
self._log(f" {line}")
# Call getContexts()
self._log("")
self._log("=" * 80)
self._log("CALLING getContexts()")
self._log("=" * 80)
try:
contexts = getContexts(jsonContent)
except Exception as e:
self._log(f"ERROR calling getContexts(): {e}")
import traceback
self._log(traceback.format_exc())
return {"success": False, "error": str(e)}
# Log results
self._log("")
self._log("=" * 80)
self._log("RESULTS FROM getContexts()")
self._log("=" * 80)
# jsonParsingSuccess
self._log("")
self._log(f"jsonParsingSuccess: {contexts.jsonParsingSuccess}")
# overlapContext
self._log("")
self._log("=" * 80)
self._log("overlapContext:")
self._log("=" * 80)
self._log(f"Length: {len(contexts.overlapContext)} characters")
if contexts.overlapContext == "":
self._log(" (empty - JSON is complete, no cut point)")
else:
overlapLines = contexts.overlapContext.split('\n')
if len(overlapLines) > 20:
for line in overlapLines[:10]:
self._log(f" {line}")
self._log(f" ... ({len(overlapLines) - 20} lines omitted) ...")
for line in overlapLines[-10:]:
self._log(f" {line}")
else:
for line in overlapLines:
self._log(f" {line}")
# hierarchyContext
self._log("")
self._log("=" * 80)
self._log("hierarchyContext (for merging - should be exact input JSON):")
self._log("=" * 80)
self._log(f"Length: {len(contexts.hierarchyContext)} characters")
# Verify hierarchyContext equals input
if contexts.hierarchyContext == jsonContent:
self._log(" ✅ hierarchyContext == input JSON (CORRECT)")
else:
self._log(" ❌ hierarchyContext != input JSON (BUG!)")
self._log(f" Input length: {len(jsonContent)}, hierarchyContext length: {len(contexts.hierarchyContext)}")
# Show difference at the end
if len(contexts.hierarchyContext) > 0 and len(jsonContent) > 0:
minLen = min(len(contexts.hierarchyContext), len(jsonContent))
for i in range(minLen):
if contexts.hierarchyContext[i] != jsonContent[i]:
self._log(f" First difference at position {i}")
self._log(f" Input: ...{repr(jsonContent[max(0,i-20):i+20])}...")
self._log(f" Hierarchy: ...{repr(contexts.hierarchyContext[max(0,i-20):i+20])}...")
break
# hierarchyContextForPrompt
self._log("")
self._log("=" * 80)
self._log("hierarchyContextForPrompt (for AI prompt with budget/placeholders):")
self._log("=" * 80)
self._log(f"Length: {len(contexts.hierarchyContextForPrompt)} characters")
hierarchyPromptLines = contexts.hierarchyContextForPrompt.split('\n')
if len(hierarchyPromptLines) > 40:
for line in hierarchyPromptLines[:20]:
self._log(f" {line}")
self._log(f" ... ({len(hierarchyPromptLines) - 40} lines omitted) ...")
for line in hierarchyPromptLines[-20:]:
self._log(f" {line}")
else:
for line in hierarchyPromptLines:
self._log(f" {line}")
# completePart
self._log("")
self._log("=" * 80)
self._log("completePart (closed JSON for parsing):")
self._log("=" * 80)
self._log(f"Length: {len(contexts.completePart)} characters")
# Try to parse completePart
try:
parsed = json.loads(contexts.completePart)
self._log(" ✅ completePart is valid JSON")
self._log(f" Parsed type: {type(parsed).__name__}")
if isinstance(parsed, dict):
self._log(f" Keys: {list(parsed.keys())}")
elif isinstance(parsed, list):
self._log(f" List length: {len(parsed)}")
except json.JSONDecodeError as e:
self._log(f" ❌ completePart is NOT valid JSON: {e}")
completeLines = contexts.completePart.split('\n')
if len(completeLines) > 40:
self._log("")
self._log("First 20 lines:")
for line in completeLines[:20]:
self._log(f" {line}")
self._log(f" ... ({len(completeLines) - 40} lines omitted) ...")
self._log("Last 20 lines:")
for line in completeLines[-20:]:
self._log(f" {line}")
else:
for line in completeLines:
self._log(f" {line}")
# Summary
self._log("")
self._log("=" * 80)
self._log("SUMMARY")
self._log("=" * 80)
self._log(f" Input JSON length: {len(jsonContent)} chars")
self._log(f" jsonParsingSuccess: {contexts.jsonParsingSuccess}")
self._log(f" overlapContext length: {len(contexts.overlapContext)} chars")
self._log(f" overlapContext empty: {contexts.overlapContext == ''}")
self._log(f" hierarchyContext length: {len(contexts.hierarchyContext)} chars")
self._log(f" hierarchyContext == input: {contexts.hierarchyContext == jsonContent}")
self._log(f" hierarchyContextForPrompt length: {len(contexts.hierarchyContextForPrompt)} chars")
self._log(f" completePart length: {len(contexts.completePart)} chars")
return {
"success": True,
"fileName": fileName,
"inputLength": len(jsonContent),
"jsonParsingSuccess": contexts.jsonParsingSuccess,
"overlapContextLength": len(contexts.overlapContext),
"overlapContextEmpty": contexts.overlapContext == "",
"hierarchyContextLength": len(contexts.hierarchyContext),
"hierarchyContextEqualsInput": contexts.hierarchyContext == jsonContent,
"hierarchyContextForPromptLength": len(contexts.hierarchyContextForPrompt),
"completePartLength": len(contexts.completePart),
"contexts": {
"overlapContext": contexts.overlapContext,
"hierarchyContext": contexts.hierarchyContext[:500] + "..." if len(contexts.hierarchyContext) > 500 else contexts.hierarchyContext,
"hierarchyContextForPrompt": contexts.hierarchyContextForPrompt[:500] + "..." if len(contexts.hierarchyContextForPrompt) > 500 else contexts.hierarchyContextForPrompt,
"completePart": contexts.completePart[:500] + "..." if len(contexts.completePart) > 500 else contexts.completePart,
}
}
def _writeLogFile(self):
"""Write log buffer to file."""
logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
os.makedirs(logDir, exist_ok=True)
logFilePath = os.path.join(logDir, "test14_json_continuation_context_results.txt")
with open(logFilePath, 'w', encoding='utf-8') as f:
f.write('\n'.join(self.logBuffer))
self.logFile = logFilePath
print(f"\n📝 Detailed log written to: {logFilePath}")
async def runTest(self):
"""Run the complete test."""
self._log("=" * 80)
self._log("JSON CONTINUATION CONTEXT TEST 14")
self._log("=" * 80)
self._log("Testing getContexts() with specific cut JSON from debug prompts")
results = {}
# Test files to analyze
testFiles = [
# The first AI response (iteration 1) - this is the cut JSON
"20260106-173342-020-chapter_1_section_section_2_response.txt",
]
# Also try to find today's response files dynamically
debugDir = os.path.join(
os.path.dirname(__file__), "..", "..", "..", "local", "debug", "prompts"
)
if os.path.exists(debugDir):
for fileName in os.listdir(debugDir):
if "section_2_response" in fileName and fileName.endswith(".txt"):
if fileName not in testFiles:
testFiles.append(fileName)
# Limit to first 3 files
testFiles = testFiles[:3]
for fileName in testFiles:
try:
result = await self.testSpecificCutJson(fileName)
results[fileName] = result
except Exception as e:
import traceback
self._log(f"\n❌ Error testing {fileName}: {str(e)}")
self._log(traceback.format_exc())
results[fileName] = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
# Write log file
self._writeLogFile()
# Summary
print("\n" + "=" * 80)
print("TEST SUMMARY")
print("=" * 80)
successCount = 0
for fileName, result in results.items():
if result.get("success"):
successCount += 1
hierarchyMatch = result.get("hierarchyContextEqualsInput", False)
overlapEmpty = result.get("overlapContextEmpty", False)
jsonSuccess = result.get("jsonParsingSuccess", False)
status = "" if hierarchyMatch else "⚠️"
print(f"{status} {fileName}")
print(f" hierarchyContext == input: {hierarchyMatch}")
print(f" overlapContext empty: {overlapEmpty}")
print(f" jsonParsingSuccess: {jsonSuccess}")
else:
print(f"{fileName}: {result.get('error', 'Unknown error')}")
print(f"\nResults: {successCount}/{len(results)} successful")
self.testResults = {
"success": successCount == len(results),
"totalFiles": len(results),
"successCount": successCount,
"results": results
}
return self.testResults
async def main():
"""Run JSON continuation context test 14."""
tester = JsonContinuationContextTester14()
results = await tester.runTest()
# Print final results as JSON for easy parsing
print("\n" + "=" * 80)
print("FINAL RESULTS (JSON)")
print("=" * 80)
# Create a simplified version for printing (contexts are too large)
printableResults = {
"success": results.get("success"),
"totalFiles": results.get("totalFiles"),
"successCount": results.get("successCount"),
"files": {}
}
for fileName, result in results.get("results", {}).items():
printableResults["files"][fileName] = {
"success": result.get("success"),
"inputLength": result.get("inputLength"),
"jsonParsingSuccess": result.get("jsonParsingSuccess"),
"overlapContextLength": result.get("overlapContextLength"),
"overlapContextEmpty": result.get("overlapContextEmpty"),
"hierarchyContextEqualsInput": result.get("hierarchyContextEqualsInput"),
"completePartLength": result.get("completePartLength"),
}
print(json.dumps(printableResults, indent=2, default=str))
if __name__ == "__main__":
asyncio.run(main())