extraction and generation engine enhanced for doc, code, image
This commit is contained in:
parent
f6540d6b5c
commit
909ee9528f
51 changed files with 2137 additions and 7955 deletions
|
|
@ -14,10 +14,6 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata,
|
|||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
from modules.shared.jsonUtils import (
|
||||
extractJsonString,
|
||||
repairBrokenJson,
|
||||
extractSectionsFromDocument,
|
||||
buildContinuationContext,
|
||||
parseJsonWithModel
|
||||
)
|
||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||
|
|
@ -209,7 +205,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
processingMode=ProcessingModeEnum.BASIC
|
||||
)
|
||||
|
||||
async def _callAiWithLooping(
|
||||
async def callAiWithLooping(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
|
|
@ -218,11 +214,12 @@ Respond with ONLY a JSON object in this exact format:
|
|||
promptArgs: Optional[Dict[str, Any]] = None,
|
||||
operationId: Optional[str] = None,
|
||||
userPrompt: Optional[str] = None,
|
||||
contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
|
||||
contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
|
||||
useCaseId: Optional[str] = None # REQUIRED: Explicit use case ID for generic looping system
|
||||
) -> str:
|
||||
"""Delegate to AiCallLooper."""
|
||||
"""Public method: Delegate to AiCallLooper for AI calls with looping support."""
|
||||
return await self.aiCallLooper.callAiWithLooping(
|
||||
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts
|
||||
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId
|
||||
)
|
||||
|
||||
async def _defineKpisFromPrompt(
|
||||
|
|
@ -341,49 +338,21 @@ Respond with ONLY a JSON object in this exact format:
|
|||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
title: Optional[str],
|
||||
aiOperationId: str
|
||||
parentOperationId: Optional[str]
|
||||
) -> AiResponse:
|
||||
"""Handle IMAGE_GENERATE operation type."""
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
|
||||
"""Handle IMAGE_GENERATE operation type using image generation path."""
|
||||
from modules.services.serviceGeneration.paths.imagePath import ImageGenerationPath
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
imagePath = ImageGenerationPath(self.services)
|
||||
|
||||
response = await self.callAi(request)
|
||||
# Extract format from options
|
||||
format = options.resultFormat or "png"
|
||||
|
||||
if not response.content:
|
||||
errorMsg = f"No image data returned: {response.content}"
|
||||
logger.error(f"Error in AI image generation: {errorMsg}")
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
imageDoc = DocumentData(
|
||||
documentName="generated_image.png",
|
||||
documentData=response.content,
|
||||
mimeType="image/png"
|
||||
)
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title or "Generated Image",
|
||||
operationType=options.operationType.value
|
||||
)
|
||||
|
||||
self.services.chat.storeWorkflowStat(
|
||||
self.services.workflow,
|
||||
response,
|
||||
"ai.generate.image"
|
||||
)
|
||||
|
||||
self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated")
|
||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=response.content,
|
||||
metadata=metadata,
|
||||
documents=[imageDoc]
|
||||
return await imagePath.generateImages(
|
||||
userPrompt=prompt,
|
||||
format=format,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
async def _handleWebOperation(
|
||||
|
|
@ -441,54 +410,54 @@ Respond with ONLY a JSON object in this exact format:
|
|||
return intent
|
||||
return None
|
||||
|
||||
async def _clarifyDocumentIntents(
|
||||
async def clarifyDocumentIntents(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
userPrompt: str,
|
||||
actionParameters: Dict[str, Any],
|
||||
parentOperationId: str
|
||||
) -> List[DocumentIntent]:
|
||||
"""Delegate to DocumentIntentAnalyzer."""
|
||||
"""Public method: Delegate to DocumentIntentAnalyzer."""
|
||||
return await self.intentAnalyzer.clarifyDocumentIntents(
|
||||
documents, userPrompt, actionParameters, parentOperationId
|
||||
)
|
||||
|
||||
async def _extractAndPrepareContent(
|
||||
async def extractAndPrepareContent(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
documentIntents: List[DocumentIntent],
|
||||
parentOperationId: str
|
||||
) -> List[ContentPart]:
|
||||
"""Delegate to ContentExtractor."""
|
||||
"""Public method: Delegate to ContentExtractor."""
|
||||
return await self.contentExtractor.extractAndPrepareContent(
|
||||
documents, documentIntents, parentOperationId, self._getIntentForDocument
|
||||
)
|
||||
|
||||
async def _generateStructure(
|
||||
async def generateStructure(
|
||||
self,
|
||||
userPrompt: str,
|
||||
contentParts: List[ContentPart],
|
||||
outputFormat: str,
|
||||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Delegate to StructureGenerator."""
|
||||
"""Public method: Delegate to StructureGenerator."""
|
||||
return await self.structureGenerator.generateStructure(
|
||||
userPrompt, contentParts, outputFormat, parentOperationId
|
||||
)
|
||||
|
||||
async def _fillStructure(
|
||||
async def fillStructure(
|
||||
self,
|
||||
structure: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Delegate to StructureFiller."""
|
||||
"""Public method: Delegate to StructureFiller."""
|
||||
return await self.structureFiller.fillStructure(
|
||||
structure, contentParts, userPrompt, parentOperationId
|
||||
)
|
||||
|
||||
async def _renderResult(
|
||||
async def renderResult(
|
||||
self,
|
||||
filledStructure: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
|
|
@ -577,13 +546,14 @@ Respond with ONLY a JSON object in this exact format:
|
|||
documentIntents: Optional[List[DocumentIntent]] = None,
|
||||
outputFormat: Optional[str] = None,
|
||||
title: Optional[str] = None,
|
||||
parentOperationId: Optional[str] = None
|
||||
parentOperationId: Optional[str] = None,
|
||||
generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection)
|
||||
) -> AiResponse:
|
||||
"""
|
||||
Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions.
|
||||
Unified AI content generation with explicit intent requirement.
|
||||
|
||||
Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch.
|
||||
Sie unterscheiden sich nur in Parametern, nicht in Logik.
|
||||
All AI-Actions (ai.process, ai.generateDocument, etc.) route through here.
|
||||
They differ only in parameters, not in logic.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for the AI call
|
||||
|
|
@ -594,6 +564,8 @@ Respond with ONLY a JSON object in this exact format:
|
|||
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
|
||||
title: Optional title for generated documents
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
|
||||
NO auto-detection - actions must explicitly specify intent.
|
||||
|
||||
Returns:
|
||||
AiResponse with content, metadata, and optional documents
|
||||
|
|
@ -625,111 +597,73 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
# Route zu Operation-spezifischen Handlern
|
||||
if opType == OperationTypeEnum.IMAGE_GENERATE:
|
||||
return await self._handleImageGeneration(prompt, options, title, aiOperationId)
|
||||
# Image generation - route to image path
|
||||
return await self._handleImageGeneration(prompt, options, title, parentOperationId)
|
||||
|
||||
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
|
||||
return await self._handleWebOperation(prompt, options, opType, aiOperationId)
|
||||
|
||||
# Dokument-Generierungs-Pfad
|
||||
options.compressPrompt = False
|
||||
options.compressContext = False
|
||||
|
||||
# Schritt 5A: Kläre Dokument-Intents
|
||||
documents = []
|
||||
if documentList:
|
||||
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
if not documentIntents and documents:
|
||||
documentIntents = await self._clarifyDocumentIntents(
|
||||
documents,
|
||||
prompt,
|
||||
{"outputFormat": outputFormat},
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
# Schritt 5B: Extrahiere und bereite Content vor
|
||||
if documents:
|
||||
preparedContentParts = await self._extractAndPrepareContent(
|
||||
documents,
|
||||
documentIntents or [],
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
# Merge mit bereitgestellten contentParts (falls vorhanden)
|
||||
if contentParts:
|
||||
# Prüfe auf pre-extracted Content
|
||||
for part in contentParts:
|
||||
if part.metadata.get("skipExtraction", False):
|
||||
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
|
||||
part.metadata.setdefault("contentFormat", "extracted")
|
||||
part.metadata.setdefault("isPreExtracted", True)
|
||||
preparedContentParts.extend(contentParts)
|
||||
|
||||
contentParts = preparedContentParts
|
||||
|
||||
# Schritt 5C: Generiere Struktur
|
||||
structure = await self._generateStructure(
|
||||
prompt,
|
||||
contentParts or [],
|
||||
outputFormat,
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
# Schritt 5D: Fülle Struktur
|
||||
# Language will be extracted from services (user intention analysis) in fillStructure
|
||||
filledStructure = await self._fillStructure(
|
||||
structure,
|
||||
contentParts or [],
|
||||
prompt,
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
# Schritt 5E: Rendere Resultat
|
||||
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||
renderedDocuments = await self._renderResult(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
title or "Generated Document",
|
||||
prompt,
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
||||
documentDataList = []
|
||||
for renderedDoc in renderedDocuments:
|
||||
try:
|
||||
# Erstelle DocumentData für jedes gerenderte Dokument
|
||||
docDataObj = DocumentData(
|
||||
documentName=renderedDoc.filename,
|
||||
documentData=renderedDoc.documentData,
|
||||
mimeType=renderedDoc.mimeType,
|
||||
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||
# Data generation - REQUIRES explicit generationIntent
|
||||
if opType == OperationTypeEnum.DATA_GENERATE:
|
||||
if not generationIntent:
|
||||
errorMsg = (
|
||||
"generationIntent is required for DATA_GENERATE operation. "
|
||||
"Actions must explicitly specify 'document' or 'code' intent. "
|
||||
"No auto-detection - use qualified actions (ai.generateDocument, ai.generateCode)."
|
||||
)
|
||||
logger.error(errorMsg)
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# Route based on explicit intent (no auto-detection, no fallback)
|
||||
if generationIntent == "code":
|
||||
# Route to code generation path
|
||||
return await self._handleCodeGeneration(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
else:
|
||||
# Route to document generation path (existing behavior)
|
||||
return await self._handleDocumentGeneration(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=documentList,
|
||||
documentIntents=documentIntents,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
documentDataList.append(docDataObj)
|
||||
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
||||
|
||||
if not documentDataList:
|
||||
raise ValueError("No documents were rendered")
|
||||
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
|
||||
if opType == OperationTypeEnum.DATA_EXTRACT:
|
||||
return await self._handleDataExtraction(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=documentList,
|
||||
documentIntents=documentIntents,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
||||
operationType=opType.value
|
||||
)
|
||||
|
||||
# Debug-Log (harmonisiert)
|
||||
self.services.utils.writeDebugFile(
|
||||
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
|
||||
"document_generation_response"
|
||||
)
|
||||
|
||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=json.dumps(filledStructure),
|
||||
metadata=metadata,
|
||||
documents=documentDataList
|
||||
# Other operation types (DATA_ANALYSE, etc.) - existing logic
|
||||
# Fallback to document generation for backward compatibility (should not happen)
|
||||
logger.warning(f"Unhandled operation type: {opType}, falling back to document generation")
|
||||
return await self._handleDocumentGeneration(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=documentList,
|
||||
documentIntents=documentIntents,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -737,6 +671,166 @@ Respond with ONLY a JSON object in this exact format:
|
|||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise
|
||||
|
||||
async def _handleDataExtraction(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
documentList: Optional[Any],
|
||||
documentIntents: Optional[List[DocumentIntent]],
|
||||
contentParts: Optional[List[ContentPart]],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
parentOperationId: Optional[str]
|
||||
) -> AiResponse:
|
||||
"""
|
||||
Handle DATA_EXTRACT: Extract content from documents (no AI), then process with AI.
|
||||
This is the original flow: extract all documents first, then process contentParts with AI.
|
||||
"""
|
||||
import time
|
||||
|
||||
# Create operation ID
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
extractOperationId = f"data_extract_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
extractOperationId,
|
||||
"Data Extraction",
|
||||
"Extraction",
|
||||
f"Format: {outputFormat}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
# Step 1: Get documents from documentList
|
||||
documents = []
|
||||
if documentList:
|
||||
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
# Step 2: Clarify document intents (if not provided) - REQUIRED for all documents
|
||||
if not documentIntents and documents:
|
||||
documentIntents = await self.clarifyDocumentIntents(
|
||||
documents,
|
||||
prompt,
|
||||
{"outputFormat": outputFormat},
|
||||
extractOperationId
|
||||
)
|
||||
|
||||
# Step 3: Extract and prepare content (NO AI - pure extraction) - REQUIRED for all documents
|
||||
if documents:
|
||||
preparedContentParts = await self.extractAndPrepareContent(
|
||||
documents,
|
||||
documentIntents or [],
|
||||
extractOperationId
|
||||
)
|
||||
|
||||
# Merge with provided contentParts (if any)
|
||||
if contentParts:
|
||||
for part in contentParts:
|
||||
if part.metadata.get("skipExtraction", False):
|
||||
part.metadata.setdefault("contentFormat", "extracted")
|
||||
part.metadata.setdefault("isPreExtracted", True)
|
||||
preparedContentParts.extend(contentParts)
|
||||
|
||||
contentParts = preparedContentParts
|
||||
|
||||
# Step 4: Process extracted contentParts with AI (simple text processing, no structure generation)
|
||||
if not contentParts:
|
||||
raise ValueError("No content extracted from documents")
|
||||
|
||||
# Use simple AI call to process extracted content
|
||||
# Prepare content for AI processing
|
||||
contentText = "\n\n".join([
|
||||
f"[Document: {part.metadata.get('documentName', 'Unknown')}]\n{part.data}"
|
||||
for part in contentParts
|
||||
if part.data
|
||||
])
|
||||
|
||||
# Call AI with extracted content
|
||||
aiRequest = AiCallRequest(
|
||||
prompt=f"{prompt}\n\nExtracted Content:\n{contentText}",
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
|
||||
aiResponse = await self.callAi(aiRequest)
|
||||
|
||||
# Create response document
|
||||
resultDocument = DocumentData(
|
||||
documentName=f"{title or 'extracted_data'}.{outputFormat}",
|
||||
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
|
||||
mimeType=f"text/{outputFormat}" if outputFormat in ["txt", "json", "csv"] else "application/octet-stream"
|
||||
)
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title or "Extracted Data",
|
||||
operationType=OperationTypeEnum.DATA_EXTRACT.value
|
||||
)
|
||||
|
||||
self.services.chat.progressLogFinish(extractOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=aiResponse.content if isinstance(aiResponse.content, str) else aiResponse.content.decode('utf-8', errors='replace'),
|
||||
metadata=metadata,
|
||||
documents=[resultDocument]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in data extraction: {str(e)}")
|
||||
self.services.chat.progressLogFinish(extractOperationId, False)
|
||||
raise
|
||||
|
||||
async def _handleCodeGeneration(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
contentParts: Optional[List[ContentPart]],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
parentOperationId: Optional[str]
|
||||
) -> AiResponse:
|
||||
"""Handle code generation using code generation path."""
|
||||
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
|
||||
|
||||
codePath = CodeGenerationPath(self.services)
|
||||
return await codePath.generateCode(
|
||||
userPrompt=prompt,
|
||||
outputFormat=outputFormat,
|
||||
contentParts=contentParts,
|
||||
title=title or "Generated Code",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
async def _handleDocumentGeneration(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
documentList: Optional[Any],
|
||||
documentIntents: Optional[List[DocumentIntent]],
|
||||
contentParts: Optional[List[ContentPart]],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
parentOperationId: Optional[str]
|
||||
) -> AiResponse:
|
||||
"""Handle document generation using document generation path."""
|
||||
from modules.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
|
||||
|
||||
# Set compression options for document generation
|
||||
options.compressPrompt = False
|
||||
options.compressContext = False
|
||||
|
||||
documentPath = DocumentGenerationPath(self.services)
|
||||
return await documentPath.generateDocument(
|
||||
userPrompt=prompt,
|
||||
documentList=documentList,
|
||||
documentIntents=documentIntents,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat,
|
||||
title=title or "Generated Document",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
|
||||
def _determineDocumentName(
|
||||
self,
|
||||
filledStructure: Dict[str, Any],
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati
|
|||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
|
||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -28,6 +29,7 @@ class AiCallLooper:
|
|||
self.services = services
|
||||
self.aiService = aiService
|
||||
self.responseParser = responseParser
|
||||
self.useCaseRegistry = LoopingUseCaseRegistry() # Initialize use case registry
|
||||
|
||||
async def callAiWithLooping(
|
||||
self,
|
||||
|
|
@ -38,7 +40,8 @@ class AiCallLooper:
|
|||
promptArgs: Optional[Dict[str, Any]] = None,
|
||||
operationId: Optional[str] = None,
|
||||
userPrompt: Optional[str] = None,
|
||||
contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
|
||||
contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
|
||||
useCaseId: str = None # REQUIRED: Explicit use case ID - no auto-detection, no fallback
|
||||
) -> str:
|
||||
"""
|
||||
Shared core function for AI calls with repair-based looping system.
|
||||
|
|
@ -53,10 +56,31 @@ class AiCallLooper:
|
|||
operationId: Optional operation ID for progress tracking
|
||||
userPrompt: Optional user prompt for KPI definition
|
||||
contentParts: Optional content parts for first iteration
|
||||
useCaseId: REQUIRED: Explicit use case ID - no auto-detection, no fallback
|
||||
|
||||
Returns:
|
||||
Complete AI response after all iterations
|
||||
"""
|
||||
# REQUIRED: useCaseId must be provided - no auto-detection, no fallback
|
||||
if not useCaseId:
|
||||
errorMsg = (
|
||||
"useCaseId is REQUIRED for callAiWithLooping. "
|
||||
"No auto-detection - must explicitly specify use case ID. "
|
||||
f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
|
||||
)
|
||||
logger.error(errorMsg)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# Validate use case exists
|
||||
useCase = self.useCaseRegistry.get(useCaseId)
|
||||
if not useCase:
|
||||
errorMsg = (
|
||||
f"Use case '{useCaseId}' not found in registry. "
|
||||
f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
|
||||
)
|
||||
logger.error(errorMsg)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
maxIterations = 50 # Prevent infinite loops
|
||||
iteration = 0
|
||||
allSections = [] # Accumulate all sections across iterations
|
||||
|
|
@ -199,36 +223,31 @@ class AiCallLooper:
|
|||
# Store raw response for continuation (even if broken)
|
||||
lastRawResponse = result
|
||||
|
||||
# Check if this is section content generation (has "elements" not "sections")
|
||||
# Section content generation returns JSON with "elements" array, not document structure with "sections"
|
||||
isSectionContentGeneration = False
|
||||
parsedJsonForSection = None
|
||||
extractedJsonForSection = None
|
||||
# Parse JSON for use case handling
|
||||
parsedJsonForUseCase = None
|
||||
extractedJsonForUseCase = None
|
||||
|
||||
try:
|
||||
extractedJsonForSection = extractJsonString(result)
|
||||
parsedJson, parseError, _ = tryParseJson(extractedJsonForSection)
|
||||
extractedJsonForUseCase = extractJsonString(result)
|
||||
parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase)
|
||||
if parseError is None and parsedJson:
|
||||
parsedJsonForSection = parsedJson
|
||||
# Check if JSON has "elements" (section content) or "sections" (document structure)
|
||||
if isinstance(parsedJson, dict):
|
||||
if "elements" in parsedJson:
|
||||
isSectionContentGeneration = True
|
||||
elif isinstance(parsedJson, list) and len(parsedJson) > 0:
|
||||
# Check if it's a list of elements (section content format)
|
||||
if isinstance(parsedJson[0], dict) and "type" in parsedJson[0]:
|
||||
isSectionContentGeneration = True
|
||||
parsedJsonForUseCase = parsedJson
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if isSectionContentGeneration:
|
||||
# This is section content generation - return the JSON directly
|
||||
# No need to extract sections, just return the complete JSON string
|
||||
logger.info(f"Iteration {iteration}: Section content generation detected (elements found), returning JSON directly")
|
||||
# Handle use cases that return JSON directly (no section extraction needed)
|
||||
directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"]
|
||||
if useCaseId in directReturnUseCases:
|
||||
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly")
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
# Note: Debug files (_prompt and _response) are already written above for iteration 1
|
||||
# No need to write _final_result as it's redundant with _response
|
||||
final_json = json.dumps(parsedJsonForSection, indent=2, ensure_ascii=False) if parsedJsonForSection else (extractedJsonForSection or result)
|
||||
|
||||
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
|
||||
|
||||
# Write final result for chapter structure and code structure (section_content skips it)
|
||||
if useCaseId in ["chapter_structure", "code_structure"]:
|
||||
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
|
||||
|
||||
return final_json
|
||||
|
||||
# Extract sections from response (handles both valid and broken JSON)
|
||||
|
|
|
|||
231
modules/services/serviceAi/subLoopingUseCases.py
Normal file
231
modules/services/serviceAi/subLoopingUseCases.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Generic Looping Use Case System
|
||||
|
||||
Provides parametrized looping infrastructure supporting different JSON formats and use cases.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Any, List, Optional, Callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoopingUseCase:
|
||||
"""Configuration for a specific looping use case."""
|
||||
|
||||
# Identification
|
||||
useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch"
|
||||
|
||||
# JSON Format Detection
|
||||
jsonTemplate: Dict[str, Any] # Expected JSON structure template
|
||||
detectionKeys: List[str] # Keys to check for format detection (e.g., ["elements"], ["chapters"], ["files"])
|
||||
detectionPath: str # JSONPath to check (e.g., "documents[0].chapters", "files[0].content")
|
||||
|
||||
# Prompt Building
|
||||
initialPromptBuilder: Optional[Callable] = None # Function to build initial prompt
|
||||
continuationPromptBuilder: Optional[Callable] = None # Function to build continuation prompt
|
||||
|
||||
# Accumulation & Merging
|
||||
accumulator: Optional[Callable] = None # Function to accumulate fragments
|
||||
merger: Optional[Callable] = None # Function to merge accumulated data
|
||||
|
||||
# Continuation Context
|
||||
continuationContextBuilder: Optional[Callable] = None # Build continuation context for this format
|
||||
|
||||
# Result Building
|
||||
resultBuilder: Optional[Callable] = None # Build final result from accumulated data
|
||||
|
||||
# Metadata
|
||||
supportsAccumulation: bool = True # Whether this use case supports accumulation
|
||||
requiresExtraction: bool = False # Whether this requires extraction (like sections)
|
||||
|
||||
|
||||
class LoopingUseCaseRegistry:
|
||||
"""Registry of all looping use cases."""
|
||||
|
||||
def __init__(self):
|
||||
self.useCases: Dict[str, LoopingUseCase] = {}
|
||||
self._registerDefaultUseCases()
|
||||
|
||||
def register(self, useCase: LoopingUseCase):
|
||||
"""Register a new use case."""
|
||||
self.useCases[useCase.useCaseId] = useCase
|
||||
logger.debug(f"Registered looping use case: {useCase.useCaseId}")
|
||||
|
||||
def get(self, useCaseId: str) -> Optional[LoopingUseCase]:
|
||||
"""Get use case by ID."""
|
||||
return self.useCases.get(useCaseId)
|
||||
|
||||
def detectUseCase(self, parsedJson: Dict[str, Any]) -> Optional[str]:
|
||||
"""Detect which use case matches the JSON structure."""
|
||||
for useCaseId, useCase in self.useCases.items():
|
||||
if self._matchesFormat(parsedJson, useCase):
|
||||
return useCaseId
|
||||
return None
|
||||
|
||||
def _matchesFormat(self, json: Dict[str, Any], useCase: LoopingUseCase) -> bool:
|
||||
"""Check if JSON matches use case format."""
|
||||
# Check top-level keys
|
||||
for key in useCase.detectionKeys:
|
||||
if key in json:
|
||||
return True
|
||||
|
||||
# Check nested path using simple dictionary traversal (no jsonpath_ng needed)
|
||||
if useCase.detectionPath:
|
||||
try:
|
||||
# Simple path matching without jsonpath_ng
|
||||
# Format: "documents[0].chapters" or "files[0].content"
|
||||
pathParts = useCase.detectionPath.split(".")
|
||||
current = json
|
||||
|
||||
for part in pathParts:
|
||||
# Handle array indices like "documents[0]"
|
||||
if "[" in part and "]" in part:
|
||||
key = part.split("[")[0]
|
||||
index = int(part.split("[")[1].split("]")[0])
|
||||
if isinstance(current, dict) and key in current:
|
||||
if isinstance(current[key], list) and 0 <= index < len(current[key]):
|
||||
current = current[key][index]
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
# Regular key access
|
||||
if isinstance(current, dict) and part in current:
|
||||
current = current[part]
|
||||
else:
|
||||
return False
|
||||
|
||||
# If we successfully traversed the path, it matches
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug(f"Path matching failed for {useCase.useCaseId}: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def _registerDefaultUseCases(self):
|
||||
"""Register default use cases."""
|
||||
|
||||
# Use Case 1: Section Content Generation
|
||||
# Returns JSON with "elements" array directly
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="section_content",
|
||||
jsonTemplate={"elements": []},
|
||||
detectionKeys=["elements"],
|
||||
detectionPath="",
|
||||
initialPromptBuilder=None, # Will use default prompt builder
|
||||
continuationPromptBuilder=None, # Will use default continuation builder
|
||||
accumulator=None, # Direct return, no accumulation
|
||||
merger=None,
|
||||
continuationContextBuilder=None, # Will use default continuation context
|
||||
resultBuilder=None, # Return JSON directly
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
# Use Case 2: Chapter Structure Generation
|
||||
# Returns JSON with "documents[0].chapters" structure
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="chapter_structure",
|
||||
jsonTemplate={"documents": [{"chapters": []}]},
|
||||
detectionKeys=["chapters"],
|
||||
detectionPath="documents[0].chapters",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Direct return, no accumulation
|
||||
merger=None,
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None, # Return JSON directly
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
# Use Case 3: Document Structure Generation
|
||||
# Returns JSON with "documents[0].sections" structure, requires extraction and accumulation
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="document_structure",
|
||||
jsonTemplate={"documents": [{"sections": []}]},
|
||||
detectionKeys=["sections"],
|
||||
detectionPath="documents[0].sections",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Will use default accumulator
|
||||
merger=None, # Will use default merger
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None, # Will use default result builder
|
||||
supportsAccumulation=True,
|
||||
requiresExtraction=True
|
||||
))
|
||||
|
||||
# Use Case 4: Code Structure Generation (NEW)
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="code_structure",
|
||||
jsonTemplate={
|
||||
"metadata": {
|
||||
"language": "",
|
||||
"projectType": "single_file|multi_file",
|
||||
"projectName": ""
|
||||
},
|
||||
"files": [
|
||||
{
|
||||
"id": "",
|
||||
"filename": "",
|
||||
"fileType": "",
|
||||
"dependencies": [],
|
||||
"imports": [],
|
||||
"functions": [],
|
||||
"classes": []
|
||||
}
|
||||
]
|
||||
},
|
||||
detectionKeys=["files"],
|
||||
detectionPath="files",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Direct return
|
||||
merger=None,
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None,
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
# Use Case 5: Code Content Generation (NEW)
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="code_content",
|
||||
jsonTemplate={"files": [{"content": "", "functions": []}]},
|
||||
detectionKeys=["content", "functions"],
|
||||
detectionPath="files[0].content",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Will use default accumulator
|
||||
merger=None, # Will use default merger
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None, # Will use default result builder
|
||||
supportsAccumulation=True,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
# Use Case 6: Image Batch Generation (NEW)
|
||||
self.register(LoopingUseCase(
|
||||
useCaseId="image_batch",
|
||||
jsonTemplate={"images": []},
|
||||
detectionKeys=["images"],
|
||||
detectionPath="images",
|
||||
initialPromptBuilder=None,
|
||||
continuationPromptBuilder=None,
|
||||
accumulator=None, # Direct return
|
||||
merger=None,
|
||||
continuationContextBuilder=None,
|
||||
resultBuilder=None,
|
||||
supportsAccumulation=False,
|
||||
requiresExtraction=False
|
||||
))
|
||||
|
||||
logger.info(f"Registered {len(self.useCases)} default looping use cases")
|
||||
|
||||
|
|
@ -23,11 +23,20 @@ logger = logging.getLogger(__name__)
|
|||
class StructureFiller:
|
||||
"""Handles filling document structure with content."""
|
||||
|
||||
# Default concurrency limit for parallel generation (chapters/sections)
|
||||
DEFAULT_MAX_CONCURRENT_GENERATION = 16
|
||||
|
||||
def __init__(self, services, aiService):
|
||||
"""Initialize StructureFiller with service center and AI service access."""
|
||||
self.services = services
|
||||
self.aiService = aiService
|
||||
|
||||
def _getMaxConcurrentGeneration(self, options: Optional[AiCallOptions] = None) -> int:
|
||||
"""Get max concurrent generation limit, configurable via options."""
|
||||
if options and hasattr(options, 'maxConcurrentGeneration'):
|
||||
return options.maxConcurrentGeneration
|
||||
return self.DEFAULT_MAX_CONCURRENT_GENERATION
|
||||
|
||||
def _getUserLanguage(self) -> str:
|
||||
"""Get user language for document generation"""
|
||||
try:
|
||||
|
|
@ -101,14 +110,19 @@ class StructureFiller:
|
|||
try:
|
||||
filledStructure = copy.deepcopy(structure)
|
||||
|
||||
# Get options from AI service if available (for concurrency control)
|
||||
# Default concurrency limit (16) will be used if options is None
|
||||
options = None
|
||||
# Note: Options can be passed via fillStructure if needed in the future
|
||||
|
||||
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
|
||||
filledStructure = await self._generateChapterSectionsStructure(
|
||||
filledStructure, contentParts, userPrompt, fillOperationId, language
|
||||
filledStructure, contentParts, userPrompt, fillOperationId, language, options
|
||||
)
|
||||
|
||||
# Phase 5D.2: Sections mit ContentParts füllen
|
||||
filledStructure = await self._fillChapterSections(
|
||||
filledStructure, contentParts, userPrompt, fillOperationId, language
|
||||
filledStructure, contentParts, userPrompt, fillOperationId, language, options
|
||||
)
|
||||
|
||||
# Flattening: Chapters zu Sections konvertieren
|
||||
|
|
@ -243,7 +257,8 @@ class StructureFiller:
|
|||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
parentOperationId: str,
|
||||
language: str
|
||||
language: str,
|
||||
options: Optional[AiCallOptions] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content) in parallel.
|
||||
|
|
@ -252,6 +267,10 @@ class StructureFiller:
|
|||
# Count total chapters for progress tracking
|
||||
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
||||
|
||||
# Get concurrency limit
|
||||
maxConcurrent = self._getMaxConcurrentGeneration(options)
|
||||
semaphore = asyncio.Semaphore(maxConcurrent)
|
||||
|
||||
# Collect all chapters with their indices for parallel processing
|
||||
chapterTasks = []
|
||||
chapterIndex = 0
|
||||
|
|
@ -266,25 +285,31 @@ class StructureFiller:
|
|||
contentPartIds = chapter.get("contentPartIds", [])
|
||||
contentPartInstructions = chapter.get("contentPartInstructions", {})
|
||||
|
||||
# Create task for parallel processing
|
||||
task = self._generateSingleChapterSectionsStructure(
|
||||
chapter=chapter,
|
||||
chapterIndex=chapterIndex,
|
||||
chapterId=chapterId,
|
||||
chapterLevel=chapterLevel,
|
||||
chapterTitle=chapterTitle,
|
||||
generationHint=generationHint,
|
||||
contentPartIds=contentPartIds,
|
||||
contentPartInstructions=contentPartInstructions,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
language=language,
|
||||
parentOperationId=parentOperationId,
|
||||
totalChapters=totalChapters
|
||||
# Create task for parallel processing with semaphore
|
||||
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions):
|
||||
async with semaphore:
|
||||
return await self._generateSingleChapterSectionsStructure(
|
||||
chapter=chapter,
|
||||
chapterIndex=chapterIndex,
|
||||
chapterId=chapterId,
|
||||
chapterLevel=chapterLevel,
|
||||
chapterTitle=chapterTitle,
|
||||
generationHint=generationHint,
|
||||
contentPartIds=contentPartIds,
|
||||
contentPartInstructions=contentPartInstructions,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
language=language,
|
||||
parentOperationId=parentOperationId,
|
||||
totalChapters=totalChapters
|
||||
)
|
||||
|
||||
task = processChapterWithSemaphore(
|
||||
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions
|
||||
)
|
||||
chapterTasks.append((chapterIndex, chapter, task))
|
||||
|
||||
# Execute all chapter tasks in parallel
|
||||
# Execute all chapter tasks in parallel with concurrency control
|
||||
if chapterTasks:
|
||||
# Create list of tasks (without indices for gather)
|
||||
tasks = [task for _, _, task in chapterTasks]
|
||||
|
|
@ -386,11 +411,25 @@ class StructureFiller:
|
|||
if generatedElements:
|
||||
elements.extend(generatedElements)
|
||||
else:
|
||||
# Fallback: Try to parse JSON response directly
|
||||
# Fallback: Try to parse JSON response directly with repair logic
|
||||
try:
|
||||
fallbackElements = json.loads(
|
||||
self.services.utils.jsonExtractString(aiResponse.content)
|
||||
)
|
||||
from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
|
||||
|
||||
# Use tryParseJson which handles extraction and basic parsing
|
||||
fallbackElements, parseError, cleanedStr = tryParseJson(aiResponse.content)
|
||||
|
||||
# If parsing failed, try repair
|
||||
if parseError and isinstance(aiResponse.content, str):
|
||||
logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}")
|
||||
repairedJson = repairBrokenJson(aiResponse.content)
|
||||
if repairedJson:
|
||||
fallbackElements = repairedJson
|
||||
parseError = None
|
||||
logger.info(f"Successfully repaired JSON for section {sectionId}")
|
||||
|
||||
if parseError:
|
||||
raise parseError
|
||||
|
||||
if isinstance(fallbackElements, list):
|
||||
elements.extend(fallbackElements)
|
||||
elif isinstance(fallbackElements, dict) and "elements" in fallbackElements:
|
||||
|
|
@ -621,7 +660,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
aiResponseJson = await self.aiService._callAiWithLooping(
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=generationPrompt,
|
||||
options=options,
|
||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||
|
|
@ -638,11 +677,28 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=extractedParts
|
||||
contentParts=extractedParts,
|
||||
useCaseId="section_content" # REQUIRED: Explicit use case ID
|
||||
)
|
||||
|
||||
try:
|
||||
parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson))
|
||||
# Use tryParseJson which handles extraction and basic parsing
|
||||
from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
|
||||
|
||||
parsedResponse, parseError, cleanedStr = tryParseJson(aiResponseJson)
|
||||
|
||||
# If parsing failed, try repair
|
||||
if parseError and isinstance(aiResponseJson, str):
|
||||
logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}")
|
||||
repairedJson = repairBrokenJson(aiResponseJson)
|
||||
if repairedJson:
|
||||
parsedResponse = repairedJson
|
||||
parseError = None
|
||||
logger.info(f"Successfully repaired JSON for section {sectionId}")
|
||||
|
||||
if parseError:
|
||||
raise parseError
|
||||
|
||||
if isinstance(parsedResponse, list):
|
||||
generatedElements = parsedResponse
|
||||
elif isinstance(parsedResponse, dict):
|
||||
|
|
@ -824,7 +880,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
aiResponseJson = await self.aiService._callAiWithLooping(
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=generationPrompt,
|
||||
options=options,
|
||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||
|
|
@ -841,7 +897,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=[]
|
||||
contentParts=[],
|
||||
useCaseId="section_content" # REQUIRED: Explicit use case ID
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -1060,7 +1117,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
aiResponseJson = await self.aiService._callAiWithLooping(
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=generationPrompt,
|
||||
options=options,
|
||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||
|
|
@ -1077,7 +1134,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=[part]
|
||||
contentParts=[part],
|
||||
useCaseId="section_content" # REQUIRED: Explicit use case ID
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -1200,7 +1258,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
parentOperationId: str,
|
||||
language: str
|
||||
language: str,
|
||||
options: Optional[AiCallOptions] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5D.2: Füllt Sections mit ContentParts.
|
||||
|
|
@ -1217,6 +1276,10 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
||||
fillOperationId = parentOperationId
|
||||
|
||||
# Get concurrency limit for sections
|
||||
maxConcurrent = self._getMaxConcurrentGeneration(options)
|
||||
sectionSemaphore = asyncio.Semaphore(maxConcurrent)
|
||||
|
||||
# Helper function to calculate overall progress
|
||||
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
|
||||
"""Calculate overall progress: 0.0 to 1.0"""
|
||||
|
|
@ -1251,28 +1314,34 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
parentOperationId=fillOperationId
|
||||
)
|
||||
|
||||
# Process sections within chapter in parallel
|
||||
# Process sections within chapter in parallel with concurrency control
|
||||
sectionTasks = []
|
||||
for sectionIndex, section in enumerate(sections):
|
||||
# Create task for parallel processing
|
||||
task = self._processSingleSection(
|
||||
section=section,
|
||||
sectionIndex=sectionIndex,
|
||||
totalSections=totalSections,
|
||||
chapterIndex=chapterIndex,
|
||||
totalChapters=totalChapters,
|
||||
chapterId=chapterId,
|
||||
chapterOperationId=chapterOperationId,
|
||||
fillOperationId=fillOperationId,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
all_sections_list=all_sections_list,
|
||||
language=language,
|
||||
calculateOverallProgress=calculateOverallProgress
|
||||
# Create task wrapper with semaphore for parallel processing
|
||||
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress):
|
||||
async with sectionSemaphore:
|
||||
return await self._processSingleSection(
|
||||
section=section,
|
||||
sectionIndex=sectionIndex,
|
||||
totalSections=totalSections,
|
||||
chapterIndex=chapterIndex,
|
||||
totalChapters=totalChapters,
|
||||
chapterId=chapterId,
|
||||
chapterOperationId=chapterOperationId,
|
||||
fillOperationId=fillOperationId,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
all_sections_list=all_sections_list,
|
||||
language=language,
|
||||
calculateOverallProgress=calculateOverallProgress
|
||||
)
|
||||
|
||||
task = processSectionWithSemaphore(
|
||||
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress
|
||||
)
|
||||
sectionTasks.append((sectionIndex, section, task))
|
||||
|
||||
# Execute all section tasks in parallel
|
||||
# Execute all section tasks in parallel with concurrency control
|
||||
if sectionTasks:
|
||||
# Create list of tasks (without indices for gather)
|
||||
tasks = [task for _, _, task in sectionTasks]
|
||||
|
|
|
|||
|
|
@ -9,9 +9,10 @@ Handles document structure generation, including:
|
|||
"""
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -82,28 +83,89 @@ class StructureGenerator:
|
|||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
# AI-Call für Chapter-Struktur-Generierung
|
||||
# Note: Debug logging is handled by callAiPlanning
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=structurePrompt,
|
||||
debugType="chapter_structure_generation"
|
||||
# AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
|
||||
# Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
compressPrompt=False,
|
||||
compressContext=False,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
# Parse Struktur
|
||||
# Use tryParseJson which handles malformed JSON and unterminated strings
|
||||
extractedJson = self.services.utils.jsonExtractString(aiResponse)
|
||||
# Create prompt builder for continuation support
|
||||
async def buildChapterStructurePromptWithContinuation(
|
||||
continuationContext: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> str:
|
||||
"""Build chapter structure prompt with optional continuation context."""
|
||||
basePrompt = self._buildChapterStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
if continuationContext:
|
||||
# Add continuation instructions
|
||||
deliveredSummary = continuationContext.get("delivered_summary", "")
|
||||
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
|
||||
cutOffElement = continuationContext.get("cut_off_element", "")
|
||||
|
||||
continuationText = f"{deliveredSummary}\n\n"
|
||||
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
|
||||
|
||||
if elementBeforeCutoff:
|
||||
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
|
||||
continuationText += f"{elementBeforeCutoff}\n\n"
|
||||
|
||||
if cutOffElement:
|
||||
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
|
||||
continuationText += f"{cutOffElement}\n\n"
|
||||
|
||||
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
|
||||
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
|
||||
continuationText += "Start directly with the next chapter that should follow.\n\n"
|
||||
|
||||
return f"""{basePrompt}
|
||||
|
||||
{continuationText}
|
||||
|
||||
Continue generating the remaining chapters now.
|
||||
"""
|
||||
else:
|
||||
return basePrompt
|
||||
|
||||
# Call AI with looping support
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=structurePrompt,
|
||||
options=options,
|
||||
debugPrefix="chapter_structure_generation",
|
||||
promptBuilder=buildChapterStructurePromptWithContinuation,
|
||||
promptArgs={
|
||||
"userPrompt": userPrompt,
|
||||
"outputFormat": outputFormat,
|
||||
"services": self.services
|
||||
},
|
||||
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
|
||||
operationId=structureOperationId,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts
|
||||
)
|
||||
|
||||
# Parse the complete JSON response (looping system already handles completion)
|
||||
extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
|
||||
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
|
||||
|
||||
if parseError is not None:
|
||||
# Try to repair broken JSON (handles unterminated strings, incomplete structures, etc.)
|
||||
logger.warning(f"Initial JSON parsing failed: {str(parseError)}. Attempting repair...")
|
||||
# Even with looping, try repair as fallback
|
||||
logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
|
||||
from modules.shared import jsonUtils
|
||||
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
|
||||
if repairedJson:
|
||||
# Try parsing repaired JSON
|
||||
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
|
||||
if parseError is None:
|
||||
logger.info("Successfully repaired and parsed JSON structure")
|
||||
logger.info("Successfully repaired and parsed JSON structure after looping")
|
||||
structure = parsedJson
|
||||
else:
|
||||
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
|
||||
|
|
|
|||
|
|
@ -1254,9 +1254,10 @@ class ExtractionService:
|
|||
aiObjects, # Pass interface for AI calls
|
||||
progressCallback=None
|
||||
) -> AiCallResponse:
|
||||
"""Process content parts with model-aware chunking and AI calls.
|
||||
"""Process content parts with model-aware chunking and AI calls in parallel.
|
||||
|
||||
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
|
||||
Uses parallel processing similar to section generation for better performance.
|
||||
"""
|
||||
prompt = request.prompt
|
||||
options = request.options
|
||||
|
|
@ -1269,13 +1270,65 @@ class ExtractionService:
|
|||
if not failoverModelList:
|
||||
return self._createErrorResponse("No suitable models found", 0, 0)
|
||||
|
||||
# Process each content part
|
||||
totalParts = len(contentParts)
|
||||
if totalParts == 0:
|
||||
return self._createErrorResponse("No content parts to process", 0, 0)
|
||||
|
||||
# Thread-safe counter for progress tracking
|
||||
completedCount = [0] # Use list to allow modification in nested function
|
||||
|
||||
# Process parts in parallel with concurrency control
|
||||
maxConcurrent = 5
|
||||
if options and hasattr(options, 'maxConcurrentParts'):
|
||||
maxConcurrent = options.maxConcurrentParts
|
||||
|
||||
semaphore = asyncio.Semaphore(maxConcurrent)
|
||||
|
||||
async def processSinglePart(contentPart, partIndex: int) -> AiCallResponse:
|
||||
"""Process a single content part with progress logging."""
|
||||
async with semaphore:
|
||||
partLabel = contentPart.label or f"Part {partIndex+1}"
|
||||
partType = contentPart.typeGroup or "unknown"
|
||||
|
||||
# Log start of processing
|
||||
if progressCallback:
|
||||
progressCallback(0.1 + (partIndex / totalParts) * 0.8, f"Processing {partLabel} ({partType}) - {partIndex+1}/{totalParts}")
|
||||
|
||||
try:
|
||||
# Process the part
|
||||
partResult = await self.processContentPartWithFallback(
|
||||
contentPart, prompt, options, failoverModelList, aiObjects, None # Don't pass progressCallback to avoid double logging
|
||||
)
|
||||
|
||||
# Update completed count and log progress
|
||||
completedCount[0] += 1
|
||||
if progressCallback:
|
||||
progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Completed {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
|
||||
|
||||
return partResult
|
||||
except Exception as e:
|
||||
# Update completed count even on error
|
||||
completedCount[0] += 1
|
||||
logger.error(f"Error processing part {partIndex+1} ({partLabel}): {str(e)}")
|
||||
if progressCallback:
|
||||
progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Error processing {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
|
||||
# Return error response
|
||||
return self._createErrorResponse(f"Error processing part: {str(e)}", 0, 0)
|
||||
|
||||
# Create tasks for all parts
|
||||
tasks = [processSinglePart(contentPart, i) for i, contentPart in enumerate(contentParts)]
|
||||
|
||||
# Execute all tasks in parallel with error handling
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Process results and handle exceptions
|
||||
allResults = []
|
||||
for contentPart in contentParts:
|
||||
partResult = await self.processContentPartWithFallback(
|
||||
contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
|
||||
)
|
||||
allResults.append(partResult)
|
||||
for i, result in enumerate(results):
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"Exception processing part {i+1}: {str(result)}")
|
||||
allResults.append(self._createErrorResponse(f"Exception: {str(result)}", 0, 0))
|
||||
elif result is not None:
|
||||
allResults.append(result)
|
||||
|
||||
# Merge all results using unified mergePartResults
|
||||
mergedContent = self.mergePartResults(allResults)
|
||||
|
|
|
|||
584
modules/services/serviceGeneration/paths/codePath.py
Normal file
584
modules/services/serviceGeneration/paths/codePath.py
Normal file
|
|
@ -0,0 +1,584 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Code Generation Path
|
||||
|
||||
Handles code generation with multi-file project support, dependency handling,
|
||||
and proper cross-file references.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeGenerationPath:
|
||||
"""Code generation path."""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
async def generateCode(
|
||||
self,
|
||||
userPrompt: str,
|
||||
outputFormat: str = None,
|
||||
contentParts: Optional[List[ContentPart]] = None,
|
||||
title: str = "Generated Code",
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> AiResponse:
|
||||
"""
|
||||
Generate code files with multi-file project support.
|
||||
|
||||
Returns: AiResponse with code files as documents
|
||||
"""
|
||||
# Create operation ID
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
codeOperationId = f"code_gen_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
codeOperationId,
|
||||
"Code Generation",
|
||||
"Code Generation",
|
||||
f"Format: {outputFormat or 'txt'}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
# Detect language and project type from prompt or outputFormat
|
||||
language, projectType = self._detectLanguageAndProjectType(userPrompt, outputFormat)
|
||||
|
||||
# Phase 1: Code structure generation (with looping)
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.2, "Generating code structure")
|
||||
codeStructure = await self._generateCodeStructure(
|
||||
userPrompt=userPrompt,
|
||||
language=language,
|
||||
outputFormat=outputFormat,
|
||||
contentParts=contentParts
|
||||
)
|
||||
|
||||
# Phase 2: Code content generation (with dependency handling)
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
|
||||
codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
|
||||
|
||||
# Phase 3: Code formatting & validation
|
||||
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
|
||||
formattedFiles = await self._formatAndValidateCode(codeFiles)
|
||||
|
||||
# Convert to unified document format
|
||||
documents = []
|
||||
for file in formattedFiles:
|
||||
mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
|
||||
content = file.get("content", "")
|
||||
if isinstance(content, str):
|
||||
contentBytes = content.encode('utf-8')
|
||||
else:
|
||||
contentBytes = content
|
||||
|
||||
documents.append(DocumentData(
|
||||
documentName=file.get("filename", "generated.txt"),
|
||||
documentData=contentBytes,
|
||||
mimeType=mimeType,
|
||||
sourceJson=file
|
||||
))
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title,
|
||||
operationType=OperationTypeEnum.DATA_GENERATE.value
|
||||
)
|
||||
|
||||
self.services.chat.progressLogFinish(codeOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
documents=documents,
|
||||
content=None,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in code generation: {str(e)}")
|
||||
self.services.chat.progressLogFinish(codeOperationId, False)
|
||||
raise
|
||||
|
||||
def _detectLanguageAndProjectType(self, userPrompt: str, outputFormat: Optional[str]) -> tuple:
|
||||
"""Detect programming language and project type from prompt or format."""
|
||||
promptLower = userPrompt.lower()
|
||||
|
||||
# Detect language
|
||||
language = None
|
||||
if outputFormat:
|
||||
if outputFormat == "py":
|
||||
language = "python"
|
||||
elif outputFormat in ["js", "ts"]:
|
||||
language = outputFormat
|
||||
elif outputFormat == "html":
|
||||
language = "html"
|
||||
|
||||
if not language:
|
||||
if "python" in promptLower or ".py" in promptLower:
|
||||
language = "python"
|
||||
elif "javascript" in promptLower or ".js" in promptLower:
|
||||
language = "javascript"
|
||||
elif "typescript" in promptLower or ".ts" in promptLower:
|
||||
language = "typescript"
|
||||
elif "html" in promptLower:
|
||||
language = "html"
|
||||
else:
|
||||
language = "python" # Default
|
||||
|
||||
# Detect project type
|
||||
projectType = "single_file"
|
||||
if "multi" in promptLower or "multiple files" in promptLower or "project" in promptLower:
|
||||
projectType = "multi_file"
|
||||
|
||||
return language, projectType
|
||||
|
||||
async def _generateCodeStructure(
|
||||
self,
|
||||
userPrompt: str,
|
||||
language: str,
|
||||
outputFormat: Optional[str],
|
||||
contentParts: Optional[List[ContentPart]]
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate code structure using looping system."""
|
||||
|
||||
# Build structure generation prompt
|
||||
structurePrompt = f"""Analyze the following code generation request and create a project structure.
|
||||
|
||||
Request: {userPrompt}
|
||||
|
||||
Language: {language}
|
||||
|
||||
Create a JSON structure with:
|
||||
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
|
||||
2. files: Array of file structures, each with:
|
||||
- id: Unique identifier
|
||||
- filename: File name (e.g., "main.py", "utils.py")
|
||||
- fileType: File extension (e.g., "py", "js")
|
||||
- dependencies: List of file IDs this file depends on (for multi-file projects)
|
||||
- imports: List of import statements (for dependency extraction)
|
||||
- functions: Array of function signatures {{"name": "...", "signature": "..."}}
|
||||
- classes: Array of class definitions {{"name": "...", "signature": "..."}}
|
||||
|
||||
For single-file projects, return one file. For multi-file projects, break down into logical modules.
|
||||
|
||||
Return ONLY valid JSON in this format:
|
||||
{{
|
||||
"metadata": {{
|
||||
"language": "{language}",
|
||||
"projectType": "single_file",
|
||||
"projectName": "generated-project"
|
||||
}},
|
||||
"files": [
|
||||
{{
|
||||
"id": "file_1",
|
||||
"filename": "main.py",
|
||||
"fileType": "py",
|
||||
"dependencies": [],
|
||||
"imports": [],
|
||||
"functions": [],
|
||||
"classes": []
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
# Use generic looping system with code_structure use case
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
structureJson = await self.services.ai.callAiWithLooping(
|
||||
prompt=structurePrompt,
|
||||
options=options,
|
||||
useCaseId="code_structure",
|
||||
debugPrefix="code_structure_generation",
|
||||
contentParts=contentParts
|
||||
)
|
||||
|
||||
parsed = json.loads(structureJson)
|
||||
return parsed
|
||||
|
||||
async def _generateCodeContent(
|
||||
self,
|
||||
codeStructure: Dict[str, Any],
|
||||
parentOperationId: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate code content for each file with dependency handling."""
|
||||
files = codeStructure.get("files", [])
|
||||
metadata = codeStructure.get("metadata", {})
|
||||
|
||||
if not files:
|
||||
raise ValueError("No files found in code structure")
|
||||
|
||||
# Step 1: Resolve dependency order
|
||||
orderedFiles = self._resolveDependencyOrder(files)
|
||||
|
||||
# Step 2: Generate dependency files first (requirements.txt, package.json, etc.)
|
||||
dependencyFiles = await self._generateDependencyFiles(metadata, orderedFiles)
|
||||
|
||||
# Step 3: Generate code files in dependency order (not fully parallel)
|
||||
codeFiles = []
|
||||
generatedFileContext = {} # Track what's been generated for cross-file references
|
||||
|
||||
for idx, fileStructure in enumerate(orderedFiles):
|
||||
# Update progress
|
||||
progress = 0.5 + (0.4 * (idx / len(orderedFiles)))
|
||||
self.services.chat.progressLogUpdate(
|
||||
parentOperationId,
|
||||
progress,
|
||||
f"Generating {fileStructure.get('filename', 'file')}"
|
||||
)
|
||||
|
||||
# Provide context about already-generated files for proper imports
|
||||
fileContext = self._buildFileContext(generatedFileContext, fileStructure)
|
||||
|
||||
# Generate this file with context
|
||||
fileContent = await self._generateSingleFileContent(
|
||||
fileStructure,
|
||||
fileContext=fileContext,
|
||||
allFilesStructure=orderedFiles,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
codeFiles.append(fileContent)
|
||||
|
||||
# Update context with generated file info (for next files)
|
||||
generatedFileContext[fileStructure["id"]] = {
|
||||
"filename": fileContent.get("filename", fileStructure.get("filename")),
|
||||
"functions": fileContent.get("functions", []),
|
||||
"classes": fileContent.get("classes", []),
|
||||
"exports": fileContent.get("exports", [])
|
||||
}
|
||||
|
||||
# Combine dependency files and code files
|
||||
return dependencyFiles + codeFiles
|
||||
|
||||
def _resolveDependencyOrder(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Resolve file generation order based on dependencies using topological sort."""
|
||||
# Build dependency graph
|
||||
fileMap = {f["id"]: f for f in files}
|
||||
dependencies = {}
|
||||
|
||||
for file in files:
|
||||
fileId = file["id"]
|
||||
deps = file.get("dependencies", []) # List of file IDs this file depends on
|
||||
dependencies[fileId] = deps
|
||||
|
||||
# Topological sort
|
||||
ordered = []
|
||||
visited = set()
|
||||
tempMark = set()
|
||||
|
||||
def visit(fileId: str):
|
||||
if fileId in tempMark:
|
||||
# Circular dependency detected - break it
|
||||
logger.warning(f"Circular dependency detected involving {fileId}")
|
||||
return
|
||||
if fileId in visited:
|
||||
return
|
||||
|
||||
tempMark.add(fileId)
|
||||
for depId in dependencies.get(fileId, []):
|
||||
if depId in fileMap:
|
||||
visit(depId)
|
||||
tempMark.remove(fileId)
|
||||
visited.add(fileId)
|
||||
ordered.append(fileMap[fileId])
|
||||
|
||||
for file in files:
|
||||
if file["id"] not in visited:
|
||||
visit(file["id"])
|
||||
|
||||
return ordered
|
||||
|
||||
async def _generateDependencyFiles(
|
||||
self,
|
||||
metadata: Dict[str, Any],
|
||||
files: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate dependency files (requirements.txt, package.json, etc.)."""
|
||||
language = metadata.get("language", "").lower()
|
||||
dependencyFiles = []
|
||||
|
||||
# Generate requirements.txt for Python
|
||||
if language in ["python", "py"]:
|
||||
requirementsContent = await self._generateRequirementsTxt(files)
|
||||
if requirementsContent:
|
||||
dependencyFiles.append({
|
||||
"filename": "requirements.txt",
|
||||
"content": requirementsContent,
|
||||
"fileType": "txt",
|
||||
"id": "requirements_txt"
|
||||
})
|
||||
|
||||
# Generate package.json for JavaScript/TypeScript
|
||||
elif language in ["javascript", "typescript", "js", "ts"]:
|
||||
packageJson = await self._generatePackageJson(files, metadata)
|
||||
if packageJson:
|
||||
dependencyFiles.append({
|
||||
"filename": "package.json",
|
||||
"content": json.dumps(packageJson, indent=2),
|
||||
"fileType": "json",
|
||||
"id": "package_json"
|
||||
})
|
||||
|
||||
return dependencyFiles
|
||||
|
||||
async def _generateRequirementsTxt(
|
||||
self,
|
||||
files: List[Dict[str, Any]]
|
||||
) -> Optional[str]:
|
||||
"""Generate requirements.txt content from Python imports."""
|
||||
pythonPackages = set()
|
||||
|
||||
for file in files:
|
||||
imports = file.get("imports", [])
|
||||
if isinstance(imports, list):
|
||||
for imp in imports:
|
||||
if isinstance(imp, str):
|
||||
# Extract package name from import
|
||||
# Handle: "from flask import", "import flask", "from flask import Flask"
|
||||
imp = imp.strip()
|
||||
if "import" in imp:
|
||||
if "from" in imp:
|
||||
# "from package import ..."
|
||||
parts = imp.split("from")
|
||||
if len(parts) > 1:
|
||||
package = parts[1].split("import")[0].strip()
|
||||
if package and not package.startswith("."):
|
||||
pythonPackages.add(package.split(".")[0]) # Get root package
|
||||
else:
|
||||
# "import package" or "import package.module"
|
||||
parts = imp.split("import")
|
||||
if len(parts) > 1:
|
||||
package = parts[1].strip().split(".")[0].strip()
|
||||
if package and not package.startswith("."):
|
||||
pythonPackages.add(package)
|
||||
|
||||
if pythonPackages:
|
||||
return "\n".join(sorted(pythonPackages))
|
||||
return None
|
||||
|
||||
async def _generatePackageJson(
|
||||
self,
|
||||
files: List[Dict[str, Any]],
|
||||
metadata: Dict[str, Any]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Generate package.json content from JavaScript/TypeScript imports."""
|
||||
npmPackages = {}
|
||||
|
||||
for file in files:
|
||||
imports = file.get("imports", [])
|
||||
if isinstance(imports, list):
|
||||
for imp in imports:
|
||||
if isinstance(imp, str):
|
||||
# Extract npm package from import
|
||||
# Handle: "import express from 'express'", "const express = require('express')"
|
||||
imp = imp.strip()
|
||||
if "from" in imp:
|
||||
# ES6 import: "import ... from 'package'"
|
||||
parts = imp.split("from")
|
||||
if len(parts) > 1:
|
||||
package = parts[1].strip().strip("'\"")
|
||||
if package and not package.startswith(".") and not package.startswith("/"):
|
||||
npmPackages[package] = "*"
|
||||
elif "require" in imp:
|
||||
# CommonJS: "require('package')"
|
||||
match = re.search(r"require\(['\"]([^'\"]+)['\"]\)", imp)
|
||||
if match:
|
||||
package = match.group(1)
|
||||
if not package.startswith(".") and not package.startswith("/"):
|
||||
npmPackages[package] = "*"
|
||||
|
||||
if npmPackages:
|
||||
return {
|
||||
"name": metadata.get("projectName", "generated-project"),
|
||||
"version": "1.0.0",
|
||||
"dependencies": npmPackages
|
||||
}
|
||||
return None
|
||||
|
||||
def _buildFileContext(
|
||||
self,
|
||||
generatedFileContext: Dict[str, Dict[str, Any]],
|
||||
currentFile: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""Build context about other files for proper imports/references."""
|
||||
context = {
|
||||
"availableFiles": [],
|
||||
"availableFunctions": {},
|
||||
"availableClasses": {}
|
||||
}
|
||||
|
||||
# Add info about already-generated files
|
||||
for fileId, fileInfo in generatedFileContext.items():
|
||||
context["availableFiles"].append({
|
||||
"id": fileId,
|
||||
"filename": fileInfo["filename"],
|
||||
"functions": fileInfo.get("functions", []),
|
||||
"classes": fileInfo.get("classes", []),
|
||||
"exports": fileInfo.get("exports", [])
|
||||
})
|
||||
|
||||
# Build function/class maps for easy lookup
|
||||
for func in fileInfo.get("functions", []):
|
||||
funcName = func.get("name", "")
|
||||
if funcName:
|
||||
context["availableFunctions"][funcName] = {
|
||||
"file": fileInfo["filename"],
|
||||
"signature": func.get("signature", "")
|
||||
}
|
||||
|
||||
for cls in fileInfo.get("classes", []):
|
||||
className = cls.get("name", "")
|
||||
if className:
|
||||
context["availableClasses"][className] = {
|
||||
"file": fileInfo["filename"]
|
||||
}
|
||||
|
||||
return context
|
||||
|
||||
async def _generateSingleFileContent(
|
||||
self,
|
||||
fileStructure: Dict[str, Any],
|
||||
fileContext: Dict[str, Any] = None,
|
||||
allFilesStructure: List[Dict[str, Any]] = None,
|
||||
metadata: Dict[str, Any] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate code content for a single file with context about other files."""
|
||||
|
||||
# Build prompt with context about other files for proper imports
|
||||
filename = fileStructure.get("filename", "generated.py")
|
||||
fileType = fileStructure.get("fileType", "py")
|
||||
dependencies = fileStructure.get("dependencies", [])
|
||||
functions = fileStructure.get("functions", [])
|
||||
classes = fileStructure.get("classes", [])
|
||||
|
||||
contextInfo = ""
|
||||
if fileContext and fileContext.get("availableFiles"):
|
||||
contextInfo = "\n\nAvailable files and their exports:\n"
|
||||
for fileInfo in fileContext["availableFiles"]:
|
||||
contextInfo += f"- {fileInfo['filename']}: "
|
||||
funcs = [f.get("name", "") for f in fileInfo.get("functions", [])]
|
||||
cls = [c.get("name", "") for c in fileInfo.get("classes", [])]
|
||||
exports = []
|
||||
if funcs:
|
||||
exports.extend(funcs)
|
||||
if cls:
|
||||
exports.extend(cls)
|
||||
if exports:
|
||||
contextInfo += ", ".join(exports)
|
||||
contextInfo += "\n"
|
||||
|
||||
contentPrompt = f"""Generate complete, executable code for the file: {filename}
|
||||
|
||||
File Type: {fileType}
|
||||
Language: {metadata.get('language', 'python') if metadata else 'python'}
|
||||
|
||||
Required functions:
|
||||
{json.dumps(functions, indent=2) if functions else 'None specified'}
|
||||
|
||||
Required classes:
|
||||
{json.dumps(classes, indent=2) if classes else 'None specified'}
|
||||
|
||||
Dependencies on other files: {', '.join(dependencies) if dependencies else 'None'}
|
||||
{contextInfo}
|
||||
|
||||
Generate complete, production-ready code with:
|
||||
1. Proper imports (including imports from other files in the project if dependencies exist)
|
||||
2. All required functions and classes
|
||||
3. Error handling
|
||||
4. Documentation/docstrings
|
||||
5. Type hints where appropriate
|
||||
|
||||
Return ONLY valid JSON in this format:
|
||||
{{
|
||||
"files": [
|
||||
{{
|
||||
"filename": "{filename}",
|
||||
"content": "// Complete code here",
|
||||
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
|
||||
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
# Use generic looping system with code_content use case
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
contentJson = await self.services.ai.callAiWithLooping(
|
||||
prompt=contentPrompt,
|
||||
options=options,
|
||||
useCaseId="code_content",
|
||||
debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
|
||||
)
|
||||
|
||||
parsed = json.loads(contentJson)
|
||||
|
||||
# Extract file content and metadata
|
||||
files = parsed.get("files", [])
|
||||
if files and len(files) > 0:
|
||||
fileData = files[0]
|
||||
return {
|
||||
"filename": fileData.get("filename", filename),
|
||||
"content": fileData.get("content", ""),
|
||||
"fileType": fileType,
|
||||
"functions": fileData.get("functions", functions),
|
||||
"classes": fileData.get("classes", classes),
|
||||
"id": fileStructure.get("id")
|
||||
}
|
||||
|
||||
# Fallback if structure is different
|
||||
return {
|
||||
"filename": filename,
|
||||
"content": parsed.get("content", ""),
|
||||
"fileType": fileType,
|
||||
"functions": functions,
|
||||
"classes": classes,
|
||||
"id": fileStructure.get("id")
|
||||
}
|
||||
|
||||
async def _formatAndValidateCode(self, codeFiles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Format and validate generated code files."""
|
||||
# For now, just return files as-is
|
||||
# TODO: Add code formatting (black, prettier, etc.) and validation
|
||||
formatted = []
|
||||
for file in codeFiles:
|
||||
content = file.get("content", "")
|
||||
# Basic cleanup: remove markdown code fences if present
|
||||
if isinstance(content, str):
|
||||
content = re.sub(r'^```[\w]*\n', '', content, flags=re.MULTILINE)
|
||||
content = re.sub(r'\n```$', '', content, flags=re.MULTILINE)
|
||||
file["content"] = content.strip()
|
||||
formatted.append(file)
|
||||
return formatted
|
||||
|
||||
def _getMimeType(self, fileType: str) -> str:
|
||||
"""Get MIME type for file type."""
|
||||
mimeTypes = {
|
||||
"py": "text/x-python",
|
||||
"js": "application/javascript",
|
||||
"ts": "application/typescript",
|
||||
"html": "text/html",
|
||||
"css": "text/css",
|
||||
"json": "application/json",
|
||||
"txt": "text/plain",
|
||||
"md": "text/markdown",
|
||||
"java": "text/x-java-source",
|
||||
"cpp": "text/x-c++src",
|
||||
"c": "text/x-csrc"
|
||||
}
|
||||
return mimeTypes.get(fileType.lower(), "text/plain")
|
||||
258
modules/services/serviceGeneration/paths/documentPath.py
Normal file
258
modules/services/serviceGeneration/paths/documentPath.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Document Generation Path
|
||||
|
||||
Handles document generation using existing chapter/section model.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DocumentGenerationPath:
|
||||
"""Document generation path (existing functionality, refactored)."""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
async def generateDocument(
|
||||
self,
|
||||
userPrompt: str,
|
||||
documentList: Optional[Any] = None, # DocumentReferenceList
|
||||
documentIntents: Optional[List[DocumentIntent]] = None,
|
||||
contentParts: Optional[List[ContentPart]] = None,
|
||||
outputFormat: str = "txt",
|
||||
title: Optional[str] = None,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> AiResponse:
|
||||
"""
|
||||
Generate document using existing chapter/section model.
|
||||
|
||||
Returns: AiResponse with documents list
|
||||
"""
|
||||
# Create operation ID
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
docOperationId = f"doc_gen_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
docOperationId,
|
||||
"Document Generation",
|
||||
"Document Generation",
|
||||
f"Format: {outputFormat}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
# Schritt 5A: Kläre Dokument-Intents
|
||||
documents = []
|
||||
if documentList:
|
||||
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
if not documentIntents and documents:
|
||||
documentIntents = await self.services.ai.clarifyDocumentIntents(
|
||||
documents,
|
||||
userPrompt,
|
||||
{"outputFormat": outputFormat},
|
||||
docOperationId
|
||||
)
|
||||
|
||||
# Schritt 5B: Extrahiere und bereite Content vor
|
||||
if documents:
|
||||
preparedContentParts = await self.services.ai.extractAndPrepareContent(
|
||||
documents,
|
||||
documentIntents or [],
|
||||
docOperationId
|
||||
)
|
||||
|
||||
# Merge mit bereitgestellten contentParts (falls vorhanden)
|
||||
if contentParts:
|
||||
# Prüfe auf pre-extracted Content
|
||||
for part in contentParts:
|
||||
if part.metadata.get("skipExtraction", False):
|
||||
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
|
||||
part.metadata.setdefault("contentFormat", "extracted")
|
||||
part.metadata.setdefault("isPreExtracted", True)
|
||||
preparedContentParts.extend(contentParts)
|
||||
|
||||
contentParts = preparedContentParts
|
||||
|
||||
# Schritt 5B.5: Process contentParts with AI extraction (if provided)
|
||||
# This extracts text from images, processes content, and updates contentParts with extracted data
|
||||
# This matches the original flow: extract content first (no AI), then process with AI
|
||||
if contentParts:
|
||||
# Filter out binary/other parts that shouldn't be processed
|
||||
processableParts = []
|
||||
skippedParts = []
|
||||
for p in contentParts:
|
||||
if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
|
||||
processableParts.append(p)
|
||||
else:
|
||||
skippedParts.append(p)
|
||||
|
||||
if skippedParts:
|
||||
logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
|
||||
|
||||
if processableParts:
|
||||
# Count images for progress update
|
||||
imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
|
||||
if imageCount > 0:
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
|
||||
|
||||
# Build proper extraction prompt using buildExtractionPrompt
|
||||
# This creates a focused extraction prompt, not the user's generation prompt
|
||||
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
|
||||
# Determine renderer for format-specific guidelines
|
||||
renderer = None
|
||||
if outputFormat:
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
renderer = generationService.getRendererForFormat(outputFormat)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
|
||||
|
||||
extractionPrompt = await buildExtractionPrompt(
|
||||
outputFormat=outputFormat or "txt",
|
||||
userPrompt=userPrompt, # User's prompt as context for what to extract
|
||||
title=title or "Document",
|
||||
aiService=self.services.ai if hasattr(self.services.ai, 'aiObjects') and self.services.ai.aiObjects else None,
|
||||
services=self.services,
|
||||
renderer=renderer
|
||||
)
|
||||
|
||||
logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
|
||||
|
||||
# Update progress - starting extraction
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.26, f"Starting AI extraction from {len(processableParts)} content parts")
|
||||
|
||||
# Use DATA_EXTRACT operation type for extraction
|
||||
extractionOptions = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
|
||||
compressPrompt=False,
|
||||
compressContext=False
|
||||
)
|
||||
|
||||
# Create progress callback for per-part progress updates
|
||||
def extractionProgressCallback(progress: float, message: str):
|
||||
"""Progress callback for extraction - updates parent operation."""
|
||||
# Map progress from 0.0-1.0 to 0.26-0.35 range (extraction phase)
|
||||
mappedProgress = 0.26 + (progress * 0.09) # 0.26 to 0.35
|
||||
self.services.chat.progressLogUpdate(docOperationId, mappedProgress, message)
|
||||
|
||||
extractionRequest = AiCallRequest(
|
||||
prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
|
||||
context="",
|
||||
options=extractionOptions,
|
||||
contentParts=processableParts
|
||||
)
|
||||
|
||||
# Write debug file for extraction prompt (all parts)
|
||||
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
|
||||
|
||||
# Call AI to extract content from contentParts (with progress callback)
|
||||
extractionResponse = await self.services.ai.callAi(extractionRequest, progressCallback=extractionProgressCallback)
|
||||
|
||||
# Update progress - extraction completed
|
||||
self.services.chat.progressLogUpdate(docOperationId, 0.35, f"Completed AI extraction from {len(processableParts)} content parts")
|
||||
|
||||
# Write debug file for extraction response
|
||||
if extractionResponse.content:
|
||||
self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
|
||||
else:
|
||||
self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
|
||||
logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
|
||||
|
||||
# Update contentParts with extracted content (matching original flow)
|
||||
if extractionResponse.errorCount == 0 and extractionResponse.content:
|
||||
# The extracted content is already merged - update the first processable part with it
|
||||
# This matches the original behavior where extracted text was used for generation
|
||||
if processableParts:
|
||||
# Store extracted content in metadata for use in structure generation
|
||||
processableParts[0].metadata["extractedContent"] = extractionResponse.content
|
||||
logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars)")
|
||||
else:
|
||||
# Extraction failed - log warning but continue
|
||||
logger.warning(f"Content extraction failed, continuing with original contentParts")
|
||||
|
||||
# Schritt 5C: Generiere Struktur
|
||||
structure = await self.services.ai.generateStructure(
|
||||
userPrompt,
|
||||
contentParts or [],
|
||||
outputFormat,
|
||||
docOperationId
|
||||
)
|
||||
|
||||
# Schritt 5D: Fülle Struktur
|
||||
# Language will be extracted from services (user intention analysis) in fillStructure
|
||||
filledStructure = await self.services.ai.fillStructure(
|
||||
structure,
|
||||
contentParts or [],
|
||||
userPrompt,
|
||||
docOperationId
|
||||
)
|
||||
|
||||
# Schritt 5E: Rendere Resultat
|
||||
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||
renderedDocuments = await self.services.ai.renderResult(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
title or "Generated Document",
|
||||
userPrompt,
|
||||
docOperationId
|
||||
)
|
||||
|
||||
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
||||
documentDataList = []
|
||||
for renderedDoc in renderedDocuments:
|
||||
try:
|
||||
# Erstelle DocumentData für jedes gerenderte Dokument
|
||||
docDataObj = DocumentData(
|
||||
documentName=renderedDoc.filename,
|
||||
documentData=renderedDoc.documentData,
|
||||
mimeType=renderedDoc.mimeType,
|
||||
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||
)
|
||||
documentDataList.append(docDataObj)
|
||||
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
||||
|
||||
if not documentDataList:
|
||||
raise ValueError("No documents were rendered")
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
||||
operationType=OperationTypeEnum.DATA_GENERATE.value
|
||||
)
|
||||
|
||||
# Debug-Log (harmonisiert)
|
||||
self.services.utils.writeDebugFile(
|
||||
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
|
||||
"document_generation_response"
|
||||
)
|
||||
|
||||
self.services.chat.progressLogFinish(docOperationId, True)
|
||||
|
||||
return AiResponse(
|
||||
content=json.dumps(filledStructure),
|
||||
metadata=metadata,
|
||||
documents=documentDataList
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in document generation: {str(e)}")
|
||||
self.services.chat.progressLogFinish(docOperationId, False)
|
||||
raise
|
||||
|
||||
132
modules/services/serviceGeneration/paths/imagePath.py
Normal file
132
modules/services/serviceGeneration/paths/imagePath.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Image Generation Path
|
||||
|
||||
Handles image generation with support for single and batch generation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Optional
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallRequest
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImageGenerationPath:
|
||||
"""Image generation path."""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
async def generateImages(
|
||||
self,
|
||||
userPrompt: str,
|
||||
count: int = 1,
|
||||
style: Optional[str] = None,
|
||||
format: str = "png",
|
||||
title: Optional[str] = None,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> AiResponse:
|
||||
"""
|
||||
Generate image files.
|
||||
|
||||
Returns: AiResponse with image files as documents
|
||||
"""
|
||||
# Create operation ID
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
imageOperationId = f"image_gen_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
self.services.chat.progressLogStart(
|
||||
imageOperationId,
|
||||
"Image Generation",
|
||||
"Image Generation",
|
||||
f"Format: {format}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
self.services.chat.progressLogUpdate(imageOperationId, 0.4, "Calling AI for image generation")
|
||||
|
||||
# Build prompt with style if provided
|
||||
imagePrompt = userPrompt
|
||||
if style:
|
||||
imagePrompt = f"{userPrompt}\n\nStyle: {style}"
|
||||
|
||||
# Use IMAGE_GENERATE operation
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.IMAGE_GENERATE,
|
||||
resultFormat=format
|
||||
)
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=imagePrompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
|
||||
response = await self.services.ai.callAi(request)
|
||||
|
||||
if not response.content:
|
||||
errorMsg = f"No image data returned: {response.content}"
|
||||
logger.error(f"Error in AI image generation: {errorMsg}")
|
||||
self.services.chat.progressLogFinish(imageOperationId, False)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
# Handle response content (could be base64 string or bytes)
|
||||
imageData = response.content
|
||||
if isinstance(imageData, str):
|
||||
# Assume base64 encoded string
|
||||
import base64
|
||||
try:
|
||||
imageData = base64.b64decode(imageData)
|
||||
except Exception:
|
||||
# If not base64, try encoding as bytes
|
||||
imageData = imageData.encode('utf-8')
|
||||
elif not isinstance(imageData, bytes):
|
||||
imageData = bytes(imageData)
|
||||
|
||||
# Create document
|
||||
imageDoc = DocumentData(
|
||||
documentName=f"generated_image.{format}",
|
||||
documentData=imageData,
|
||||
mimeType=f"image/{format}"
|
||||
)
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title or "Generated Image",
|
||||
operationType=OperationTypeEnum.IMAGE_GENERATE.value
|
||||
)
|
||||
|
||||
self.services.chat.storeWorkflowStat(
|
||||
self.services.workflow,
|
||||
response,
|
||||
"ai.generate.image"
|
||||
)
|
||||
|
||||
self.services.chat.progressLogUpdate(imageOperationId, 0.9, "Image generated")
|
||||
self.services.chat.progressLogFinish(imageOperationId, True)
|
||||
|
||||
# Create content string describing the image generation
|
||||
import json
|
||||
contentJson = json.dumps({
|
||||
"type": "image",
|
||||
"format": format,
|
||||
"prompt": userPrompt,
|
||||
"filename": imageDoc.documentName
|
||||
}, ensure_ascii=False)
|
||||
|
||||
return AiResponse(
|
||||
content=contentJson, # JSON string describing the image generation
|
||||
metadata=metadata,
|
||||
documents=[imageDoc]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in image generation: {str(e)}")
|
||||
self.services.chat.progressLogFinish(imageOperationId, False)
|
||||
raise
|
||||
|
||||
|
|
@ -1,742 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
AI processing method module.
|
||||
Handles direct AI calls for any type of task.
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
from modules.datamodels.datamodelWorkflow import ExtractContentParameters
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MethodAi(MethodBase):
|
||||
"""AI processing methods."""
|
||||
|
||||
def __init__(self, services):
|
||||
super().__init__(services)
|
||||
self.name = "ai"
|
||||
self.description = "AI processing methods"
|
||||
|
||||
def _format_timestamp_for_filename(self) -> str:
|
||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
|
||||
@action
|
||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
||||
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
||||
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
||||
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
||||
|
||||
Parameters:
|
||||
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
||||
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
||||
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
parentOperationId = parameters.get('parentOperationId')
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Generate",
|
||||
"AI Processing",
|
||||
f"Format: {parameters.get('resultType', 'txt')}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
||||
|
||||
# Update progress - preparing parameters
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
||||
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
|
||||
documentListParam = parameters.get("documentList")
|
||||
# Convert to DocumentReferenceList if needed
|
||||
if documentListParam is None:
|
||||
documentList = DocumentReferenceList(references=[])
|
||||
elif isinstance(documentListParam, DocumentReferenceList):
|
||||
documentList = documentListParam
|
||||
elif isinstance(documentListParam, str):
|
||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
||||
elif isinstance(documentListParam, list):
|
||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
||||
else:
|
||||
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
||||
documentList = DocumentReferenceList(references=[])
|
||||
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
|
||||
|
||||
if not aiPrompt:
|
||||
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
||||
return ActionResult.isFailure(
|
||||
error="AI prompt is required"
|
||||
)
|
||||
|
||||
# Determine output extension and default MIME type without duplicating service logic
|
||||
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
||||
output_extension = f".{normalized_result_type}"
|
||||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||
|
||||
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
||||
# Check if contentParts are already provided (preferred path)
|
||||
contentParts: Optional[List[ContentPart]] = None
|
||||
if "contentParts" in parameters:
|
||||
contentParts = parameters.get("contentParts")
|
||||
if contentParts and not isinstance(contentParts, list):
|
||||
# Try to extract from ContentExtracted if it's an ActionDocument
|
||||
if hasattr(contentParts, 'parts'):
|
||||
contentParts = contentParts.parts
|
||||
else:
|
||||
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
||||
contentParts = None
|
||||
|
||||
# If contentParts not provided but documentList is, extract content first
|
||||
if not contentParts and documentList.references:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
||||
|
||||
# Get ChatDocuments
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
if not chatDocuments:
|
||||
logger.warning("No documents found in documentList")
|
||||
else:
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options (use defaults if not provided)
|
||||
extractionOptions = parameters.get("extractionOptions")
|
||||
if not extractionOptions:
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Extract content using extraction service with hierarchical progress logging
|
||||
# Pass operationId for per-document progress tracking
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
||||
|
||||
# Combine all ContentParts from all extracted results
|
||||
contentParts = []
|
||||
for extracted in extractedResults:
|
||||
if extracted.parts:
|
||||
contentParts.extend(extracted.parts)
|
||||
|
||||
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
||||
|
||||
# Update progress - preparing AI call
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
||||
|
||||
# Build options with only resultFormat - let service layer handle all other parameters
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
options = AiCallOptions(
|
||||
resultFormat=output_format
|
||||
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
|
||||
)
|
||||
|
||||
# Update progress - calling AI
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
||||
|
||||
# Use unified callAiContent method with contentParts (extraction is now separate)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Already extracted (or None if no documents)
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
|
||||
# Update progress - processing result
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
|
||||
# Extract documents from AiResponse
|
||||
if aiResponse.documents and len(aiResponse.documents) > 0:
|
||||
action_documents = []
|
||||
for doc in aiResponse.documents:
|
||||
validationMetadata = {
|
||||
"actionType": "ai.process",
|
||||
"resultType": normalized_result_type,
|
||||
"outputFormat": output_format,
|
||||
"hasDocuments": True,
|
||||
"documentCount": len(aiResponse.documents)
|
||||
}
|
||||
action_documents.append(ActionDocument(
|
||||
documentName=doc.documentName,
|
||||
documentData=doc.documentData,
|
||||
mimeType=doc.mimeType or output_mime_type,
|
||||
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
|
||||
validationMetadata=validationMetadata
|
||||
))
|
||||
|
||||
final_documents = action_documents
|
||||
else:
|
||||
# Text response - create document from content
|
||||
extension = output_extension.lstrip('.')
|
||||
meaningful_name = self._generateMeaningfulFileName(
|
||||
base_name="ai",
|
||||
extension=extension,
|
||||
action_name="result"
|
||||
)
|
||||
validationMetadata = {
|
||||
"actionType": "ai.process",
|
||||
"resultType": normalized_result_type,
|
||||
"outputFormat": output_format,
|
||||
"hasDocuments": False,
|
||||
"contentType": "text"
|
||||
}
|
||||
action_document = ActionDocument(
|
||||
documentName=meaningful_name,
|
||||
documentData=aiResponse.content,
|
||||
mimeType=output_mime_type,
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
final_documents = [action_document]
|
||||
|
||||
# Complete progress tracking
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
return ActionResult.isSuccess(documents=final_documents)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI processing: {str(e)}")
|
||||
|
||||
# Complete progress tracking with failure
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
|
||||
return ActionResult.isFailure(
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
@action
|
||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
||||
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
||||
- Output format: JSON with research results including URLs and content.
|
||||
|
||||
Parameters:
|
||||
- prompt (str, required): Natural language research instruction.
|
||||
- urlList (list, optional): Specific URLs to crawl, if needed.
|
||||
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
||||
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
||||
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
||||
"""
|
||||
try:
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
return ActionResult.isFailure(error="Research prompt is required")
|
||||
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"web_research_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking
|
||||
parentOperationId = parameters.get('parentOperationId')
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Web Research",
|
||||
"Searching and Crawling",
|
||||
"Extracting URLs and Content",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
# Call webcrawl service - service handles all AI intention analysis and processing
|
||||
result = await self.services.web.performWebResearch(
|
||||
prompt=prompt,
|
||||
urls=parameters.get("urlList", []),
|
||||
country=parameters.get("country"),
|
||||
language=parameters.get("language"),
|
||||
researchDepth=parameters.get("researchDepth", "general"),
|
||||
operationId=operationId
|
||||
)
|
||||
|
||||
# Complete progress tracking
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
# Get meaningful filename from research result (generated by intent analyzer)
|
||||
suggestedFilename = result.get("suggested_filename")
|
||||
if suggestedFilename:
|
||||
# Clean and validate filename
|
||||
import re
|
||||
cleaned = suggestedFilename.strip().strip('"\'')
|
||||
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
||||
# Ensure it doesn't already have extension
|
||||
if cleaned.lower().endswith('.json'):
|
||||
cleaned = cleaned[:-5]
|
||||
# Validate: should be reasonable length and contain only safe characters
|
||||
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
||||
meaningfulName = f"{cleaned}.json"
|
||||
else:
|
||||
# Fallback to generic meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_research",
|
||||
extension="json",
|
||||
action_name="research"
|
||||
)
|
||||
else:
|
||||
# Fallback to generic meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_research",
|
||||
extension="json",
|
||||
action_name="research"
|
||||
)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
validationMetadata = {
|
||||
"actionType": "ai.webResearch",
|
||||
"prompt": prompt,
|
||||
"urlList": parameters.get("urlList", []),
|
||||
"country": parameters.get("country"),
|
||||
"language": parameters.get("language"),
|
||||
"researchDepth": parameters.get("researchDepth", "general"),
|
||||
"resultFormat": "json"
|
||||
}
|
||||
actionDocument = ActionDocument(
|
||||
documentName=meaningfulName,
|
||||
documentData=result,
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDocument])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web research: {str(e)}")
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Document Transformation Wrappers
|
||||
# ============================================================================
|
||||
|
||||
@action
|
||||
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Summarize one or more documents, extracting key points and main ideas.
|
||||
- Input requirements: documentList (required); optional summaryLength, focus.
|
||||
- Output format: Text document with summary (default: txt, can be overridden with resultType).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to summarize.
|
||||
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
|
||||
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
|
||||
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
summaryLength = parameters.get("summaryLength", "medium")
|
||||
focus = parameters.get("focus")
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
|
||||
lengthInstructions = {
|
||||
"brief": "Create a brief summary (2-3 paragraphs)",
|
||||
"medium": "Create a medium-length summary (comprehensive but concise)",
|
||||
"detailed": "Create a detailed summary covering all major points"
|
||||
}
|
||||
lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
|
||||
|
||||
aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
|
||||
if focus:
|
||||
aiPrompt += f" Focus specifically on: {focus}."
|
||||
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
||||
@action
|
||||
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Translate documents to a target language while preserving formatting and structure.
|
||||
- Input requirements: documentList (required); targetLanguage (required).
|
||||
- Output format: Translated document in same format as input (default) or specified resultType.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to translate.
|
||||
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
|
||||
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
|
||||
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
|
||||
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
targetLanguage = parameters.get("targetLanguage")
|
||||
if not targetLanguage:
|
||||
return ActionResult.isFailure(error="targetLanguage is required")
|
||||
|
||||
sourceLanguage = parameters.get("sourceLanguage")
|
||||
preserveFormatting = parameters.get("preserveFormatting", True)
|
||||
resultType = parameters.get("resultType")
|
||||
|
||||
aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
|
||||
if sourceLanguage:
|
||||
aiPrompt += f" The source language is {sourceLanguage}."
|
||||
if preserveFormatting:
|
||||
aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
|
||||
else:
|
||||
aiPrompt += " Focus on accurate translation of content."
|
||||
aiPrompt += " Maintain the same document structure, headings, and organization."
|
||||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList
|
||||
}
|
||||
if resultType:
|
||||
processParams["resultType"] = resultType
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
||||
@action
|
||||
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
|
||||
- Input requirements: documentList (required); inputFormat and outputFormat (required).
|
||||
- Output format: Document in target format with specified formatting options.
|
||||
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to convert.
|
||||
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
|
||||
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
|
||||
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
|
||||
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
|
||||
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
|
||||
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
inputFormat = parameters.get("inputFormat")
|
||||
outputFormat = parameters.get("outputFormat")
|
||||
if not inputFormat or not outputFormat:
|
||||
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
|
||||
|
||||
# Normalize formats (remove leading dot if present)
|
||||
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
|
||||
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
|
||||
|
||||
# Get documents
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
if isinstance(documentList, DocumentReferenceList):
|
||||
docRefList = documentList
|
||||
elif isinstance(documentList, list):
|
||||
docRefList = DocumentReferenceList.from_string_list(documentList)
|
||||
else:
|
||||
docRefList = DocumentReferenceList.from_string_list([documentList])
|
||||
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
||||
if not chatDocuments:
|
||||
return ActionResult.isFailure(error="No documents found in documentList")
|
||||
|
||||
# Check if input is standardized JSON format - if so, use direct rendering
|
||||
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
|
||||
try:
|
||||
import json
|
||||
doc = chatDocuments[0]
|
||||
# ChatDocument doesn't have documentData - need to load file content using fileId
|
||||
docBytes = self.services.chat.getFileData(doc.fileId)
|
||||
if not docBytes:
|
||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||
|
||||
# Decode bytes to string
|
||||
docData = docBytes.decode('utf-8')
|
||||
|
||||
# Try to parse as JSON
|
||||
if isinstance(docData, str):
|
||||
jsonData = json.loads(docData)
|
||||
elif isinstance(docData, dict):
|
||||
jsonData = docData
|
||||
else:
|
||||
jsonData = None
|
||||
|
||||
# Check if it's standardized JSON format (has "documents" or "sections")
|
||||
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
|
||||
# Use direct rendering - no AI call needed!
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
# Ensure format is "documents" array
|
||||
if "documents" not in jsonData:
|
||||
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
|
||||
|
||||
# Get title
|
||||
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
|
||||
|
||||
# Render with options
|
||||
renderOptions = {}
|
||||
if normalizedOutputFormat == "csv":
|
||||
renderOptions["delimiter"] = parameters.get("delimiter", ",")
|
||||
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
|
||||
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
|
||||
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
jsonData, normalizedOutputFormat, title, None, None
|
||||
)
|
||||
|
||||
# Apply CSV options if needed (renderer will handle them)
|
||||
if normalizedOutputFormat == "csv" and renderOptions:
|
||||
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
validationMetadata = {
|
||||
"actionType": "ai.convert",
|
||||
"inputFormat": normalizedInputFormat,
|
||||
"outputFormat": normalizedOutputFormat,
|
||||
"hasSourceJson": True,
|
||||
"conversionType": "direct_rendering"
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
||||
documentData=rendered_content,
|
||||
mimeType=mime_type,
|
||||
sourceJson=jsonData, # Preserve source JSON for structure validation
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDoc])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
|
||||
# Fall through to AI-based conversion
|
||||
|
||||
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
|
||||
columnsPerRow = parameters.get("columnsPerRow")
|
||||
delimiter = parameters.get("delimiter", ",")
|
||||
includeHeader = parameters.get("includeHeader", True)
|
||||
language = parameters.get("language", "en")
|
||||
|
||||
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
|
||||
|
||||
if normalizedOutputFormat == "csv":
|
||||
aiPrompt += f" Use '{delimiter}' as the delimiter character."
|
||||
if columnsPerRow:
|
||||
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
|
||||
if not includeHeader:
|
||||
aiPrompt += " Do not include a header row."
|
||||
else:
|
||||
aiPrompt += " Include a header row with column names."
|
||||
|
||||
if language and language != "en":
|
||||
aiPrompt += f" Use language: {language}."
|
||||
|
||||
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": normalizedOutputFormat
|
||||
})
|
||||
|
||||
def _applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
|
||||
"""Apply CSV formatting options to rendered CSV content."""
|
||||
delimiter = options.get("delimiter", ",")
|
||||
columnsPerRow = options.get("columnsPerRow")
|
||||
includeHeader = options.get("includeHeader", True)
|
||||
|
||||
# Check if any options need to be applied
|
||||
needsProcessing = (delimiter != ",") or (columnsPerRow is not None) or (not includeHeader)
|
||||
|
||||
if not needsProcessing:
|
||||
return csvContent
|
||||
|
||||
import csv
|
||||
import io
|
||||
# Re-read CSV with comma, write with new delimiter
|
||||
reader = csv.reader(io.StringIO(csvContent))
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, delimiter=delimiter)
|
||||
|
||||
rows = list(reader)
|
||||
|
||||
# Handle header
|
||||
if not includeHeader and rows:
|
||||
rows = rows[1:] # Skip header
|
||||
|
||||
# Handle columnsPerRow
|
||||
if columnsPerRow:
|
||||
newRows = []
|
||||
for row in rows:
|
||||
# Split row into chunks of columnsPerRow
|
||||
for i in range(0, len(row), columnsPerRow):
|
||||
chunk = row[i:i+columnsPerRow]
|
||||
# Pad to columnsPerRow if needed
|
||||
while len(chunk) < columnsPerRow:
|
||||
chunk.append("")
|
||||
newRows.append(chunk)
|
||||
rows = newRows
|
||||
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
|
||||
@action
|
||||
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
|
||||
- Input requirements: documentList (required); targetFormat (required).
|
||||
- Output format: Document in target format.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to convert.
|
||||
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
|
||||
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
targetFormat = parameters.get("targetFormat")
|
||||
if not targetFormat:
|
||||
return ActionResult.isFailure(error="targetFormat is required")
|
||||
|
||||
preserveStructure = parameters.get("preserveStructure", True)
|
||||
|
||||
# Normalize format (remove leading dot if present)
|
||||
normalizedFormat = targetFormat.strip().lstrip('.').lower()
|
||||
|
||||
aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
|
||||
if preserveStructure:
|
||||
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
|
||||
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": normalizedFormat
|
||||
})
|
||||
|
||||
|
||||
@action
|
||||
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
|
||||
- Input requirements: documentList (required); optional dataStructure, fields.
|
||||
- Output format: JSON by default, or specified resultType.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract data from.
|
||||
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
|
||||
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
|
||||
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
dataStructure = parameters.get("dataStructure", "nested")
|
||||
fields = parameters.get("fields", [])
|
||||
resultType = parameters.get("resultType", "json")
|
||||
|
||||
aiPrompt = "Extract structured data from the provided document(s)."
|
||||
if fields:
|
||||
fieldsStr = ", ".join(fields)
|
||||
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
|
||||
else:
|
||||
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
|
||||
|
||||
structureInstructions = {
|
||||
"flat": "Use a flat key-value structure with simple properties.",
|
||||
"nested": "Use a nested JSON structure with logical grouping of related data.",
|
||||
"list": "Structure the data as a list/array of objects, one per entity or record."
|
||||
}
|
||||
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
|
||||
|
||||
aiPrompt += " Ensure all extracted data is accurate and complete."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Content Generation Wrapper
|
||||
# ============================================================================
|
||||
|
||||
@action
|
||||
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate documents from scratch or based on templates/inputs.
|
||||
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
||||
- Output format: Document in specified format (default: docx).
|
||||
|
||||
Parameters:
|
||||
- prompt (str, required): Description of the document to generate.
|
||||
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
||||
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
||||
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
|
||||
"""
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
return ActionResult.isFailure(error="prompt is required")
|
||||
|
||||
documentList = parameters.get("documentList", [])
|
||||
documentType = parameters.get("documentType")
|
||||
resultType = parameters.get("resultType", "docx")
|
||||
|
||||
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
|
||||
if documentType:
|
||||
aiPrompt += f" Document type: {documentType}."
|
||||
if documentList:
|
||||
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
|
||||
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
|
||||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"resultType": resultType
|
||||
}
|
||||
if documentList:
|
||||
processParams["documentList"] = documentList
|
||||
|
||||
return await self.process(processParams)
|
||||
|
|
@ -10,6 +10,7 @@ from .summarizeDocument import summarizeDocument
|
|||
from .translateDocument import translateDocument
|
||||
from .convertDocument import convertDocument
|
||||
from .generateDocument import generateDocument
|
||||
from .generateCode import generateCode
|
||||
|
||||
__all__ = [
|
||||
'process',
|
||||
|
|
@ -18,5 +19,6 @@ __all__ = [
|
|||
'translateDocument',
|
||||
'convertDocument',
|
||||
'generateDocument',
|
||||
'generateCode',
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +1,13 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Convert Document action for AI operations.
|
||||
Converts documents between different formats (PDF→Word, Excel→CSV, etc.).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
|
||||
- Input requirements: documentList (required); targetFormat (required).
|
||||
- Output format: Document in target format.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to convert.
|
||||
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
|
||||
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
|
|
|||
135
modules/workflows/methods/methodAi/actions/generateCode.py
Normal file
135
modules/workflows/methods/methodAi/actions/generateCode.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
return ActionResult.isFailure(error="prompt is required")
|
||||
|
||||
documentList = parameters.get("documentList", [])
|
||||
resultType = parameters.get("resultType")
|
||||
|
||||
# Auto-detect format from prompt if not provided
|
||||
if not resultType:
|
||||
promptLower = prompt.lower()
|
||||
if ".html" in promptLower or "html file" in promptLower:
|
||||
resultType = "html"
|
||||
elif ".js" in promptLower or "javascript" in promptLower:
|
||||
resultType = "js"
|
||||
elif ".py" in promptLower or "python" in promptLower:
|
||||
resultType = "py"
|
||||
elif ".ts" in promptLower or "typescript" in promptLower:
|
||||
resultType = "ts"
|
||||
elif ".java" in promptLower:
|
||||
resultType = "java"
|
||||
elif ".cpp" in promptLower or ".c++" in promptLower:
|
||||
resultType = "cpp"
|
||||
else:
|
||||
resultType = "txt" # Default
|
||||
|
||||
# Create operation ID for progress tracking
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"code_gen_{workflowId}_{int(time.time())}"
|
||||
parentOperationId = parameters.get('parentOperationId')
|
||||
|
||||
try:
|
||||
# Convert documentList to DocumentReferenceList if needed
|
||||
docRefList = None
|
||||
if documentList:
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
|
||||
if isinstance(documentList, DocumentReferenceList):
|
||||
docRefList = documentList
|
||||
elif isinstance(documentList, str):
|
||||
docRefList = DocumentReferenceList.from_string_list([documentList])
|
||||
elif isinstance(documentList, list):
|
||||
docRefList = DocumentReferenceList.from_string_list(documentList)
|
||||
else:
|
||||
docRefList = DocumentReferenceList(references=[])
|
||||
|
||||
# Prepare title
|
||||
title = "Generated Code"
|
||||
|
||||
# Call AI service with explicit code intent
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=docRefList,
|
||||
outputFormat=resultType,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId,
|
||||
generationIntent="code" # Explicit intent, skips detection
|
||||
)
|
||||
|
||||
# Convert AiResponse to ActionResult
|
||||
documents = []
|
||||
|
||||
# Convert DocumentData to ActionDocument
|
||||
if aiResponse.documents:
|
||||
for docData in aiResponse.documents:
|
||||
documents.append(ActionDocument(
|
||||
documentName=docData.documentName,
|
||||
documentData=docData.documentData,
|
||||
mimeType=docData.mimeType,
|
||||
sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
|
||||
))
|
||||
|
||||
# If no documents but content exists, create a document from content
|
||||
if not documents and aiResponse.content:
|
||||
# Determine document name from metadata
|
||||
docName = f"code.{resultType}"
|
||||
if aiResponse.metadata and aiResponse.metadata.filename:
|
||||
docName = aiResponse.metadata.filename
|
||||
elif aiResponse.metadata and aiResponse.metadata.title:
|
||||
import re
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if sanitized:
|
||||
if not sanitized.lower().endswith(f".{resultType}"):
|
||||
docName = f"{sanitized}.{resultType}"
|
||||
else:
|
||||
docName = sanitized
|
||||
|
||||
# Determine mime type
|
||||
mimeType = "text/plain"
|
||||
if resultType == "html":
|
||||
mimeType = "text/html"
|
||||
elif resultType == "js":
|
||||
mimeType = "application/javascript"
|
||||
elif resultType == "py":
|
||||
mimeType = "text/x-python"
|
||||
elif resultType == "ts":
|
||||
mimeType = "application/typescript"
|
||||
elif resultType == "java":
|
||||
mimeType = "text/x-java-source"
|
||||
elif resultType == "cpp":
|
||||
mimeType = "text/x-c++src"
|
||||
|
||||
documents.append(ActionDocument(
|
||||
documentName=docName,
|
||||
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
|
||||
mimeType=mimeType
|
||||
))
|
||||
|
||||
return ActionResult.isSuccess(documents=documents)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in code generation: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
|
@ -1,15 +1,9 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Generate Document action for AI operations.
|
||||
Wrapper around AI service callAiContent method.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
|
|
@ -17,23 +11,7 @@ from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
|
||||
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
||||
- Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
|
||||
|
||||
Parameters:
|
||||
- prompt (str, required): Description of the document to generate.
|
||||
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
||||
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
||||
- resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
|
||||
- maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
|
||||
- parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
|
||||
- progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
|
||||
"""
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
return ActionResult.isFailure(error="prompt is required")
|
||||
|
|
@ -97,7 +75,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
|
||||
outputFormat=resultType,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
parentOperationId=parentOperationId,
|
||||
generationIntent="document" # NEW: Explicit intent, skips detection
|
||||
)
|
||||
|
||||
# Convert AiResponse to ActionResult
|
||||
|
|
|
|||
|
|
@ -1,36 +1,17 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Process action for AI operations.
|
||||
Universal AI document processing action.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
||||
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
||||
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
||||
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
||||
|
||||
Parameters:
|
||||
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
||||
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
||||
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
|
|
@ -88,7 +69,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||
|
||||
# Check if contentParts are already provided (from context.extractContent or other sources)
|
||||
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
||||
# Check if contentParts are already provided (preferred path)
|
||||
contentParts: Optional[List[ContentPart]] = None
|
||||
if "contentParts" in parameters:
|
||||
contentParts = parameters.get("contentParts")
|
||||
|
|
@ -100,42 +82,96 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
||||
contentParts = None
|
||||
|
||||
# If contentParts not provided but documentList is, extract content first
|
||||
if not contentParts and documentList.references:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
||||
|
||||
# Get ChatDocuments
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
if not chatDocuments:
|
||||
logger.warning("No documents found in documentList")
|
||||
else:
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options (use defaults if not provided)
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
extractionOptions = parameters.get("extractionOptions")
|
||||
if not extractionOptions:
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Extract content using extraction service
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
||||
|
||||
# Combine all ContentParts from all extracted results
|
||||
contentParts = []
|
||||
for extracted in extractedResults:
|
||||
if extracted.parts:
|
||||
contentParts.extend(extracted.parts)
|
||||
|
||||
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
||||
|
||||
# Update progress - preparing AI call
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
||||
|
||||
# Build options
|
||||
# Detect image generation from resultType
|
||||
imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
|
||||
isImageGeneration = normalized_result_type in imageFormats
|
||||
|
||||
# Build options with correct operationType
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||
options = AiCallOptions(
|
||||
resultFormat=output_format
|
||||
resultFormat=output_format,
|
||||
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
|
||||
)
|
||||
|
||||
# Get generationIntent from parameters
|
||||
generationIntent = parameters.get("generationIntent")
|
||||
|
||||
# For DATA_GENERATE, generationIntent is REQUIRED
|
||||
# If not provided, default to "document" for document formats (xlsx, docx, pdf, txt, html, etc.)
|
||||
# This is format-based defaulting, not prompt-based auto-detection
|
||||
if options.operationType == OperationTypeEnum.DATA_GENERATE and not generationIntent:
|
||||
# Document formats (default to document generation)
|
||||
documentFormats = ["xlsx", "docx", "pdf", "txt", "md", "html", "csv", "xml", "json", "pptx"]
|
||||
# Code formats (should use ai.generateCode instead, but default to code if ai.process is used)
|
||||
codeFormats = ["py", "js", "ts", "java", "cpp", "c", "go", "rs", "rb", "php", "swift", "kt"]
|
||||
|
||||
if normalized_result_type in documentFormats:
|
||||
generationIntent = "document"
|
||||
logger.info(f"Defaulting generationIntent to 'document' for resultType '{normalized_result_type}'")
|
||||
elif normalized_result_type in codeFormats:
|
||||
generationIntent = "code"
|
||||
logger.info(f"Defaulting generationIntent to 'code' for resultType '{normalized_result_type}'")
|
||||
else:
|
||||
# Unknown format - default to document (most common use case)
|
||||
generationIntent = "document"
|
||||
logger.warning(
|
||||
f"Unknown resultType '{normalized_result_type}', defaulting generationIntent to 'document'. "
|
||||
f"For code generation, use ai.generateCode action or explicitly pass generationIntent='code'."
|
||||
)
|
||||
|
||||
# Update progress - calling AI
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
||||
|
||||
# Use unified callAiContent method
|
||||
# If contentParts provided (pre-extracted), use them directly
|
||||
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
|
||||
# Note: ContentExtracted documents (from context.extractContent) are now handled
|
||||
# automatically in _extractAndPrepareContent() (Phase 5B)
|
||||
if contentParts:
|
||||
# Pre-extracted ContentParts - use them directly
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Pre-extracted ContentParts
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
else:
|
||||
# Pass documentList - callAiContent handles Phases 5A-5E internally
|
||||
# This includes automatic detection of ContentExtracted documents
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
documentList=documentList, # callAiContent macht Phasen 5A-5E
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
# Use unified callAiContent method with contentParts (extraction is now separate)
|
||||
# ContentParts are already extracted above (or None if no documents)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Already extracted (or None if no documents)
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId,
|
||||
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
|
||||
)
|
||||
|
||||
# Update progress - processing result
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
||||
|
|
|
|||
|
|
@ -1,32 +1,13 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Summarize Document action for AI operations.
|
||||
Summarizes one or more documents, extracting key points and main ideas.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Summarize one or more documents, extracting key points and main ideas.
|
||||
- Input requirements: documentList (required); optional summaryLength, focus.
|
||||
- Output format: Text document with summary (default: txt, can be overridden with resultType).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to summarize.
|
||||
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
|
||||
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
|
||||
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
|
@ -50,6 +31,7 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
"resultType": resultType,
|
||||
"generationIntent": "document" # NEW: Explicit intent
|
||||
})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,33 +1,13 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Translate Document action for AI operations.
|
||||
Translates documents to a target language while preserving formatting and structure.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Translate documents to a target language while preserving formatting and structure.
|
||||
- Input requirements: documentList (required); targetLanguage (required).
|
||||
- Output format: Translated document in same format as input (default) or specified resultType.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to translate.
|
||||
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
|
||||
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
|
||||
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
|
||||
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
|
@ -51,7 +31,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
|
||||
processParams = {
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList
|
||||
"documentList": documentList,
|
||||
"generationIntent": "document" # NEW: Explicit intent
|
||||
}
|
||||
if resultType:
|
||||
processParams["resultType"] = resultType
|
||||
|
|
|
|||
|
|
@ -1,35 +1,15 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Web Research action for AI operations.
|
||||
Web research with two-step process: search for URLs, then crawl content.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
||||
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
||||
- Output format: JSON with research results including URLs and content.
|
||||
|
||||
Parameters:
|
||||
- prompt (str, required): Natural language research instruction.
|
||||
- urlList (list, optional): Specific URLs to crawl, if needed.
|
||||
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
||||
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
||||
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
||||
"""
|
||||
try:
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from .actions.summarizeDocument import summarizeDocument
|
|||
from .actions.translateDocument import translateDocument
|
||||
from .actions.convertDocument import convertDocument
|
||||
from .actions.generateDocument import generateDocument
|
||||
from .actions.generateCode import generateCode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -59,6 +60,14 @@ class MethodAi(MethodBase):
|
|||
required=False,
|
||||
default="txt",
|
||||
description="Output file extension. All output documents will use this format"
|
||||
),
|
||||
"generationIntent": WorkflowActionParameter(
|
||||
name="generationIntent",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["document", "code", "image"],
|
||||
required=False,
|
||||
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). For DATA_GENERATE operations, if not provided, defaults based on resultType: document formats (xlsx, docx, pdf, etc.) → \"document\", code formats (py, js, ts, etc.) → \"code\". For IMAGE_GENERATE operations, this parameter is ignored. Best practice: Use qualified actions (ai.generateDocument, ai.generateCode) instead of ai.process."
|
||||
)
|
||||
},
|
||||
execute=process.__get__(self, self.__class__)
|
||||
|
|
@ -256,6 +265,35 @@ class MethodAi(MethodBase):
|
|||
)
|
||||
},
|
||||
execute=generateDocument.__get__(self, self.__class__)
|
||||
),
|
||||
"generateCode": WorkflowActionDefinition(
|
||||
actionId="ai.generateCode",
|
||||
description="Generate code files - explicitly sets intent to 'code'",
|
||||
parameters={
|
||||
"prompt": WorkflowActionParameter(
|
||||
name="prompt",
|
||||
type="str",
|
||||
frontendType=FrontendType.TEXTAREA,
|
||||
required=True,
|
||||
description="Description of code to generate"
|
||||
),
|
||||
"documentList": WorkflowActionParameter(
|
||||
name="documentList",
|
||||
type="List[str]",
|
||||
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
||||
required=False,
|
||||
description="Reference documents"
|
||||
),
|
||||
"resultType": WorkflowActionParameter(
|
||||
name="resultType",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
|
||||
required=False,
|
||||
description="Output format (html, js, py, etc.). Default: based on prompt"
|
||||
)
|
||||
},
|
||||
execute=generateCode.__get__(self, self.__class__)
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -269,6 +307,7 @@ class MethodAi(MethodBase):
|
|||
self.translateDocument = translateDocument.__get__(self, self.__class__)
|
||||
self.convertDocument = convertDocument.__get__(self, self.__class__)
|
||||
self.generateDocument = generateDocument.__get__(self, self.__class__)
|
||||
self.generateCode = generateCode.__get__(self, self.__class__)
|
||||
|
||||
def _format_timestamp_for_filename(self) -> str:
|
||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||
|
|
|
|||
|
|
@ -1,460 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Context and workflow information method module.
|
||||
Handles workflow context queries and document indexing.
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
import aiohttp
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MethodContext(MethodBase):
|
||||
"""Context and workflow information methods."""
|
||||
|
||||
def __init__(self, services):
|
||||
super().__init__(services)
|
||||
self.name = "context"
|
||||
self.description = "Context and workflow information methods"
|
||||
|
||||
@action
|
||||
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
|
||||
- Input requirements: No input documents required. Optional resultType parameter.
|
||||
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
|
||||
|
||||
Parameters:
|
||||
- resultType (str, optional): Output format (json, txt, md). Default: json.
|
||||
"""
|
||||
try:
|
||||
workflow = self.services.workflow
|
||||
if not workflow:
|
||||
return ActionResult.isFailure(
|
||||
error="No workflow available"
|
||||
)
|
||||
|
||||
resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
|
||||
|
||||
# Get available documents index from chat service
|
||||
documentsIndex = self.services.chat.getAvailableDocuments(workflow)
|
||||
|
||||
if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
|
||||
# Return empty index structure
|
||||
if resultType == "json":
|
||||
indexData = {
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"totalDocuments": 0,
|
||||
"rounds": [],
|
||||
"documentReferences": []
|
||||
}
|
||||
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
|
||||
else:
|
||||
# Parse the document index string to extract structured information
|
||||
indexData = self._parseDocumentIndex(documentsIndex, workflow)
|
||||
|
||||
if resultType == "json":
|
||||
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
|
||||
elif resultType == "md":
|
||||
indexContent = self._formatAsMarkdown(indexData)
|
||||
else: # txt
|
||||
indexContent = self._formatAsText(indexData, documentsIndex)
|
||||
|
||||
# Generate meaningful filename
|
||||
workflowContext = self.services.chat.getWorkflowContext()
|
||||
filename = self._generateMeaningfulFileName(
|
||||
"document_index",
|
||||
resultType if resultType in ["json", "txt", "md"] else "json",
|
||||
workflowContext,
|
||||
"getDocumentIndex"
|
||||
)
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "context.getDocumentIndex",
|
||||
"resultType": resultType,
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
|
||||
}
|
||||
|
||||
# Create ActionDocument
|
||||
document = ActionDocument(
|
||||
documentName=filename,
|
||||
documentData=indexContent,
|
||||
mimeType="application/json" if resultType == "json" else "text/plain",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[document])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating document index: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=f"Failed to generate document index: {str(e)}"
|
||||
)
|
||||
|
||||
def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
|
||||
"""Parse the document index string into structured data."""
|
||||
try:
|
||||
indexData = {
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"generatedAt": datetime.now(UTC).isoformat(),
|
||||
"totalDocuments": 0,
|
||||
"rounds": [],
|
||||
"documentReferences": []
|
||||
}
|
||||
|
||||
# Extract document references from the index string
|
||||
lines = documentsIndex.split('\n')
|
||||
currentRound = None
|
||||
currentDocList = None
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for round headers
|
||||
if "Current round documents:" in line:
|
||||
currentRound = "current"
|
||||
continue
|
||||
elif "Past rounds documents:" in line:
|
||||
currentRound = "past"
|
||||
continue
|
||||
|
||||
# Check for document list references (docList:...)
|
||||
if line.startswith("- docList:"):
|
||||
docListRef = line.replace("- docList:", "").strip()
|
||||
currentDocList = {
|
||||
"reference": docListRef,
|
||||
"round": currentRound,
|
||||
"documents": []
|
||||
}
|
||||
indexData["rounds"].append(currentDocList)
|
||||
continue
|
||||
|
||||
# Check for individual document references (docItem:...)
|
||||
if line.startswith(" - docItem:") or line.startswith("- docItem:"):
|
||||
docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
|
||||
indexData["documentReferences"].append({
|
||||
"reference": docItemRef,
|
||||
"round": currentRound,
|
||||
"docList": currentDocList["reference"] if currentDocList else None
|
||||
})
|
||||
indexData["totalDocuments"] += 1
|
||||
if currentDocList:
|
||||
currentDocList["documents"].append(docItemRef)
|
||||
|
||||
return indexData
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing document index: {str(e)}")
|
||||
return {
|
||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
||||
"error": f"Failed to parse document index: {str(e)}",
|
||||
"rawIndex": documentsIndex
|
||||
}
|
||||
|
||||
def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
|
||||
"""Format document index as Markdown."""
|
||||
try:
|
||||
md = f"# Document Index\n\n"
|
||||
md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
|
||||
md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
|
||||
md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
|
||||
|
||||
if indexData.get('rounds'):
|
||||
md += "## Documents by Round\n\n"
|
||||
for roundInfo in indexData['rounds']:
|
||||
roundLabel = roundInfo.get('round', 'unknown').title()
|
||||
md += f"### {roundLabel} Round\n\n"
|
||||
md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
|
||||
if roundInfo.get('documents'):
|
||||
md += "**Documents:**\n\n"
|
||||
for docRef in roundInfo['documents']:
|
||||
md += f"- `{docRef}`\n"
|
||||
md += "\n"
|
||||
|
||||
if indexData.get('documentReferences'):
|
||||
md += "## All Document References\n\n"
|
||||
for docRef in indexData['documentReferences']:
|
||||
md += f"- `{docRef.get('reference', 'unknown')}`\n"
|
||||
|
||||
return md
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting as Markdown: {str(e)}")
|
||||
return f"# Document Index\n\nError formatting index: {str(e)}\n"
|
||||
|
||||
def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
|
||||
"""Format document index as plain text."""
|
||||
try:
|
||||
text = "Document Index\n"
|
||||
text += "=" * 50 + "\n\n"
|
||||
text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
|
||||
text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
|
||||
text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
|
||||
|
||||
# Include the raw formatted index for readability
|
||||
text += rawIndex
|
||||
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting as text: {str(e)}")
|
||||
return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
|
||||
|
||||
@action
|
||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract content from documents (separate from AI calls).
|
||||
|
||||
This action performs pure content extraction without AI processing.
|
||||
The extracted ContentParts can then be used by subsequent AI processing actions.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract content from.
|
||||
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
||||
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"context_extract_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Extract documentList from parameters dict
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
documentListParam = parameters.get("documentList")
|
||||
if not documentListParam:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
# Convert to DocumentReferenceList if needed
|
||||
if isinstance(documentListParam, DocumentReferenceList):
|
||||
documentList = documentListParam
|
||||
elif isinstance(documentListParam, str):
|
||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
||||
elif isinstance(documentListParam, list):
|
||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
||||
else:
|
||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
||||
|
||||
# Start progress tracking
|
||||
parentOperationId = parameters.get('parentOperationId')
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Extracting content from documents",
|
||||
"Content Extraction",
|
||||
f"Documents: {len(documentList.references)}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
# Get ChatDocuments from documentList
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
if not chatDocuments:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
return ActionResult.isFailure(error="No documents found in documentList")
|
||||
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
||||
extractionOptionsParam = parameters.get("extractionOptions")
|
||||
|
||||
# Convert dict to ExtractionOptions object if needed, or create defaults
|
||||
if extractionOptionsParam:
|
||||
if isinstance(extractionOptionsParam, dict):
|
||||
# Convert dict to ExtractionOptions object
|
||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
||||
extractionOptions = extractionOptionsParam
|
||||
else:
|
||||
# Invalid type, use defaults
|
||||
extractionOptions = None
|
||||
else:
|
||||
extractionOptions = None
|
||||
|
||||
# If extractionOptions not provided, create defaults
|
||||
if not extractionOptions:
|
||||
# Default extraction options for pure content extraction (no AI processing)
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Call extraction service with hierarchical progress logging
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
|
||||
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
|
||||
# Pass operationId for hierarchical per-document progress logging
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
||||
|
||||
# Build ActionDocuments from ContentExtracted results
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
||||
actionDocuments = []
|
||||
# Map extracted results back to original documents by index (results are in same order)
|
||||
for i, extracted in enumerate(extractedResults):
|
||||
# Get original document name if available
|
||||
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
|
||||
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
|
||||
# Use original filename with "extracted_" prefix
|
||||
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
|
||||
documentName = f"{baseName}_extracted_{extracted.id}.json"
|
||||
else:
|
||||
# Fallback to generic name with index
|
||||
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
|
||||
|
||||
# Store ContentExtracted object in ActionDocument.documentData
|
||||
validationMetadata = {
|
||||
"actionType": "context.extractContent",
|
||||
"documentIndex": i,
|
||||
"extractedId": extracted.id,
|
||||
"partCount": len(extracted.parts) if extracted.parts else 0,
|
||||
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=documentName,
|
||||
documentData=extracted, # ContentExtracted object
|
||||
mimeType="application/json",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
actionDocuments.append(actionDoc)
|
||||
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
|
||||
return ActionResult.isSuccess(documents=actionDocuments)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in content extraction: {str(e)}")
|
||||
|
||||
# Complete progress tracking with failure
|
||||
try:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass # Don't fail on progress logging errors
|
||||
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
@action
|
||||
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Trigger preprocessing server at customer tenant to update database with configuration.
|
||||
|
||||
This action makes a POST request to the preprocessing server endpoint with the provided
|
||||
configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
|
||||
|
||||
Parameters:
|
||||
- endpoint (str, required): The full URL endpoint for the preprocessing server API.
|
||||
- configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
|
||||
- authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing "ok" on success, or error message on failure.
|
||||
"""
|
||||
try:
|
||||
endpoint = parameters.get("endpoint")
|
||||
if not endpoint:
|
||||
return ActionResult.isFailure(error="endpoint parameter is required")
|
||||
|
||||
configJsonParam = parameters.get("configJson")
|
||||
if not configJsonParam:
|
||||
return ActionResult.isFailure(error="configJson parameter is required")
|
||||
|
||||
authSecretConfigKey = parameters.get("authSecretConfigKey")
|
||||
if not authSecretConfigKey:
|
||||
return ActionResult.isFailure(error="authSecretConfigKey parameter is required")
|
||||
|
||||
# Handle configJson as either dict or JSON string
|
||||
if isinstance(configJsonParam, str):
|
||||
try:
|
||||
configJson = json.loads(configJsonParam)
|
||||
except json.JSONDecodeError as e:
|
||||
return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}")
|
||||
elif isinstance(configJsonParam, dict):
|
||||
configJson = configJsonParam
|
||||
else:
|
||||
return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}")
|
||||
|
||||
# Get authorization secret from APP_CONFIG using the provided config key
|
||||
authSecret = APP_CONFIG.get(authSecretConfigKey)
|
||||
if not authSecret:
|
||||
errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG"
|
||||
logger.error(errorMsg)
|
||||
return ActionResult.isFailure(error=errorMsg)
|
||||
|
||||
# Prepare headers with authorization (default headers as in original function)
|
||||
headers = {
|
||||
"X-PP-API-Key": authSecret,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# Make POST request
|
||||
timeout = aiohttp.ClientTimeout(total=60)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
json=configJson
|
||||
) as response:
|
||||
if response.status in [200, 201]:
|
||||
responseText = await response.text()
|
||||
logger.info(f"Preprocessing server trigger successful: {response.status}")
|
||||
logger.debug(f"Response: {responseText}")
|
||||
|
||||
# Generate meaningful filename
|
||||
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
|
||||
filename = self._generateMeaningfulFileName(
|
||||
"preprocessing_result",
|
||||
"txt",
|
||||
workflowContext,
|
||||
"triggerPreprocessingServer"
|
||||
)
|
||||
|
||||
# Create validation metadata
|
||||
validationMetadata = self._createValidationMetadata(
|
||||
"triggerPreprocessingServer",
|
||||
endpoint=endpoint,
|
||||
statusCode=response.status,
|
||||
responseText=responseText
|
||||
)
|
||||
|
||||
# Return success with "ok" document
|
||||
document = ActionDocument(
|
||||
documentName=filename,
|
||||
documentData="ok",
|
||||
mimeType="text/plain",
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[document])
|
||||
else:
|
||||
errorText = await response.text()
|
||||
errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}"
|
||||
logger.error(errorMsg)
|
||||
return ActionResult.isFailure(error=errorMsg)
|
||||
|
||||
except Exception as e:
|
||||
errorMsg = f"Error triggering preprocessing server: {str(e)}"
|
||||
logger.error(errorMsg)
|
||||
return ActionResult.isFailure(error=errorMsg)
|
||||
|
||||
|
|
@ -1,49 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Extract Content action for Context operations.
|
||||
Extracts content from documents (separate from AI calls).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract raw content parts from documents without AI processing.
|
||||
|
||||
This action performs pure content extraction WITHOUT AI/OCR processing.
|
||||
It returns ContentParts with different typeGroups:
|
||||
- "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
|
||||
- "image": Images as base64-encoded data (NOT converted to text, no OCR)
|
||||
- "table": Tables as structured data
|
||||
- "structure": Structured content (JSON, etc.)
|
||||
- "container": Container elements (PDF pages, etc.)
|
||||
|
||||
IMPORTANT:
|
||||
- Images are returned as base64 data, NOT as extracted text
|
||||
- No OCR is performed - images are preserved as visual elements
|
||||
- Text extraction only works for text-based formats (not images)
|
||||
- The extracted ContentParts can then be used by subsequent AI processing actions
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract content from.
|
||||
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
||||
- ContentExtracted.parts contains List[ContentPart] with various typeGroups
|
||||
- Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
|
|
|
|||
|
|
@ -1,30 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Get Document Index action for Context operations.
|
||||
Generates a comprehensive index of all documents available in the current workflow.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
|
||||
- Input requirements: No input documents required. Optional resultType parameter.
|
||||
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
|
||||
|
||||
Parameters:
|
||||
- resultType (str, optional): Output format (json, txt, md). Default: json.
|
||||
"""
|
||||
try:
|
||||
workflow = self.services.workflow
|
||||
if not workflow:
|
||||
|
|
|
|||
|
|
@ -1,35 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Neutralize Data action for Context operations.
|
||||
Neutralizes extracted content data from ContentExtracted documents.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Neutralize data from ContentExtracted documents.
|
||||
|
||||
This action takes documents containing ContentExtracted objects (from extractContent)
|
||||
and neutralizes the text data in ContentPart.data fields.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) containing ContentExtracted objects.
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing neutralized ContentExtracted objects
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
|
|
|
|||
|
|
@ -1,37 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Trigger Preprocessing Server action for Context operations.
|
||||
Triggers preprocessing server at customer tenant to update database with configuration.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import aiohttp
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Trigger preprocessing server at customer tenant to update database with configuration.
|
||||
|
||||
This action makes a POST request to the preprocessing server endpoint with the provided
|
||||
configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
|
||||
|
||||
Parameters:
|
||||
- endpoint (str, required): The full URL endpoint for the preprocessing server API.
|
||||
- configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
|
||||
- authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing "ok" on success, or error message on failure.
|
||||
"""
|
||||
try:
|
||||
endpoint = parameters.get("endpoint")
|
||||
if not endpoint:
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,37 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Connect JIRA action for JIRA operations.
|
||||
Connects to JIRA instance and creates ticket interface.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import uuid
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Connect to JIRA instance and create ticket interface.
|
||||
|
||||
Parameters:
|
||||
- apiUsername (str, required): JIRA API username/email
|
||||
- apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
|
||||
- apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
|
||||
- projectCode (str, required): JIRA project code (e.g., "DCS")
|
||||
- issueType (str, required): JIRA issue type (e.g., "Task")
|
||||
- taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing connection ID
|
||||
"""
|
||||
try:
|
||||
apiUsername = parameters.get("apiUsername")
|
||||
if not apiUsername:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,6 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Create CSV Content action for JIRA operations.
|
||||
Creates CSV content with custom headers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
|
|
@ -14,25 +9,11 @@ import csv as csv_module
|
|||
from io import StringIO
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Create CSV content with custom headers.
|
||||
|
||||
Parameters:
|
||||
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
|
||||
- headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
|
||||
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
|
||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing CSV content as bytes
|
||||
"""
|
||||
try:
|
||||
dataParam = parameters.get("data")
|
||||
if not dataParam:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,6 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Create Excel Content action for JIRA operations.
|
||||
Creates Excel content with custom headers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
|
|
@ -14,25 +9,11 @@ import csv as csv_module
|
|||
from io import BytesIO
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Create Excel content with custom headers.
|
||||
|
||||
Parameters:
|
||||
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
|
||||
- headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
|
||||
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
|
||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing Excel content as bytes
|
||||
"""
|
||||
try:
|
||||
dataParam = parameters.get("data")
|
||||
if not dataParam:
|
||||
|
|
|
|||
|
|
@ -1,31 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Export Tickets As JSON action for JIRA operations.
|
||||
Exports tickets from JIRA as JSON list.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Export tickets from JIRA as JSON list.
|
||||
|
||||
Parameters:
|
||||
- connectionId (str, required): Connection ID from connectJira action result
|
||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing list of tickets as JSON
|
||||
"""
|
||||
try:
|
||||
connectionIdParam = parameters.get("connectionId")
|
||||
if not connectionIdParam:
|
||||
|
|
|
|||
|
|
@ -1,32 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Import Tickets From JSON action for JIRA operations.
|
||||
Imports ticket data from JSON back to JIRA.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Import ticket data from JSON back to JIRA.
|
||||
|
||||
Parameters:
|
||||
- connectionId (str, required): Connection ID from connectJira action result
|
||||
- ticketData (str, required): Document reference containing ticket data as JSON
|
||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing import result with counts
|
||||
"""
|
||||
try:
|
||||
connectionIdParam = parameters.get("connectionId")
|
||||
if not connectionIdParam:
|
||||
|
|
|
|||
|
|
@ -1,33 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Merge Ticket Data action for JIRA operations.
|
||||
Merges JIRA export data with existing SharePoint data.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, List
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Merge JIRA export data with existing SharePoint data.
|
||||
|
||||
Parameters:
|
||||
- jiraData (str, required): Document reference containing JIRA ticket data as JSON array
|
||||
- existingData (str, required): Document reference containing existing SharePoint data as JSON array
|
||||
- taskSyncDefinition (str or dict, required): Field mapping definition
|
||||
- idField (str, optional): Field name to use as ID for merging (default: "ID")
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing merged data and merge details
|
||||
"""
|
||||
try:
|
||||
jiraDataParam = parameters.get("jiraData")
|
||||
if not jiraDataParam:
|
||||
|
|
|
|||
|
|
@ -1,34 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Parse CSV Content action for JIRA operations.
|
||||
Parses CSV content with custom headers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import io
|
||||
import pandas as pd
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Parse CSV content with custom headers.
|
||||
|
||||
Parameters:
|
||||
- csvContent (str, required): Document reference containing CSV file content as bytes
|
||||
- skipRows (int, optional): Number of header rows to skip (default: 2)
|
||||
- hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing parsed data and headers as JSON
|
||||
"""
|
||||
try:
|
||||
csvContentParam = parameters.get("csvContent")
|
||||
if not csvContentParam:
|
||||
|
|
|
|||
|
|
@ -1,34 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Parse Excel Content action for JIRA operations.
|
||||
Parses Excel content with custom headers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import pandas as pd
|
||||
from io import BytesIO
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Parse Excel content with custom headers.
|
||||
|
||||
Parameters:
|
||||
- excelContent (str, required): Document reference containing Excel file content as bytes
|
||||
- skipRows (int, optional): Number of header rows to skip (default: 3)
|
||||
- hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing parsed data and headers as JSON
|
||||
"""
|
||||
try:
|
||||
excelContentParam = parameters.get("excelContent")
|
||||
if not excelContentParam:
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,39 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Compose And Draft Email With Context action for Outlook operations.
|
||||
Composes email content using AI from context and optional documents, then creates a draft.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import requests
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Compose email content using AI from context and optional documents, then create a draft.
|
||||
- Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
|
||||
- Output format: JSON confirmation with AI-generated draft metadata.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- to (list, required): Recipient email addresses.
|
||||
- context (str, required): Detailled context for composing the email.
|
||||
- documentList (list, optional): Document references for context/attachments.
|
||||
- cc (list, optional): CC recipients.
|
||||
- bcc (list, optional): BCC recipients.
|
||||
- emailStyle (str, optional): formal | casual | business. Default: business.
|
||||
- maxLength (int, optional): Maximum length for generated content. Default: 1000.
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
to = parameters.get("to")
|
||||
|
|
|
|||
|
|
@ -1,36 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Read Emails action for Outlook operations.
|
||||
Reads emails and metadata from a mailbox folder.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Read emails and metadata from a mailbox folder.
|
||||
- Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
|
||||
- Output format: JSON with emails and metadata.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- folder (str, optional): Folder to read from. Default: Inbox.
|
||||
- limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
|
||||
- filter (str, optional): Sender, query operators, or subject text.
|
||||
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
|
|
@ -1,35 +1,15 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Search Emails action for Outlook operations.
|
||||
Searches emails by query and returns matching items with metadata.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Search emails by query and return matching items with metadata.
|
||||
- Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
|
||||
- Output format: JSON with search results and metadata.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- query (str, required): Search expression.
|
||||
- folder (str, optional): Folder scope or All. Default: All.
|
||||
- limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
|
||||
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
query = parameters.get("query")
|
||||
|
|
|
|||
|
|
@ -1,33 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Send Draft Email action for Outlook operations.
|
||||
Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
|
||||
- Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
|
||||
- Output format: JSON confirmation with sent mail metadata for all emails.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,36 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Analyze Folder Usage action for SharePoint operations.
|
||||
Analyzes usage intensity of folders and files in SharePoint.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Analyze usage intensity of folders and files in SharePoint.
|
||||
- Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
|
||||
- Output format: JSON with usage analytics grouped by time intervals.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
|
||||
- startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
|
||||
- endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
|
||||
- interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
|
|
@ -1,35 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Copy File action for SharePoint operations.
|
||||
Copies file within SharePoint.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Copy file within SharePoint.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
|
||||
- sourceFolder (str, required): Source folder path relative to site root
|
||||
- sourceFile (str, required): Source file name
|
||||
- destFolder (str, required): Destination folder path relative to site root
|
||||
- destFile (str, required): Destination file name
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing copy result
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
if not connectionReference:
|
||||
|
|
|
|||
|
|
@ -1,34 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Download File By Path action for SharePoint operations.
|
||||
Downloads file from SharePoint by exact file path.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Download file from SharePoint by exact file path.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
|
||||
- filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing file content as base64-encoded bytes
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
if not connectionReference:
|
||||
|
|
|
|||
|
|
@ -1,35 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Find Document Path action for SharePoint operations.
|
||||
Finds documents and folders by name/path across SharePoint sites.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import urllib.parse
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Find documents and folders by name/path across sites.
|
||||
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
|
||||
- Output format: JSON with found items and paths.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- site (str, optional): Site hint.
|
||||
- searchQuery (str, required): Search terms or path.
|
||||
- maxResults (int, optional): Maximum items to return. Default: 1000.
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
|
|
@ -1,32 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Find Site By URL action for SharePoint operations.
|
||||
Finds SharePoint site by hostname and site path.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Find SharePoint site by hostname and site path.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
|
||||
- sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
if not connectionReference:
|
||||
|
|
|
|||
|
|
@ -1,34 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
List Documents action for SharePoint operations.
|
||||
Lists documents and folders in SharePoint paths across sites.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import urllib.parse
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: List documents and folders in SharePoint paths across sites.
|
||||
- Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
|
||||
- Output format: JSON with folder items and metadata.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
|
||||
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
|
|
@ -1,44 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Read Documents action for SharePoint operations.
|
||||
Reads documents from SharePoint and extracts content/metadata.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import base64
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Read documents from SharePoint and extract content/metadata.
|
||||
- Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
|
||||
- Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
|
||||
- Binary files (PDFs, etc.) are Base64-encoded in documentData.
|
||||
- Text files are stored as plain text in documentData.
|
||||
- Returns ActionResult with documents list for template processing.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- documentList (list, optional): Document list reference(s) containing findDocumentPath result.
|
||||
- pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
|
||||
- includeMetadata (bool, optional): Include metadata. Default: True.
|
||||
|
||||
Returns:
|
||||
- ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
|
||||
- documentName: File name
|
||||
- documentData: Base64-encoded content (binary files) or plain text (text files)
|
||||
- mimeType: MIME type (e.g., application/pdf, text/plain)
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
|
|
@ -1,34 +1,16 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Upload Document action for SharePoint operations.
|
||||
Uploads documents to SharePoint.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import urllib.parse
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
|
||||
- Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
|
||||
- Output format: JSON with upload status and file info.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
|
||||
- pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
|
||||
"""
|
||||
operationId = None
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
|
|||
|
|
@ -1,35 +1,15 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Upload File action for SharePoint operations.
|
||||
Uploads raw file content (bytes) to SharePoint.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Upload raw file content (bytes) to SharePoint.
|
||||
|
||||
Parameters:
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
|
||||
- folderPath (str, required): Folder path relative to site root
|
||||
- fileName (str, required): File name
|
||||
- content (str, required): Document reference containing file content as base64-encoded bytes
|
||||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing upload result
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
if not connectionReference:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class ContentValidator:
|
|||
self.services = services
|
||||
self.learningEngine = learningEngine
|
||||
|
||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
|
||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
|
||||
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
||||
|
||||
Args:
|
||||
|
|
@ -34,8 +34,9 @@ class ContentValidator:
|
|||
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
||||
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
|
||||
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
|
||||
context: Optional context object to access all documents delivered in the current round
|
||||
"""
|
||||
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
|
||||
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory, context)
|
||||
|
||||
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
|
||||
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
|
||||
|
|
@ -533,7 +534,7 @@ class ContentValidator:
|
|||
|
||||
return False
|
||||
|
||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
|
||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
|
||||
"""AI-based comprehensive validation - generic approach"""
|
||||
try:
|
||||
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
||||
|
|
@ -636,9 +637,46 @@ class ContentValidator:
|
|||
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
|
||||
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
|
||||
|
||||
# Build document index context (all documents delivered in current round)
|
||||
documentIndexContext = ""
|
||||
if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
|
||||
try:
|
||||
documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
|
||||
if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
|
||||
# Extract only "Current round documents" section if present
|
||||
lines = documentIndex.split('\n')
|
||||
currentRoundSection = []
|
||||
inCurrentRound = False
|
||||
for line in lines:
|
||||
if "Current round documents:" in line:
|
||||
inCurrentRound = True
|
||||
currentRoundSection.append(line)
|
||||
elif inCurrentRound:
|
||||
if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
|
||||
currentRoundSection.append(line)
|
||||
elif line.strip() == "":
|
||||
# Empty line is okay, continue
|
||||
continue
|
||||
elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
|
||||
# End of current round section
|
||||
break
|
||||
else:
|
||||
# Still in current round section
|
||||
currentRoundSection.append(line)
|
||||
|
||||
if currentRoundSection:
|
||||
documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
|
||||
documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting document index for validation: {str(e)}")
|
||||
# Continue without document index - not critical
|
||||
|
||||
# Transform criteria that require data access into metadata-only checks
|
||||
transformedCriteria = self._transformCriteriaForMetadataOnly(successCriteria)
|
||||
|
||||
# Format success criteria for display with index numbers
|
||||
if successCriteria:
|
||||
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
|
||||
if transformedCriteria:
|
||||
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(transformedCriteria)])
|
||||
else:
|
||||
criteriaDisplay = "[]"
|
||||
|
||||
|
|
@ -647,7 +685,7 @@ class ContentValidator:
|
|||
=== TASK INFORMATION ===
|
||||
{objectiveLabel}: '{objectiveText}'
|
||||
EXPECTED DATA TYPE: {dataType}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}{documentIndexContext}
|
||||
|
||||
=== VALIDATION INSTRUCTIONS ===
|
||||
|
||||
|
|
@ -661,6 +699,7 @@ VALIDATION RULES:
|
|||
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
|
||||
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
|
||||
7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
|
||||
8. DATA-LEVEL CRITERIA TRANSFORMATION: Criteria mentioning accuracy percentages (e.g., "95% accuracy"), completeness percentages (e.g., "98% completeness"), or "all X extracted" have been transformed to metadata-only checks. For accuracy/completeness: Check if contentPartIds reference all source documents and if structure metadata shows expected data types (tables, lists, etc.) exist. For "all X extracted": Check if contentPartIds reference all source documents mentioned in ACTION HISTORY or document index. NEVER attempt to verify accuracy/completeness by comparing actual data values - only use metadata indicators.
|
||||
|
||||
VALIDATION STEPS:
|
||||
- Check ACTION HISTORY for process-oriented criteria
|
||||
|
|
@ -812,6 +851,52 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
|||
logger.error(f"AI validation failed: {str(e)}")
|
||||
raise
|
||||
|
||||
def _transformCriteriaForMetadataOnly(self, criteria: List[str]) -> List[str]:
|
||||
"""
|
||||
Transform criteria that require data access into metadata-only checks.
|
||||
|
||||
Preserves original criterion intent while converting data-level checks to metadata checks.
|
||||
Examples:
|
||||
- "95% accuracy" → "[METADATA ONLY] Data structure indicates extraction completed (check contentPartIds reference all source documents)"
|
||||
- "98% completeness" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
|
||||
- "all transactions extracted" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
|
||||
"""
|
||||
if not criteria:
|
||||
return []
|
||||
|
||||
transformed = []
|
||||
for criterion in criteria:
|
||||
original = criterion.strip()
|
||||
transformed_criterion = original
|
||||
|
||||
# Pattern: accuracy percentage (e.g., "95% accuracy", "accuracy meets or exceeds 95% threshold")
|
||||
if re.search(r'\d+%?\s*accuracy|accuracy.*\d+%', original, re.IGNORECASE):
|
||||
# Extract the main subject (e.g., "transactions", "data", etc.)
|
||||
subject_match = re.search(r'(transactions?|data|items?|records?|entries?)', original, re.IGNORECASE)
|
||||
subject = subject_match.group(1).lower() if subject_match else "data"
|
||||
|
||||
transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference all source documents and jsonStructure shows expected {subject} structure exists (tables/lists with rowCount/itemCount > 0). Cannot verify actual {subject} accuracy values from metadata."
|
||||
|
||||
# Pattern: completeness percentage or "all X extracted" (e.g., "98% completeness", "all transactions extracted")
|
||||
elif re.search(r'\d+%?\s*completeness|completeness.*\d+%|all\s+.*extracted|extract.*all', original, re.IGNORECASE):
|
||||
# Extract the main subject
|
||||
subject_match = re.search(r'(transactions?|data|items?|records?|entries?|statements?|documents?)', original, re.IGNORECASE)
|
||||
subject = subject_match.group(1).lower() if subject_match else "items"
|
||||
|
||||
transformed_criterion = f"[METADATA ONLY] {original}: Verify that contentPartIds reference all source documents mentioned in ACTION HISTORY/document index, and jsonStructure shows {subject} structure exists (check rowCount/itemCount in tables/lists). Cannot verify actual {subject} count from metadata."
|
||||
|
||||
# Pattern: "no missing data" or "no incorrect data"
|
||||
elif re.search(r'no\s+missing|no\s+incorrect|no\s+errors?', original, re.IGNORECASE):
|
||||
transformed_criterion = f"[METADATA ONLY] {original}: Check that jsonStructure.content_type shows expected data types present (tables, lists, etc.) and contentPreview.looksLikeRenderedContent=true. Cannot verify actual data values from metadata."
|
||||
|
||||
# Pattern: data accuracy without percentage (e.g., "data is accurate", "accurate data")
|
||||
elif re.search(r'data.*accurate|accurate.*data', original, re.IGNORECASE) and '%' not in original:
|
||||
transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference source documents and jsonStructure shows expected data structure exists. Cannot verify actual data accuracy values from metadata."
|
||||
|
||||
transformed.append(transformed_criterion)
|
||||
|
||||
return transformed
|
||||
|
||||
def _createFailedValidationResult(self, errorMessage: str) -> Dict[str, Any]:
|
||||
"""Create a standardized failed validation result"""
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ class DynamicMode(BaseMode):
|
|||
actionName = selection.get('action', 'unknown')
|
||||
actionParameters = selection.get('parameters', {})
|
||||
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory)
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory, context)
|
||||
observation.contentValidation = validationResult
|
||||
quality_score = validationResult.get('qualityScore', 0.0)
|
||||
if quality_score is None:
|
||||
|
|
@ -194,6 +194,31 @@ class DynamicMode(BaseMode):
|
|||
if decision: # Only append if decision is not None
|
||||
context.previousReviewResult.append(decision)
|
||||
|
||||
# Send ChatLog message if userMessage is present in refinement response
|
||||
if decision and decision.userMessage:
|
||||
try:
|
||||
currentRound = getattr(workflow, 'currentRound', 0)
|
||||
currentTask = getattr(workflow, 'currentTask', 0)
|
||||
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": decision.userMessage,
|
||||
"status": "refinement",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.timestampGetUtc(),
|
||||
"documentsLabel": None,
|
||||
"documents": [],
|
||||
"roundNumber": currentRound,
|
||||
"taskNumber": currentTask,
|
||||
"actionNumber": step
|
||||
}
|
||||
|
||||
self.services.chat.storeMessageWithDocuments(workflow, messageData, [])
|
||||
logger.info(f"Sent refinement userMessage to UI: {decision.userMessage[:100]}...")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send refinement userMessage to UI: {str(e)}")
|
||||
|
||||
# Store next action guidance from decision for use in next iteration
|
||||
if decision and decision.status == "continue" and decision.nextAction:
|
||||
# Set nextActionGuidance directly (now defined in TaskContext model)
|
||||
|
|
|
|||
|
|
@ -413,12 +413,11 @@ class DocumentGenerationFormatsTester10:
|
|||
async def testAllFormats(self) -> Dict[str, Any]:
|
||||
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
|
||||
print("\n" + "="*80)
|
||||
print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
|
||||
print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
|
||||
print("="*80)
|
||||
|
||||
# Only test HTML format
|
||||
formats = ["html"]
|
||||
# formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
|
||||
# Test all document formats
|
||||
formats = ["docx", "xlsx", "pptx", "pdf", "html"]
|
||||
results = {}
|
||||
|
||||
for format in formats:
|
||||
|
|
@ -471,7 +470,7 @@ class DocumentGenerationFormatsTester10:
|
|||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
print("\n" + "="*80)
|
||||
print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
|
||||
print("DOCUMENT GENERATION FORMATS TEST 10 - ALL FORMATS")
|
||||
print("="*80)
|
||||
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in a new issue